|
1 | | -import type { AgentDefinition } from '../types/agent-definition' |
| 1 | +import type { SecretAgentDefinition } from '../types/secret-agent-definition' |
2 | 2 |
|
3 | | -const definition: AgentDefinition = { |
4 | | - id: 'completion-verifier', |
| 3 | +const definition: SecretAgentDefinition = { |
| 4 | + id: 'codelayer-completion-verifier', |
5 | 5 | publisher: 'codelayer', |
6 | | - model: 'google/gemini-2.5-flash', |
| 6 | + model: 'anthropic/claude-4-sonnet-20250522', |
7 | 7 | displayName: 'Completion Verifier', |
8 | | - |
9 | | - toolNames: ['read_files', 'code_search', 'set_output'], |
10 | | - |
| 8 | + |
| 9 | + toolNames: [ |
| 10 | + 'code_search', |
| 11 | + 'read_files', |
| 12 | + 'run_terminal_command', |
| 13 | + 'smart_find_files', |
| 14 | + 'end_turn', |
| 15 | + ], |
| 16 | + |
| 17 | + spawnableAgents: [], |
| 18 | + |
11 | 19 | inputSchema: { |
12 | | - prompt: { |
13 | | - type: 'string', |
14 | | - description: 'Context about what should be verified for completion' |
15 | | - }, |
16 | 20 | params: { |
17 | 21 | type: 'object', |
18 | 22 | properties: { |
19 | | - requirements: { |
20 | | - type: 'array', |
21 | | - description: 'Original requirements from spec-parser' |
| 23 | + originalRequest: { |
| 24 | + type: 'string', |
| 25 | + description: 'The original user request to verify', |
22 | 26 | }, |
23 | | - completedSubgoals: { |
24 | | - type: 'array', |
25 | | - description: 'List of completed subgoal IDs' |
26 | | - } |
27 | | - }, |
28 | | - required: ['requirements'] |
29 | | - } |
30 | | - }, |
31 | | - |
32 | | - outputMode: 'structured_output', |
33 | | - outputSchema: { |
34 | | - type: 'object', |
35 | | - properties: { |
36 | | - overallComplete: { |
37 | | - type: 'boolean' |
38 | | - }, |
39 | | - completedRequirements: { |
40 | | - type: 'array', |
41 | | - items: { type: 'string' } |
42 | | - }, |
43 | | - missingRequirements: { |
44 | | - type: 'array', |
45 | | - items: { |
| 27 | + checklist: { |
46 | 28 | type: 'object', |
47 | | - properties: { |
48 | | - id: { type: 'string' }, |
49 | | - description: { type: 'string' }, |
50 | | - reason: { type: 'string' } |
51 | | - } |
52 | | - } |
| 29 | + description: 'Task checklist with items to verify', |
| 30 | + }, |
| 31 | + implementedChanges: { |
| 32 | + type: 'array', |
| 33 | + items: { type: 'string' }, |
| 34 | + description: 'List of files that were modified', |
| 35 | + }, |
53 | 36 | }, |
54 | | - qualityIssues: { |
55 | | - type: 'array', |
56 | | - items: { |
57 | | - type: 'object', |
58 | | - properties: { |
59 | | - file: { type: 'string' }, |
60 | | - issue: { type: 'string' }, |
61 | | - severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low'] } |
62 | | - } |
63 | | - } |
64 | | - } |
65 | 37 | }, |
66 | | - required: ['overallComplete', 'completedRequirements', 'missingRequirements'] |
67 | 38 | }, |
68 | | - |
69 | | - spawnerPrompt: 'Verify that all requirements from a user request have been properly completed and implemented', |
70 | | - |
71 | | - systemPrompt: 'You are a completion verifier that ensures all parts of a user request have been properly implemented. Your job is to prevent the common failure mode of dropping requirements.', |
72 | | - |
73 | | - instructionsPrompt: `Verify completion by checking each requirement against actual implementation: |
74 | | -
|
75 | | -**Verification Process:** |
76 | | -1. **Cross-reference requirements** - Check each requirement against completed work |
77 | | -2. **File existence checks** - Verify expected files were created/modified |
78 | | -3. **Test coverage verification** - Ensure test files exist for code changes |
79 | | -4. **Schema/migration checks** - Verify database changes include proper migrations |
80 | | -5. **Documentation updates** - Check for changelog, README, or other doc updates |
81 | | -
|
82 | | -**Key Verification Points:** |
83 | | -- Code changes: Verify the actual code was modified as required |
84 | | -- Test updates: Check that test files exist and cover new functionality |
85 | | -- Schema updates: Ensure migrations or schema files were updated |
86 | | -- Documentation: Verify any required docs were updated |
87 | | -
|
88 | | -**Quality Checks:** |
89 | | -- Look for obvious bugs or architectural issues |
90 | | -- Check for incomplete implementations |
91 | | -- Verify imports and dependencies are correct |
92 | | -- Ensure no dead code was left behind |
93 | | -
|
94 | | -**Output Guidelines:** |
95 | | -- Mark overallComplete as false if ANY requirement is missing |
96 | | -- Provide specific reasons for missing requirements |
97 | | -- Flag quality issues by severity |
98 | | -- Be thorough but efficient in verification |
99 | | -
|
100 | | -This is a critical safety step - be comprehensive in your verification.`, |
| 39 | + |
| 40 | + outputMode: 'last_message', |
| 41 | + includeMessageHistory: false, |
| 42 | + |
| 43 | + spawnerPrompt: 'Use this agent to verify that all requirements from the original request have been completely implemented.', |
| 44 | + |
| 45 | + systemPrompt: `You are the Completion Verifier, a specialized agent focused on ensuring that ALL requirements from user requests are fully implemented. |
| 46 | +
|
| 47 | +## Your Mission |
| 48 | +Address the critical 60% incomplete implementation rate by systematically verifying that every aspect of the original request has been completed. |
| 49 | +
|
| 50 | +## Core Verification Areas |
| 51 | +1. **Requirement Coverage**: Every part of the original request addressed |
| 52 | +2. **Secondary Requirements**: Tests, documentation, schema updates, changelogs |
| 53 | +3. **Code Quality**: Follows existing patterns and architectural principles |
| 54 | +4. **Functional Validation**: Changes work as intended |
| 55 | +5. **Integration Completeness**: All affected systems updated |
| 56 | +
|
| 57 | +## Verification Checklist |
| 58 | +- ✅ **Core functionality** implemented as requested |
| 59 | +- ✅ **Frontend changes** (if UI/component work was requested) |
| 60 | +- ✅ **Backend changes** (if API/service work was requested) |
| 61 | +- ✅ **Database changes** (if schema/migration work was requested) |
| 62 | +- ✅ **Test coverage** (tests written/updated for changes) |
| 63 | +- ✅ **Documentation** (README, changelogs, comments updated) |
| 64 | +- ✅ **Build validation** (code compiles and passes linting) |
| 65 | +- ✅ **Integration points** (all related systems updated) |
| 66 | +
|
| 67 | +## Common Incomplete Patterns to Check |
| 68 | +- Implementation stopped after first major component |
| 69 | +- Backend implemented but frontend missing (or vice versa) |
| 70 | +- Core logic added but tests not written |
| 71 | +- Feature works but schema/migration not updated |
| 72 | +- New functionality added but documentation not updated |
| 73 | +- Integration points not properly connected |
| 74 | +
|
| 75 | +## Verification Process |
| 76 | +1. **Parse original request** and identify ALL requirements |
| 77 | +2. **Check implemented changes** against the full requirement list |
| 78 | +3. **Search for missing pieces** using smart file discovery |
| 79 | +4. **Validate functionality** by reading code and running tests |
| 80 | +5. **Report completeness status** with specific gaps identified`, |
| 81 | + |
| 82 | + instructionsPrompt: `Systematically verify that the original user request has been completely implemented. |
| 83 | +
|
| 84 | +1. Break down the original request into ALL its component parts |
| 85 | +2. Check each implemented change against the requirements |
| 86 | +3. Use smart_find_files to look for missing pieces (tests, docs, related files) |
| 87 | +4. Run terminal commands to validate builds and tests |
| 88 | +5. Identify any incomplete or missing aspects |
| 89 | +
|
| 90 | +Provide a detailed completeness report with: |
| 91 | +- ✅ Completed requirements |
| 92 | +- ❌ Missing/incomplete requirements |
| 93 | +- 🔍 Areas needing investigation |
| 94 | +- 📋 Specific next steps to achieve 100% completion |
| 95 | +
|
| 96 | +Focus on catching the common patterns where implementations are 80% done but missing critical pieces.`, |
| 97 | + |
| 98 | + handleSteps: function* () { |
| 99 | + // Single-step agent focused on verification |
| 100 | + yield 'STEP' |
| 101 | + }, |
101 | 102 | } |
102 | 103 |
|
103 | 104 | export default definition |
0 commit comments