From 742ac7fd0176fca2d055c5d29bf8876a43afb221 Mon Sep 17 00:00:00 2001 From: Suprhimp Date: Fri, 5 Jun 2026 23:47:21 +0900 Subject: [PATCH] fix: prevent empty-{} loop in review tools via optional fields + file inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit request_plan_review / request_code_review / request_execution_partition collapsed to an empty {} when a large plan/code string was inlined, and the MCP SDK rejected it pre-handler (-32602: required) before the recovery guard could run — leaving callers in an unrecoverable identical-retry loop. - Relax large required string fields (plan, code, approved_plan, partition workspace_root) to optional so the handler's friendly guard is reachable - Add plan_file / code_file / approved_plan_file: write large content to a file and pass a relative path; server reads it scoped + symlink-guarded - Harden safePath: apply sensitive/blocked-path checks to the symlink-resolved real path, not just the logical path (closes in-root symlink-to-secret read) - Reviewer system prompts emit compressed output to save tokens - duul-planner now runs on Opus and submits plans via plan_file - Update README / README.ko / CLAUDE.md / agent prompt; add tests + changeset Co-Authored-By: Claude Opus 4.8 --- .changeset/plan-file-escape-hatch.md | 12 ++++ .claude/agents/duul-planner.md | 28 +++++--- CLAUDE.md | 8 +-- README.ko.md | 8 +-- README.md | 8 +-- package-lock.json | 4 +- src/__tests__/inline-or-file.test.ts | 101 +++++++++++++++++++++++++++ src/prompts/code-review-system.ts | 7 ++ src/prompts/plan-review-system.ts | 7 ++ src/schemas/code-review.ts | 27 +++++-- src/schemas/execution-partition.ts | 19 +++-- src/schemas/plan-review.ts | 17 ++++- src/services/filesystem.ts | 78 ++++++++++++++++----- src/tools/code-review.ts | 53 +++++++++----- src/tools/execution-partition.ts | 43 ++++++++---- src/tools/plan-review.ts | 39 +++++++---- 16 files changed, 364 insertions(+), 95 deletions(-) create mode 100644 .changeset/plan-file-escape-hatch.md create mode 100644 src/__tests__/inline-or-file.test.ts diff --git a/.changeset/plan-file-escape-hatch.md b/.changeset/plan-file-escape-hatch.md new file mode 100644 index 0000000..c5ae199 --- /dev/null +++ b/.changeset/plan-file-escape-hatch.md @@ -0,0 +1,12 @@ +--- +"@planningo/duul": minor +--- + +Fix the recurring `-32602: plan required` (and `code`/`approved_plan` equivalents) failure where a caller's tool call collapsed to an empty `{}` and looped. + +Two-part fix: + +- **Reachable guards.** The large required string fields (`plan`, `code`, `approved_plan`) are now `optional` at the schema level, so an empty/partial call reaches the handler instead of being rejected pre-handler by the MCP SDK. Callers now get actionable retry guidance instead of an opaque `-32602` zod error. (Partition's short `workspace_root` was also relaxed to `optional` for the same reachability reason — the handler still hard-requires it.) +- **File escape hatch.** Added `plan_file` (plan review), `code_file` + `approved_plan_file` (code review), and `approved_plan_file` (execution partition). Callers can write large content to a file with a normal Write call and pass a short relative path; the server reads it (scoped, symlink-guarded, `tracked_only` bypassed for the caller's own artifact). This avoids the large-argument serialization failure that made models emit `{}`. + +The reviewer system prompts now emit free-text fields in compressed style to reduce output tokens. Exactly one of the inline field or its `*_file` companion is required. diff --git a/.claude/agents/duul-planner.md b/.claude/agents/duul-planner.md index 95012ae..d90bb68 100644 --- a/.claude/agents/duul-planner.md +++ b/.claude/agents/duul-planner.md @@ -1,7 +1,7 @@ --- name: duul-planner -description: "DUUL Phase 1 plan ping-pong agent. Writes implementation plans and iterates with the plan reviewer until APPROVE. Runs on Sonnet to save tokens." -model: sonnet +description: "DUUL Phase 1 plan ping-pong agent. Writes implementation plans and iterates with the plan reviewer until APPROVE. Runs on Opus for plan quality; writes plans in compressed caveman style to save tokens." +model: opus tools: Read, Edit, Write, Bash, Grep, Glob, mcp__duul__request_plan_review --- @@ -25,9 +25,12 @@ You will receive: - The approach and architecture - Edge cases and error handling - Dependencies and imports needed -3. **Call `request_plan_review`** with the plan. ALWAYS include: - - `workspace_root`: the workspace root path - - `plan`: your detailed plan text + + **Write the plan in compressed "caveman" style to save tokens:** drop articles (a/an/the), filler (just/really/basically), and pleasantries; prefer fragments over full sentences; use short synonyms. Keep EXACT: file paths, identifiers, function/type names, code, and the verbatim quote of the user's request. Brevity must never drop a required section or change technical meaning. +3. **Submit the plan via file (preferred for reliability).** Write the full plan markdown to `.duul/plan.md` under the workspace root using the Write tool, THEN call `request_plan_review` with `plan_file: ".duul/plan.md"` (relative path). This avoids the large-argument tool-call failure where a big inline `plan` string collapses to an empty `{}`. For a short plan you may inline `plan` instead. ALWAYS include: + - `workspace_root`: the workspace root path (required when using `plan_file`) + - `plan_file`: `".duul/plan.md"` (relative path) — OR `plan` with the full text inline for short plans + - `user_original_request`: the user's verbatim message - `project_context`: file tree, changed files, relevant code snippets - `artifact_refs`: key files the reviewer should look at - `iteration_count`: starts at 1, increment each round @@ -61,17 +64,24 @@ approved_plan: ## Tool input rules (CRITICAL) -When calling `request_plan_review`, your tool input MUST include the `plan` field with the full plan markdown as a string. Do **NOT** send an empty `{}` object — that triggers an MCP validation error (`-32602: plan required`). +A large inline `plan` string is the #1 cause of failed DUUL calls: the model tries to emit a big markdown value inside the tool's large schema and the whole argument object collapses to an empty `{}`, which the MCP server rejects (`-32602: plan required`) — then the call loops. + +**The fix: route the large plan through a file.** -**Minimum valid call:** +1. Write the full plan markdown to `.duul/plan.md` under the workspace root with the **Write** tool (Write has a tiny, reliable schema — big content goes through fine here). +2. Call `request_plan_review` with a *small* argument object that points at the file: ```json { - "plan": "## Problem\n\n\n## Files\n- path/to/file.ts: \n\n## Approach\n<...>\n\n## Edge cases\n<...>", + "plan_file": ".duul/plan.md", "workspace_root": "/absolute/path/to/repo", "user_original_request": "", "iteration_count": 1 } ``` -If you find yourself unable to write the plan text in one tool-use turn (e.g. the plan is too long), draft and finalize the plan in your thinking/scratch first, then make a single tool call with the complete `plan` string. **Never call the tool with placeholder, empty, or partial input.** If the tool returns the validation error above, you wrote an empty input — re-read your draft and call again with the full `plan` string populated. +The server reads `.duul/plan.md` and uses its contents as the plan. `plan_file` must be a **relative** path inside `workspace_root`. + +**Short plans only:** you may instead inline `plan` directly. Exactly one of `plan` or `plan_file` is required. + +**If a call ever errors:** read the error text — the server now returns actionable guidance (it no longer just says `-32602`). Do **NOT** retry the identical empty call. Switch to the `plan_file` path above. Update the file and call again with the same `plan_file` on each REVISE round. diff --git a/CLAUDE.md b/CLAUDE.md index e98bc37..0f3f7cb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,11 +15,11 @@ Only activate when the user mentions **"DUUL"** (or **"두울"**) in their reque **CRITICAL:** This is a continuous, uninterrupted sequence. Do **NOT** pause between phases to ask the user "should I proceed?" or "should I implement?". The user already authorized the full loop when they requested DUUL. -### Phase 1: Upfront-plan Ping-Pong (delegated to Sonnet subagent) +### Phase 1: Upfront-plan Ping-Pong (delegated to Opus subagent) -**To save tokens, Phase 1 runs on Sonnet via the `duul-planner` subagent.** The reviewer catches any plan issues, so Sonnet is sufficient for plan authoring. +**Phase 1 runs on Opus via the `duul-planner` subagent for maximum plan quality.** To keep token cost down, the planner writes plans in compressed "caveman" style and submits the plan via a file (`plan_file`) rather than a large inline string. -1. **Launch the `duul-planner` subagent** using the Agent tool with the user's requirements, workspace root path, and any relevant context. The subagent runs on Sonnet automatically (`model: sonnet` in its definition). +1. **Launch the `duul-planner` subagent** using the Agent tool with the user's requirements, workspace root path, and any relevant context. The subagent runs on Opus automatically (`model: opus` in its definition). 2. The subagent handles the entire plan ping-pong loop internally: - Writes a detailed implementation plan - Calls `request_plan_review` and iterates on REVISE feedback @@ -35,7 +35,7 @@ Phase 2 runs on the **main agent (Opus)** for maximum code quality. 7. **Write the actual code** to the project files based on the approved plan (received from the `duul-planner` subagent). Use your edit/write tools to make real changes. 8. **Run lint if available.** Check `package.json` scripts for `lint`, `lint:fix`, or `eslint`, or check for a Makefile/config equivalent. If a lint command exists, run it with auto-fix (e.g. `npm run lint -- --fix` or `npx eslint --fix`). Fix any remaining errors before proceeding. If no lint is configured, skip this step. -9. Call `request_code_review` with the code and the approved plan. +9. Call `request_code_review` with the code and the approved plan. **For large content, avoid huge inline strings** (they can make the tool call collapse to an empty `{}` and fail validation): write the code to `.duul/code.md` and the plan to `.duul/plan.md`, then pass `code_file: ".duul/code.md"` and `approved_plan_file: ".duul/plan.md"` (relative paths) plus `workspace_root`. Inline `code`/`approved_plan` only for small content. 10. If `review_status === "incomplete"`: check `missing_context` and retry with narrower scope. 11. If `blocking_issues.length > 0` or `verdict === "REVISE"`: fix the code in the actual files, re-run lint if applicable, and call again. 12. If `requires_human_review === true`: pause and ask the user. diff --git a/README.ko.md b/README.ko.md index 70c6b7e..a026baa 100644 --- a/README.ko.md +++ b/README.ko.md @@ -25,7 +25,7 @@ DUUL은 [Model Context Protocol](https://modelcontextprotocol.io/) 서버로, MC 호출 에이전트는 각 단계에서 `APPROVE` 판정을 받을 때까지 리뷰어와 반복하고, 이후 다음 단계로 진행합니다. 이를 통해 한 LLM이 다른 LLM의 작업을 검증하는 크로스 모델 리뷰 워크플로우를 만듭니다. -**토큰 효율 설계:** 1단계(계획 작성)는 Sonnet급 서브에이전트에 위임합니다 — 리뷰어가 계획의 문제를 잡아주므로 충분합니다. 2단계(코드 구현)는 최대 코드 품질을 위해 Opus에서 실행됩니다. 이를 통해 1단계 토큰 비용이 약 80% 절감됩니다. +**토큰 효율 설계:** 두 단계 모두 최대 품질을 위해 Opus에서 실행됩니다. 비용을 낮추기 위해, 1단계 플래너는 압축된 "케이브맨" 스타일로 계획을 작성하고, 큰 계획은 거대한 인라인 문자열 대신 파일(`plan_file`)로 제출하며, 리뷰어도 같은 압축 형식으로 결과를 출력합니다. 리뷰어는 **워크스페이스 인식 파일 탐색** 기능을 갖추고 있어, `workspace_root`가 주어지면 7개의 내장 도구(파일 읽기, 코드 검색, 디렉토리 목록 등)를 사용하여 정보에 기반한 리뷰 결정을 내립니다. @@ -268,9 +268,9 @@ node scripts/token-report.mjs --plan max20 --all-time ```mermaid flowchart TD - Start(["사용자: 'DUUL로 개발 진행해줘'"]):::trigger --> Plan["구현 계획 작성\n(Sonnet 서브에이전트)"]:::sonnet + Start(["사용자: 'DUUL로 개발 진행해줘'"]):::trigger --> Plan["구현 계획 작성\n(Opus 서브에이전트)"]:::planner - subgraph Phase1["1단계: 계획 핑퐁 — Sonnet (최대 7회 반복)"] + subgraph Phase1["1단계: 계획 핑퐁 — Opus (최대 7회 반복)"] Plan --> PR["request_plan_review"] PR --> IterCheck1{반복\n제한?} IterCheck1 -- "초과" --> Human1["⏸ requires_human_review: true"] @@ -305,7 +305,7 @@ flowchart TD classDef trigger fill:#e1f5fe,stroke:#0288d1,color:#01579b classDef approved fill:#e8f5e9,stroke:#388e3c,color:#1b5e20 classDef done fill:#c8e6c9,stroke:#2e7d32,color:#1b5e20,stroke-width:2px - classDef sonnet fill:#fff3e0,stroke:#f57c00,color:#e65100 + classDef planner fill:#fff3e0,stroke:#f57c00,color:#e65100 classDef opus fill:#ede7f6,stroke:#7b1fa2,color:#4a148c ``` diff --git a/README.md b/README.md index a04e0d4..86ce27d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ DUUL is a [Model Context Protocol](https://modelcontextprotocol.io/) server that The calling agent iterates with the reviewer on each phase until it receives an `APPROVE` verdict, then moves to the next phase. This creates a cross-model peer review workflow where one LLM checks the work of another. -**Token-efficient by design:** Phase 1 (plan authoring) is delegated to a Sonnet-class subagent, since the reviewer catches any plan issues anyway. Phase 2 (code implementation) stays on Opus for maximum code quality. This typically reduces Phase 1 token costs by ~80%. +**Token-efficient by design:** Both phases run on Opus for maximum quality. To keep cost down, the Phase 1 planner writes plans in compressed "caveman" style and submits large plans via a file (`plan_file`) instead of a giant inline string, and the reviewer emits its findings in the same compressed form. The reviewer has **workspace-aware file exploration** -- when given a `workspace_root`, it can autonomously browse the codebase using 7 built-in tools (read files, search code, list directories, etc.) to make informed review decisions instead of speculating. @@ -268,9 +268,9 @@ Reads `~/.duul/usage.jsonl` (set `DUUL_DEBUG_TOKEN=1` in your MCP env to enable ```mermaid flowchart TD - Start(["User: 'run DUUL'"]):::trigger --> Plan["Write implementation plan\n(Sonnet subagent)"]:::sonnet + Start(["User: 'run DUUL'"]):::trigger --> Plan["Write implementation plan\n(Opus subagent)"]:::planner - subgraph Phase1["Phase 1: Plan Ping-Pong — Sonnet (max 7 iterations)"] + subgraph Phase1["Phase 1: Plan Ping-Pong — Opus (max 7 iterations)"] Plan --> PR["request_plan_review"] PR --> IterCheck1{iteration\nlimit?} IterCheck1 -- "exceeded" --> Human1["⏸ requires_human_review: true"] @@ -305,7 +305,7 @@ flowchart TD classDef trigger fill:#e1f5fe,stroke:#0288d1,color:#01579b classDef approved fill:#e8f5e9,stroke:#388e3c,color:#1b5e20 classDef done fill:#c8e6c9,stroke:#2e7d32,color:#1b5e20,stroke-width:2px - classDef sonnet fill:#fff3e0,stroke:#f57c00,color:#e65100 + classDef planner fill:#fff3e0,stroke:#f57c00,color:#e65100 classDef opus fill:#ede7f6,stroke:#7b1fa2,color:#4a148c ``` diff --git a/package-lock.json b/package-lock.json index ea2105f..faa01c2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@planningo/duul", - "version": "1.0.0", + "version": "1.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@planningo/duul", - "version": "1.0.0", + "version": "1.0.1", "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "^1.29.0", diff --git a/src/__tests__/inline-or-file.test.ts b/src/__tests__/inline-or-file.test.ts new file mode 100644 index 0000000..7dbc269 --- /dev/null +++ b/src/__tests__/inline-or-file.test.ts @@ -0,0 +1,101 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync, rmSync, symlinkSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { resolveInlineOrFile, type WorkspaceScope } from '../services/filesystem.js'; + +function makeScope(root: string): WorkspaceScope { + return { root, workingDirectories: null, linkedRoots: [], trackedOnly: false }; +} + +test('returns inline value when present', async () => { + const result = await resolveInlineOrFile({ inline: 'hello world', file: undefined, scope: null, label: 'plan' }); + assert.equal(result, 'hello world'); +}); + +test('reads from file when inline is empty', async () => { + const dir = mkdtempSync(join(tmpdir(), 'duul-inline-')); + try { + writeFileSync(join(dir, 'plan.md'), '## Plan\nfull contents from file'); + const result = await resolveInlineOrFile({ + inline: undefined, + file: 'plan.md', + scope: makeScope(dir), + label: 'plan', + }); + assert.equal(result, '## Plan\nfull contents from file'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test('whitespace-only inline falls through to file', async () => { + const dir = mkdtempSync(join(tmpdir(), 'duul-inline-')); + try { + writeFileSync(join(dir, 'plan.md'), 'real content'); + const result = await resolveInlineOrFile({ + inline: ' \n ', + file: 'plan.md', + scope: makeScope(dir), + label: 'plan', + }); + assert.equal(result, 'real content'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test('inline takes precedence over file', async () => { + const dir = mkdtempSync(join(tmpdir(), 'duul-inline-')); + try { + writeFileSync(join(dir, 'plan.md'), 'from file'); + const result = await resolveInlineOrFile({ + inline: 'from inline', + file: 'plan.md', + scope: makeScope(dir), + label: 'plan', + }); + assert.equal(result, 'from inline'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test('returns undefined when neither inline nor file is provided', async () => { + const result = await resolveInlineOrFile({ inline: undefined, file: undefined, scope: null, label: 'plan' }); + assert.equal(result, undefined); +}); + +test('throws when file is given but no workspace scope is set', async () => { + await assert.rejects( + () => resolveInlineOrFile({ inline: undefined, file: 'plan.md', scope: null, label: 'plan' }), + /plan_file was provided.*no workspace_root/s, + ); +}); + +test('throws when file path escapes the workspace root', async () => { + const dir = mkdtempSync(join(tmpdir(), 'duul-inline-')); + try { + await assert.rejects( + () => resolveInlineOrFile({ inline: undefined, file: '../escape.md', scope: makeScope(dir), label: 'plan' }), + /outside project root/, + ); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + +test('blocks an in-root symlink that points at a secret (.env)', async () => { + const dir = mkdtempSync(join(tmpdir(), 'duul-inline-')); + try { + writeFileSync(join(dir, '.env'), 'SECRET=topsecret'); + symlinkSync(join(dir, '.env'), join(dir, 'innocent.md')); + await assert.rejects( + () => resolveInlineOrFile({ inline: undefined, file: 'innocent.md', scope: makeScope(dir), label: 'plan' }), + /Access denied \(sensitive file\)/, + ); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); diff --git a/src/prompts/code-review-system.ts b/src/prompts/code-review-system.ts index d463530..7374a1c 100644 --- a/src/prompts/code-review-system.ts +++ b/src/prompts/code-review-system.ts @@ -30,6 +30,13 @@ A junior developer wrote code based on an approved plan. You must verify that ev - Do NOT put actionable corrections in \`non_blocking_suggestions\` to soften the tone — if the code would be more correct or safer with the change, it belongs in \`blocking_issues\` with verdict "REVISE". - \`confidence\`: Your honest confidence (0-1). If the code is too short to fully evaluate, or context is missing, be honest about it and set \`requires_human_review: true\`. +## Output Style — Compressed (token economy) +Write every free-text VALUE (logic_validation, blocking_issues.description/suggestion, non_blocking_suggestions, vulnerabilities.description, symptom_impact prose, symptom_match_notes) in compressed "caveman" style to save tokens: +- Drop articles (a/an/the), filler (just/really/basically/actually/simply), and pleasantries. +- Prefer fragments over full sentences. Pattern: "[location] [problem]. [fix]." beats prose. +- Use short synonyms (big not extensive, fix not "implement a solution for"). +Keep EXACT and uncompressed: JSON keys, enum values (APPROVE/REVISE, severities), \`optimized_snippet\` code, file paths, identifiers, function/type names, and any quoted user text (\`user_original_request_echo\` stays verbatim). Brevity must never drop a required field, soften a blocking issue, or change technical meaning. + ## Verdict Calibration Do NOT conflate positive tone with APPROVE. Code can be "almost perfect" and still require REVISE. The verdict is determined solely by whether blocking_issues is empty: - blocking_issues is empty → APPROVE is allowed diff --git a/src/prompts/plan-review-system.ts b/src/prompts/plan-review-system.ts index bcd1235..f406dbf 100644 --- a/src/prompts/plan-review-system.ts +++ b/src/prompts/plan-review-system.ts @@ -28,6 +28,13 @@ You are reviewing a development plan submitted by a junior developer. Your job i - \`edge_cases\`: List specific scenarios the plan does not account for. - \`checklist_for_implementation\`: Concrete steps the developer must follow during coding. +## Output Style — Compressed (token economy) +Write every free-text VALUE (architectural_analysis, blocking_issues.description/suggestion, non_blocking_suggestions, edge_cases, checklist_for_implementation, symptom_impact prose, symptom_match_notes) in compressed "caveman" style to save tokens: +- Drop articles (a/an/the), filler (just/really/basically/actually/simply), and pleasantries. +- Prefer fragments over full sentences. Pattern: "[thing] [problem]. [fix]." beats prose. +- Use short synonyms (big not extensive, fix not "implement a solution for"). +Keep EXACT and uncompressed: JSON keys, enum values (APPROVE/REVISE, severities), code, file paths, identifiers, function/type names, and any quoted user text (\`user_original_request_echo\` stays verbatim). Brevity must never drop a required field, soften a blocking issue, or change technical meaning. + ## Verdict Calibration Do NOT conflate positive tone with APPROVE. A plan can be "mostly good" or "almost there" and still require REVISE. The verdict is determined solely by whether blocking_issues is empty: - blocking_issues is empty → APPROVE is allowed (but not required if you have low confidence) diff --git a/src/schemas/code-review.ts b/src/schemas/code-review.ts index 0b6f5b1..780091f 100644 --- a/src/schemas/code-review.ts +++ b/src/schemas/code-review.ts @@ -15,19 +15,36 @@ export const DependenciesSchema = z.object({ export const CodeReviewInputSchema = z.object({ code: z .string() - .min(1, 'code must not be empty') + .optional() .describe( - 'REQUIRED. The full code being reviewed (markdown code block or raw source). Must NOT be omitted or empty. ' + + 'The full code being reviewed (markdown code block or raw source). REQUIRED unless code_file is provided. ' + 'For multi-file diffs, concatenate all changed code with file headers. ' + - 'Pass actual code content here — never call this tool with an empty object.', + 'If the code is large, prefer code_file — inlining a very large code string here can make the tool call fail to serialize.', + ), + code_file: z + .string() + .optional() + .describe( + 'Relative path (within workspace_root) to a file containing the full code being reviewed, ' + + 'e.g. ".duul/code.md". Use this instead of inlining `code` when the content is large: ' + + 'write it to the file first, then pass its path here. ' + + 'Exactly one of `code` or `code_file` is required. Requires workspace_root. Must be a relative path.', ), approved_plan: z .string() - .min(1, 'approved_plan must not be empty') + .optional() .describe( - 'REQUIRED. Full text of the plan approved in Phase 1. Must NOT be omitted. ' + + 'Full text of the plan approved in Phase 1. REQUIRED unless approved_plan_file is provided. ' + 'Pass the entire approved plan content (markdown) so the reviewer can verify the code matches it.', ), + approved_plan_file: z + .string() + .optional() + .describe( + 'Relative path (within workspace_root) to a markdown file containing the approved plan, ' + + 'e.g. ".duul/plan.md". Use this instead of inlining `approved_plan` when it is large. ' + + 'Exactly one of `approved_plan` or `approved_plan_file` is required. Requires workspace_root. Must be a relative path.', + ), file_path: z.string().optional().describe('File path for contextual feedback'), dependencies: DependenciesSchema.optional().describe('Related library version info'), relevant_code: z diff --git a/src/schemas/execution-partition.ts b/src/schemas/execution-partition.ts index da3ab4c..81aa531 100644 --- a/src/schemas/execution-partition.ts +++ b/src/schemas/execution-partition.ts @@ -4,16 +4,25 @@ import { ArtifactRefSchema, ReviewerConfigSchema, IterationMetaOutputSchema, Tok export const ExecutionPartitionInputSchema = z.object({ approved_plan: z .string() - .min(1, 'approved_plan must not be empty') + .optional() + .describe( + 'Full text of the approved plan to partition into subtasks. REQUIRED unless approved_plan_file is provided. ' + + 'Pass the entire approved plan markdown so the partitioner can analyze dependencies and split work. ' + + 'If the plan is large, prefer approved_plan_file — inlining a very large string here can make the tool call fail to serialize.', + ), + approved_plan_file: z + .string() + .optional() .describe( - 'REQUIRED. Full text of the approved plan to partition into subtasks. Must NOT be omitted or empty. ' + - 'Pass the entire approved plan markdown so the partitioner can analyze dependencies and split work.', + 'Relative path (within workspace_root) to a markdown file containing the approved plan, ' + + 'e.g. ".duul/plan.md". Use this instead of inlining `approved_plan` when it is large. ' + + 'Exactly one of `approved_plan` or `approved_plan_file` is required. Must be a relative path.', ), workspace_root: z .string() - .min(1, 'workspace_root must not be empty') + .optional() .describe( - 'REQUIRED. Absolute path to the workspace root directory. Must NOT be omitted. ' + + 'Absolute path to the workspace root directory. REQUIRED (enforced by the handler). ' + 'Example: "/Users/me/project". The partitioner uses this to verify file paths exist.', ), working_directories: z diff --git a/src/schemas/plan-review.ts b/src/schemas/plan-review.ts index 07ae7b5..44c0ffc 100644 --- a/src/schemas/plan-review.ts +++ b/src/schemas/plan-review.ts @@ -33,11 +33,22 @@ export const ProjectContextSchema = z.object({ export const PlanReviewInputSchema = z.object({ plan: z .string() - .min(1, 'plan must not be empty') + .optional() .describe( - 'REQUIRED. Full implementation plan text (markdown). Must NOT be omitted or empty. ' + + 'Full implementation plan text (markdown). REQUIRED unless plan_file is provided. ' + 'Include: problem statement (quote user request), files to create/modify with paths, ' + - 'approach, edge cases, dependencies. Pass actual plan content here — never call this tool with an empty object.', + 'approach, edge cases, dependencies. ' + + 'If the plan is large, prefer writing it to a file and passing plan_file instead — ' + + 'inlining a very large plan string here can make the tool call fail to serialize.', + ), + plan_file: z + .string() + .optional() + .describe( + 'Relative path (within workspace_root) to a markdown file containing the full plan, ' + + 'e.g. ".duul/plan.md". Use this instead of inlining `plan` when the plan is large: ' + + 'write the plan to the file first, then pass its path here. ' + + 'Exactly one of `plan` or `plan_file` is required. Requires workspace_root. Must be a relative path.', ), project_context: ProjectContextSchema.optional().describe('Structured project context'), constraints: z diff --git a/src/services/filesystem.ts b/src/services/filesystem.ts index 59226bb..8e6d868 100644 --- a/src/services/filesystem.ts +++ b/src/services/filesystem.ts @@ -86,25 +86,27 @@ async function safePath( throw new Error(`Symlink escape detected: ${requestedPath} resolves outside project root`); } - // Block sensitive files - const lower = rel.toLowerCase(); - if (lower.includes('.env') && !lower.endsWith('.example')) { - throw new Error(`Access denied (sensitive file): ${requestedPath}`); - } - if (rel === 'node_modules' || rel.startsWith('node_modules/') || rel.startsWith('node_modules\\')) { - throw new Error('Access denied: node_modules'); - } - - // Block additional paths (.git, build, dist) - const topSegment = rel.split('/')[0].split('\\')[0]; - if (BLOCKED_PATHS.includes(topSegment)) { - throw new Error(`Access denied: ${topSegment}`); - } - - // Block large file extensions (.log) - if (BLOCKED_EXTENSIONS.some((ext) => lower.endsWith(ext))) { - throw new Error(`Access denied (blocked extension): ${requestedPath}`); - } + // Block sensitive files — checked against BOTH the logical requested path and + // the symlink-resolved real path, so an in-root symlink with an innocuous name + // cannot point at an in-root secret (e.g. innocent.txt -> .env, gitcfg.txt -> .git/config). + const assertNotSensitive = (candidate: string): void => { + const low = candidate.toLowerCase(); + if (low.includes('.env') && !low.endsWith('.example')) { + throw new Error(`Access denied (sensitive file): ${requestedPath}`); + } + if (candidate === 'node_modules' || candidate.startsWith('node_modules/') || candidate.startsWith('node_modules\\')) { + throw new Error('Access denied: node_modules'); + } + const topSegment = candidate.split('/')[0].split('\\')[0]; + if (BLOCKED_PATHS.includes(topSegment)) { + throw new Error(`Access denied: ${topSegment}`); + } + if (BLOCKED_EXTENSIONS.some((ext) => low.endsWith(ext))) { + throw new Error(`Access denied (blocked extension): ${requestedPath}`); + } + }; + assertNotSensitive(rel); + assertNotSensitive(realRel); return realResolved; } @@ -329,6 +331,44 @@ export async function readProjectFile( return readFile(resolved, 'utf-8'); } +/** + * Resolve a large text argument that may be supplied inline OR via a file path. + * + * Large MCP tool arguments (full plan/code markdown) are the single input that + * tool-calling models most often fail to serialize: when the intended value is + * big, the model can emit an empty `{}` for the whole argument object, which the + * MCP SDK then rejects with a `-32602` validation error — and the caller loops. + * Writing the content to a file with a normal Write call (a small, reliable + * tool schema) and passing a short relative path here sidesteps that failure. + * + * Returns the resolved text, or undefined when neither source yields content. + * Throws (with a labelled message) only when a file path was given but could + * not be read — the tool handler converts that into actionable retry guidance. + */ +export async function resolveInlineOrFile(opts: { + inline?: string | null; + file?: string | null; + scope: WorkspaceScope | null; + label: string; +}): Promise { + const { inline, file, scope, label } = opts; + if (typeof inline === 'string' && inline.trim().length > 0) { + return inline; + } + if (typeof file === 'string' && file.trim().length > 0) { + if (!scope) { + throw new Error( + `${label}_file was provided ("${file}") but no workspace_root (or project_root) is set. ` + + 'Pass workspace_root so the file can be read.', + ); + } + // Bypass tracked_only: the *_file artifact is the caller's own scratch file + // (e.g. .duul/plan.md), which is typically untracked by git. + return readProjectFile(scope.root, file, { ...scope, trackedOnly: false }); + } + return undefined; +} + export async function listProjectDirectory( projectRoot: string, dirPath: string, diff --git a/src/tools/code-review.ts b/src/tools/code-review.ts index aa33b7a..b2c41b1 100644 --- a/src/tools/code-review.ts +++ b/src/tools/code-review.ts @@ -8,7 +8,7 @@ import { import { getCodeReviewSystemPrompt, formatCodeReviewUserMessage } from '../prompts/code-review-system.js'; import { callReview } from '../services/reviewer.js'; import type { TokenUsage } from '../services/reviewer.js'; -import { resolveWorkspaceScope, getGitDiff } from '../services/filesystem.js'; +import { resolveWorkspaceScope, getGitDiff, resolveInlineOrFile } from '../services/filesystem.js'; import { computeIterationMeta, isIterationLimitExceeded, computeCostWarning } from '../services/review-limits.js'; import { logUsage } from '../services/usage-logger.js'; import { applyGates } from '../services/review-gates.js'; @@ -24,9 +24,10 @@ export function registerCodeReviewTool(server: McpServer): void { title: 'DUUL Code Review (Strict QA)', description: 'DUUL Phase 2: Submit code for strict QA review. ' + - 'REQUIRED fields: code (the full code being reviewed — do NOT leave empty), approved_plan (the Phase 1 approved plan text). ' + + 'Provide the code EITHER inline via `code` OR (preferred for large content) via `code_file`, and the ' + + 'Phase 1 plan EITHER inline via `approved_plan` OR via `approved_plan_file` (relative paths, e.g. ".duul/code.md" / ".duul/plan.md") plus `workspace_root`. ' + + 'Exactly one of code/code_file and one of approved_plan/approved_plan_file are required. ' + 'Optional: workspace_root, file_path, changed_files, artifact_refs, previous_review_id, iteration_count. ' + - 'NEVER call with an empty object — populate code and approved_plan with actual content before invoking. ' + 'Returns blocking issues, vulnerabilities, optimized snippet, or APPROVE verdict.', inputSchema: CodeReviewInputSchema, outputSchema: CodeReviewMcpOutputSchema, @@ -35,21 +36,43 @@ export function registerCodeReviewTool(server: McpServer): void { try { const args = input as CodeReviewInput; + const scope = resolveWorkspaceScope(args); + + // Resolve code and approved_plan from inline values or *_file escape hatches. + let codeText: string | undefined; + let approvedPlanText: string | undefined; + try { + codeText = await resolveInlineOrFile({ inline: args.code, file: args.code_file, scope, label: 'code' }); + approvedPlanText = await resolveInlineOrFile({ + inline: args.approved_plan, + file: args.approved_plan_file, + scope, + label: 'approved_plan', + }); + } catch (readErr: unknown) { + const reason = readErr instanceof Error ? readErr.message : String(readErr); + console.error(`[duul] code-review file read failed: ${reason}`); + return { + content: [{ type: 'text' as const, text: `ERROR: could not read a *_file argument. ${reason}` }], + isError: true, + }; + } + if ( - !args || - typeof args.code !== 'string' || - args.code.trim().length < 5 || - typeof args.approved_plan !== 'string' || - args.approved_plan.trim().length < 20 + typeof codeText !== 'string' || + codeText.trim().length < 5 || + typeof approvedPlanText !== 'string' || + approvedPlanText.trim().length < 20 ) { const message = - 'ERROR: `code` and `approved_plan` fields are both required. ' + + 'ERROR: `code` and `approved_plan` are both required. ' + '`code` must contain the actual code being reviewed (min 5 chars). ' + '`approved_plan` must contain the full plan text approved in Phase 1 (min 20 chars). ' + - 'You called request_code_review with missing or empty content. ' + - 'Retry with: { "code": "", "approved_plan": "", "workspace_root": "", "iteration_count": 1 }. ' + + 'You called request_code_review without usable content. ' + + 'Inline them — { "code": "", "approved_plan": "", ... } — or, for large content, ' + + 'write each to a file and pass { "code_file": ".duul/code.md", "approved_plan_file": ".duul/plan.md", "workspace_root": "" }. ' + 'Do NOT call this tool again with an empty input.'; - console.error(`[duul] code-review rejected: missing/empty code or approved_plan field`); + console.error(`[duul] code-review rejected: missing/empty code or approved_plan content`); return { content: [{ type: 'text' as const, text: message }], isError: true, @@ -93,8 +116,6 @@ export function registerCodeReviewTool(server: McpServer): void { }; } - const scope = resolveWorkspaceScope(args); - // Auto-generate git diff if not provided let gitDiff = args.git_diff; if (!gitDiff && scope?.root && args.changed_files?.length) { @@ -112,8 +133,8 @@ export function registerCodeReviewTool(server: McpServer): void { const systemPrompt = getCodeReviewSystemPrompt(); const userMessage = formatCodeReviewUserMessage( - args.code, - args.approved_plan, + codeText, + approvedPlanText, args.file_path, args.dependencies, args.relevant_code, diff --git a/src/tools/execution-partition.ts b/src/tools/execution-partition.ts index 1914752..accf472 100644 --- a/src/tools/execution-partition.ts +++ b/src/tools/execution-partition.ts @@ -8,7 +8,7 @@ import { import { getExecutionPartitionSystemPrompt, formatExecutionPartitionUserMessage } from '../prompts/execution-partition-system.js'; import { callReview } from '../services/reviewer.js'; import type { TokenUsage } from '../services/reviewer.js'; -import { resolveWorkspaceScope } from '../services/filesystem.js'; +import { resolveWorkspaceScope, resolveInlineOrFile } from '../services/filesystem.js'; import { computeIterationMeta, isIterationLimitExceeded, computeCostWarning } from '../services/review-limits.js'; import { logUsage } from '../services/usage-logger.js'; @@ -21,9 +21,10 @@ export function registerExecutionPartitionTool(server: McpServer): void { title: 'DUUL Execution Partition (Project Manager)', description: 'DUUL optional: Partition an approved plan into executable subtasks. ' + - 'REQUIRED fields: approved_plan (full plan markdown — do NOT leave empty), workspace_root (absolute path). ' + + 'Provide the plan EITHER inline via `approved_plan` OR (preferred for large plans) via `approved_plan_file` ' + + '(relative path, e.g. ".duul/plan.md"). `workspace_root` (absolute path) is required. ' + + 'Exactly one of approved_plan/approved_plan_file is required. ' + 'Optional: working_directories, changed_files, entrypoints, artifact_refs, max_parallelism, iteration_count. ' + - 'NEVER call with an empty object — populate approved_plan with actual plan text before invoking. ' + 'Returns dependency graph, spawn strategy, and handoff contracts.', inputSchema: ExecutionPartitionInputSchema, outputSchema: ExecutionPartitionMcpOutputSchema, @@ -32,16 +33,35 @@ export function registerExecutionPartitionTool(server: McpServer): void { try { const args = input as ExecutionPartitionInput; + const scope = resolveWorkspaceScope(args); + + // Resolve approved_plan from inline value or approved_plan_file escape hatch. + let approvedPlanText: string | undefined; + try { + approvedPlanText = await resolveInlineOrFile({ + inline: args.approved_plan, + file: args.approved_plan_file, + scope, + label: 'approved_plan', + }); + } catch (readErr: unknown) { + const reason = readErr instanceof Error ? readErr.message : String(readErr); + console.error(`[duul] execution-partition approved_plan_file read failed: ${reason}`); + return { + content: [{ type: 'text' as const, text: `ERROR: could not read approved_plan_file. ${reason}` }], + isError: true, + }; + } + if ( - !args || - typeof args.approved_plan !== 'string' || - args.approved_plan.trim().length < 20 || + typeof approvedPlanText !== 'string' || + approvedPlanText.trim().length < 20 || typeof args.workspace_root !== 'string' || args.workspace_root.trim().length === 0 ) { const message = - 'ERROR: `approved_plan` and `workspace_root` fields are both required. ' + - '`approved_plan` must contain the full plan text (min 20 chars). ' + + 'ERROR: `approved_plan` and `workspace_root` are both required. ' + + '`approved_plan` must contain the full plan text (min 20 chars) — inline it, or pass approved_plan_file (e.g. ".duul/plan.md"). ' + '`workspace_root` must be an absolute path. ' + 'You called request_execution_partition with missing or empty content. ' + 'Retry with: { "approved_plan": "", "workspace_root": "" }. ' + @@ -70,7 +90,7 @@ export function registerExecutionPartitionTool(server: McpServer): void { subtasks: [{ id: 'full-plan', title: 'Execute full plan serially (limit reached)', - goal: args.approved_plan.slice(0, 200) + '...', + goal: approvedPlanText.slice(0, 200) + '...', can_run_in_parallel: false, depends_on: [], workspace_name_hint: 'main', @@ -106,10 +126,9 @@ export function registerExecutionPartitionTool(server: McpServer): void { }; } - const scope = resolveWorkspaceScope(args); const systemPrompt = getExecutionPartitionSystemPrompt(); const userMessage = formatExecutionPartitionUserMessage( - args.approved_plan, + approvedPlanText, args.constraints, { changedFiles: args.changed_files, @@ -140,7 +159,7 @@ export function registerExecutionPartitionTool(server: McpServer): void { subtasks: [{ id: 'full-plan', title: 'Execute full plan serially', - goal: args.approved_plan.slice(0, 200) + '...', + goal: approvedPlanText.slice(0, 200) + '...', can_run_in_parallel: false, depends_on: [], workspace_name_hint: 'main', diff --git a/src/tools/plan-review.ts b/src/tools/plan-review.ts index 1efaa70..0ebd5ca 100644 --- a/src/tools/plan-review.ts +++ b/src/tools/plan-review.ts @@ -8,7 +8,7 @@ import { import { getPlanReviewSystemPrompt, formatPlanReviewUserMessage } from '../prompts/plan-review-system.js'; import { callReview } from '../services/reviewer.js'; import type { TokenUsage } from '../services/reviewer.js'; -import { resolveWorkspaceScope, getGitDiff } from '../services/filesystem.js'; +import { resolveWorkspaceScope, getGitDiff, resolveInlineOrFile } from '../services/filesystem.js'; import { computeIterationMeta, isIterationLimitExceeded, computeCostWarning } from '../services/review-limits.js'; import { logUsage } from '../services/usage-logger.js'; import { applyGates } from '../services/review-gates.js'; @@ -24,9 +24,10 @@ export function registerPlanReviewTool(server: McpServer): void { title: 'DUUL Plan Review (Senior Architect)', description: 'DUUL Phase 1: Submit an implementation plan for senior-architect review. ' + - 'REQUIRED fields: plan (full plan markdown — do NOT leave empty), workspace_root (absolute path). ' + + 'Provide the plan EITHER inline via `plan` OR (preferred for large plans) by writing it to a file ' + + 'and passing `plan_file` (relative path, e.g. ".duul/plan.md") plus `workspace_root`. ' + + 'Exactly one of plan/plan_file is required. ' + 'Optional: project_context, changed_files, artifact_refs, user_original_request, previous_review_id, iteration_count. ' + - 'NEVER call with an empty object — populate plan with your actual plan text before invoking. ' + 'Returns blocking issues, edge cases, implementation checklist, or APPROVE verdict.', inputSchema: PlanReviewInputSchema, outputSchema: PlanReviewMcpOutputSchema, @@ -35,13 +36,29 @@ export function registerPlanReviewTool(server: McpServer): void { try { const args = input as PlanReviewInput; - if (!args || typeof args.plan !== 'string' || args.plan.trim().length < 20) { + const scope = resolveWorkspaceScope(args); + + // Resolve the plan from inline `plan` or from `plan_file` (large-plan escape hatch). + let planText: string | undefined; + try { + planText = await resolveInlineOrFile({ inline: args.plan, file: args.plan_file, scope, label: 'plan' }); + } catch (readErr: unknown) { + const reason = readErr instanceof Error ? readErr.message : String(readErr); + console.error(`[duul] plan-review plan_file read failed: ${reason}`); + return { + content: [{ type: 'text' as const, text: `ERROR: could not read plan_file. ${reason}` }], + isError: true, + }; + } + + if (typeof planText !== 'string' || planText.trim().length < 20) { const message = - 'ERROR: `plan` field is required and must contain the full plan markdown (at least 20 chars). ' + - 'You called request_plan_review with missing or empty plan content. ' + - 'Retry with: { "plan": "", "workspace_root": "", "user_original_request": "", "iteration_count": 1 }. ' + - 'Do NOT call this tool again with an empty input.'; - console.error(`[duul] plan-review rejected: missing/empty plan field`); + 'ERROR: a plan is required and must contain the full plan markdown (at least 20 chars). ' + + 'You called request_plan_review without usable plan content. ' + + 'Either inline it — { "plan": "", ... } — or, for a large plan, ' + + 'write it to a file first and pass { "plan_file": ".duul/plan.md", "workspace_root": "" }. ' + + 'Always include workspace_root and user_original_request. Do NOT call this tool again with an empty input.'; + console.error(`[duul] plan-review rejected: missing/empty plan content`); return { content: [{ type: 'text' as const, text: message }], isError: true, @@ -88,8 +105,6 @@ export function registerPlanReviewTool(server: McpServer): void { }; } - const scope = resolveWorkspaceScope(args); - // Auto-generate git diff if not provided let gitDiff = args.git_diff; if (!gitDiff && scope?.root && args.changed_files?.length) { @@ -107,7 +122,7 @@ export function registerPlanReviewTool(server: McpServer): void { const systemPrompt = getPlanReviewSystemPrompt(); const userMessage = formatPlanReviewUserMessage( - args.plan, + planText, args.project_context, args.constraints, args.notes_to_reviewer,