From 4d6df5e8fca74b02a7b55d909d903159ecc80393 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek Date: Mon, 15 Jun 2026 22:40:15 +0500 Subject: [PATCH 1/6] fix: stop discovery fill-tool from poisoning the doubt_review protocol parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A decoded planner session showed the model stuck in an 11-call loop on planner_doubt_review. Root cause: planner_discovery_submit took both a free-form `body` and a structured `verificationProtocol` argument, and the model wrote its own `## Verification Protocol` section into body while the wrapper also appended one from the argument. discovery.md then carried two protocol sections; extractVerificationProtocol collected the body's prose lead-in ("The existing project has these commands available:") as a phantom required command and never reached the argument's section — so doubt_review could not be satisfied. - artifact-tools: the verificationProtocol argument is now the single source of truth — buildDiscoveryMarkdown strips any protocol section the model wrote into body before appending the canonical one (no brittle reject on an exact header). - doubt-review parser hardened: extractVerificationProtocol collects only bullet list items, and extractProtocolCommand rejects prose lines ending in ':'. - Every fill-tool now echoes the canonical schema next to what was saved so the model can self-correct by comparing expected vs received shape. - next-step hint spells out submit ordering (a later step's planner_*_submit is blocked until planner_finish_step advances there) to remove the early-call friction seen in the session. - Round-trip tests reproduce the deadlock and assert each artifact survives its consuming parser/validator with no phantom or duplicate sections. Co-Authored-By: Claude Opus 4.8 --- src/runtime/artifact-tools.roundtrip.test.ts | 138 +++++++++++++++++++ src/runtime/artifact-tools.ts | 113 +++++++++++++-- src/runtime/doubt-review.ts | 10 +- src/runtime/next-step-hint.ts | 5 + 4 files changed, 253 insertions(+), 13 deletions(-) create mode 100644 src/runtime/artifact-tools.roundtrip.test.ts diff --git a/src/runtime/artifact-tools.roundtrip.test.ts b/src/runtime/artifact-tools.roundtrip.test.ts new file mode 100644 index 0000000..428c723 --- /dev/null +++ b/src/runtime/artifact-tools.roundtrip.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from "vitest"; +import { MockPlannerFs } from "../test/mock-fs"; +import { + buildDiscoveryMarkdown, + CANONICAL_SCHEMA, + PLANNER_ARTIFACT_TOOL_NAMES, + stripVerificationProtocolSection, +} from "./artifact-tools"; +import { + extractVerificationProtocol, + extractVerificationProtocolCommands, +} from "./doubt-review"; +import { + validatePostImplementationCounterexampleReview, + validatePreImplementationProofContract, + validateTaskMergeScopeAudit, +} from "./tdd-evidence"; +import { mergeTddMarkdown, renderTddSection, TDD_SECTIONS } from "./tdd-form"; + +// Reproduces the deadlock from the decoded pi-session: the model wrote its own +// "## Verification Protocol" section (with a prose lead-in and extra commands) +// into the discovery body while the wrapper also appended one from the +// verificationProtocol argument. The parser then collected the prose line as a +// phantom required command, so planner_doubt_review could never be satisfied. +describe("discovery_submit ↔ verification protocol parser round-trip", () => { + it("keeps the verificationProtocol argument as the single source of truth", () => { + const body = [ + "# Discovery: example", + "", + "## Findings", + "- everything already has AGENTS.md except src/test/", + "", + "### Verification Protocol", + "The existing project has these commands available:", + "- `npm run check` — biome", + "- `npm run build` — tsc", + "- `npm test` — vitest", + "- `npm run ci` — full pipeline", + "- `npm run format` — biome format", + ].join("\n"); + + const discoveryMd = buildDiscoveryMarkdown(body, [ + "npm run check", + "npm run build", + "npm test", + ]); + + // Exactly one protocol section survives, and it is the argument's. + const headings = discoveryMd + .split(/\r?\n/) + .filter( + (line) => /verification protocol/i.test(line) && /^#{1,6}\s/.test(line), + ); + expect(headings).toEqual(["## Verification Protocol"]); + + // The parser the doubt_review gate uses returns exactly the argument + // commands — no prose phantom, no ci/format the model never ran. + expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([ + "npm run check", + "npm run build", + "npm test", + ]); + }); + + it("ignores prose lines under a verification protocol heading", () => { + const discoveryMd = [ + "# Discovery", + "", + "## Verification Protocol", + "The existing project has these commands available:", + "- npm run check", + "- npm test", + ].join("\n"); + // The prose lead-in is dropped; only bullet commands remain. + expect(extractVerificationProtocol(discoveryMd)).toEqual([ + "- npm run check", + "- npm test", + ]); + expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([ + "npm run check", + "npm test", + ]); + }); + + it("strips a body protocol section regardless of heading level", () => { + for (const heading of [ + "## Verification Protocol", + "#### Verification Protocol", + ]) { + const stripped = stripVerificationProtocolSection( + ["## Findings", "- a", heading, "- npm run lint"].join("\n"), + ); + expect(stripped).not.toMatch(/verification protocol/i); + expect(stripped).not.toContain("npm run lint"); + expect(stripped).toContain("## Findings"); + } + }); +}); + +describe("tdd_submit ↔ tdd-evidence validators round-trip", () => { + it("renders sections that the consuming validators accept", async () => { + const updates = Object.fromEntries( + TDD_SECTIONS.map((def) => [ + def.key, + renderTddSection( + def, + Object.fromEntries( + def.fields.map((field) => [field, `concrete ${field} evidence`]), + ), + ), + ]), + ); + const content = mergeTddMarkdown( + "", + updates as Parameters[1], + ); + + const fs = new MockPlannerFs(); + const path = "/plan/tasks/task-1/tdd.md"; + await fs.writeTextAtomic(path, content); + + await expect( + validatePreImplementationProofContract(fs, path), + ).resolves.toBeNull(); + await expect( + validatePostImplementationCounterexampleReview(fs, path), + ).resolves.toBeNull(); + await expect(validateTaskMergeScopeAudit(fs, path)).resolves.toBeNull(); + }); +}); + +describe("fill-tool echo schema", () => { + it("provides a canonical schema for every artifact tool", () => { + for (const tool of PLANNER_ARTIFACT_TOOL_NAMES) { + expect(CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); + } + }); +}); diff --git a/src/runtime/artifact-tools.ts b/src/runtime/artifact-tools.ts index d1cd9bf..fcddb67 100644 --- a/src/runtime/artifact-tools.ts +++ b/src/runtime/artifact-tools.ts @@ -58,33 +58,37 @@ export async function executePlannerArtifactTool(input: { switch (input.toolName) { case "planner_plan_submit": { const content = requiredString(params, "content"); - await input.fs.writeTextAtomic(planPaths.planMd, `${content}\n`); - return applied(input.toolName, planPaths.planMd, "Planner plan saved."); + const written = `${content}\n`; + await input.fs.writeTextAtomic(planPaths.planMd, written); + return applied( + input.toolName, + planPaths.planMd, + "Planner plan saved.", + written, + ); } case "planner_discovery_submit": { const body = requiredString(params, "body"); const protocol = requiredStringArray(params, "verificationProtocol"); - const content = [ - body, - "", - "## Verification Protocol", - ...protocol.map((command) => `- ${command}`), - ].join("\n"); - await input.fs.writeTextAtomic(planPaths.discoveryMd, `${content}\n`); + const written = `${buildDiscoveryMarkdown(body, protocol)}\n`; + await input.fs.writeTextAtomic(planPaths.discoveryMd, written); return applied( input.toolName, planPaths.discoveryMd, - "Planner discovery saved with a ## Verification Protocol section.", + "Planner discovery saved. The ## Verification Protocol section is rebuilt from the verificationProtocol argument (any protocol section written in body is dropped).", + written, ); } case "planner_summary_submit": { const content = requiredString(params, "content"); const summaryPath = join(planPaths.planDir, "final_summary.md"); - await input.fs.writeTextAtomic(summaryPath, `${content}\n`); + const written = `${content}\n`; + await input.fs.writeTextAtomic(summaryPath, written); return applied( input.toolName, summaryPath, "Planner final summary saved.", + written, ); } case "planner_tdd_submit": { @@ -111,6 +115,7 @@ export async function executePlannerArtifactTool(input: { input.toolName, tddPath, `Planner tdd.md updated (${Object.keys(updates).join(", ")}).`, + content, ); } } @@ -140,6 +145,7 @@ function applied( toolName: PlannerArtifactToolName, path: string, headline: string, + written: string, ): PlannerArtifactToolExecutionResult { return { status: "applied", @@ -147,12 +153,95 @@ function applied( text: [ headline, `Artifact: ${path}`, - "Continue the current step; the next-step hint follows after planner_finish_step.", + "", + "## Expected shape (canonical schema)", + CANONICAL_SCHEMA[toolName], + "", + "## What you submitted (saved to disk)", + "```markdown", + written.trimEnd(), + "```", + "", + "Compare the two: the saved artifact should follow the canonical shape above (headings/sections of the same kind — the exact wording of your prose is up to you). If a required section is missing or wrong, call the same tool again to overwrite it; otherwise continue. The next-step hint follows after planner_finish_step.", ].join("\n"), details: { path }, }; } +/** Assemble discovery.md from a free-form body plus the structured protocol + * commands. The verificationProtocol argument is the single source of truth for + * the ## Verification Protocol section, so any protocol section the model also + * wrote into body is stripped first — otherwise discovery.md would carry two + * `## Verification Protocol` sections and break the doubt_review protocol + * parser. */ +export function buildDiscoveryMarkdown( + body: string, + protocol: readonly string[], +): string { + return [ + stripVerificationProtocolSection(body).trimEnd(), + "", + "## Verification Protocol", + ...protocol.map((command) => `- ${command}`), + ].join("\n"); +} + +/** Remove any `verification protocol` section (heading at any level plus its + * lines, up to the next heading) from body. */ +export function stripVerificationProtocolSection(body: string): string { + const lines = body.split(/\r?\n/); + const out: string[] = []; + let skipping = false; + for (const line of lines) { + const heading = /^(#{1,6})\s+(.+?)\s*$/.exec(line.trim()); + if (heading) { + skipping = heading[2].trim().toLowerCase() === "verification protocol"; + if (skipping) continue; + } + if (!skipping) out.push(line); + } + return out.join("\n"); +} + +/** Compact reference templates echoed back so the model sees the canonical + * shape of each artifact next to what it actually wrote. */ +export const CANONICAL_SCHEMA: Record = { + planner_plan_submit: [ + "# Plan: ", + "## Goal", + "## Scope (in-scope vs out-of-scope)", + "## Constraints", + "## Risks", + "## Checks (how each task is verified)", + "## Tasks (ordered task sequence)", + ].join("\n"), + planner_discovery_submit: [ + "# Discovery: <title>", + "## Project Overview / boundaries / findings / fundamental rules", + "(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)", + "", + "NOTE: Do NOT write a `## Verification Protocol` heading in body — pass", + "the commands in the verificationProtocol argument; the wrapper renders", + "`## Verification Protocol` with one `- <command>` per line. That section", + "is the single source doubt_review checks against.", + ].join("\n"), + planner_tdd_submit: [ + "# tdd.md (per active task; sections added as the lifecycle reaches them)", + `## ${TDD_SECTIONS[0].title}`, + ...TDD_SECTIONS[0].fields.map((f) => `- ${f}: <concrete evidence>`), + `## ${TDD_SECTIONS[1].title}`, + ...TDD_SECTIONS[1].fields.map((f) => `- ${f}: <concrete evidence>`), + `## ${TDD_SECTIONS[2].title}`, + ...TDD_SECTIONS[2].fields.map((f) => `- ${f}: <concrete evidence>`), + ].join("\n"), + planner_summary_submit: [ + "# Final Summary", + "## What changed", + "## Verification evidence (command → result)", + "## Follow-ups", + ].join("\n"), +}; + function blocked( toolName: PlannerArtifactToolName, text: string, diff --git a/src/runtime/doubt-review.ts b/src/runtime/doubt-review.ts index 1ea9a4c..57e381b 100644 --- a/src/runtime/doubt-review.ts +++ b/src/runtime/doubt-review.ts @@ -502,7 +502,10 @@ export function extractVerificationProtocol(content: string): string[] { collecting = title === "verification protocol"; continue; } - if (collecting && line.trim()) { + // Only bullet list items count as protocol commands. Prose lines (e.g. an + // introductory "These commands are available:") under the heading are + // ignored so they never become phantom required commands. + if (collecting && /^[-*]\s+\S/.test(line.trim())) { result.push(line.trim()); } } @@ -523,6 +526,11 @@ function extractProtocolCommand(line: string): string | null { if (/^(working directory|cwd|flags?)\s*:/i.test(normalized)) { return null; } + // A line ending in a colon is a prose heading (e.g. "Commands available:"), + // not a runnable command — never treat it as a required protocol command. + if (normalized.endsWith(":")) { + return null; + } const match = /^(test|tests|lint|format|build|ci|check|checks)\s*:\s*(.+)$/i.exec( normalized, diff --git a/src/runtime/next-step-hint.ts b/src/runtime/next-step-hint.ts index f925e19..d42ead7 100644 --- a/src/runtime/next-step-hint.ts +++ b/src/runtime/next-step-hint.ts @@ -53,6 +53,11 @@ export function buildNextStepHint(state: PlanStateRecord): string { // Name the wrappers permitted at this exact step so the model reaches for // the right planner tool instead of a built-in write/edit. lines.push(`Tools allowed now: ${tools.join(", ")}.`); + // Each planner_*_submit belongs to one step. The model often reaches for + // the next step's submit tool early and gets blocked; spell out the order. + lines.push( + "Order: only the tools listed above work at this step. A planner_*_submit for a later step is blocked until you call planner_finish_step to advance there.", + ); } lines.push( `If unsure, re-read \`${getPlannerStepStage(state.step)}.md\` or call planner_status.`, From 04cc3d378590147593c5365c37a5d849a3159f5d Mon Sep 17 00:00:00 2001 From: Mansur Azatbek <mansur62624@gmail.com> Date: Mon, 15 Jun 2026 22:42:39 +0500 Subject: [PATCH 2/6] test: harden tool-gating invariant across the full flag matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The invariant test only checked one flag combination (normal flags, debug on), leaving recovery/compact/user-decision states uncovered. Expand it to encode the real two-gate composition from orchestrator-gate.ts: - Normal allow_stage_machine path (debug on AND off): every guard-allowed wrapper must pass the stage-behavior gate — the deadlock-prevention invariant. - Broken / user-decision states bypass the behavior gate, so the guard set must be fixed (identical across every stage/step) — a step-scoped tool must never leak into a state where nothing checks it against stage behavior. - Compact boundary: the guard set must be exactly [planner_status]. Document the two-halves contract in src/runtime/AGENTS.md so future tool changes update both allowlists and stay covered. Any drift now fails before publish. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- src/runtime/AGENTS.md | 2 +- src/runtime/tool-gating-invariant.test.ts | 104 +++++++++++++++++----- 2 files changed, 81 insertions(+), 25 deletions(-) diff --git a/src/runtime/AGENTS.md b/src/runtime/AGENTS.md index 7c47980..d722abc 100644 --- a/src/runtime/AGENTS.md +++ b/src/runtime/AGENTS.md @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r ### Do Not Touch Unless - Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions. -- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests. +- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The two gates are composed only on the normal `allow_stage_machine` path (`orchestrator-gate.ts`); the broken/user-decision/compact states bypass the behavior gate and the guard returns a fixed set, so a step-scoped tool must never leak into those sets. Both halves are enforced by `tool-gating-invariant.test.ts` across the full flag matrix (debug on/off, broken, user-decision, compact). Also update tool visibility expectations, status/instructions, and tests. ### Domain Details - `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines. diff --git a/src/runtime/tool-gating-invariant.test.ts b/src/runtime/tool-gating-invariant.test.ts index 180a35f..335908c 100644 --- a/src/runtime/tool-gating-invariant.test.ts +++ b/src/runtime/tool-gating-invariant.test.ts @@ -13,42 +13,98 @@ import { getPlannerStageStepBehavior, } from "./stage-behavior"; -// Invariant: a planner tool is usable at a step only if it passes BOTH gates — -// the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools -// gate. So everything the guard permits MUST also pass the behavior gate; -// otherwise the guard advertises a tool the model can never actually call -// (a deadlock when no fallback exists, as happened with the artifact fill-tools). +// A planner tool is usable at a step only if it passes BOTH gates: the guard +// policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools gate. The +// two gates are composed in orchestrator-gate.ts, but ONLY on the normal +// `allow_stage_machine` path — i.e. when the plan is not broken, not awaiting a +// user decision, and not at a compact boundary. In those special states the +// behavior gate is bypassed and the guard returns a fixed recovery/compact set. // -// This guards against the two allowlists drifting apart for any tool — debug -// wrappers, git wrappers, contract tools, skills, or future additions. +// So the deadlock invariant has two halves, and this test enforces both so any +// drift between the allowlists (debug, git, contract, fill-tools, or future +// additions) is caught before publish. + +const STEPS = Object.entries(PLANNER_STAGE_STEPS) as Array< + [PlannerStage, readonly PlannerStep[]] +>; + describe("planner tool gating invariant", () => { - it("every guard-allowed wrapper passes the behavior gate at every step", () => { - const steps = Object.entries(PLANNER_STAGE_STEPS) as Array< - [PlannerStage, readonly PlannerStep[]] - >; - const violations: string[] = []; + // Half 1 — normal (allow_stage_machine) path: everything the guard permits + // MUST also pass the behavior gate, or the guard advertises a tool the model + // can never actually call (a deadlock when no fallback exists, as happened + // with the artifact fill-tools). Checked with debug artifacts both present + // and absent, since debugArtifactsDir changes the guard set. + for (const debugArtifactsDir of [null, "/tmp/planner-debug"]) { + it(`guard-allowed wrappers pass the behavior gate at every step (debug=${debugArtifactsDir ? "on" : "off"})`, () => { + const violations: string[] = []; + for (const [stage, stepList] of STEPS) { + for (const step of stepList) { + const allowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir, + }); + const behavior = getPlannerStageStepBehavior({ stage, step }); + for (const tool of allowed as readonly PlannerWrapperTool[]) { + if ( + !checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow + ) { + violations.push(`${stage}/${step}: ${tool}`); + } + } + } + } + expect(violations, violations.join("\n")).toEqual([]); + }); + } - for (const [stage, stepList] of steps) { + // Half 2 — special states bypass the behavior gate, so the guard is the sole + // gate. Their allowed set must therefore be FIXED (independent of the frozen + // stage/step); otherwise a step-scoped tool could leak into a state where + // nothing checks it against stage behavior. We pin that the set is identical + // across every stage/step for each special flag. + for (const flags of [ + { name: "broken", broken: true, requiresUserDecision: false }, + { name: "requiresUserDecision", broken: false, requiresUserDecision: true }, + ] as const) { + it(`guard set is fixed (step-independent) when ${flags.name}`, () => { + const sets = STEPS.flatMap(([stage, stepList]) => + stepList.map((step) => + [ + ...getAllowedPlannerWrapperTools({ + stage, + step, + broken: flags.broken, + requiresUserDecision: flags.requiresUserDecision, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }), + ].sort(), + ), + ); + const first = JSON.stringify(sets[0]); + for (const set of sets) { + expect(JSON.stringify(set)).toEqual(first); + } + }); + } + + it("guard set is exactly [planner_status] at a compact boundary", () => { + for (const [stage, stepList] of STEPS) { for (const step of stepList) { - // debugArtifactsDir set so debug wrappers are included in the guard - // set and therefore checked against the behavior gate too. const allowed = getAllowedPlannerWrapperTools({ stage, step, broken: false, requiresUserDecision: false, - requiresCompact: false, + requiresCompact: true, debugArtifactsDir: "/tmp/planner-debug", }); - const behavior = getPlannerStageStepBehavior({ stage, step }); - for (const tool of allowed as readonly PlannerWrapperTool[]) { - if (!checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow) { - violations.push(`${stage}/${step}: ${tool}`); - } - } + expect([...allowed]).toEqual(["planner_status"]); } } - - expect(violations, violations.join("\n")).toEqual([]); }); }); From 1ee5e018b2b4091d2e784eaf4e6bfa3f8a7efd79 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek <mansur62624@gmail.com> Date: Mon, 15 Jun 2026 22:43:56 +0500 Subject: [PATCH 3/6] ci: generate release notes against the explicit previous tag Release notes accumulated old feat/fix entries onto each new release. Root cause: the draft release used generate_release_notes:true with no baseline. GitHub anchors auto-generated notes to the last *published* (non-draft) release, but every release here is created as a draft, so the baseline stayed stale and each release re-listed every PR since the last published one. Generate the notes explicitly via `gh api releases/generate-notes` with previous_tag_name set to the previous v* tag (selected with `git tag --list 'v*' --sort=-v:refname`, excluding the just-created tag), and feed the result into the release body instead of generate_release_notes. The existing .github/release.yml categories/exclusions still apply. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- .github/workflows/release.yml | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 912ce2e..ad933d6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -165,13 +165,35 @@ jobs: echo "url=$EXISTING_PR" >> "$GITHUB_OUTPUT" fi + - name: Generate Release Notes + id: notes + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + R_TAG=${{ needs.prepare.outputs.tag }} + # Compare against the previous v* tag explicitly. Without this, GitHub + # anchors auto-notes to the last *published* (non-draft) release; since + # every release is created as a draft, the baseline is stale and each + # new release re-lists every PR since the last published one, so old + # feat/fix entries pile onto the new ones. + PREV_TAG=$(git tag --list 'v*' --sort=-v:refname | grep -vx "$R_TAG" | head -n1) + ARGS=(-f tag_name="$R_TAG") + if [ -n "$PREV_TAG" ]; then + ARGS+=(-f previous_tag_name="$PREV_TAG") + fi + NOTES=$(gh api -X POST "repos/${{ github.repository }}/releases/generate-notes" "${ARGS[@]}" --jq '.body') + { + echo "body<<EOF_NOTES" + echo "$NOTES" + echo "EOF_NOTES" + } >> "$GITHUB_OUTPUT" + - name: Create Draft Release uses: softprops/action-gh-release@v2 with: name: ${{ env.R_NAME }} tag_name: ${{ needs.prepare.outputs.tag }} draft: true - generate_release_notes: true body: | This is a draft release. @@ -179,4 +201,8 @@ jobs: - npm package `pi-code-planner@${{ needs.prepare.outputs.version }}` is available. - Pi can install it with `pi install npm:pi-code-planner@${{ needs.prepare.outputs.version }}`. - The sync PR is ready: ${{ steps.sync_pr.outputs.url }} + + --- + + ${{ steps.notes.outputs.body }} token: ${{ secrets.GITHUB_TOKEN }} From 3ec30ff82fd45ac6b3fd918b8d76167575b54ad4 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek <mansur62624@gmail.com> Date: Mon, 15 Jun 2026 22:46:42 +0500 Subject: [PATCH 4/6] fix: tolerate already-removed branches/worktrees when deleting a plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleting a plan whose task worktree had been removed by hand failed with "git -C <repo> branch -d <branch> failed". The thrown GitCommandError aborted the whole deletion, so the plan — and its stale branches — stayed in the list. deletePlan now deletes managed branches and the worktree through tolerant helpers: a branch that is already gone (branchExists=false) is skipped, and any delete/remove failure is downgraded to a warning instead of throwing. The plan is pruned regardless, and the warnings are surfaced in the command result so the branch stops appearing among selectable plans. Tests cover both the already-removed branch and the failed-delete paths. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- src/runtime/user-commands.test.ts | 66 ++++++++++++++++++++++++++++++- src/runtime/user-commands.ts | 62 +++++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 5 deletions(-) diff --git a/src/runtime/user-commands.test.ts b/src/runtime/user-commands.test.ts index 00201ea..09e8d52 100644 --- a/src/runtime/user-commands.test.ts +++ b/src/runtime/user-commands.test.ts @@ -36,6 +36,10 @@ import { executePlannerUserCommand } from "./user-commands"; class MockGitRunner implements GitRunner { status = ""; readonly calls: Array<{ name: string; input: unknown }> = []; + /** Branches reported as already gone by branchExists. */ + readonly missingBranches = new Set<string>(); + /** Branches whose deleteBranch should throw (e.g. unmerged, force:false). */ + readonly failDeleteBranches = new Set<string>(); async init(_input: GitRepoInput): Promise<void> {} async currentBranch(_input: GitRepoInput): Promise<string> { @@ -57,12 +61,17 @@ class MockGitRunner implements GitRunner { async listProjectFiles(_input: GitRepoInput): Promise<string[]> { return []; } - async branchExists(_input: GitBranchInput): Promise<boolean> { - return true; + async branchExists(input: GitBranchInput): Promise<boolean> { + return !this.missingBranches.has(input.branch); } async createBranch(_input: GitCreateBranchInput): Promise<void> {} async deleteBranch(input: GitDeleteBranchInput): Promise<void> { this.calls.push({ name: "deleteBranch", input }); + if (this.failDeleteBranches.has(input.branch)) { + throw new Error( + `git -C ${input.repoRoot} branch -d ${input.branch} failed`, + ); + } } async switchBranch(_input: GitSwitchBranchInput): Promise<void> {} async stageAll(_input: GitRepoInput): Promise<void> {} @@ -578,4 +587,57 @@ describe("planner user commands", () => { }, ]); }); + + it("completes deletion when a child branch was already removed by hand", async () => { + const { fs, git, projectPaths } = await createProjectFixture({ + activePlanId: "plan-a", + }); + // The worktree was deleted manually, so its task branch is already gone. + git.missingBranches.add("task/plan-b/task-1"); + + const result = await executePlannerUserCommand({ + fs, + git, + projectPaths, + commandName: "planner_delete", + params: { planId: "plan-b" }, + }); + + expect(result.status).toBe("applied"); + expect(result.text).toContain("was already gone"); + // The missing branch is not deleted, but the others still are. + expect( + git.calls + .filter((call) => call.name === "deleteBranch") + .map((call) => (call.input as { branch: string }).branch), + ).toEqual(["refactor/plan-b/task-1"]); + // The plan is pruned from the project despite the missing branch. + await expect(readProjectRecord(fs, projectPaths)).resolves.toMatchObject({ + plans: [expect.objectContaining({ planId: "plan-a" })], + }); + await expect( + fs.exists(createPlanStoragePaths(projectPaths, "plan-b").planDir), + ).resolves.toBe(false); + }); + + it("completes deletion and warns when a child branch delete fails", async () => { + const { fs, git, projectPaths } = await createProjectFixture({ + activePlanId: "plan-a", + }); + git.failDeleteBranches.add("task/plan-b/task-1"); + + const result = await executePlannerUserCommand({ + fs, + git, + projectPaths, + commandName: "planner_delete", + params: { planId: "plan-b" }, + }); + + expect(result.status).toBe("applied"); + expect(result.text).toContain("could not be deleted"); + await expect( + fs.exists(createPlanStoragePaths(projectPaths, "plan-b").planDir), + ).resolves.toBe(false); + }); }); diff --git a/src/runtime/user-commands.ts b/src/runtime/user-commands.ts index 8c0161b..c79e2a6 100644 --- a/src/runtime/user-commands.ts +++ b/src/runtime/user-commands.ts @@ -273,23 +273,32 @@ async function deletePlan( const projectRootExists = await input.fs.exists( input.projectPaths.projectRoot, ); + const warnings: string[] = []; if (state) { if (state.worktreePath && (await input.fs.exists(state.worktreePath))) { if (projectRootExists) { - await input.git.worktreeRemove({ + const warning = await tryWorktreeRemove({ + git: input.git, repoRoot: input.projectPaths.projectRoot, path: state.worktreePath, }); + if (warning) warnings.push(warning); } else { await input.fs.removeDir(state.worktreePath); } } if (projectRootExists) { + // Tolerate branches that were already removed (e.g. the worktree was + // deleted by hand before this command). A hard failure here would abort + // the whole deletion and leave the plan — and its stale branches — in + // the list. Downgrade to a warning and prune the plan regardless. for (const branch of managedChildBranches(state)) { - await input.git.deleteBranch({ + const warning = await tryDeleteManagedBranch({ + git: input.git, repoRoot: input.projectPaths.projectRoot, branch, }); + if (warning) warnings.push(warning); } } if (state.worktreePath) { @@ -318,12 +327,14 @@ async function deletePlan( plans: project.plans.filter((plan) => plan.planId !== planId), }; await saveProjectRecord(input.fs, input.projectPaths, nextProject); - return applied(input.commandName, `Planner plan deleted: ${planId}.`, { + const text = [`Planner plan deleted: ${planId}.`, ...warnings].join("\n"); + return applied(input.commandName, text, { project: nextProject, planId, removedPlanDir: planPaths.planDir, projectRootExists, gitCleanupSkipped: !projectRootExists, + warnings, }); } @@ -427,6 +438,51 @@ async function assertPlanSwitchable(input: { return { allow: true }; } +/** Delete a managed branch, tolerating one that is already gone. Returns a + * warning string when the branch could not be deleted cleanly (already removed, + * or git refused), so the caller can surface it and prune the plan regardless; + * returns null on a clean delete. */ +async function tryDeleteManagedBranch(input: { + git: GitRunner; + repoRoot: string; + branch: string; +}): Promise<string | null> { + try { + const exists = await input.git.branchExists({ + repoRoot: input.repoRoot, + branch: input.branch, + }); + if (!exists) { + return `Branch ${input.branch} was already gone — skipped and pruned from the plan registry.`; + } + await input.git.deleteBranch({ + repoRoot: input.repoRoot, + branch: input.branch, + }); + return null; + } catch (error) { + return `Branch ${input.branch} could not be deleted (${errorMessage(error)}); pruned from the plan registry anyway.`; + } +} + +/** Remove a worktree, tolerating one that git no longer tracks (e.g. it was + * deleted by hand). Returns a warning string on failure, null on success. */ +async function tryWorktreeRemove(input: { + git: GitRunner; + repoRoot: string; + path: string; +}): Promise<string | null> { + try { + await input.git.worktreeRemove({ + repoRoot: input.repoRoot, + path: input.path, + }); + return null; + } catch (error) { + return `Worktree ${input.path} could not be removed via git (${errorMessage(error)}); continuing with deletion.`; + } +} + function managedChildBranches(state: PlanStateRecord): string[] { const branches = [ state.activeBranches.currentTask, From a092140a883d05f7dacaa997021d1455b6d40320 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek <mansur62624@gmail.com> Date: Mon, 15 Jun 2026 22:56:50 +0500 Subject: [PATCH 5/6] test: make the gate invariant bidirectional and trim over-advertised tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The forward invariant (guard ⊆ behavior) caught deadlocks where the guard lets a tool through but the behavior gate blocks it. The reverse direction was unguarded: a wrapper tool advertised in expectedTools (so it surfaces in status' "Stage Behavior" and the model reaches for it) while the guard blocks it leaves the model told to call a tool it cannot. Add the reverse invariant — every wrapper tool in expectedTools, except the always-allowed specials planner_status/planner_git_inspect, must be guard-allowed at that step. It surfaced three over-listings, all at runtime-driven steps where the guard intentionally allows (almost) nothing: - init/check_project advertised planner_report_stuck (execution-only per stuck-tools.ts). - init/prepare_storage advertised the contract wrappers + planner_git_commit. - init/create_plan_worktree advertised the contract read/route wrappers. Trim those expectedTools to match the guard. Not a live deadlock today (the model does not drive those steps), but exactly the drift that becomes one — now both directions are pinned across the full flag matrix. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- src/runtime/stage-behavior.ts | 23 ++++++-------- src/runtime/tool-gating-invariant.test.ts | 38 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/runtime/stage-behavior.ts b/src/runtime/stage-behavior.ts index c1b325a..e0f4e65 100644 --- a/src/runtime/stage-behavior.ts +++ b/src/runtime/stage-behavior.ts @@ -96,7 +96,9 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: [], updatedArtifacts: [], requiredGates: [], - expectedTools: ["planner_status", "planner_report_stuck"], + // planner_report_stuck is execution-only (see stuck-tools.ts); the guard + // does not allow it during init, so it must not be advertised here. + expectedTools: ["planner_status"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -116,14 +118,10 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: [], updatedArtifacts: ["project.json"], requiredGates: ["git_available"], - expectedTools: [ - "planner_contract_scan", - "planner_contract_route", - "planner_contract_read", - "planner_contract_upsert", - "planner_git_commit", - "planner_status", - ], + // Storage setup is runtime-driven (guard allows no wrappers here). The + // model does contract scanning/upsert later, at discovery; listing those + // wrappers here only mis-advertised tools the guard blocks. + expectedTools: ["planner_status"], commitPolicy: "required_if_dirty", compactPolicy: "not_allowed", }), @@ -160,11 +158,8 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["plan.json", "state.json"], updatedArtifacts: ["state.json"], requiredGates: ["plan_record_exists"], - expectedTools: [ - "planner_git_inspect", - "planner_contract_route", - "planner_contract_read", - ], + // Worktree creation is runtime-driven; guard allows only git_inspect here. + expectedTools: ["planner_git_inspect"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), diff --git a/src/runtime/tool-gating-invariant.test.ts b/src/runtime/tool-gating-invariant.test.ts index 335908c..ad0b624 100644 --- a/src/runtime/tool-gating-invariant.test.ts +++ b/src/runtime/tool-gating-invariant.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { getAllowedPlannerWrapperTools, + PLANNER_WRAPPER_TOOLS, type PlannerWrapperTool, } from "../guard/tool-policy"; import { @@ -107,4 +108,41 @@ describe("planner tool gating invariant", () => { } } }); + + // Reverse direction: a wrapper tool the stage behavior advertises in + // expectedTools (so it shows up in status' "Stage Behavior" and the model + // reaches for it) MUST be permitted by the guard — otherwise the model is + // told to use a tool the guard blocks ("not allowed at stage/step"). The two + // always-allowed specials (planner_status, planner_git_inspect) are exempt + // because the behavior gate permits them regardless of the guard step list; + // non-wrapper workflow tools (finish_step, request/complete_compact, + // report_stuck) are gated elsewhere and never appear in the guard set. + const WRAPPER_TOOLS = new Set<string>(PLANNER_WRAPPER_TOOLS); + const GATE_EXEMPT = new Set<string>([ + "planner_status", + "planner_git_inspect", + ]); + it("every wrapper tool in expectedTools is permitted by the guard", () => { + const violations: string[] = []; + for (const [stage, stepList] of STEPS) { + for (const step of stepList) { + const guard = new Set<string>( + getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }), + ); + const behavior = getPlannerStageStepBehavior({ stage, step }); + for (const tool of behavior.expectedTools) { + if (!WRAPPER_TOOLS.has(tool) || GATE_EXEMPT.has(tool)) continue; + if (!guard.has(tool)) violations.push(`${stage}/${step}: ${tool}`); + } + } + } + expect(violations, violations.join("\n")).toEqual([]); + }); }); From 2aad9d3bbc4e1b0460e9085f4b733f88dc5aa662 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek <mansur62624@gmail.com> Date: Mon, 15 Jun 2026 23:04:15 +0500 Subject: [PATCH 6/6] feat: echo expected-vs-received markdown for every strict planner tool The expected/received echo previously covered only the four artifact fill-tools. Extend it to every tool that writes a structured markdown artifact so the model gets the same self-correction signal everywhere. - New artifact-echo.ts holds the shared formatArtifactEcho (expected shape + what was saved), formatCanonicalSchemaHint (schema only, for tools that already echo their content), and ARTIFACT_CANONICAL_SCHEMA keyed by tool. - artifact-tools.ts now routes through the shared helper/map (no local copy). - goal_submit / questions_submit append the canonical-schema hint (they already echo the saved content for user review). - refactor_review / doubt_review / task_upsert / contract_upsert / skill_create / skill_update append the full expected-vs-received echo. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- src/runtime/artifact-echo.test.ts | 48 +++++++ src/runtime/artifact-echo.ts | 126 +++++++++++++++++++ src/runtime/artifact-tools.roundtrip.test.ts | 4 +- src/runtime/artifact-tools.ts | 53 +------- src/runtime/contracts.ts | 7 ++ src/runtime/doubt-tools.ts | 7 ++ src/runtime/goal-tools.ts | 8 ++ src/runtime/question-tools.ts | 8 ++ src/runtime/refactor-tools.ts | 7 ++ src/runtime/skill-library.ts | 13 ++ src/runtime/task-tools.ts | 7 ++ 11 files changed, 239 insertions(+), 49 deletions(-) create mode 100644 src/runtime/artifact-echo.test.ts create mode 100644 src/runtime/artifact-echo.ts diff --git a/src/runtime/artifact-echo.test.ts b/src/runtime/artifact-echo.test.ts new file mode 100644 index 0000000..a967fd7 --- /dev/null +++ b/src/runtime/artifact-echo.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from "vitest"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatArtifactEcho, + formatCanonicalSchemaHint, +} from "./artifact-echo"; + +// Every strict-structure planner tool that writes markdown must have a canonical +// schema to echo back, so the "expected vs received" feedback is uniform. +const STRICT_TOOLS = [ + "planner_goal_submit", + "planner_questions_submit", + "planner_plan_submit", + "planner_discovery_submit", + "planner_tdd_submit", + "planner_summary_submit", + "planner_task_upsert", + "planner_refactor_review", + "planner_doubt_review", + "planner_contract_upsert", + "planner_skill_create", + "planner_skill_update", +]; + +describe("artifact echo", () => { + it("has a canonical schema for every strict tool", () => { + for (const tool of STRICT_TOOLS) { + expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); + } + }); + + it("echoes both expected shape and what was written", () => { + const text = formatArtifactEcho({ + canonicalSchema: "# Example\n## Section", + writtenMarkdown: "# Real\n## Done\n", + }); + expect(text).toContain("Expected shape"); + expect(text).toContain("## Section"); + expect(text).toContain("What you submitted"); + expect(text).toContain("# Real"); + }); + + it("schema-only hint shows the expected shape without a written block", () => { + const text = formatCanonicalSchemaHint("# Example\n## Section"); + expect(text).toContain("Expected shape"); + expect(text).not.toContain("What you submitted"); + }); +}); diff --git a/src/runtime/artifact-echo.ts b/src/runtime/artifact-echo.ts new file mode 100644 index 0000000..9a4cfba --- /dev/null +++ b/src/runtime/artifact-echo.ts @@ -0,0 +1,126 @@ +import { TDD_SECTIONS } from "./tdd-form"; + +/** + * Shared "expected vs received" echo appended to the result of every planner + * tool that writes a structured markdown artifact. Showing the canonical shape + * next to what was actually saved lets the model self-correct by comparison + * instead of guessing the format — the same teaching signal for all strict + * tools, free-form (goal/discovery/plan/summary/questions) and structured + * (tdd/task/refactor/doubt/contract/skill) alike. + */ +export function formatArtifactEcho(input: { + canonicalSchema: string; + writtenMarkdown: string; +}): string { + return [ + "## Expected shape (canonical schema)", + input.canonicalSchema, + "", + "## What you submitted (saved to disk)", + "```markdown", + input.writtenMarkdown.trimEnd(), + "```", + "", + "Compare the two: the saved artifact should follow the canonical shape above (the same kind of sections — your prose wording is your own). If a section is missing or wrong, call the same tool again to overwrite it; otherwise continue.", + ].join("\n"); +} + +/** + * Just the canonical-shape reference, for tools that already echo the content + * they saved (goal/questions show it for user review). Avoids printing the + * artifact twice while still giving the model the expected shape to compare. + */ +export function formatCanonicalSchemaHint(canonicalSchema: string): string { + return [ + "## Expected shape (canonical schema)", + canonicalSchema, + "", + "Make sure what you saved follows this shape; if a section is missing or wrong, call the same tool again to overwrite it.", + ].join("\n"); +} + +/** Canonical reference templates, keyed by the tool that produces the artifact. */ +export const ARTIFACT_CANONICAL_SCHEMA: Record<string, string> = { + planner_goal_submit: [ + "# Goal: <title>", + "## Outcome (what the finished work delivers)", + "## Assumptions", + "## Out of scope", + ].join("\n"), + planner_questions_submit: [ + "# Discovery Questions", + "## Status (open questions, or 'No unresolved questions')", + "## Assumptions (assumptions carried into planning)", + ].join("\n"), + planner_plan_submit: [ + "# Plan: <title>", + "## Goal", + "## Scope (in-scope vs out-of-scope)", + "## Constraints", + "## Risks", + "## Checks (how each task is verified)", + "## Tasks (ordered task sequence)", + ].join("\n"), + planner_discovery_submit: [ + "# Discovery: <title>", + "## Project Overview / boundaries / findings / fundamental rules", + "(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)", + "", + "NOTE: Do NOT write a `## Verification Protocol` heading in body — pass", + "the commands in the verificationProtocol argument; the wrapper renders", + "`## Verification Protocol` with one `- <command>` per line. That section", + "is the single source doubt_review checks against.", + ].join("\n"), + planner_tdd_submit: [ + "# tdd.md (per active task; sections added as the lifecycle reaches them)", + ...TDD_SECTIONS.flatMap((section) => [ + `## ${section.title}`, + ...section.fields.map((field) => `- ${field}: <concrete evidence>`), + ]), + ].join("\n"), + planner_summary_submit: [ + "# Final Summary", + "## What changed", + "## Verification evidence (command → result)", + "## Follow-ups", + ].join("\n"), + planner_task_upsert: [ + "# Task: <title>", + "## Acceptance Criteria", + "## Scope (files/areas in and out of scope)", + "## Notes", + ].join("\n"), + planner_refactor_review: [ + "# Refactor Review", + "## Changed Surface / Complexity / Duplication / Naming & Boundaries / Edge Cases", + "## Category Reviews (per-category findings)", + "## Decision (applied changes, or why kept as-is)", + ].join("\n"), + planner_doubt_review: [ + "# Doubt Review", + "## Verification Evidence (one entry per protocol command: command/status/evidence)", + "## Possible Errors (each: riskCategory/status/proofLevel/nextAction/claim/...)", + "## Summary", + ].join("\n"), + planner_contract_upsert: [ + "# <AGENTS.md contract block>", + "## Purpose / Scope / Stable Contracts / Read First / Do Not Touch Unless", + "(only the pi-code-planner:contracts block is managed; surrounding prose is yours)", + ].join("\n"), + planner_skill_create: [ + "---", + "name: <kebab-case>", + "description: <one line>", + "---", + "# <skill title>", + "## When to use / Steps / Notes", + ].join("\n"), + planner_skill_update: [ + "---", + "name: <kebab-case>", + "description: <one line>", + "---", + "# <skill title>", + "## When to use / Steps / Notes", + ].join("\n"), +}; diff --git a/src/runtime/artifact-tools.roundtrip.test.ts b/src/runtime/artifact-tools.roundtrip.test.ts index 428c723..3663f48 100644 --- a/src/runtime/artifact-tools.roundtrip.test.ts +++ b/src/runtime/artifact-tools.roundtrip.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it } from "vitest"; import { MockPlannerFs } from "../test/mock-fs"; +import { ARTIFACT_CANONICAL_SCHEMA } from "./artifact-echo"; import { buildDiscoveryMarkdown, - CANONICAL_SCHEMA, PLANNER_ARTIFACT_TOOL_NAMES, stripVerificationProtocolSection, } from "./artifact-tools"; @@ -132,7 +132,7 @@ describe("tdd_submit ↔ tdd-evidence validators round-trip", () => { describe("fill-tool echo schema", () => { it("provides a canonical schema for every artifact tool", () => { for (const tool of PLANNER_ARTIFACT_TOOL_NAMES) { - expect(CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); + expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); } }); }); diff --git a/src/runtime/artifact-tools.ts b/src/runtime/artifact-tools.ts index fcddb67..39afbfb 100644 --- a/src/runtime/artifact-tools.ts +++ b/src/runtime/artifact-tools.ts @@ -2,6 +2,7 @@ import { join } from "node:path"; import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -154,15 +155,12 @@ function applied( headline, `Artifact: ${path}`, "", - "## Expected shape (canonical schema)", - CANONICAL_SCHEMA[toolName], + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA[toolName], + writtenMarkdown: written, + }), "", - "## What you submitted (saved to disk)", - "```markdown", - written.trimEnd(), - "```", - "", - "Compare the two: the saved artifact should follow the canonical shape above (headings/sections of the same kind — the exact wording of your prose is up to you). If a required section is missing or wrong, call the same tool again to overwrite it; otherwise continue. The next-step hint follows after planner_finish_step.", + "The next-step hint follows after planner_finish_step.", ].join("\n"), details: { path }, }; @@ -203,45 +201,6 @@ export function stripVerificationProtocolSection(body: string): string { return out.join("\n"); } -/** Compact reference templates echoed back so the model sees the canonical - * shape of each artifact next to what it actually wrote. */ -export const CANONICAL_SCHEMA: Record<PlannerArtifactToolName, string> = { - planner_plan_submit: [ - "# Plan: <title>", - "## Goal", - "## Scope (in-scope vs out-of-scope)", - "## Constraints", - "## Risks", - "## Checks (how each task is verified)", - "## Tasks (ordered task sequence)", - ].join("\n"), - planner_discovery_submit: [ - "# Discovery: <title>", - "## Project Overview / boundaries / findings / fundamental rules", - "(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)", - "", - "NOTE: Do NOT write a `## Verification Protocol` heading in body — pass", - "the commands in the verificationProtocol argument; the wrapper renders", - "`## Verification Protocol` with one `- <command>` per line. That section", - "is the single source doubt_review checks against.", - ].join("\n"), - planner_tdd_submit: [ - "# tdd.md (per active task; sections added as the lifecycle reaches them)", - `## ${TDD_SECTIONS[0].title}`, - ...TDD_SECTIONS[0].fields.map((f) => `- ${f}: <concrete evidence>`), - `## ${TDD_SECTIONS[1].title}`, - ...TDD_SECTIONS[1].fields.map((f) => `- ${f}: <concrete evidence>`), - `## ${TDD_SECTIONS[2].title}`, - ...TDD_SECTIONS[2].fields.map((f) => `- ${f}: <concrete evidence>`), - ].join("\n"), - planner_summary_submit: [ - "# Final Summary", - "## What changed", - "## Verification evidence (command → result)", - "## Follow-ups", - ].join("\n"), -}; - function blocked( toolName: PlannerArtifactToolName, text: string, diff --git a/src/runtime/contracts.ts b/src/runtime/contracts.ts index ee463b8..ae95ff5 100644 --- a/src/runtime/contracts.ts +++ b/src/runtime/contracts.ts @@ -28,6 +28,7 @@ import type { PlanStateRecord, } from "../storage/schema"; import { updatePlanState } from "../storage/state-store"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { appendPlannerSection } from "./artifact-utils"; import { checkPlannerOrchestratorToolAllowed, @@ -722,6 +723,12 @@ async function contractUpsert(input: { existing === null ? "Created new AGENTS.md contract file." : "Updated existing AGENTS.md managed block and preserved non-planner content.", + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_contract_upsert, + writtenMarkdown: next, + }), + "", "Commit this change through planner_git_commit when the current step requires a clean worktree.", ].join("\n"), { path, diagnostics: validation.diagnostics }, diff --git a/src/runtime/doubt-tools.ts b/src/runtime/doubt-tools.ts index 3764a41..0ae0e50 100644 --- a/src/runtime/doubt-tools.ts +++ b/src/runtime/doubt-tools.ts @@ -1,6 +1,7 @@ import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { DOUBT_FINDING_STATUSES, DOUBT_NEXT_ACTIONS, @@ -95,6 +96,12 @@ export async function executePlannerDoubtTool(input: { `Verify artifact: ${planPaths.verifyMd}`, `Proven bugs: ${validation.provenBugCount}`, `Needs probe: ${validation.needsProbeCount}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_doubt_review, + writtenMarkdown: content, + }), + "", validation.provenBugCount > 0 ? "Complete finalize/doubt_review with target planning/read_context after recording the proven findings in decisions.md." : validation.needsProbeCount > 0 diff --git a/src/runtime/goal-tools.ts b/src/runtime/goal-tools.ts index f6fa6a0..782db81 100644 --- a/src/runtime/goal-tools.ts +++ b/src/runtime/goal-tools.ts @@ -4,6 +4,10 @@ import type { ProjectStoragePaths } from "../storage/paths"; import { updatePlanRecord } from "../storage/plan-store"; import { upsertProjectPlanSummary } from "../storage/project-store"; import { savePlanState } from "../storage/state-store"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatCanonicalSchemaHint, +} from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -97,6 +101,10 @@ export async function executePlannerGoalTool( "", content.trim(), "", + formatCanonicalSchemaHint( + ARTIFACT_CANONICAL_SCHEMA.planner_goal_submit, + ), + "", "Ask the user to review the goal and explicitly approve it or request revision.", "After the user answers, call planner_goal_decide.", ].join("\n"), diff --git a/src/runtime/question-tools.ts b/src/runtime/question-tools.ts index 8bcdad5..cf93a74 100644 --- a/src/runtime/question-tools.ts +++ b/src/runtime/question-tools.ts @@ -3,6 +3,10 @@ import type { PlannerWrapperTool } from "../guard/tool-policy"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; import { savePlanState } from "../storage/state-store"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatCanonicalSchemaHint, +} from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -83,6 +87,10 @@ export async function executePlannerQuestionTool( "## Questions For User", content.trim(), "", + formatCanonicalSchemaHint( + ARTIFACT_CANONICAL_SCHEMA.planner_questions_submit, + ), + "", hasOpenQuestions ? "Show these questions to the user verbatim. Wait for the user's answers, then call planner_questions_resolve. Do not finish discovery/write_questions yet." : "No unresolved questions remain. Call planner_finish_step for discovery/write_questions.", diff --git a/src/runtime/refactor-tools.ts b/src/runtime/refactor-tools.ts index 303c514..9c038b0 100644 --- a/src/runtime/refactor-tools.ts +++ b/src/runtime/refactor-tools.ts @@ -2,6 +2,7 @@ import { join } from "node:path"; import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -123,6 +124,12 @@ export async function executePlannerRefactorTool(input: { "Planner refactor review written.", `Refactor artifact: ${refactorMd}`, `Decision: ${decision}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_refactor_review, + writtenMarkdown: content, + }), + "", "If project files changed during refactor, commit through planner_git_commit before finishing.", "Then call planner_status before choosing the next planner action.", ].join("\n"), diff --git a/src/runtime/skill-library.ts b/src/runtime/skill-library.ts index fac8ced..22fe4f7 100644 --- a/src/runtime/skill-library.ts +++ b/src/runtime/skill-library.ts @@ -6,6 +6,7 @@ import type { PlannerFs } from "../storage/fs"; import { readJsonIfExists, writeJson } from "../storage/json"; import type { ProjectStoragePaths } from "../storage/paths"; import { resolveProjectStoragePaths } from "../storage/project-resolver"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -334,6 +335,12 @@ export async function executePlannerSkillTool( `Skill path: ${skillPath}`, `Index: ${paths.indexJson}`, `Language: ${language}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_skill_create, + writtenMarkdown: content, + }), + "", "Pi loads planner skills through resources_discover on the next planner session start, resume, or reload.", "Continue the current planner state from planner_status; this skill is future memory, not a replacement for the current stage instructions.", ].join("\n"), @@ -416,6 +423,12 @@ export async function executePlannerSkillUpdateTool( `Skill path: ${existing.skillPath}`, `Index: ${paths.indexJson}`, `Language: ${language}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_skill_update, + writtenMarkdown: content, + }), + "", "Pi loads updated skills through resources_discover on the next planner session start, resume, or reload.", ].join("\n"), details: { diff --git a/src/runtime/task-tools.ts b/src/runtime/task-tools.ts index 380e654..829bd7a 100644 --- a/src/runtime/task-tools.ts +++ b/src/runtime/task-tools.ts @@ -3,6 +3,7 @@ import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; import { readPlanRecord, updatePlanRecord } from "../storage/plan-store"; import { upsertTaskArtifacts } from "../storage/task-store"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -89,6 +90,12 @@ export async function executePlannerTaskTool(input: { `Planner task artifacts written: ${result.record.taskId}.`, `Task JSON: ${result.paths.taskJson}`, `Task Markdown: ${result.paths.taskMd}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_task_upsert, + writtenMarkdown: await input.fs.readText(result.paths.taskMd), + }), + "", "TDD lifecycle artifacts were created if missing. Do not create a separate testing task.", "Call planner_status before choosing the next planner action.", ].join("\n"),