m62624 · m62624 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -165,18 +165,44 @@ jobs:
             echo "url=$EXISTING_PR" >> "$GITHUB_OUTPUT"
           fi
 
+      - name: Generate Release Notes
+        id: notes
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          R_TAG=${{ needs.prepare.outputs.tag }}
+          # Compare against the previous v* tag explicitly. Without this, GitHub
+          # anchors auto-notes to the last *published* (non-draft) release; since
+          # every release is created as a draft, the baseline is stale and each
+          # new release re-lists every PR since the last published one, so old
+          # feat/fix entries pile onto the new ones.
+          PREV_TAG=$(git tag --list 'v*' --sort=-v:refname | grep -vx "$R_TAG" | head -n1)
+          ARGS=(-f tag_name="$R_TAG")
+          if [ -n "$PREV_TAG" ]; then
+            ARGS+=(-f previous_tag_name="$PREV_TAG")
+          fi
+          NOTES=$(gh api -X POST "repos/${{ github.repository }}/releases/generate-notes" "${ARGS[@]}" --jq '.body')
+          {
+            echo "body<<EOF_NOTES"
+            echo "$NOTES"
+            echo "EOF_NOTES"
+          } >> "$GITHUB_OUTPUT"
+
       - name: Create Draft Release
         uses: softprops/action-gh-release@v2
         with:
           name: ${{ env.R_NAME }}
           tag_name: ${{ needs.prepare.outputs.tag }}
           draft: true
-          generate_release_notes: true
           body: |
             This is a draft release.
 
             Before publishing, verify:
             - npm package `pi-code-planner@${{ needs.prepare.outputs.version }}` is available.
             - Pi can install it with `pi install npm:pi-code-planner@${{ needs.prepare.outputs.version }}`.
             - The sync PR is ready: ${{ steps.sync_pr.outputs.url }}
+
+            ---
+
+            ${{ steps.notes.outputs.body }}
           token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/src/runtime/AGENTS.md b/src/runtime/AGENTS.md
@@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r
 
 ### Do Not Touch Unless
 - Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions.
-- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests.
+- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The two gates are composed only on the normal `allow_stage_machine` path (`orchestrator-gate.ts`); the broken/user-decision/compact states bypass the behavior gate and the guard returns a fixed set, so a step-scoped tool must never leak into those sets. Both halves are enforced by `tool-gating-invariant.test.ts` across the full flag matrix (debug on/off, broken, user-decision, compact). Also update tool visibility expectations, status/instructions, and tests.
 
 ### Domain Details
 - `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines.

diff --git a/src/runtime/artifact-echo.test.ts b/src/runtime/artifact-echo.test.ts
@@ -0,0 +1,48 @@
+import { describe, expect, it } from "vitest";
+import {
+	ARTIFACT_CANONICAL_SCHEMA,
+	formatArtifactEcho,
+	formatCanonicalSchemaHint,
+} from "./artifact-echo";
+
+// Every strict-structure planner tool that writes markdown must have a canonical
+// schema to echo back, so the "expected vs received" feedback is uniform.
+const STRICT_TOOLS = [
+	"planner_goal_submit",
+	"planner_questions_submit",
+	"planner_plan_submit",
+	"planner_discovery_submit",
+	"planner_tdd_submit",
+	"planner_summary_submit",
+	"planner_task_upsert",
+	"planner_refactor_review",
+	"planner_doubt_review",
+	"planner_contract_upsert",
+	"planner_skill_create",
+	"planner_skill_update",
+];
+
+describe("artifact echo", () => {
+	it("has a canonical schema for every strict tool", () => {
+		for (const tool of STRICT_TOOLS) {
+			expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0);
+		}
+	});
+
+	it("echoes both expected shape and what was written", () => {
+		const text = formatArtifactEcho({
+			canonicalSchema: "# Example\n## Section",
+			writtenMarkdown: "# Real\n## Done\n",
+		});
+		expect(text).toContain("Expected shape");
+		expect(text).toContain("## Section");
+		expect(text).toContain("What you submitted");
+		expect(text).toContain("# Real");
+	});
+
+	it("schema-only hint shows the expected shape without a written block", () => {
+		const text = formatCanonicalSchemaHint("# Example\n## Section");
+		expect(text).toContain("Expected shape");
+		expect(text).not.toContain("What you submitted");
+	});
+});
diff --git a/src/runtime/artifact-echo.ts b/src/runtime/artifact-echo.ts
@@ -0,0 +1,126 @@
+import { TDD_SECTIONS } from "./tdd-form";
+
+/**
+ * Shared "expected vs received" echo appended to the result of every planner
+ * tool that writes a structured markdown artifact. Showing the canonical shape
+ * next to what was actually saved lets the model self-correct by comparison
+ * instead of guessing the format — the same teaching signal for all strict
+ * tools, free-form (goal/discovery/plan/summary/questions) and structured
+ * (tdd/task/refactor/doubt/contract/skill) alike.
+ */
+export function formatArtifactEcho(input: {
+	canonicalSchema: string;
+	writtenMarkdown: string;
+}): string {
+	return [
+		"## Expected shape (canonical schema)",
+		input.canonicalSchema,
+		"",
+		"## What you submitted (saved to disk)",
+		"```markdown",
+		input.writtenMarkdown.trimEnd(),
+		"```",
+		"",
+		"Compare the two: the saved artifact should follow the canonical shape above (the same kind of sections — your prose wording is your own). If a section is missing or wrong, call the same tool again to overwrite it; otherwise continue.",
+	].join("\n");
+}
+
+/**
+ * Just the canonical-shape reference, for tools that already echo the content
+ * they saved (goal/questions show it for user review). Avoids printing the
+ * artifact twice while still giving the model the expected shape to compare.
+ */
+export function formatCanonicalSchemaHint(canonicalSchema: string): string {
+	return [
+		"## Expected shape (canonical schema)",
+		canonicalSchema,
+		"",
+		"Make sure what you saved follows this shape; if a section is missing or wrong, call the same tool again to overwrite it.",
+	].join("\n");
+}
+
+/** Canonical reference templates, keyed by the tool that produces the artifact. */
+export const ARTIFACT_CANONICAL_SCHEMA: Record<string, string> = {
+	planner_goal_submit: [
+		"# Goal: <title>",
+		"## Outcome      (what the finished work delivers)",
+		"## Assumptions",
+		"## Out of scope",
+	].join("\n"),
+	planner_questions_submit: [
+		"# Discovery Questions",
+		"## Status        (open questions, or 'No unresolved questions')",
+		"## Assumptions   (assumptions carried into planning)",
+	].join("\n"),
+	planner_plan_submit: [
+		"# Plan: <title>",
+		"## Goal",
+		"## Scope        (in-scope vs out-of-scope)",
+		"## Constraints",
+		"## Risks",
+		"## Checks       (how each task is verified)",
+		"## Tasks        (ordered task sequence)",
+	].join("\n"),
+	planner_discovery_submit: [
+		"# Discovery: <title>",
+		"## Project Overview / boundaries / findings / fundamental rules",
+		"(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)",
+		"",
+		"NOTE: Do NOT write a `## Verification Protocol` heading in body — pass",
+		"the commands in the verificationProtocol argument; the wrapper renders",
+		"`## Verification Protocol` with one `- <command>` per line. That section",
+		"is the single source doubt_review checks against.",
+	].join("\n"),
+	planner_tdd_submit: [
+		"# tdd.md (per active task; sections added as the lifecycle reaches them)",
+		...TDD_SECTIONS.flatMap((section) => [
+			`## ${section.title}`,
+			...section.fields.map((field) => `- ${field}: <concrete evidence>`),
+		]),
+	].join("\n"),
+	planner_summary_submit: [
+		"# Final Summary",
+		"## What changed",
+		"## Verification evidence  (command → result)",
+		"## Follow-ups",
+	].join("\n"),
+	planner_task_upsert: [
+		"# Task: <title>",
+		"## Acceptance Criteria",
+		"## Scope        (files/areas in and out of scope)",
+		"## Notes",
+	].join("\n"),
+	planner_refactor_review: [
+		"# Refactor Review",
+		"## Changed Surface / Complexity / Duplication / Naming & Boundaries / Edge Cases",
+		"## Category Reviews   (per-category findings)",
+		"## Decision           (applied changes, or why kept as-is)",
+	].join("\n"),
+	planner_doubt_review: [
+		"# Doubt Review",
+		"## Verification Evidence   (one entry per protocol command: command/status/evidence)",
+		"## Possible Errors         (each: riskCategory/status/proofLevel/nextAction/claim/...)",
+		"## Summary",
+	].join("\n"),
+	planner_contract_upsert: [
+		"# <AGENTS.md contract block>",
+		"## Purpose / Scope / Stable Contracts / Read First / Do Not Touch Unless",
+		"(only the pi-code-planner:contracts block is managed; surrounding prose is yours)",
+	].join("\n"),
+	planner_skill_create: [
+		"---",
+		"name: <kebab-case>",
+		"description: <one line>",
+		"---",
+		"# <skill title>",
+		"## When to use / Steps / Notes",
+	].join("\n"),
+	planner_skill_update: [
+		"---",
+		"name: <kebab-case>",
+		"description: <one line>",
+		"---",
+		"# <skill title>",
+		"## When to use / Steps / Notes",
+	].join("\n"),
+};
diff --git a/src/runtime/artifact-tools.roundtrip.test.ts b/src/runtime/artifact-tools.roundtrip.test.ts
@@ -0,0 +1,138 @@
+import { describe, expect, it } from "vitest";
+import { MockPlannerFs } from "../test/mock-fs";
+import { ARTIFACT_CANONICAL_SCHEMA } from "./artifact-echo";
+import {
+	buildDiscoveryMarkdown,
+	PLANNER_ARTIFACT_TOOL_NAMES,
+	stripVerificationProtocolSection,
+} from "./artifact-tools";
+import {
+	extractVerificationProtocol,
+	extractVerificationProtocolCommands,
+} from "./doubt-review";
+import {
+	validatePostImplementationCounterexampleReview,
+	validatePreImplementationProofContract,
+	validateTaskMergeScopeAudit,
+} from "./tdd-evidence";
+import { mergeTddMarkdown, renderTddSection, TDD_SECTIONS } from "./tdd-form";
+
+// Reproduces the deadlock from the decoded pi-session: the model wrote its own
+// "## Verification Protocol" section (with a prose lead-in and extra commands)
+// into the discovery body while the wrapper also appended one from the
+// verificationProtocol argument. The parser then collected the prose line as a
+// phantom required command, so planner_doubt_review could never be satisfied.
+describe("discovery_submit ↔ verification protocol parser round-trip", () => {
+	it("keeps the verificationProtocol argument as the single source of truth", () => {
+		const body = [
+			"# Discovery: example",
+			"",
+			"## Findings",
+			"- everything already has AGENTS.md except src/test/",
+			"",
+			"### Verification Protocol",
+			"The existing project has these commands available:",
+			"- `npm run check` — biome",
+			"- `npm run build` — tsc",
+			"- `npm test` — vitest",
+			"- `npm run ci` — full pipeline",
+			"- `npm run format` — biome format",
+		].join("\n");
+
+		const discoveryMd = buildDiscoveryMarkdown(body, [
+			"npm run check",
+			"npm run build",
+			"npm test",
+		]);
+
+		// Exactly one protocol section survives, and it is the argument's.
+		const headings = discoveryMd
+			.split(/\r?\n/)
+			.filter(
+				(line) => /verification protocol/i.test(line) && /^#{1,6}\s/.test(line),
+			);
+		expect(headings).toEqual(["## Verification Protocol"]);
+
+		// The parser the doubt_review gate uses returns exactly the argument
+		// commands — no prose phantom, no ci/format the model never ran.
+		expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([
+			"npm run check",
+			"npm run build",
+			"npm test",
+		]);
+	});
+
+	it("ignores prose lines under a verification protocol heading", () => {
+		const discoveryMd = [
+			"# Discovery",
+			"",
+			"## Verification Protocol",
+			"The existing project has these commands available:",
+			"- npm run check",
+			"- npm test",
+		].join("\n");
+		// The prose lead-in is dropped; only bullet commands remain.
+		expect(extractVerificationProtocol(discoveryMd)).toEqual([
+			"- npm run check",
+			"- npm test",
+		]);
+		expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([
+			"npm run check",
+			"npm test",
+		]);
+	});
+
+	it("strips a body protocol section regardless of heading level", () => {
+		for (const heading of [
+			"## Verification Protocol",
+			"#### Verification Protocol",
+		]) {
+			const stripped = stripVerificationProtocolSection(
+				["## Findings", "- a", heading, "- npm run lint"].join("\n"),
+			);
+			expect(stripped).not.toMatch(/verification protocol/i);
+			expect(stripped).not.toContain("npm run lint");
+			expect(stripped).toContain("## Findings");
+		}
+	});
+});
+
+describe("tdd_submit ↔ tdd-evidence validators round-trip", () => {
+	it("renders sections that the consuming validators accept", async () => {
+		const updates = Object.fromEntries(
+			TDD_SECTIONS.map((def) => [
+				def.key,
+				renderTddSection(
+					def,
+					Object.fromEntries(
+						def.fields.map((field) => [field, `concrete ${field} evidence`]),
+					),
+				),
+			]),
+		);
+		const content = mergeTddMarkdown(
+			"",
+			updates as Parameters<typeof mergeTddMarkdown>[1],
+		);
+
+		const fs = new MockPlannerFs();
+		const path = "/plan/tasks/task-1/tdd.md";
+		await fs.writeTextAtomic(path, content);
+
+		await expect(
+			validatePreImplementationProofContract(fs, path),
+		).resolves.toBeNull();
+		await expect(
+			validatePostImplementationCounterexampleReview(fs, path),
+		).resolves.toBeNull();
+		await expect(validateTaskMergeScopeAudit(fs, path)).resolves.toBeNull();
+	});
+});
+
+describe("fill-tool echo schema", () => {
+	it("provides a canonical schema for every artifact tool", () => {
+		for (const tool of PLANNER_ARTIFACT_TOOL_NAMES) {
+			expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0);
+		}
+	});
+});