diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 912ce2e..ad933d6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -165,13 +165,35 @@ jobs: echo "url=$EXISTING_PR" >> "$GITHUB_OUTPUT" fi + - name: Generate Release Notes + id: notes + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + R_TAG=${{ needs.prepare.outputs.tag }} + # Compare against the previous v* tag explicitly. Without this, GitHub + # anchors auto-notes to the last *published* (non-draft) release; since + # every release is created as a draft, the baseline is stale and each + # new release re-lists every PR since the last published one, so old + # feat/fix entries pile onto the new ones. + PREV_TAG=$(git tag --list 'v*' --sort=-v:refname | grep -vx "$R_TAG" | head -n1) + ARGS=(-f tag_name="$R_TAG") + if [ -n "$PREV_TAG" ]; then + ARGS+=(-f previous_tag_name="$PREV_TAG") + fi + NOTES=$(gh api -X POST "repos/${{ github.repository }}/releases/generate-notes" "${ARGS[@]}" --jq '.body') + { + echo "body<> "$GITHUB_OUTPUT" + - name: Create Draft Release uses: softprops/action-gh-release@v2 with: name: ${{ env.R_NAME }} tag_name: ${{ needs.prepare.outputs.tag }} draft: true - generate_release_notes: true body: | This is a draft release. @@ -179,4 +201,8 @@ jobs: - npm package `pi-code-planner@${{ needs.prepare.outputs.version }}` is available. - Pi can install it with `pi install npm:pi-code-planner@${{ needs.prepare.outputs.version }}`. - The sync PR is ready: ${{ steps.sync_pr.outputs.url }} + + --- + + ${{ steps.notes.outputs.body }} token: ${{ secrets.GITHUB_TOKEN }} diff --git a/src/runtime/AGENTS.md b/src/runtime/AGENTS.md index 7c47980..d722abc 100644 --- a/src/runtime/AGENTS.md +++ b/src/runtime/AGENTS.md @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r ### Do Not Touch Unless - Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions. -- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests. +- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The two gates are composed only on the normal `allow_stage_machine` path (`orchestrator-gate.ts`); the broken/user-decision/compact states bypass the behavior gate and the guard returns a fixed set, so a step-scoped tool must never leak into those sets. Both halves are enforced by `tool-gating-invariant.test.ts` across the full flag matrix (debug on/off, broken, user-decision, compact). Also update tool visibility expectations, status/instructions, and tests. ### Domain Details - `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines. diff --git a/src/runtime/artifact-echo.test.ts b/src/runtime/artifact-echo.test.ts new file mode 100644 index 0000000..a967fd7 --- /dev/null +++ b/src/runtime/artifact-echo.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from "vitest"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatArtifactEcho, + formatCanonicalSchemaHint, +} from "./artifact-echo"; + +// Every strict-structure planner tool that writes markdown must have a canonical +// schema to echo back, so the "expected vs received" feedback is uniform. +const STRICT_TOOLS = [ + "planner_goal_submit", + "planner_questions_submit", + "planner_plan_submit", + "planner_discovery_submit", + "planner_tdd_submit", + "planner_summary_submit", + "planner_task_upsert", + "planner_refactor_review", + "planner_doubt_review", + "planner_contract_upsert", + "planner_skill_create", + "planner_skill_update", +]; + +describe("artifact echo", () => { + it("has a canonical schema for every strict tool", () => { + for (const tool of STRICT_TOOLS) { + expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); + } + }); + + it("echoes both expected shape and what was written", () => { + const text = formatArtifactEcho({ + canonicalSchema: "# Example\n## Section", + writtenMarkdown: "# Real\n## Done\n", + }); + expect(text).toContain("Expected shape"); + expect(text).toContain("## Section"); + expect(text).toContain("What you submitted"); + expect(text).toContain("# Real"); + }); + + it("schema-only hint shows the expected shape without a written block", () => { + const text = formatCanonicalSchemaHint("# Example\n## Section"); + expect(text).toContain("Expected shape"); + expect(text).not.toContain("What you submitted"); + }); +}); diff --git a/src/runtime/artifact-echo.ts b/src/runtime/artifact-echo.ts new file mode 100644 index 0000000..9a4cfba --- /dev/null +++ b/src/runtime/artifact-echo.ts @@ -0,0 +1,126 @@ +import { TDD_SECTIONS } from "./tdd-form"; + +/** + * Shared "expected vs received" echo appended to the result of every planner + * tool that writes a structured markdown artifact. Showing the canonical shape + * next to what was actually saved lets the model self-correct by comparison + * instead of guessing the format — the same teaching signal for all strict + * tools, free-form (goal/discovery/plan/summary/questions) and structured + * (tdd/task/refactor/doubt/contract/skill) alike. + */ +export function formatArtifactEcho(input: { + canonicalSchema: string; + writtenMarkdown: string; +}): string { + return [ + "## Expected shape (canonical schema)", + input.canonicalSchema, + "", + "## What you submitted (saved to disk)", + "```markdown", + input.writtenMarkdown.trimEnd(), + "```", + "", + "Compare the two: the saved artifact should follow the canonical shape above (the same kind of sections — your prose wording is your own). If a section is missing or wrong, call the same tool again to overwrite it; otherwise continue.", + ].join("\n"); +} + +/** + * Just the canonical-shape reference, for tools that already echo the content + * they saved (goal/questions show it for user review). Avoids printing the + * artifact twice while still giving the model the expected shape to compare. + */ +export function formatCanonicalSchemaHint(canonicalSchema: string): string { + return [ + "## Expected shape (canonical schema)", + canonicalSchema, + "", + "Make sure what you saved follows this shape; if a section is missing or wrong, call the same tool again to overwrite it.", + ].join("\n"); +} + +/** Canonical reference templates, keyed by the tool that produces the artifact. */ +export const ARTIFACT_CANONICAL_SCHEMA: Record = { + planner_goal_submit: [ + "# Goal: ", + "## Outcome (what the finished work delivers)", + "## Assumptions", + "## Out of scope", + ].join("\n"), + planner_questions_submit: [ + "# Discovery Questions", + "## Status (open questions, or 'No unresolved questions')", + "## Assumptions (assumptions carried into planning)", + ].join("\n"), + planner_plan_submit: [ + "# Plan: <title>", + "## Goal", + "## Scope (in-scope vs out-of-scope)", + "## Constraints", + "## Risks", + "## Checks (how each task is verified)", + "## Tasks (ordered task sequence)", + ].join("\n"), + planner_discovery_submit: [ + "# Discovery: <title>", + "## Project Overview / boundaries / findings / fundamental rules", + "(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)", + "", + "NOTE: Do NOT write a `## Verification Protocol` heading in body — pass", + "the commands in the verificationProtocol argument; the wrapper renders", + "`## Verification Protocol` with one `- <command>` per line. That section", + "is the single source doubt_review checks against.", + ].join("\n"), + planner_tdd_submit: [ + "# tdd.md (per active task; sections added as the lifecycle reaches them)", + ...TDD_SECTIONS.flatMap((section) => [ + `## ${section.title}`, + ...section.fields.map((field) => `- ${field}: <concrete evidence>`), + ]), + ].join("\n"), + planner_summary_submit: [ + "# Final Summary", + "## What changed", + "## Verification evidence (command → result)", + "## Follow-ups", + ].join("\n"), + planner_task_upsert: [ + "# Task: <title>", + "## Acceptance Criteria", + "## Scope (files/areas in and out of scope)", + "## Notes", + ].join("\n"), + planner_refactor_review: [ + "# Refactor Review", + "## Changed Surface / Complexity / Duplication / Naming & Boundaries / Edge Cases", + "## Category Reviews (per-category findings)", + "## Decision (applied changes, or why kept as-is)", + ].join("\n"), + planner_doubt_review: [ + "# Doubt Review", + "## Verification Evidence (one entry per protocol command: command/status/evidence)", + "## Possible Errors (each: riskCategory/status/proofLevel/nextAction/claim/...)", + "## Summary", + ].join("\n"), + planner_contract_upsert: [ + "# <AGENTS.md contract block>", + "## Purpose / Scope / Stable Contracts / Read First / Do Not Touch Unless", + "(only the pi-code-planner:contracts block is managed; surrounding prose is yours)", + ].join("\n"), + planner_skill_create: [ + "---", + "name: <kebab-case>", + "description: <one line>", + "---", + "# <skill title>", + "## When to use / Steps / Notes", + ].join("\n"), + planner_skill_update: [ + "---", + "name: <kebab-case>", + "description: <one line>", + "---", + "# <skill title>", + "## When to use / Steps / Notes", + ].join("\n"), +}; diff --git a/src/runtime/artifact-tools.roundtrip.test.ts b/src/runtime/artifact-tools.roundtrip.test.ts new file mode 100644 index 0000000..3663f48 --- /dev/null +++ b/src/runtime/artifact-tools.roundtrip.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from "vitest"; +import { MockPlannerFs } from "../test/mock-fs"; +import { ARTIFACT_CANONICAL_SCHEMA } from "./artifact-echo"; +import { + buildDiscoveryMarkdown, + PLANNER_ARTIFACT_TOOL_NAMES, + stripVerificationProtocolSection, +} from "./artifact-tools"; +import { + extractVerificationProtocol, + extractVerificationProtocolCommands, +} from "./doubt-review"; +import { + validatePostImplementationCounterexampleReview, + validatePreImplementationProofContract, + validateTaskMergeScopeAudit, +} from "./tdd-evidence"; +import { mergeTddMarkdown, renderTddSection, TDD_SECTIONS } from "./tdd-form"; + +// Reproduces the deadlock from the decoded pi-session: the model wrote its own +// "## Verification Protocol" section (with a prose lead-in and extra commands) +// into the discovery body while the wrapper also appended one from the +// verificationProtocol argument. The parser then collected the prose line as a +// phantom required command, so planner_doubt_review could never be satisfied. +describe("discovery_submit ↔ verification protocol parser round-trip", () => { + it("keeps the verificationProtocol argument as the single source of truth", () => { + const body = [ + "# Discovery: example", + "", + "## Findings", + "- everything already has AGENTS.md except src/test/", + "", + "### Verification Protocol", + "The existing project has these commands available:", + "- `npm run check` — biome", + "- `npm run build` — tsc", + "- `npm test` — vitest", + "- `npm run ci` — full pipeline", + "- `npm run format` — biome format", + ].join("\n"); + + const discoveryMd = buildDiscoveryMarkdown(body, [ + "npm run check", + "npm run build", + "npm test", + ]); + + // Exactly one protocol section survives, and it is the argument's. + const headings = discoveryMd + .split(/\r?\n/) + .filter( + (line) => /verification protocol/i.test(line) && /^#{1,6}\s/.test(line), + ); + expect(headings).toEqual(["## Verification Protocol"]); + + // The parser the doubt_review gate uses returns exactly the argument + // commands — no prose phantom, no ci/format the model never ran. + expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([ + "npm run check", + "npm run build", + "npm test", + ]); + }); + + it("ignores prose lines under a verification protocol heading", () => { + const discoveryMd = [ + "# Discovery", + "", + "## Verification Protocol", + "The existing project has these commands available:", + "- npm run check", + "- npm test", + ].join("\n"); + // The prose lead-in is dropped; only bullet commands remain. + expect(extractVerificationProtocol(discoveryMd)).toEqual([ + "- npm run check", + "- npm test", + ]); + expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([ + "npm run check", + "npm test", + ]); + }); + + it("strips a body protocol section regardless of heading level", () => { + for (const heading of [ + "## Verification Protocol", + "#### Verification Protocol", + ]) { + const stripped = stripVerificationProtocolSection( + ["## Findings", "- a", heading, "- npm run lint"].join("\n"), + ); + expect(stripped).not.toMatch(/verification protocol/i); + expect(stripped).not.toContain("npm run lint"); + expect(stripped).toContain("## Findings"); + } + }); +}); + +describe("tdd_submit ↔ tdd-evidence validators round-trip", () => { + it("renders sections that the consuming validators accept", async () => { + const updates = Object.fromEntries( + TDD_SECTIONS.map((def) => [ + def.key, + renderTddSection( + def, + Object.fromEntries( + def.fields.map((field) => [field, `concrete ${field} evidence`]), + ), + ), + ]), + ); + const content = mergeTddMarkdown( + "", + updates as Parameters<typeof mergeTddMarkdown>[1], + ); + + const fs = new MockPlannerFs(); + const path = "/plan/tasks/task-1/tdd.md"; + await fs.writeTextAtomic(path, content); + + await expect( + validatePreImplementationProofContract(fs, path), + ).resolves.toBeNull(); + await expect( + validatePostImplementationCounterexampleReview(fs, path), + ).resolves.toBeNull(); + await expect(validateTaskMergeScopeAudit(fs, path)).resolves.toBeNull(); + }); +}); + +describe("fill-tool echo schema", () => { + it("provides a canonical schema for every artifact tool", () => { + for (const tool of PLANNER_ARTIFACT_TOOL_NAMES) { + expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0); + } + }); +}); diff --git a/src/runtime/artifact-tools.ts b/src/runtime/artifact-tools.ts index d1cd9bf..39afbfb 100644 --- a/src/runtime/artifact-tools.ts +++ b/src/runtime/artifact-tools.ts @@ -2,6 +2,7 @@ import { join } from "node:path"; import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -58,33 +59,37 @@ export async function executePlannerArtifactTool(input: { switch (input.toolName) { case "planner_plan_submit": { const content = requiredString(params, "content"); - await input.fs.writeTextAtomic(planPaths.planMd, `${content}\n`); - return applied(input.toolName, planPaths.planMd, "Planner plan saved."); + const written = `${content}\n`; + await input.fs.writeTextAtomic(planPaths.planMd, written); + return applied( + input.toolName, + planPaths.planMd, + "Planner plan saved.", + written, + ); } case "planner_discovery_submit": { const body = requiredString(params, "body"); const protocol = requiredStringArray(params, "verificationProtocol"); - const content = [ - body, - "", - "## Verification Protocol", - ...protocol.map((command) => `- ${command}`), - ].join("\n"); - await input.fs.writeTextAtomic(planPaths.discoveryMd, `${content}\n`); + const written = `${buildDiscoveryMarkdown(body, protocol)}\n`; + await input.fs.writeTextAtomic(planPaths.discoveryMd, written); return applied( input.toolName, planPaths.discoveryMd, - "Planner discovery saved with a ## Verification Protocol section.", + "Planner discovery saved. The ## Verification Protocol section is rebuilt from the verificationProtocol argument (any protocol section written in body is dropped).", + written, ); } case "planner_summary_submit": { const content = requiredString(params, "content"); const summaryPath = join(planPaths.planDir, "final_summary.md"); - await input.fs.writeTextAtomic(summaryPath, `${content}\n`); + const written = `${content}\n`; + await input.fs.writeTextAtomic(summaryPath, written); return applied( input.toolName, summaryPath, "Planner final summary saved.", + written, ); } case "planner_tdd_submit": { @@ -111,6 +116,7 @@ export async function executePlannerArtifactTool(input: { input.toolName, tddPath, `Planner tdd.md updated (${Object.keys(updates).join(", ")}).`, + content, ); } } @@ -140,6 +146,7 @@ function applied( toolName: PlannerArtifactToolName, path: string, headline: string, + written: string, ): PlannerArtifactToolExecutionResult { return { status: "applied", @@ -147,12 +154,53 @@ function applied( text: [ headline, `Artifact: ${path}`, - "Continue the current step; the next-step hint follows after planner_finish_step.", + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA[toolName], + writtenMarkdown: written, + }), + "", + "The next-step hint follows after planner_finish_step.", ].join("\n"), details: { path }, }; } +/** Assemble discovery.md from a free-form body plus the structured protocol + * commands. The verificationProtocol argument is the single source of truth for + * the ## Verification Protocol section, so any protocol section the model also + * wrote into body is stripped first — otherwise discovery.md would carry two + * `## Verification Protocol` sections and break the doubt_review protocol + * parser. */ +export function buildDiscoveryMarkdown( + body: string, + protocol: readonly string[], +): string { + return [ + stripVerificationProtocolSection(body).trimEnd(), + "", + "## Verification Protocol", + ...protocol.map((command) => `- ${command}`), + ].join("\n"); +} + +/** Remove any `verification protocol` section (heading at any level plus its + * lines, up to the next heading) from body. */ +export function stripVerificationProtocolSection(body: string): string { + const lines = body.split(/\r?\n/); + const out: string[] = []; + let skipping = false; + for (const line of lines) { + const heading = /^(#{1,6})\s+(.+?)\s*$/.exec(line.trim()); + if (heading) { + skipping = heading[2].trim().toLowerCase() === "verification protocol"; + if (skipping) continue; + } + if (!skipping) out.push(line); + } + return out.join("\n"); +} + function blocked( toolName: PlannerArtifactToolName, text: string, diff --git a/src/runtime/contracts.ts b/src/runtime/contracts.ts index ee463b8..ae95ff5 100644 --- a/src/runtime/contracts.ts +++ b/src/runtime/contracts.ts @@ -28,6 +28,7 @@ import type { PlanStateRecord, } from "../storage/schema"; import { updatePlanState } from "../storage/state-store"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { appendPlannerSection } from "./artifact-utils"; import { checkPlannerOrchestratorToolAllowed, @@ -722,6 +723,12 @@ async function contractUpsert(input: { existing === null ? "Created new AGENTS.md contract file." : "Updated existing AGENTS.md managed block and preserved non-planner content.", + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_contract_upsert, + writtenMarkdown: next, + }), + "", "Commit this change through planner_git_commit when the current step requires a clean worktree.", ].join("\n"), { path, diagnostics: validation.diagnostics }, diff --git a/src/runtime/doubt-review.ts b/src/runtime/doubt-review.ts index 1ea9a4c..57e381b 100644 --- a/src/runtime/doubt-review.ts +++ b/src/runtime/doubt-review.ts @@ -502,7 +502,10 @@ export function extractVerificationProtocol(content: string): string[] { collecting = title === "verification protocol"; continue; } - if (collecting && line.trim()) { + // Only bullet list items count as protocol commands. Prose lines (e.g. an + // introductory "These commands are available:") under the heading are + // ignored so they never become phantom required commands. + if (collecting && /^[-*]\s+\S/.test(line.trim())) { result.push(line.trim()); } } @@ -523,6 +526,11 @@ function extractProtocolCommand(line: string): string | null { if (/^(working directory|cwd|flags?)\s*:/i.test(normalized)) { return null; } + // A line ending in a colon is a prose heading (e.g. "Commands available:"), + // not a runnable command — never treat it as a required protocol command. + if (normalized.endsWith(":")) { + return null; + } const match = /^(test|tests|lint|format|build|ci|check|checks)\s*:\s*(.+)$/i.exec( normalized, diff --git a/src/runtime/doubt-tools.ts b/src/runtime/doubt-tools.ts index 3764a41..0ae0e50 100644 --- a/src/runtime/doubt-tools.ts +++ b/src/runtime/doubt-tools.ts @@ -1,6 +1,7 @@ import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { DOUBT_FINDING_STATUSES, DOUBT_NEXT_ACTIONS, @@ -95,6 +96,12 @@ export async function executePlannerDoubtTool(input: { `Verify artifact: ${planPaths.verifyMd}`, `Proven bugs: ${validation.provenBugCount}`, `Needs probe: ${validation.needsProbeCount}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_doubt_review, + writtenMarkdown: content, + }), + "", validation.provenBugCount > 0 ? "Complete finalize/doubt_review with target planning/read_context after recording the proven findings in decisions.md." : validation.needsProbeCount > 0 diff --git a/src/runtime/goal-tools.ts b/src/runtime/goal-tools.ts index f6fa6a0..782db81 100644 --- a/src/runtime/goal-tools.ts +++ b/src/runtime/goal-tools.ts @@ -4,6 +4,10 @@ import type { ProjectStoragePaths } from "../storage/paths"; import { updatePlanRecord } from "../storage/plan-store"; import { upsertProjectPlanSummary } from "../storage/project-store"; import { savePlanState } from "../storage/state-store"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatCanonicalSchemaHint, +} from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -97,6 +101,10 @@ export async function executePlannerGoalTool( "", content.trim(), "", + formatCanonicalSchemaHint( + ARTIFACT_CANONICAL_SCHEMA.planner_goal_submit, + ), + "", "Ask the user to review the goal and explicitly approve it or request revision.", "After the user answers, call planner_goal_decide.", ].join("\n"), diff --git a/src/runtime/next-step-hint.ts b/src/runtime/next-step-hint.ts index f925e19..d42ead7 100644 --- a/src/runtime/next-step-hint.ts +++ b/src/runtime/next-step-hint.ts @@ -53,6 +53,11 @@ export function buildNextStepHint(state: PlanStateRecord): string { // Name the wrappers permitted at this exact step so the model reaches for // the right planner tool instead of a built-in write/edit. lines.push(`Tools allowed now: ${tools.join(", ")}.`); + // Each planner_*_submit belongs to one step. The model often reaches for + // the next step's submit tool early and gets blocked; spell out the order. + lines.push( + "Order: only the tools listed above work at this step. A planner_*_submit for a later step is blocked until you call planner_finish_step to advance there.", + ); } lines.push( `If unsure, re-read \`${getPlannerStepStage(state.step)}.md\` or call planner_status.`, diff --git a/src/runtime/question-tools.ts b/src/runtime/question-tools.ts index 8bcdad5..cf93a74 100644 --- a/src/runtime/question-tools.ts +++ b/src/runtime/question-tools.ts @@ -3,6 +3,10 @@ import type { PlannerWrapperTool } from "../guard/tool-policy"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; import { savePlanState } from "../storage/state-store"; +import { + ARTIFACT_CANONICAL_SCHEMA, + formatCanonicalSchemaHint, +} from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -83,6 +87,10 @@ export async function executePlannerQuestionTool( "## Questions For User", content.trim(), "", + formatCanonicalSchemaHint( + ARTIFACT_CANONICAL_SCHEMA.planner_questions_submit, + ), + "", hasOpenQuestions ? "Show these questions to the user verbatim. Wait for the user's answers, then call planner_questions_resolve. Do not finish discovery/write_questions yet." : "No unresolved questions remain. Call planner_finish_step for discovery/write_questions.", diff --git a/src/runtime/refactor-tools.ts b/src/runtime/refactor-tools.ts index 303c514..9c038b0 100644 --- a/src/runtime/refactor-tools.ts +++ b/src/runtime/refactor-tools.ts @@ -2,6 +2,7 @@ import { join } from "node:path"; import type { GitRunner } from "../git/runner"; import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -123,6 +124,12 @@ export async function executePlannerRefactorTool(input: { "Planner refactor review written.", `Refactor artifact: ${refactorMd}`, `Decision: ${decision}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_refactor_review, + writtenMarkdown: content, + }), + "", "If project files changed during refactor, commit through planner_git_commit before finishing.", "Then call planner_status before choosing the next planner action.", ].join("\n"), diff --git a/src/runtime/skill-library.ts b/src/runtime/skill-library.ts index fac8ced..22fe4f7 100644 --- a/src/runtime/skill-library.ts +++ b/src/runtime/skill-library.ts @@ -6,6 +6,7 @@ import type { PlannerFs } from "../storage/fs"; import { readJsonIfExists, writeJson } from "../storage/json"; import type { ProjectStoragePaths } from "../storage/paths"; import { resolveProjectStoragePaths } from "../storage/project-resolver"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -334,6 +335,12 @@ export async function executePlannerSkillTool( `Skill path: ${skillPath}`, `Index: ${paths.indexJson}`, `Language: ${language}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_skill_create, + writtenMarkdown: content, + }), + "", "Pi loads planner skills through resources_discover on the next planner session start, resume, or reload.", "Continue the current planner state from planner_status; this skill is future memory, not a replacement for the current stage instructions.", ].join("\n"), @@ -416,6 +423,12 @@ export async function executePlannerSkillUpdateTool( `Skill path: ${existing.skillPath}`, `Index: ${paths.indexJson}`, `Language: ${language}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_skill_update, + writtenMarkdown: content, + }), + "", "Pi loads updated skills through resources_discover on the next planner session start, resume, or reload.", ].join("\n"), details: { diff --git a/src/runtime/stage-behavior.ts b/src/runtime/stage-behavior.ts index c1b325a..e0f4e65 100644 --- a/src/runtime/stage-behavior.ts +++ b/src/runtime/stage-behavior.ts @@ -96,7 +96,9 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: [], updatedArtifacts: [], requiredGates: [], - expectedTools: ["planner_status", "planner_report_stuck"], + // planner_report_stuck is execution-only (see stuck-tools.ts); the guard + // does not allow it during init, so it must not be advertised here. + expectedTools: ["planner_status"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -116,14 +118,10 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: [], updatedArtifacts: ["project.json"], requiredGates: ["git_available"], - expectedTools: [ - "planner_contract_scan", - "planner_contract_route", - "planner_contract_read", - "planner_contract_upsert", - "planner_git_commit", - "planner_status", - ], + // Storage setup is runtime-driven (guard allows no wrappers here). The + // model does contract scanning/upsert later, at discovery; listing those + // wrappers here only mis-advertised tools the guard blocks. + expectedTools: ["planner_status"], commitPolicy: "required_if_dirty", compactPolicy: "not_allowed", }), @@ -160,11 +158,8 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["plan.json", "state.json"], updatedArtifacts: ["state.json"], requiredGates: ["plan_record_exists"], - expectedTools: [ - "planner_git_inspect", - "planner_contract_route", - "planner_contract_read", - ], + // Worktree creation is runtime-driven; guard allows only git_inspect here. + expectedTools: ["planner_git_inspect"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), diff --git a/src/runtime/task-tools.ts b/src/runtime/task-tools.ts index 380e654..829bd7a 100644 --- a/src/runtime/task-tools.ts +++ b/src/runtime/task-tools.ts @@ -3,6 +3,7 @@ import type { PlannerFs } from "../storage/fs"; import type { ProjectStoragePaths } from "../storage/paths"; import { readPlanRecord, updatePlanRecord } from "../storage/plan-store"; import { upsertTaskArtifacts } from "../storage/task-store"; +import { ARTIFACT_CANONICAL_SCHEMA, formatArtifactEcho } from "./artifact-echo"; import { checkPlannerOrchestratorToolAllowed, runPlannerOrchestrator, @@ -89,6 +90,12 @@ export async function executePlannerTaskTool(input: { `Planner task artifacts written: ${result.record.taskId}.`, `Task JSON: ${result.paths.taskJson}`, `Task Markdown: ${result.paths.taskMd}`, + "", + formatArtifactEcho({ + canonicalSchema: ARTIFACT_CANONICAL_SCHEMA.planner_task_upsert, + writtenMarkdown: await input.fs.readText(result.paths.taskMd), + }), + "", "TDD lifecycle artifacts were created if missing. Do not create a separate testing task.", "Call planner_status before choosing the next planner action.", ].join("\n"), diff --git a/src/runtime/tool-gating-invariant.test.ts b/src/runtime/tool-gating-invariant.test.ts index 180a35f..ad0b624 100644 --- a/src/runtime/tool-gating-invariant.test.ts +++ b/src/runtime/tool-gating-invariant.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { getAllowedPlannerWrapperTools, + PLANNER_WRAPPER_TOOLS, type PlannerWrapperTool, } from "../guard/tool-policy"; import { @@ -13,42 +14,135 @@ import { getPlannerStageStepBehavior, } from "./stage-behavior"; -// Invariant: a planner tool is usable at a step only if it passes BOTH gates — -// the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools -// gate. So everything the guard permits MUST also pass the behavior gate; -// otherwise the guard advertises a tool the model can never actually call -// (a deadlock when no fallback exists, as happened with the artifact fill-tools). +// A planner tool is usable at a step only if it passes BOTH gates: the guard +// policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools gate. The +// two gates are composed in orchestrator-gate.ts, but ONLY on the normal +// `allow_stage_machine` path — i.e. when the plan is not broken, not awaiting a +// user decision, and not at a compact boundary. In those special states the +// behavior gate is bypassed and the guard returns a fixed recovery/compact set. // -// This guards against the two allowlists drifting apart for any tool — debug -// wrappers, git wrappers, contract tools, skills, or future additions. +// So the deadlock invariant has two halves, and this test enforces both so any +// drift between the allowlists (debug, git, contract, fill-tools, or future +// additions) is caught before publish. + +const STEPS = Object.entries(PLANNER_STAGE_STEPS) as Array< + [PlannerStage, readonly PlannerStep[]] +>; + describe("planner tool gating invariant", () => { - it("every guard-allowed wrapper passes the behavior gate at every step", () => { - const steps = Object.entries(PLANNER_STAGE_STEPS) as Array< - [PlannerStage, readonly PlannerStep[]] - >; - const violations: string[] = []; + // Half 1 — normal (allow_stage_machine) path: everything the guard permits + // MUST also pass the behavior gate, or the guard advertises a tool the model + // can never actually call (a deadlock when no fallback exists, as happened + // with the artifact fill-tools). Checked with debug artifacts both present + // and absent, since debugArtifactsDir changes the guard set. + for (const debugArtifactsDir of [null, "/tmp/planner-debug"]) { + it(`guard-allowed wrappers pass the behavior gate at every step (debug=${debugArtifactsDir ? "on" : "off"})`, () => { + const violations: string[] = []; + for (const [stage, stepList] of STEPS) { + for (const step of stepList) { + const allowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir, + }); + const behavior = getPlannerStageStepBehavior({ stage, step }); + for (const tool of allowed as readonly PlannerWrapperTool[]) { + if ( + !checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow + ) { + violations.push(`${stage}/${step}: ${tool}`); + } + } + } + } + expect(violations, violations.join("\n")).toEqual([]); + }); + } + + // Half 2 — special states bypass the behavior gate, so the guard is the sole + // gate. Their allowed set must therefore be FIXED (independent of the frozen + // stage/step); otherwise a step-scoped tool could leak into a state where + // nothing checks it against stage behavior. We pin that the set is identical + // across every stage/step for each special flag. + for (const flags of [ + { name: "broken", broken: true, requiresUserDecision: false }, + { name: "requiresUserDecision", broken: false, requiresUserDecision: true }, + ] as const) { + it(`guard set is fixed (step-independent) when ${flags.name}`, () => { + const sets = STEPS.flatMap(([stage, stepList]) => + stepList.map((step) => + [ + ...getAllowedPlannerWrapperTools({ + stage, + step, + broken: flags.broken, + requiresUserDecision: flags.requiresUserDecision, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }), + ].sort(), + ), + ); + const first = JSON.stringify(sets[0]); + for (const set of sets) { + expect(JSON.stringify(set)).toEqual(first); + } + }); + } - for (const [stage, stepList] of steps) { + it("guard set is exactly [planner_status] at a compact boundary", () => { + for (const [stage, stepList] of STEPS) { for (const step of stepList) { - // debugArtifactsDir set so debug wrappers are included in the guard - // set and therefore checked against the behavior gate too. const allowed = getAllowedPlannerWrapperTools({ stage, step, broken: false, requiresUserDecision: false, - requiresCompact: false, + requiresCompact: true, debugArtifactsDir: "/tmp/planner-debug", }); + expect([...allowed]).toEqual(["planner_status"]); + } + } + }); + + // Reverse direction: a wrapper tool the stage behavior advertises in + // expectedTools (so it shows up in status' "Stage Behavior" and the model + // reaches for it) MUST be permitted by the guard — otherwise the model is + // told to use a tool the guard blocks ("not allowed at stage/step"). The two + // always-allowed specials (planner_status, planner_git_inspect) are exempt + // because the behavior gate permits them regardless of the guard step list; + // non-wrapper workflow tools (finish_step, request/complete_compact, + // report_stuck) are gated elsewhere and never appear in the guard set. + const WRAPPER_TOOLS = new Set<string>(PLANNER_WRAPPER_TOOLS); + const GATE_EXEMPT = new Set<string>([ + "planner_status", + "planner_git_inspect", + ]); + it("every wrapper tool in expectedTools is permitted by the guard", () => { + const violations: string[] = []; + for (const [stage, stepList] of STEPS) { + for (const step of stepList) { + const guard = new Set<string>( + getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }), + ); const behavior = getPlannerStageStepBehavior({ stage, step }); - for (const tool of allowed as readonly PlannerWrapperTool[]) { - if (!checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow) { - violations.push(`${stage}/${step}: ${tool}`); - } + for (const tool of behavior.expectedTools) { + if (!WRAPPER_TOOLS.has(tool) || GATE_EXEMPT.has(tool)) continue; + if (!guard.has(tool)) violations.push(`${stage}/${step}: ${tool}`); } } } - expect(violations, violations.join("\n")).toEqual([]); }); }); diff --git a/src/runtime/user-commands.test.ts b/src/runtime/user-commands.test.ts index 00201ea..09e8d52 100644 --- a/src/runtime/user-commands.test.ts +++ b/src/runtime/user-commands.test.ts @@ -36,6 +36,10 @@ import { executePlannerUserCommand } from "./user-commands"; class MockGitRunner implements GitRunner { status = ""; readonly calls: Array<{ name: string; input: unknown }> = []; + /** Branches reported as already gone by branchExists. */ + readonly missingBranches = new Set<string>(); + /** Branches whose deleteBranch should throw (e.g. unmerged, force:false). */ + readonly failDeleteBranches = new Set<string>(); async init(_input: GitRepoInput): Promise<void> {} async currentBranch(_input: GitRepoInput): Promise<string> { @@ -57,12 +61,17 @@ class MockGitRunner implements GitRunner { async listProjectFiles(_input: GitRepoInput): Promise<string[]> { return []; } - async branchExists(_input: GitBranchInput): Promise<boolean> { - return true; + async branchExists(input: GitBranchInput): Promise<boolean> { + return !this.missingBranches.has(input.branch); } async createBranch(_input: GitCreateBranchInput): Promise<void> {} async deleteBranch(input: GitDeleteBranchInput): Promise<void> { this.calls.push({ name: "deleteBranch", input }); + if (this.failDeleteBranches.has(input.branch)) { + throw new Error( + `git -C ${input.repoRoot} branch -d ${input.branch} failed`, + ); + } } async switchBranch(_input: GitSwitchBranchInput): Promise<void> {} async stageAll(_input: GitRepoInput): Promise<void> {} @@ -578,4 +587,57 @@ describe("planner user commands", () => { }, ]); }); + + it("completes deletion when a child branch was already removed by hand", async () => { + const { fs, git, projectPaths } = await createProjectFixture({ + activePlanId: "plan-a", + }); + // The worktree was deleted manually, so its task branch is already gone. + git.missingBranches.add("task/plan-b/task-1"); + + const result = await executePlannerUserCommand({ + fs, + git, + projectPaths, + commandName: "planner_delete", + params: { planId: "plan-b" }, + }); + + expect(result.status).toBe("applied"); + expect(result.text).toContain("was already gone"); + // The missing branch is not deleted, but the others still are. + expect( + git.calls + .filter((call) => call.name === "deleteBranch") + .map((call) => (call.input as { branch: string }).branch), + ).toEqual(["refactor/plan-b/task-1"]); + // The plan is pruned from the project despite the missing branch. + await expect(readProjectRecord(fs, projectPaths)).resolves.toMatchObject({ + plans: [expect.objectContaining({ planId: "plan-a" })], + }); + await expect( + fs.exists(createPlanStoragePaths(projectPaths, "plan-b").planDir), + ).resolves.toBe(false); + }); + + it("completes deletion and warns when a child branch delete fails", async () => { + const { fs, git, projectPaths } = await createProjectFixture({ + activePlanId: "plan-a", + }); + git.failDeleteBranches.add("task/plan-b/task-1"); + + const result = await executePlannerUserCommand({ + fs, + git, + projectPaths, + commandName: "planner_delete", + params: { planId: "plan-b" }, + }); + + expect(result.status).toBe("applied"); + expect(result.text).toContain("could not be deleted"); + await expect( + fs.exists(createPlanStoragePaths(projectPaths, "plan-b").planDir), + ).resolves.toBe(false); + }); }); diff --git a/src/runtime/user-commands.ts b/src/runtime/user-commands.ts index 8c0161b..c79e2a6 100644 --- a/src/runtime/user-commands.ts +++ b/src/runtime/user-commands.ts @@ -273,23 +273,32 @@ async function deletePlan( const projectRootExists = await input.fs.exists( input.projectPaths.projectRoot, ); + const warnings: string[] = []; if (state) { if (state.worktreePath && (await input.fs.exists(state.worktreePath))) { if (projectRootExists) { - await input.git.worktreeRemove({ + const warning = await tryWorktreeRemove({ + git: input.git, repoRoot: input.projectPaths.projectRoot, path: state.worktreePath, }); + if (warning) warnings.push(warning); } else { await input.fs.removeDir(state.worktreePath); } } if (projectRootExists) { + // Tolerate branches that were already removed (e.g. the worktree was + // deleted by hand before this command). A hard failure here would abort + // the whole deletion and leave the plan — and its stale branches — in + // the list. Downgrade to a warning and prune the plan regardless. for (const branch of managedChildBranches(state)) { - await input.git.deleteBranch({ + const warning = await tryDeleteManagedBranch({ + git: input.git, repoRoot: input.projectPaths.projectRoot, branch, }); + if (warning) warnings.push(warning); } } if (state.worktreePath) { @@ -318,12 +327,14 @@ async function deletePlan( plans: project.plans.filter((plan) => plan.planId !== planId), }; await saveProjectRecord(input.fs, input.projectPaths, nextProject); - return applied(input.commandName, `Planner plan deleted: ${planId}.`, { + const text = [`Planner plan deleted: ${planId}.`, ...warnings].join("\n"); + return applied(input.commandName, text, { project: nextProject, planId, removedPlanDir: planPaths.planDir, projectRootExists, gitCleanupSkipped: !projectRootExists, + warnings, }); } @@ -427,6 +438,51 @@ async function assertPlanSwitchable(input: { return { allow: true }; } +/** Delete a managed branch, tolerating one that is already gone. Returns a + * warning string when the branch could not be deleted cleanly (already removed, + * or git refused), so the caller can surface it and prune the plan regardless; + * returns null on a clean delete. */ +async function tryDeleteManagedBranch(input: { + git: GitRunner; + repoRoot: string; + branch: string; +}): Promise<string | null> { + try { + const exists = await input.git.branchExists({ + repoRoot: input.repoRoot, + branch: input.branch, + }); + if (!exists) { + return `Branch ${input.branch} was already gone — skipped and pruned from the plan registry.`; + } + await input.git.deleteBranch({ + repoRoot: input.repoRoot, + branch: input.branch, + }); + return null; + } catch (error) { + return `Branch ${input.branch} could not be deleted (${errorMessage(error)}); pruned from the plan registry anyway.`; + } +} + +/** Remove a worktree, tolerating one that git no longer tracks (e.g. it was + * deleted by hand). Returns a warning string on failure, null on success. */ +async function tryWorktreeRemove(input: { + git: GitRunner; + repoRoot: string; + path: string; +}): Promise<string | null> { + try { + await input.git.worktreeRemove({ + repoRoot: input.repoRoot, + path: input.path, + }); + return null; + } catch (error) { + return `Worktree ${input.path} could not be removed via git (${errorMessage(error)}); continuing with deletion.`; + } +} + function managedChildBranches(state: PlanStateRecord): string[] { const branches = [ state.activeBranches.currentTask,