Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,44 @@ jobs:
echo "url=$EXISTING_PR" >> "$GITHUB_OUTPUT"
fi

- name: Generate Release Notes
id: notes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
R_TAG=${{ needs.prepare.outputs.tag }}
# Compare against the previous v* tag explicitly. Without this, GitHub
# anchors auto-notes to the last *published* (non-draft) release; since
# every release is created as a draft, the baseline is stale and each
# new release re-lists every PR since the last published one, so old
# feat/fix entries pile onto the new ones.
PREV_TAG=$(git tag --list 'v*' --sort=-v:refname | grep -vx "$R_TAG" | head -n1)
ARGS=(-f tag_name="$R_TAG")
if [ -n "$PREV_TAG" ]; then
ARGS+=(-f previous_tag_name="$PREV_TAG")
fi
NOTES=$(gh api -X POST "repos/${{ github.repository }}/releases/generate-notes" "${ARGS[@]}" --jq '.body')
{
echo "body<<EOF_NOTES"
echo "$NOTES"
echo "EOF_NOTES"
} >> "$GITHUB_OUTPUT"

- name: Create Draft Release
uses: softprops/action-gh-release@v2
with:
name: ${{ env.R_NAME }}
tag_name: ${{ needs.prepare.outputs.tag }}
draft: true
generate_release_notes: true
body: |
This is a draft release.

Before publishing, verify:
- npm package `pi-code-planner@${{ needs.prepare.outputs.version }}` is available.
- Pi can install it with `pi install npm:pi-code-planner@${{ needs.prepare.outputs.version }}`.
- The sync PR is ready: ${{ steps.sync_pr.outputs.url }}

---

${{ steps.notes.outputs.body }}
token: ${{ secrets.GITHUB_TOKEN }}
2 changes: 1 addition & 1 deletion src/runtime/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r

### Do Not Touch Unless
- Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions.
- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests.
- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The two gates are composed only on the normal `allow_stage_machine` path (`orchestrator-gate.ts`); the broken/user-decision/compact states bypass the behavior gate and the guard returns a fixed set, so a step-scoped tool must never leak into those sets. Both halves are enforced by `tool-gating-invariant.test.ts` across the full flag matrix (debug on/off, broken, user-decision, compact). Also update tool visibility expectations, status/instructions, and tests.

### Domain Details
- `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines.
Expand Down
48 changes: 48 additions & 0 deletions src/runtime/artifact-echo.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { describe, expect, it } from "vitest";
import {
ARTIFACT_CANONICAL_SCHEMA,
formatArtifactEcho,
formatCanonicalSchemaHint,
} from "./artifact-echo";

// Every strict-structure planner tool that writes markdown must have a canonical
// schema to echo back, so the "expected vs received" feedback is uniform.
const STRICT_TOOLS = [
"planner_goal_submit",
"planner_questions_submit",
"planner_plan_submit",
"planner_discovery_submit",
"planner_tdd_submit",
"planner_summary_submit",
"planner_task_upsert",
"planner_refactor_review",
"planner_doubt_review",
"planner_contract_upsert",
"planner_skill_create",
"planner_skill_update",
];

describe("artifact echo", () => {
it("has a canonical schema for every strict tool", () => {
for (const tool of STRICT_TOOLS) {
expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0);
}
});

it("echoes both expected shape and what was written", () => {
const text = formatArtifactEcho({
canonicalSchema: "# Example\n## Section",
writtenMarkdown: "# Real\n## Done\n",
});
expect(text).toContain("Expected shape");
expect(text).toContain("## Section");
expect(text).toContain("What you submitted");
expect(text).toContain("# Real");
});

it("schema-only hint shows the expected shape without a written block", () => {
const text = formatCanonicalSchemaHint("# Example\n## Section");
expect(text).toContain("Expected shape");
expect(text).not.toContain("What you submitted");
});
});
126 changes: 126 additions & 0 deletions src/runtime/artifact-echo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { TDD_SECTIONS } from "./tdd-form";

/**
* Shared "expected vs received" echo appended to the result of every planner
* tool that writes a structured markdown artifact. Showing the canonical shape
* next to what was actually saved lets the model self-correct by comparison
* instead of guessing the format — the same teaching signal for all strict
* tools, free-form (goal/discovery/plan/summary/questions) and structured
* (tdd/task/refactor/doubt/contract/skill) alike.
*/
export function formatArtifactEcho(input: {
canonicalSchema: string;
writtenMarkdown: string;
}): string {
return [
"## Expected shape (canonical schema)",
input.canonicalSchema,
"",
"## What you submitted (saved to disk)",
"```markdown",
input.writtenMarkdown.trimEnd(),
"```",
"",
"Compare the two: the saved artifact should follow the canonical shape above (the same kind of sections — your prose wording is your own). If a section is missing or wrong, call the same tool again to overwrite it; otherwise continue.",
].join("\n");
}

/**
* Just the canonical-shape reference, for tools that already echo the content
* they saved (goal/questions show it for user review). Avoids printing the
* artifact twice while still giving the model the expected shape to compare.
*/
export function formatCanonicalSchemaHint(canonicalSchema: string): string {
return [
"## Expected shape (canonical schema)",
canonicalSchema,
"",
"Make sure what you saved follows this shape; if a section is missing or wrong, call the same tool again to overwrite it.",
].join("\n");
}

/** Canonical reference templates, keyed by the tool that produces the artifact. */
export const ARTIFACT_CANONICAL_SCHEMA: Record<string, string> = {
planner_goal_submit: [
"# Goal: <title>",
"## Outcome (what the finished work delivers)",
"## Assumptions",
"## Out of scope",
].join("\n"),
planner_questions_submit: [
"# Discovery Questions",
"## Status (open questions, or 'No unresolved questions')",
"## Assumptions (assumptions carried into planning)",
].join("\n"),
planner_plan_submit: [
"# Plan: <title>",
"## Goal",
"## Scope (in-scope vs out-of-scope)",
"## Constraints",
"## Risks",
"## Checks (how each task is verified)",
"## Tasks (ordered task sequence)",
].join("\n"),
planner_discovery_submit: [
"# Discovery: <title>",
"## Project Overview / boundaries / findings / fundamental rules",
"(for change requests: ## Post-Implementation Snapshot / Completed Work / Remaining Work)",
"",
"NOTE: Do NOT write a `## Verification Protocol` heading in body — pass",
"the commands in the verificationProtocol argument; the wrapper renders",
"`## Verification Protocol` with one `- <command>` per line. That section",
"is the single source doubt_review checks against.",
].join("\n"),
planner_tdd_submit: [
"# tdd.md (per active task; sections added as the lifecycle reaches them)",
...TDD_SECTIONS.flatMap((section) => [
`## ${section.title}`,
...section.fields.map((field) => `- ${field}: <concrete evidence>`),
]),
].join("\n"),
planner_summary_submit: [
"# Final Summary",
"## What changed",
"## Verification evidence (command → result)",
"## Follow-ups",
].join("\n"),
planner_task_upsert: [
"# Task: <title>",
"## Acceptance Criteria",
"## Scope (files/areas in and out of scope)",
"## Notes",
].join("\n"),
planner_refactor_review: [
"# Refactor Review",
"## Changed Surface / Complexity / Duplication / Naming & Boundaries / Edge Cases",
"## Category Reviews (per-category findings)",
"## Decision (applied changes, or why kept as-is)",
].join("\n"),
planner_doubt_review: [
"# Doubt Review",
"## Verification Evidence (one entry per protocol command: command/status/evidence)",
"## Possible Errors (each: riskCategory/status/proofLevel/nextAction/claim/...)",
"## Summary",
].join("\n"),
planner_contract_upsert: [
"# <AGENTS.md contract block>",
"## Purpose / Scope / Stable Contracts / Read First / Do Not Touch Unless",
"(only the pi-code-planner:contracts block is managed; surrounding prose is yours)",
].join("\n"),
planner_skill_create: [
"---",
"name: <kebab-case>",
"description: <one line>",
"---",
"# <skill title>",
"## When to use / Steps / Notes",
].join("\n"),
planner_skill_update: [
"---",
"name: <kebab-case>",
"description: <one line>",
"---",
"# <skill title>",
"## When to use / Steps / Notes",
].join("\n"),
};
138 changes: 138 additions & 0 deletions src/runtime/artifact-tools.roundtrip.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { describe, expect, it } from "vitest";
import { MockPlannerFs } from "../test/mock-fs";
import { ARTIFACT_CANONICAL_SCHEMA } from "./artifact-echo";
import {
buildDiscoveryMarkdown,
PLANNER_ARTIFACT_TOOL_NAMES,
stripVerificationProtocolSection,
} from "./artifact-tools";
import {
extractVerificationProtocol,
extractVerificationProtocolCommands,
} from "./doubt-review";
import {
validatePostImplementationCounterexampleReview,
validatePreImplementationProofContract,
validateTaskMergeScopeAudit,
} from "./tdd-evidence";
import { mergeTddMarkdown, renderTddSection, TDD_SECTIONS } from "./tdd-form";

// Reproduces the deadlock from the decoded pi-session: the model wrote its own
// "## Verification Protocol" section (with a prose lead-in and extra commands)
// into the discovery body while the wrapper also appended one from the
// verificationProtocol argument. The parser then collected the prose line as a
// phantom required command, so planner_doubt_review could never be satisfied.
describe("discovery_submit ↔ verification protocol parser round-trip", () => {
it("keeps the verificationProtocol argument as the single source of truth", () => {
const body = [
"# Discovery: example",
"",
"## Findings",
"- everything already has AGENTS.md except src/test/",
"",
"### Verification Protocol",
"The existing project has these commands available:",
"- `npm run check` — biome",
"- `npm run build` — tsc",
"- `npm test` — vitest",
"- `npm run ci` — full pipeline",
"- `npm run format` — biome format",
].join("\n");

const discoveryMd = buildDiscoveryMarkdown(body, [
"npm run check",
"npm run build",
"npm test",
]);

// Exactly one protocol section survives, and it is the argument's.
const headings = discoveryMd
.split(/\r?\n/)
.filter(
(line) => /verification protocol/i.test(line) && /^#{1,6}\s/.test(line),
);
expect(headings).toEqual(["## Verification Protocol"]);

// The parser the doubt_review gate uses returns exactly the argument
// commands — no prose phantom, no ci/format the model never ran.
expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([
"npm run check",
"npm run build",
"npm test",
]);
});

it("ignores prose lines under a verification protocol heading", () => {
const discoveryMd = [
"# Discovery",
"",
"## Verification Protocol",
"The existing project has these commands available:",
"- npm run check",
"- npm test",
].join("\n");
// The prose lead-in is dropped; only bullet commands remain.
expect(extractVerificationProtocol(discoveryMd)).toEqual([
"- npm run check",
"- npm test",
]);
expect(extractVerificationProtocolCommands(discoveryMd)).toEqual([
"npm run check",
"npm test",
]);
});

it("strips a body protocol section regardless of heading level", () => {
for (const heading of [
"## Verification Protocol",
"#### Verification Protocol",
]) {
const stripped = stripVerificationProtocolSection(
["## Findings", "- a", heading, "- npm run lint"].join("\n"),
);
expect(stripped).not.toMatch(/verification protocol/i);
expect(stripped).not.toContain("npm run lint");
expect(stripped).toContain("## Findings");
}
});
});

describe("tdd_submit ↔ tdd-evidence validators round-trip", () => {
it("renders sections that the consuming validators accept", async () => {
const updates = Object.fromEntries(
TDD_SECTIONS.map((def) => [
def.key,
renderTddSection(
def,
Object.fromEntries(
def.fields.map((field) => [field, `concrete ${field} evidence`]),
),
),
]),
);
const content = mergeTddMarkdown(
"",
updates as Parameters<typeof mergeTddMarkdown>[1],
);

const fs = new MockPlannerFs();
const path = "/plan/tasks/task-1/tdd.md";
await fs.writeTextAtomic(path, content);

await expect(
validatePreImplementationProofContract(fs, path),
).resolves.toBeNull();
await expect(
validatePostImplementationCounterexampleReview(fs, path),
).resolves.toBeNull();
await expect(validateTaskMergeScopeAudit(fs, path)).resolves.toBeNull();
});
});

describe("fill-tool echo schema", () => {
it("provides a canonical schema for every artifact tool", () => {
for (const tool of PLANNER_ARTIFACT_TOOL_NAMES) {
expect(ARTIFACT_CANONICAL_SCHEMA[tool]?.trim().length).toBeGreaterThan(0);
}
});
});
Loading
Loading