From 64a1102a7a0d50f6fb87356c1c2b2c54501569bf Mon Sep 17 00:00:00 2001
From: Mansur Azatbek
Date: Mon, 15 Jun 2026 23:56:32 +0500
Subject: [PATCH 1/3] fix: block model from simulating /planner-finish via
planner_finish_step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two new guards in validateWorkflowExit:
1. done/await_user_acceptance blocks planner_finish_step unless the
target is handle_change_request — the model must tell the user to
run /planner-finish; it cannot advance the internal done steps itself.
2. Internal done steps (prepare_output_branch, merge_or_export_result,
cleanup_worktree, mark_done, cleanup_plan_files) always block
planner_finish_step — /planner-finish drives them atomically.
Root cause: user typed /planner-finish as chat text (not as a CLI
slash command); model treated it as acceptance and called
planner_finish_step six times through internal steps without doing any
real work, leaving the plan in done/cleanup_plan_files/running limbo.
Co-Authored-By: Claude Sonnet 4.6
---
src/runtime/workflow-tools.test.ts | 113 +++++++++++++++++++++++++++++
src/runtime/workflow-tools.ts | 26 +++++++
2 files changed, 139 insertions(+)
diff --git a/src/runtime/workflow-tools.test.ts b/src/runtime/workflow-tools.test.ts
index d89c897..5ac0309 100644
--- a/src/runtime/workflow-tools.test.ts
+++ b/src/runtime/workflow-tools.test.ts
@@ -662,4 +662,117 @@ describe("workflowToolTransition", () => {
expect(blocked.result.status).toBe("blocked");
expect(blocked.text).toContain("must be proven_bug or needs_probe");
});
+
+ it("blocks planner_finish_step at done/await_user_acceptance unless targeting handle_change_request", async () => {
+ const fs = new MockPlannerFs();
+ const git = new MockGitRunner();
+ const projectPaths = createProjectStoragePaths({
+ agentDir: "/agent",
+ projectRoot: "/repo/app",
+ });
+ const planPaths = createPlanStoragePaths(projectPaths, "plan-a");
+ const worktreePath = "/repo/app/.pi/pi-code-planner/worktrees/plan-a";
+ await ensureProjectRecord(fs, projectPaths);
+ await initializePlanFiles(
+ fs,
+ planPaths,
+ createPlanRecord({ planId: "plan-a", title: "Plan A" }),
+ );
+ await fs.mkdirp(worktreePath);
+ await initializePlanState(fs, planPaths, {
+ ...createInitialPlanState({
+ baseBranch: "main",
+ planBranch: "plan/plan-a",
+ worktreePath,
+ }),
+ stage: "done",
+ step: "await_user_acceptance",
+ stepStatus: "running",
+ currentBranch: "plan/plan-a",
+ });
+ await setActivePlan(fs, projectPaths, "plan-a");
+
+ const blockedNoTarget = await executePlannerWorkflowTool({
+ fs,
+ git,
+ projectPaths,
+ toolName: "planner_finish_step",
+ params: {},
+ });
+ expect(blockedNoTarget.result.status).toBe("blocked");
+ expect(blockedNoTarget.text).toContain("/planner-finish");
+
+ const blockedWrongTarget = await executePlannerWorkflowTool({
+ fs,
+ git,
+ projectPaths,
+ toolName: "planner_finish_step",
+ params: { nextStage: "done", nextStep: "prepare_output_branch" },
+ });
+ expect(blockedWrongTarget.result.status).toBe("blocked");
+ expect(blockedWrongTarget.text).toContain("/planner-finish");
+
+ const allowed = await executePlannerWorkflowTool({
+ fs,
+ git,
+ projectPaths,
+ toolName: "planner_finish_step",
+ params: { nextStage: "done", nextStep: "handle_change_request" },
+ });
+ expect(allowed.result.status).toBe("applied");
+ expect(allowed.result.state).toMatchObject({
+ stage: "done",
+ step: "handle_change_request",
+ });
+ });
+
+ it("blocks planner_finish_step at internal done steps (prepare_output_branch, etc.)", async () => {
+ const internalSteps = [
+ "prepare_output_branch",
+ "merge_or_export_result",
+ "cleanup_worktree",
+ "mark_done",
+ "cleanup_plan_files",
+ ] as const;
+
+ for (const step of internalSteps) {
+ const fs = new MockPlannerFs();
+ const git = new MockGitRunner();
+ const projectPaths = createProjectStoragePaths({
+ agentDir: "/agent",
+ projectRoot: "/repo/app",
+ });
+ const planPaths = createPlanStoragePaths(projectPaths, "plan-a");
+ const worktreePath = "/repo/app/.pi/pi-code-planner/worktrees/plan-a";
+ await ensureProjectRecord(fs, projectPaths);
+ await initializePlanFiles(
+ fs,
+ planPaths,
+ createPlanRecord({ planId: "plan-a", title: "Plan A" }),
+ );
+ await fs.mkdirp(worktreePath);
+ await initializePlanState(fs, planPaths, {
+ ...createInitialPlanState({
+ baseBranch: "main",
+ planBranch: "plan/plan-a",
+ worktreePath,
+ }),
+ stage: "done",
+ step,
+ stepStatus: "running",
+ currentBranch: "plan/plan-a",
+ });
+ await setActivePlan(fs, projectPaths, "plan-a");
+
+ const result = await executePlannerWorkflowTool({
+ fs,
+ git,
+ projectPaths,
+ toolName: "planner_finish_step",
+ params: {},
+ });
+ expect(result.result.status, `step=${step}`).toBe("blocked");
+ expect(result.text, `step=${step}`).toContain("/planner-finish");
+ }
+ });
});
diff --git a/src/runtime/workflow-tools.ts b/src/runtime/workflow-tools.ts
index 88c5fe4..f69f2c5 100644
--- a/src/runtime/workflow-tools.ts
+++ b/src/runtime/workflow-tools.ts
@@ -240,6 +240,14 @@ function fallbackWorkflowToolTransition(
}
}
+const INTERNAL_DONE_STEPS = new Set([
+ "prepare_output_branch",
+ "merge_or_export_result",
+ "cleanup_worktree",
+ "mark_done",
+ "cleanup_plan_files",
+]);
+
async function validateWorkflowExit(input: {
fs: PlannerFs;
git: GitRunner;
@@ -284,6 +292,24 @@ async function validateWorkflowExit(input: {
: "Discovery questions are still unresolved. Show them to the user verbatim, wait for answers, and call planner_questions_resolve before finishing discovery/write_questions.")
);
}
+ if (state.stage === "done" && INTERNAL_DONE_STEPS.has(state.step)) {
+ return "Internal done steps are managed by /planner-finish. Do not call planner_finish_step here — run /planner-finish from done/await_user_acceptance instead.";
+ }
+ if (
+ state.stage === "done" &&
+ state.step === "await_user_acceptance" &&
+ state.stepStatus === "running"
+ ) {
+ const target =
+ input.transition.type === "finish_step"
+ ? input.transition.next
+ : undefined;
+ const isChangeRequest =
+ target?.stage === "done" && target.step === "handle_change_request";
+ if (!isChangeRequest) {
+ return "Run /planner-finish (the CLI slash command) to accept the result — do not advance done/await_user_acceptance via planner_finish_step. Only change requests are allowed here: call planner_finish_step with target {stage: 'done', step: 'handle_change_request'}.";
+ }
+ }
if (state.stage === "done" && state.step === "handle_change_request") {
return (
(await requireArtifactIncludes(
From 3a9081f82602cbbf7dc09534524dbfe55cba9c6f Mon Sep 17 00:00:00 2001
From: Mansur Azatbek
Date: Mon, 15 Jun 2026 23:59:48 +0500
Subject: [PATCH 2/3] docs: rewrite README with Mermaid state diagram and full
stage/step reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
New sections:
- Mermaid flowchart showing all 8 stages, execution loop, recovery path,
and /planner-finish exit — rendered natively on GitHub
- Stage/step tables for all 52 steps with descriptions of what each does
- "How Local Models Are Kept on Track" — 10 enforcement mechanisms:
persisted state, dual-gate tool allowlist, exit-condition validation,
echo expected-vs-received, Verification Protocol enforcement, compact
checkpoints, AGENTS.md contracts, skill library, Git isolation, recovery
Goal: give anyone reading the repo a full mental model of what the
extension does and why each piece exists, without reading the source.
Co-Authored-By: Claude Sonnet 4.6
---
README.md | 242 +++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 221 insertions(+), 21 deletions(-)
diff --git a/README.md b/README.md
index 4e2f0f4..4940ed2 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
-An experimental [Pi](https://github.com/badlogic/pi-mono) extension for local coding models. Adds a persisted state machine so long tasks survive context compaction, Git branching, and approval steps without you babysitting the session.
+An experimental [Pi](https://github.com/badlogic/pi-mono) extension for local coding models. Adds a persisted state machine so long tasks survive context compaction, Git branching, and user approval steps without you babysitting the session.
However you run it, the unit that matters is **Pi + this extension driving a local model**. With that in mind, read this plainly:
@@ -34,22 +34,230 @@ Open Pi inside a Git project and run `/planner-create`.
---
-## Workflow
+## State Machine Overview
+
+The extension drives the model through a fixed sequence of stages. Each stage contains a set of steps. The model cannot skip stages, call out-of-order tools, or advance past a gate without satisfying its exit conditions.
+
+```mermaid
+flowchart TD
+ S([user runs /planner-create]) --> INIT
+
+ INIT["**init** — 7 steps\nbootstrap worktree and plan record"]
+ INTAKE["**intake** — 2 steps\nwrite and approve goal"]
+ DISCOVERY["**discovery** — 4 steps\nscan project, write verification protocol"]
+ PLANNING["**planning** — 7 steps\nwrite plan.md, split into tasks"]
+ EXECUTION["**execution** — 12 steps per task\nTDD → implement → contracts → refactor → merge"]
+ FINALIZE["**finalize** — 6 steps\nintegration check, doubt review, summary"]
+ DONE["**done** — 8 steps\npresent result, await user acceptance"]
+ RECOVERY["**recovery** — 6 steps\ndiagnose and repair broken state"]
+ OUT([output/<plan-id> branch])
+
+ INIT --> INTAKE
+ INTAKE --> DISCOVERY
+ DISCOVERY --> PLANNING
+ PLANNING --> EXECUTION
+ EXECUTION -->|"select next task"| EXECUTION
+ EXECUTION -->|"all tasks done"| FINALIZE
+ FINALIZE --> DONE
+ DONE -->|"/planner-finish"| OUT
+ DONE -->|"change request"| PLANNING
+
+ INIT & INTAKE & DISCOVERY & PLANNING & EXECUTION & FINALIZE -.->|"broken / stuck"| RECOVERY
+ RECOVERY -.->|"resume"| INIT
+```
+
+The model never touches the diagram directly — it only ever calls the tool for the current step, and the gate advances the pointer.
+
+---
+
+## Stages and Steps
+
+52 steps total across 8 stages.
+
+### init — 7 steps
+
+Runs once when `/planner-create` is called. Fully automated — the model does not drive these steps.
+
+| Step | What happens |
+| --- | --- |
+| `check_project` | Verify a Git repo exists |
+| `check_git` | Ensure Git is usable; init if needed |
+| `prepare_storage` | Create `.pi/pi-code-planner/` storage directories |
+| `choose_worktree_location` | Select worktree path |
+| `create_plan_record` | Write `plan.json` and `project.json` |
+| `create_plan_worktree` | `git worktree add` for the plan branch |
+| `enter_intake` | Transition to intake stage |
+
+### intake — 2 steps
+
+The model reads the user's request and writes a normalized goal. The user must approve before planning begins.
+
+| Step | What happens |
+| --- | --- |
+| `draft_goal` | Model writes `goal.md` via `planner_goal_submit` |
+| `await_goal_approval` | Model presents goal; user approves or revises via `planner_goal_decide` |
+
+### discovery — 4 steps
+
+The model scans the project, reads contracts, and produces a `discovery.md` artifact that persists across all future compactions.
+
+| Step | What happens |
+| --- | --- |
+| `scan_project_structure` | Read files, AGENTS.md contracts, run checks; write `discovery.md` with a **Verification Protocol** (exact commands to prove work is correct) |
+| `write_questions` | Write and resolve open questions in `questions.md` |
+| `compact_discovery` | Compact checkpoint — Pi compacts context here |
+| `enter_planning` | Transition to planning stage |
+
+The **Verification Protocol** is critical: it locks down the exact commands (`cargo test`, `npm run ci`, etc.) that every `doubt_review` submission must prove passed. The parser enforces this — if a command is missing from evidence, the submission is blocked.
+
+### planning — 7 steps
+
+The model writes a plan and splits it into atomic tasks. Each task gets its own `task.md` artifact.
+
+| Step | What happens |
+| --- | --- |
+| `read_context` | Route AGENTS.md contracts relevant to the goal |
+| `draft_plan` | Write `plan.md` via `planner_plan_submit` |
+| `split_tasks` | Identify atomic tasks from the plan |
+| `write_task_files` | Write each `task.md` via `planner_task_upsert` |
+| `verify_plan` | Self-check: all tasks have acceptance criteria and scope |
+| `compact_planning` | Compact checkpoint |
+| `enter_execution` | Transition to execution stage |
+
+### execution — 12 steps (repeated per task)
+
+The main loop. Each task cycles through all 12 steps before the next task begins.
+
+| Step | What happens |
+| --- | --- |
+| `prepare_task` | Create `task//` branch via `planner_git_create_task_branch` |
+| `write_tdd_plan` | Write pre-implementation TDD plan: failing signal, production path, success signal |
+| `write_tests` | Write or locate tests; commit via `planner_git_commit` |
+| `run_failing_tests` | Verify tests fail (or exist) before implementation |
+| `implement_task` | Implement; commit incrementally |
+| `contract_check` | Assess AGENTS.md impact; upsert contracts if needed |
+| `refactor_task` | Review changed surface for complexity and naming |
+| `run_final_tests` | Run full verification protocol; tests must pass |
+| `capture_skill` | Record reusable technique in skill library if warranted |
+| `merge_task_to_plan` | Merge task branch into plan branch via `planner_git_merge_task_to_plan` |
+| `compact_task` | Compact checkpoint |
+| `select_next_task` | Pick next pending task or exit to finalize |
+
+### finalize — 6 steps
+
+Integration check and adversarial review of the complete plan branch before presenting to the user.
+
+| Step | What happens |
+| --- | --- |
+| `verify_plan_branch` | Run full test suite on the merged plan branch |
+| `compact_before_doubt` | Compact checkpoint — doubt review starts with a clean context |
+| `doubt_review` | Adversarial audit via `planner_doubt_review`: every item in the Verification Protocol must appear in evidence; every possible bug must be classified as proven, needs_probe, or dismissed with proof |
+| `write_final_summary` | Write `final_summary.md` |
+| `compact_finalize` | Compact checkpoint |
+| `enter_done` | Transition to done stage |
+
+### done — 8 steps
+
+The model presents the result and waits. It cannot advance to the internal export/cleanup steps on its own — those are driven exclusively by `/planner-finish`.
+
+| Step | What happens |
+| --- | --- |
+| `present_result` | Model shows summary, commits, and output options |
+| `await_user_acceptance` | Model waits; only the user can proceed (via `/planner-finish`) or request changes |
+| `handle_change_request` | Model records corrections and returns to `planning/read_context` |
+| `prepare_output_branch` | *(internal — /planner-finish only)* Create `output/` |
+| `merge_or_export_result` | *(internal — /planner-finish only)* Merge plan branch to output |
+| `cleanup_worktree` | *(internal — /planner-finish only)* Remove the plan worktree |
+| `mark_done` | *(internal — /planner-finish only)* Clear active plan record |
+| `cleanup_plan_files` | *(internal — /planner-finish only)* Remove plan artifacts |
+
+> The model is explicitly blocked from calling `planner_finish_step` to enter any internal step. Attempting to simulate `/planner-finish` via tools results in a gate error.
+
+### recovery — 6 steps
+
+Entered automatically when the plan sets `broken=true` or `requiresUserDecision=true`. The model inspects state, classifies the problem, and either repairs or asks the user before resuming.
+
+| Step | What happens |
+| --- | --- |
+| `read_state` | Read `state.json` and surface current position |
+| `inspect_git` | Check branch, worktree, and diff state |
+| `compare_expected_actual` | Diff expected vs actual file and branch state |
+| `classify_recovery` | Determine if recovery is safe or requires user decision |
+| `ask_user_if_destructive` | Present risk to user and wait for explicit approval |
+| `repair_or_resume` | Apply repairs; call `planner_recovery_resume` to return |
+
+---
+
+## How Local Models Are Kept on Track
+
+Running a local model on a multi-hour task without supervision requires more than good prompting. Here is what the extension actually enforces.
+
+### 1. Persisted state — compaction survival
+
+All plan state lives in JSON and Markdown artifacts on disk, not in chat. After every context compaction, the model calls `planner_status`, which reloads the current stage, step, and active task from `state.json`. The conversation is not the source of truth; the artifacts are. A compaction is a checkpoint, not a reset.
+
+### 2. Dual-gate tool allowlist
+
+Every planner tool call passes through two independent gates composed in sequence:
+
+- **Policy gate** (`tool-policy.ts`): For the current `{stage, step}`, returns the exact set of allowed wrapper tools. Anything not on the list is blocked before any logic runs.
+- **Behavior gate** (`stage-behavior.ts`): For each step, declares `expectedTools[]`. The model cannot call a tool that the step's contract does not expect, even if the policy gate would allow it.
+
+Both gates must pass. The model cannot call `planner_task_upsert` during `doubt_review`, `planner_doubt_review` during `planning`, or anything outside the current step's declared scope.
+
+### 3. Exit-condition validation
+
+`planner_finish_step` is gated by `validateWorkflowExit`. The model cannot leave a step until its exit conditions are satisfied:
+
+- `discovery/scan_project_structure`: `discovery.md` must contain `## Verification Protocol` with at least one command.
+- `discovery/write_questions`: questions must be explicitly resolved.
+- `finalize/doubt_review`: every Verification Protocol command must appear in `verificationEvidence`; no finding may have `status: possible` without a `proofLevel`.
+- `done/handle_change_request`: `decisions.md`, `plan.md`, and `discovery.md` must each contain required sections.
+- `done/await_user_acceptance`: `planner_finish_step` is blocked entirely unless targeting `handle_change_request` — the model must instruct the user to run `/planner-finish`.
+
+### 4. Echo expected vs received
+
+Every planner tool that writes structured Markdown returns two blocks in its result:
```
-user request
- → normalize and approve goal
- → scan AGENTS.md contracts, inspect relevant files
- → persist discovery.md
- → write plan.md, split into tasks
- → for each task: write tests → implement → update AGENTS.md contracts → refactor → verify → merge
- → verify integrated plan branch
- → doubt_review: prove or disprove possible errors
- → ask user to accept
- → export output/ branch with full task history
+## Expected shape (canonical schema)
+
+
+## What you submitted (saved to disk)
+
```
-After compaction, the model calls `planner_status`, reloads from persisted JSON/Markdown artifacts, and continues. Chat is not the source of truth — artifacts are.
+This applies to all 12 strict-structure tools: `planner_goal_submit`, `planner_questions_submit`, `planner_plan_submit`, `planner_discovery_submit`, `planner_tdd_submit`, `planner_summary_submit`, `planner_task_upsert`, `planner_refactor_review`, `planner_doubt_review`, `planner_contract_upsert`, `planner_skill_create`, `planner_skill_update`.
+
+The model can self-correct by comparing what it wrote against the expected schema without reading the file again.
+
+### 5. Verification Protocol enforcement
+
+During `discovery/scan_project_structure`, the model writes the project's exact verification commands (test, lint, build, format) into `## Verification Protocol` in `discovery.md`. During `finalize/doubt_review`, the parser extracts those commands and requires each one to appear in `verificationEvidence`. If the model skips a command or adds a phantom one, the submission is blocked.
+
+`planner_discovery_submit` is the single writer of this section — any protocol content in the `body` argument is stripped and replaced by the canonical section built from the `verificationProtocol[]` argument. Parser and writer share the same invariant.
+
+### 6. Compact checkpoints
+
+Compact boundaries (`compact_discovery`, `compact_planning`, `compact_task`, `compact_before_doubt`, `compact_finalize`) are baked into the state machine. The model calls `planner_request_compact`, Pi compacts the context, and the model calls `planner_complete_compact` to resume. The gate blocks all other tools at compact boundaries until the compact/resume cycle completes.
+
+### 7. AGENTS.md contracts
+
+The planner treats `AGENTS.md` files as local architecture contracts — model-facing memory routed by topic rather than file path. Before planning, the model calls `planner_contract_route` to fetch only the contracts relevant to the current goal. After each task, `planner_contract_check` determines whether the implementation changed any architectural surface and updates contracts if needed.
+
+Contracts are written only through `planner_contract_upsert`. The planner tracks touched files in `state.json` and keeps baselines so `/planner-finish` can offer to remove or restore them.
+
+### 8. Skill library
+
+When a task reveals a non-obvious technique (a workaround, a tricky API pattern, a testing approach), the model can write it to the skill library via `planner_skill_create`. Before future tasks, `planner_skill_create` is allowed only at `capture_skill` — the step explicitly reserved for this. Skills are searchable via `/planner-skills`.
+
+### 9. Git isolation
+
+Each plan gets a dedicated worktree on a `plan/` branch. Task work happens on short-lived `task//` branches that are removed after merge. Raw `git` is blocked while a plan is active — the model uses `planner_git_*` wrappers. This keeps the plan's history clean and prevents the model from accidentally touching the base branch.
+
+### 10. Recovery stage
+
+If the model calls `planner_report_stuck`, or if an internal invariant fails, the plan sets `broken=true` and the tool allowlist collapses to the recovery set. The model then walks through `recovery/*` steps to diagnose and repair before resuming. The user is always consulted before any destructive recovery action.
---
@@ -88,14 +296,6 @@ See [SETTINGS.md](SETTINGS.md) for the full reference — worktree, compact, idl
---
-## AGENTS.md Contracts
-
-The planner treats `AGENTS.md` files as local architecture contracts — durable model-facing memory that routes the model through the project without reading irrelevant code first. Inspired by [DOX](https://github.com/agent0ai/dox).
-
-Contracts are written only through `planner_contract_upsert`. The planner tracks touched files in `state.json` and keeps baselines so `/planner-finish` can remove or restore them.
-
----
-
## Development
```bash
From 6b6b142f035a5935e805e6fc86811aba47d8f85f Mon Sep 17 00:00:00 2001
From: Mansur Azatbek
Date: Tue, 16 Jun 2026 00:05:19 +0500
Subject: [PATCH 3/3] docs: restore DOX attribution in AGENTS.md contracts
section
Co-Authored-By: Claude Sonnet 4.6
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 4940ed2..61dff59 100644
--- a/README.md
+++ b/README.md
@@ -243,7 +243,7 @@ Compact boundaries (`compact_discovery`, `compact_planning`, `compact_task`, `co
### 7. AGENTS.md contracts
-The planner treats `AGENTS.md` files as local architecture contracts — model-facing memory routed by topic rather than file path. Before planning, the model calls `planner_contract_route` to fetch only the contracts relevant to the current goal. After each task, `planner_contract_check` determines whether the implementation changed any architectural surface and updates contracts if needed.
+The planner treats `AGENTS.md` files as local architecture contracts — model-facing memory routed by topic rather than file path. Inspired by [DOX](https://github.com/agent0ai/dox). Before planning, the model calls `planner_contract_route` to fetch only the contracts relevant to the current goal. After each task, `planner_contract_check` determines whether the implementation changed any architectural surface and updates contracts if needed.
Contracts are written only through `planner_contract_upsert`. The planner tracks touched files in `state.json` and keeps baselines so `/planner-finish` can offer to remove or restore them.