From 2dc76f94eaff0f1232eff71f664cb746f0abe1c7 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek Date: Mon, 15 Jun 2026 18:51:54 +0500 Subject: [PATCH 1/2] fix: register fill-tools in stage-behavior expectedTools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 3 added the artifact fill-tools to the guard policy (STEP_ALLOWED_TOOLS) but not to the parallel expectedTools allowlist in stage-behavior.ts, so checkPlannerStageBehaviorWrapperTool blocked them with "Stage behavior does not list as expected at /". For discovery/plan/summary the model could fall back to a built-in write, but for tdd.md — where edit/write is blocked — it was a hard deadlock. Add each fill-tool to expectedTools at exactly the steps where the guard policy already allows it: planner_discovery_submit (scan_project_structure, handle_change_request), planner_plan_submit (draft_plan, handle_change_request), planner_tdd_submit (every execution step that writes tdd.md), planner_summary_submit (write_final_summary). artifactBehavior gains an optional extraTools param for draft_plan. Add a regression test asserting every fill-tool is permitted by BOTH the guard policy and the stage-behavior gate at each of its steps. Co-Authored-By: Claude Opus 4.8 --- src/runtime/artifact-tools.gating.test.ts | 72 +++++++++++++++++++++++ src/runtime/stage-behavior.ts | 21 ++++++- 2 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 src/runtime/artifact-tools.gating.test.ts diff --git a/src/runtime/artifact-tools.gating.test.ts b/src/runtime/artifact-tools.gating.test.ts new file mode 100644 index 0000000..b64b1cc --- /dev/null +++ b/src/runtime/artifact-tools.gating.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from "vitest"; +import { + getAllowedPlannerWrapperTools, + type PlannerWrapperTool, +} from "../guard/tool-policy"; +import type { PlannerStage, PlannerStep } from "../storage/schema"; +import { + checkPlannerStageBehaviorWrapperTool, + getPlannerStageStepBehavior, +} from "./stage-behavior"; + +// Each structured fill-tool must be permitted by BOTH allowlists at every step +// where it is used: the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior +// expectedTools gate. Missing either one deadlocks the model — and for tdd.md, +// where built-in edit/write is blocked, there is no fallback at all. +const FILL_TOOL_STEPS: Array<{ + stage: PlannerStage; + step: PlannerStep; + tool: PlannerWrapperTool; +}> = [ + { + stage: "discovery", + step: "scan_project_structure", + tool: "planner_discovery_submit", + }, + { stage: "planning", step: "draft_plan", tool: "planner_plan_submit" }, + { stage: "execution", step: "write_tdd_plan", tool: "planner_tdd_submit" }, + { stage: "execution", step: "write_tests", tool: "planner_tdd_submit" }, + { stage: "execution", step: "run_failing_tests", tool: "planner_tdd_submit" }, + { stage: "execution", step: "implement_task", tool: "planner_tdd_submit" }, + { stage: "execution", step: "contract_check", tool: "planner_tdd_submit" }, + { stage: "execution", step: "refactor_task", tool: "planner_tdd_submit" }, + { stage: "execution", step: "run_final_tests", tool: "planner_tdd_submit" }, + { + stage: "execution", + step: "merge_task_to_plan", + tool: "planner_tdd_submit", + }, + { + stage: "finalize", + step: "write_final_summary", + tool: "planner_summary_submit", + }, + { stage: "done", step: "handle_change_request", tool: "planner_plan_submit" }, + { + stage: "done", + step: "handle_change_request", + tool: "planner_discovery_submit", + }, +]; + +describe("artifact fill-tool gating", () => { + for (const { stage, step, tool } of FILL_TOOL_STEPS) { + it(`${tool} is allowed by both gates at ${stage}/${step}`, () => { + const policyAllowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: null, + }); + expect(policyAllowed).toContain(tool); + + const behaviorDecision = checkPlannerStageBehaviorWrapperTool({ + behavior: getPlannerStageStepBehavior({ stage, step }), + tool, + }); + expect(behaviorDecision.allow).toBe(true); + }); + } +}); diff --git a/src/runtime/stage-behavior.ts b/src/runtime/stage-behavior.ts index bd8bffd..976f0e8 100644 --- a/src/runtime/stage-behavior.ts +++ b/src/runtime/stage-behavior.ts @@ -207,6 +207,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["plan_worktree_exists"], expectedTools: [ "planner_status", + "planner_discovery_submit", "planner_contract_scan", "planner_contract_route", "planner_contract_read", @@ -256,6 +257,7 @@ export const PLANNER_STAGE_BEHAVIOR = { "draft_plan", ["plan.md"], ["discovery.md"], + ["planner_plan_submit"], ), split_tasks: artifactBehavior( "planning", @@ -314,6 +316,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["active_task_selected", "task_branch_ready"], expectedTools: [ "planner_status", + "planner_tdd_submit", "planner_report_stuck", "planner_skill_create", "planner_skill_update", @@ -332,6 +335,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md"], requiredGates: ["tdd_plan_written"], expectedTools: [ + "planner_tdd_submit", "planner_git_inspect", "planner_git_commit", "planner_report_stuck", @@ -353,6 +357,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["tests_written_first"], expectedTools: [ "planner_status", + "planner_tdd_submit", "planner_report_stuck", "planner_skill_create", "planner_skill_update", @@ -371,6 +376,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md"], requiredGates: ["failing_signal_recorded"], expectedTools: [ + "planner_tdd_submit", "planner_git_commit", "planner_contract_route", "planner_contract_read", @@ -392,6 +398,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md", "contracts/manifest.json"], requiredGates: ["task_implemented"], expectedTools: [ + "planner_tdd_submit", "planner_contract_check", "planner_contract_upsert", "planner_contract_route", @@ -411,6 +418,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["refactor.md"], requiredGates: ["task_implemented", "contract_check_completed"], expectedTools: [ + "planner_tdd_submit", "planner_refactor_review", "planner_git_commit", "planner_contract_check", @@ -435,6 +443,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["refactor.md"], requiredGates: ["refactor_checked"], expectedTools: [ + "planner_tdd_submit", "planner_git_commit", "planner_report_stuck", "planner_skill_create", @@ -468,7 +477,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["refactor.md"], updatedArtifacts: ["state.json"], requiredGates: ["final_tests_passed"], - expectedTools: ["planner_git_merge_task_to_plan"], + expectedTools: ["planner_tdd_submit", "planner_git_merge_task_to_plan"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -532,6 +541,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["plan_branch_verified"], expectedTools: [ "planner_status", + "planner_summary_submit", "planner_skill_create", "planner_skill_update", "planner_contract_check", @@ -578,7 +588,11 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["decisions.md"], updatedArtifacts: ["plan.md", "decisions.md", "discovery.md", "state.json"], requiredGates: ["user_acceptance_required"], - expectedTools: ["planner_finish_step"], + expectedTools: [ + "planner_finish_step", + "planner_plan_submit", + "planner_discovery_submit", + ], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -738,6 +752,7 @@ function artifactBehavior( step: PlannerStep, updatedArtifacts: readonly PlannerBehaviorArtifact[], requiredArtifacts: readonly PlannerBehaviorArtifact[], + extraTools: readonly string[] = [], ): PlannerStageStepBehavior { return behavior(stage, step, { projectAccess: "planner_artifacts", @@ -745,7 +760,7 @@ function artifactBehavior( requiredArtifacts, updatedArtifacts, requiredGates: [], - expectedTools: ["planner_status"], + expectedTools: ["planner_status", ...extraTools], commitPolicy: "forbidden", compactPolicy: "not_allowed", }); From aa09448a13ec6f9b458ec1fd05731a5b13f46926 Mon Sep 17 00:00:00 2001 From: Mansur Azatbek Date: Mon, 15 Jun 2026 18:56:29 +0500 Subject: [PATCH 2/2] fix: sync contract wrappers into stage-behavior expectedTools + invariant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A general gating invariant (new tool-gating-invariant.test.ts) revealed that the guard policy and stage-behavior expectedTools had drifted for contract helpers too — not only the new fill-tools. The guard allowed these at steps where the behavior gate blocked them, so the model hit "Stage behavior does not list " if it tried: - planning/draft_plan, planning/split_tasks: planner_contract_route/read - finalize/verify_plan_branch, finalize/write_final_summary: planner_contract_route/read - done/present_result, done/await_user_acceptance: planner_contract_decide These are read-only/decision helpers (non-deadlocking, the model can proceed without them) but the instructions explicitly tell it to route and read contracts there, so the behavior gate was the side that was wrong. Add the missing tools to expectedTools. Debug wrappers were already consistent across both gates. The invariant test now asserts that at every step, every guard-allowed wrapper also passes the behavior gate. Document the dual-allowlist requirement in src/runtime/AGENTS.md so future tool changes update both. Co-Authored-By: Claude Opus 4.8 --- src/runtime/AGENTS.md | 2 +- src/runtime/stage-behavior.ts | 14 ++++-- src/runtime/tool-gating-invariant.test.ts | 54 +++++++++++++++++++++++ 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 src/runtime/tool-gating-invariant.test.ts diff --git a/src/runtime/AGENTS.md b/src/runtime/AGENTS.md index a28e8ec..7c47980 100644 --- a/src/runtime/AGENTS.md +++ b/src/runtime/AGENTS.md @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r ### Do Not Touch Unless - Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions. -- Do not add a new planner tool without updating guard policy, tool visibility expectations, status/instructions, and tests. +- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests. ### Domain Details - `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines. diff --git a/src/runtime/stage-behavior.ts b/src/runtime/stage-behavior.ts index 976f0e8..c1b325a 100644 --- a/src/runtime/stage-behavior.ts +++ b/src/runtime/stage-behavior.ts @@ -257,13 +257,14 @@ export const PLANNER_STAGE_BEHAVIOR = { "draft_plan", ["plan.md"], ["discovery.md"], - ["planner_plan_submit"], + ["planner_plan_submit", "planner_contract_route", "planner_contract_read"], ), split_tasks: artifactBehavior( "planning", "split_tasks", ["plan.md"], ["plan.md"], + ["planner_contract_route", "planner_contract_read"], ), write_task_files: behavior("planning", "write_task_files", { projectAccess: "planner_artifacts", @@ -503,7 +504,11 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["plan.md"], updatedArtifacts: ["verify.md"], requiredGates: ["next_task_decided"], - expectedTools: ["planner_git_inspect"], + expectedTools: [ + "planner_git_inspect", + "planner_contract_route", + "planner_contract_read", + ], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -546,6 +551,8 @@ export const PLANNER_STAGE_BEHAVIOR = { "planner_skill_update", "planner_contract_check", "planner_contract_upsert", + "planner_contract_route", + "planner_contract_read", ], commitPolicy: "forbidden", compactPolicy: "not_allowed", @@ -568,6 +575,7 @@ export const PLANNER_STAGE_BEHAVIOR = { "planner_status", "planner_skill_create", "planner_skill_update", + "planner_contract_decide", ], commitPolicy: "forbidden", compactPolicy: "not_allowed", @@ -578,7 +586,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["final_summary.md"], updatedArtifacts: ["decisions.md"], requiredGates: ["user_acceptance_required"], - expectedTools: ["planner_status"], + expectedTools: ["planner_status", "planner_contract_decide"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), diff --git a/src/runtime/tool-gating-invariant.test.ts b/src/runtime/tool-gating-invariant.test.ts new file mode 100644 index 0000000..180a35f --- /dev/null +++ b/src/runtime/tool-gating-invariant.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from "vitest"; +import { + getAllowedPlannerWrapperTools, + type PlannerWrapperTool, +} from "../guard/tool-policy"; +import { + PLANNER_STAGE_STEPS, + type PlannerStage, + type PlannerStep, +} from "../storage/schema"; +import { + checkPlannerStageBehaviorWrapperTool, + getPlannerStageStepBehavior, +} from "./stage-behavior"; + +// Invariant: a planner tool is usable at a step only if it passes BOTH gates — +// the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools +// gate. So everything the guard permits MUST also pass the behavior gate; +// otherwise the guard advertises a tool the model can never actually call +// (a deadlock when no fallback exists, as happened with the artifact fill-tools). +// +// This guards against the two allowlists drifting apart for any tool — debug +// wrappers, git wrappers, contract tools, skills, or future additions. +describe("planner tool gating invariant", () => { + it("every guard-allowed wrapper passes the behavior gate at every step", () => { + const steps = Object.entries(PLANNER_STAGE_STEPS) as Array< + [PlannerStage, readonly PlannerStep[]] + >; + const violations: string[] = []; + + for (const [stage, stepList] of steps) { + for (const step of stepList) { + // debugArtifactsDir set so debug wrappers are included in the guard + // set and therefore checked against the behavior gate too. + const allowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }); + const behavior = getPlannerStageStepBehavior({ stage, step }); + for (const tool of allowed as readonly PlannerWrapperTool[]) { + if (!checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow) { + violations.push(`${stage}/${step}: ${tool}`); + } + } + } + } + + expect(violations, violations.join("\n")).toEqual([]); + }); +});