diff --git a/src/runtime/AGENTS.md b/src/runtime/AGENTS.md index a28e8ec..7c47980 100644 --- a/src/runtime/AGENTS.md +++ b/src/runtime/AGENTS.md @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r ### Do Not Touch Unless - Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions. -- Do not add a new planner tool without updating guard policy, tool visibility expectations, status/instructions, and tests. +- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests. ### Domain Details - `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines. diff --git a/src/runtime/artifact-tools.gating.test.ts b/src/runtime/artifact-tools.gating.test.ts new file mode 100644 index 0000000..b64b1cc --- /dev/null +++ b/src/runtime/artifact-tools.gating.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from "vitest"; +import { + getAllowedPlannerWrapperTools, + type PlannerWrapperTool, +} from "../guard/tool-policy"; +import type { PlannerStage, PlannerStep } from "../storage/schema"; +import { + checkPlannerStageBehaviorWrapperTool, + getPlannerStageStepBehavior, +} from "./stage-behavior"; + +// Each structured fill-tool must be permitted by BOTH allowlists at every step +// where it is used: the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior +// expectedTools gate. Missing either one deadlocks the model — and for tdd.md, +// where built-in edit/write is blocked, there is no fallback at all. +const FILL_TOOL_STEPS: Array<{ + stage: PlannerStage; + step: PlannerStep; + tool: PlannerWrapperTool; +}> = [ + { + stage: "discovery", + step: "scan_project_structure", + tool: "planner_discovery_submit", + }, + { stage: "planning", step: "draft_plan", tool: "planner_plan_submit" }, + { stage: "execution", step: "write_tdd_plan", tool: "planner_tdd_submit" }, + { stage: "execution", step: "write_tests", tool: "planner_tdd_submit" }, + { stage: "execution", step: "run_failing_tests", tool: "planner_tdd_submit" }, + { stage: "execution", step: "implement_task", tool: "planner_tdd_submit" }, + { stage: "execution", step: "contract_check", tool: "planner_tdd_submit" }, + { stage: "execution", step: "refactor_task", tool: "planner_tdd_submit" }, + { stage: "execution", step: "run_final_tests", tool: "planner_tdd_submit" }, + { + stage: "execution", + step: "merge_task_to_plan", + tool: "planner_tdd_submit", + }, + { + stage: "finalize", + step: "write_final_summary", + tool: "planner_summary_submit", + }, + { stage: "done", step: "handle_change_request", tool: "planner_plan_submit" }, + { + stage: "done", + step: "handle_change_request", + tool: "planner_discovery_submit", + }, +]; + +describe("artifact fill-tool gating", () => { + for (const { stage, step, tool } of FILL_TOOL_STEPS) { + it(`${tool} is allowed by both gates at ${stage}/${step}`, () => { + const policyAllowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: null, + }); + expect(policyAllowed).toContain(tool); + + const behaviorDecision = checkPlannerStageBehaviorWrapperTool({ + behavior: getPlannerStageStepBehavior({ stage, step }), + tool, + }); + expect(behaviorDecision.allow).toBe(true); + }); + } +}); diff --git a/src/runtime/stage-behavior.ts b/src/runtime/stage-behavior.ts index bd8bffd..c1b325a 100644 --- a/src/runtime/stage-behavior.ts +++ b/src/runtime/stage-behavior.ts @@ -207,6 +207,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["plan_worktree_exists"], expectedTools: [ "planner_status", + "planner_discovery_submit", "planner_contract_scan", "planner_contract_route", "planner_contract_read", @@ -256,12 +257,14 @@ export const PLANNER_STAGE_BEHAVIOR = { "draft_plan", ["plan.md"], ["discovery.md"], + ["planner_plan_submit", "planner_contract_route", "planner_contract_read"], ), split_tasks: artifactBehavior( "planning", "split_tasks", ["plan.md"], ["plan.md"], + ["planner_contract_route", "planner_contract_read"], ), write_task_files: behavior("planning", "write_task_files", { projectAccess: "planner_artifacts", @@ -314,6 +317,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["active_task_selected", "task_branch_ready"], expectedTools: [ "planner_status", + "planner_tdd_submit", "planner_report_stuck", "planner_skill_create", "planner_skill_update", @@ -332,6 +336,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md"], requiredGates: ["tdd_plan_written"], expectedTools: [ + "planner_tdd_submit", "planner_git_inspect", "planner_git_commit", "planner_report_stuck", @@ -353,6 +358,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["tests_written_first"], expectedTools: [ "planner_status", + "planner_tdd_submit", "planner_report_stuck", "planner_skill_create", "planner_skill_update", @@ -371,6 +377,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md"], requiredGates: ["failing_signal_recorded"], expectedTools: [ + "planner_tdd_submit", "planner_git_commit", "planner_contract_route", "planner_contract_read", @@ -392,6 +399,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["tdd.md", "contracts/manifest.json"], requiredGates: ["task_implemented"], expectedTools: [ + "planner_tdd_submit", "planner_contract_check", "planner_contract_upsert", "planner_contract_route", @@ -411,6 +419,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["refactor.md"], requiredGates: ["task_implemented", "contract_check_completed"], expectedTools: [ + "planner_tdd_submit", "planner_refactor_review", "planner_git_commit", "planner_contract_check", @@ -435,6 +444,7 @@ export const PLANNER_STAGE_BEHAVIOR = { updatedArtifacts: ["refactor.md"], requiredGates: ["refactor_checked"], expectedTools: [ + "planner_tdd_submit", "planner_git_commit", "planner_report_stuck", "planner_skill_create", @@ -468,7 +478,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["refactor.md"], updatedArtifacts: ["state.json"], requiredGates: ["final_tests_passed"], - expectedTools: ["planner_git_merge_task_to_plan"], + expectedTools: ["planner_tdd_submit", "planner_git_merge_task_to_plan"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -494,7 +504,11 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["plan.md"], updatedArtifacts: ["verify.md"], requiredGates: ["next_task_decided"], - expectedTools: ["planner_git_inspect"], + expectedTools: [ + "planner_git_inspect", + "planner_contract_route", + "planner_contract_read", + ], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -532,10 +546,13 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredGates: ["plan_branch_verified"], expectedTools: [ "planner_status", + "planner_summary_submit", "planner_skill_create", "planner_skill_update", "planner_contract_check", "planner_contract_upsert", + "planner_contract_route", + "planner_contract_read", ], commitPolicy: "forbidden", compactPolicy: "not_allowed", @@ -558,6 +575,7 @@ export const PLANNER_STAGE_BEHAVIOR = { "planner_status", "planner_skill_create", "planner_skill_update", + "planner_contract_decide", ], commitPolicy: "forbidden", compactPolicy: "not_allowed", @@ -568,7 +586,7 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["final_summary.md"], updatedArtifacts: ["decisions.md"], requiredGates: ["user_acceptance_required"], - expectedTools: ["planner_status"], + expectedTools: ["planner_status", "planner_contract_decide"], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -578,7 +596,11 @@ export const PLANNER_STAGE_BEHAVIOR = { requiredArtifacts: ["decisions.md"], updatedArtifacts: ["plan.md", "decisions.md", "discovery.md", "state.json"], requiredGates: ["user_acceptance_required"], - expectedTools: ["planner_finish_step"], + expectedTools: [ + "planner_finish_step", + "planner_plan_submit", + "planner_discovery_submit", + ], commitPolicy: "forbidden", compactPolicy: "not_allowed", }), @@ -738,6 +760,7 @@ function artifactBehavior( step: PlannerStep, updatedArtifacts: readonly PlannerBehaviorArtifact[], requiredArtifacts: readonly PlannerBehaviorArtifact[], + extraTools: readonly string[] = [], ): PlannerStageStepBehavior { return behavior(stage, step, { projectAccess: "planner_artifacts", @@ -745,7 +768,7 @@ function artifactBehavior( requiredArtifacts, updatedArtifacts, requiredGates: [], - expectedTools: ["planner_status"], + expectedTools: ["planner_status", ...extraTools], commitPolicy: "forbidden", compactPolicy: "not_allowed", }); diff --git a/src/runtime/tool-gating-invariant.test.ts b/src/runtime/tool-gating-invariant.test.ts new file mode 100644 index 0000000..180a35f --- /dev/null +++ b/src/runtime/tool-gating-invariant.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from "vitest"; +import { + getAllowedPlannerWrapperTools, + type PlannerWrapperTool, +} from "../guard/tool-policy"; +import { + PLANNER_STAGE_STEPS, + type PlannerStage, + type PlannerStep, +} from "../storage/schema"; +import { + checkPlannerStageBehaviorWrapperTool, + getPlannerStageStepBehavior, +} from "./stage-behavior"; + +// Invariant: a planner tool is usable at a step only if it passes BOTH gates — +// the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools +// gate. So everything the guard permits MUST also pass the behavior gate; +// otherwise the guard advertises a tool the model can never actually call +// (a deadlock when no fallback exists, as happened with the artifact fill-tools). +// +// This guards against the two allowlists drifting apart for any tool — debug +// wrappers, git wrappers, contract tools, skills, or future additions. +describe("planner tool gating invariant", () => { + it("every guard-allowed wrapper passes the behavior gate at every step", () => { + const steps = Object.entries(PLANNER_STAGE_STEPS) as Array< + [PlannerStage, readonly PlannerStep[]] + >; + const violations: string[] = []; + + for (const [stage, stepList] of steps) { + for (const step of stepList) { + // debugArtifactsDir set so debug wrappers are included in the guard + // set and therefore checked against the behavior gate too. + const allowed = getAllowedPlannerWrapperTools({ + stage, + step, + broken: false, + requiresUserDecision: false, + requiresCompact: false, + debugArtifactsDir: "/tmp/planner-debug", + }); + const behavior = getPlannerStageStepBehavior({ stage, step }); + for (const tool of allowed as readonly PlannerWrapperTool[]) { + if (!checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow) { + violations.push(`${stage}/${step}: ${tool}`); + } + } + } + } + + expect(violations, violations.join("\n")).toEqual([]); + }); +});