Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/runtime/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Runtime domain for planner stages, model-facing status, tool wrappers, timers, r

### Do Not Touch Unless
- Do not change lifecycle transitions without updating `state-machine.ts`, `state-transition.ts`, `workflow-tools.ts`, tests, and instructions.
- Do not add a new planner tool without updating guard policy, tool visibility expectations, status/instructions, and tests.
- Do not add or re-scope a planner tool without updating BOTH allowlists that gate it: the guard policy (`guard/tool-policy.ts` `STEP_ALLOWED_TOOLS`) AND the stage behavior (`stage-behavior.ts` `expectedTools`). Both gates must pass for a tool to be usable at a step; if the guard allows a tool the behavior gate omits, the model is blocked at runtime (a deadlock when no fallback exists). The invariant is enforced by `tool-gating-invariant.test.ts`. Also update tool visibility expectations, status/instructions, and tests.

### Domain Details
- `status.ts` is the primary prompt surface for local models; it reads `PlanStateRecord` from the orchestrator and formats step rules, contract summaries, and guidance lines.
Expand Down
72 changes: 72 additions & 0 deletions src/runtime/artifact-tools.gating.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { describe, expect, it } from "vitest";
import {
getAllowedPlannerWrapperTools,
type PlannerWrapperTool,
} from "../guard/tool-policy";
import type { PlannerStage, PlannerStep } from "../storage/schema";
import {
checkPlannerStageBehaviorWrapperTool,
getPlannerStageStepBehavior,
} from "./stage-behavior";

// Each structured fill-tool must be permitted by BOTH allowlists at every step
// where it is used: the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior
// expectedTools gate. Missing either one deadlocks the model — and for tdd.md,
// where built-in edit/write is blocked, there is no fallback at all.
const FILL_TOOL_STEPS: Array<{
stage: PlannerStage;
step: PlannerStep;
tool: PlannerWrapperTool;
}> = [
{
stage: "discovery",
step: "scan_project_structure",
tool: "planner_discovery_submit",
},
{ stage: "planning", step: "draft_plan", tool: "planner_plan_submit" },
{ stage: "execution", step: "write_tdd_plan", tool: "planner_tdd_submit" },
{ stage: "execution", step: "write_tests", tool: "planner_tdd_submit" },
{ stage: "execution", step: "run_failing_tests", tool: "planner_tdd_submit" },
{ stage: "execution", step: "implement_task", tool: "planner_tdd_submit" },
{ stage: "execution", step: "contract_check", tool: "planner_tdd_submit" },
{ stage: "execution", step: "refactor_task", tool: "planner_tdd_submit" },
{ stage: "execution", step: "run_final_tests", tool: "planner_tdd_submit" },
{
stage: "execution",
step: "merge_task_to_plan",
tool: "planner_tdd_submit",
},
{
stage: "finalize",
step: "write_final_summary",
tool: "planner_summary_submit",
},
{ stage: "done", step: "handle_change_request", tool: "planner_plan_submit" },
{
stage: "done",
step: "handle_change_request",
tool: "planner_discovery_submit",
},
];

describe("artifact fill-tool gating", () => {
for (const { stage, step, tool } of FILL_TOOL_STEPS) {
it(`${tool} is allowed by both gates at ${stage}/${step}`, () => {
const policyAllowed = getAllowedPlannerWrapperTools({
stage,
step,
broken: false,
requiresUserDecision: false,
requiresCompact: false,
debugArtifactsDir: null,
});
expect(policyAllowed).toContain(tool);

const behaviorDecision = checkPlannerStageBehaviorWrapperTool({
behavior: getPlannerStageStepBehavior({ stage, step }),
tool,
});
expect(behaviorDecision.allow).toBe(true);
});
}
});
33 changes: 28 additions & 5 deletions src/runtime/stage-behavior.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredGates: ["plan_worktree_exists"],
expectedTools: [
"planner_status",
"planner_discovery_submit",
"planner_contract_scan",
"planner_contract_route",
"planner_contract_read",
Expand Down Expand Up @@ -256,12 +257,14 @@ export const PLANNER_STAGE_BEHAVIOR = {
"draft_plan",
["plan.md"],
["discovery.md"],
["planner_plan_submit", "planner_contract_route", "planner_contract_read"],
),
split_tasks: artifactBehavior(
"planning",
"split_tasks",
["plan.md"],
["plan.md"],
["planner_contract_route", "planner_contract_read"],
),
write_task_files: behavior("planning", "write_task_files", {
projectAccess: "planner_artifacts",
Expand Down Expand Up @@ -314,6 +317,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredGates: ["active_task_selected", "task_branch_ready"],
expectedTools: [
"planner_status",
"planner_tdd_submit",
"planner_report_stuck",
"planner_skill_create",
"planner_skill_update",
Expand All @@ -332,6 +336,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
updatedArtifacts: ["tdd.md"],
requiredGates: ["tdd_plan_written"],
expectedTools: [
"planner_tdd_submit",
"planner_git_inspect",
"planner_git_commit",
"planner_report_stuck",
Expand All @@ -353,6 +358,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredGates: ["tests_written_first"],
expectedTools: [
"planner_status",
"planner_tdd_submit",
"planner_report_stuck",
"planner_skill_create",
"planner_skill_update",
Expand All @@ -371,6 +377,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
updatedArtifacts: ["tdd.md"],
requiredGates: ["failing_signal_recorded"],
expectedTools: [
"planner_tdd_submit",
"planner_git_commit",
"planner_contract_route",
"planner_contract_read",
Expand All @@ -392,6 +399,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
updatedArtifacts: ["tdd.md", "contracts/manifest.json"],
requiredGates: ["task_implemented"],
expectedTools: [
"planner_tdd_submit",
"planner_contract_check",
"planner_contract_upsert",
"planner_contract_route",
Expand All @@ -411,6 +419,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
updatedArtifacts: ["refactor.md"],
requiredGates: ["task_implemented", "contract_check_completed"],
expectedTools: [
"planner_tdd_submit",
"planner_refactor_review",
"planner_git_commit",
"planner_contract_check",
Expand All @@ -435,6 +444,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
updatedArtifacts: ["refactor.md"],
requiredGates: ["refactor_checked"],
expectedTools: [
"planner_tdd_submit",
"planner_git_commit",
"planner_report_stuck",
"planner_skill_create",
Expand Down Expand Up @@ -468,7 +478,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredArtifacts: ["refactor.md"],
updatedArtifacts: ["state.json"],
requiredGates: ["final_tests_passed"],
expectedTools: ["planner_git_merge_task_to_plan"],
expectedTools: ["planner_tdd_submit", "planner_git_merge_task_to_plan"],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
}),
Expand All @@ -494,7 +504,11 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredArtifacts: ["plan.md"],
updatedArtifacts: ["verify.md"],
requiredGates: ["next_task_decided"],
expectedTools: ["planner_git_inspect"],
expectedTools: [
"planner_git_inspect",
"planner_contract_route",
"planner_contract_read",
],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
}),
Expand Down Expand Up @@ -532,10 +546,13 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredGates: ["plan_branch_verified"],
expectedTools: [
"planner_status",
"planner_summary_submit",
"planner_skill_create",
"planner_skill_update",
"planner_contract_check",
"planner_contract_upsert",
"planner_contract_route",
"planner_contract_read",
],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
Expand All @@ -558,6 +575,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
"planner_status",
"planner_skill_create",
"planner_skill_update",
"planner_contract_decide",
],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
Expand All @@ -568,7 +586,7 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredArtifacts: ["final_summary.md"],
updatedArtifacts: ["decisions.md"],
requiredGates: ["user_acceptance_required"],
expectedTools: ["planner_status"],
expectedTools: ["planner_status", "planner_contract_decide"],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
}),
Expand All @@ -578,7 +596,11 @@ export const PLANNER_STAGE_BEHAVIOR = {
requiredArtifacts: ["decisions.md"],
updatedArtifacts: ["plan.md", "decisions.md", "discovery.md", "state.json"],
requiredGates: ["user_acceptance_required"],
expectedTools: ["planner_finish_step"],
expectedTools: [
"planner_finish_step",
"planner_plan_submit",
"planner_discovery_submit",
],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
}),
Expand Down Expand Up @@ -738,14 +760,15 @@ function artifactBehavior(
step: PlannerStep,
updatedArtifacts: readonly PlannerBehaviorArtifact[],
requiredArtifacts: readonly PlannerBehaviorArtifact[],
extraTools: readonly string[] = [],
): PlannerStageStepBehavior {
return behavior(stage, step, {
projectAccess: "planner_artifacts",
actions: ["write_artifacts"],
requiredArtifacts,
updatedArtifacts,
requiredGates: [],
expectedTools: ["planner_status"],
expectedTools: ["planner_status", ...extraTools],
commitPolicy: "forbidden",
compactPolicy: "not_allowed",
});
Expand Down
54 changes: 54 additions & 0 deletions src/runtime/tool-gating-invariant.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { describe, expect, it } from "vitest";
import {
getAllowedPlannerWrapperTools,
type PlannerWrapperTool,
} from "../guard/tool-policy";
import {
PLANNER_STAGE_STEPS,
type PlannerStage,
type PlannerStep,
} from "../storage/schema";
import {
checkPlannerStageBehaviorWrapperTool,
getPlannerStageStepBehavior,
} from "./stage-behavior";

// Invariant: a planner tool is usable at a step only if it passes BOTH gates —
// the guard policy (STEP_ALLOWED_TOOLS) and the stage-behavior expectedTools
// gate. So everything the guard permits MUST also pass the behavior gate;
// otherwise the guard advertises a tool the model can never actually call
// (a deadlock when no fallback exists, as happened with the artifact fill-tools).
//
// This guards against the two allowlists drifting apart for any tool — debug
// wrappers, git wrappers, contract tools, skills, or future additions.
describe("planner tool gating invariant", () => {
it("every guard-allowed wrapper passes the behavior gate at every step", () => {
const steps = Object.entries(PLANNER_STAGE_STEPS) as Array<
[PlannerStage, readonly PlannerStep[]]
>;
const violations: string[] = [];

for (const [stage, stepList] of steps) {
for (const step of stepList) {
// debugArtifactsDir set so debug wrappers are included in the guard
// set and therefore checked against the behavior gate too.
const allowed = getAllowedPlannerWrapperTools({
stage,
step,
broken: false,
requiresUserDecision: false,
requiresCompact: false,
debugArtifactsDir: "/tmp/planner-debug",
});
const behavior = getPlannerStageStepBehavior({ stage, step });
for (const tool of allowed as readonly PlannerWrapperTool[]) {
if (!checkPlannerStageBehaviorWrapperTool({ behavior, tool }).allow) {
violations.push(`${stage}/${step}: ${tool}`);
}
}
}
}

expect(violations, violations.join("\n")).toEqual([]);
});
});
Loading