From 1fd07b73ce532e34f5f2db71f302e895805edaee Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 06:34:38 -0300
Subject: [PATCH 01/17] docs: add json settings implementation plan

---
 docs/changelog/260413.md                      |  20 ++
 .../json-settings/01-why-json-settings.md     | 132 +++++++++
 docs/plans/json-settings/02-policy-model.md   | 273 +++++++++++++++++
 .../03-code-and-payload-changes.md            | 250 ++++++++++++++++
 .../04-migration-testing-and-risks.md         | 200 +++++++++++++
 docs/plans/json-settings/README.md            |  85 ++++++
 docs/plans/json-settings/TODO.md              | 276 ++++++++++++++++++
 7 files changed, 1236 insertions(+)
 create mode 100644 docs/plans/json-settings/01-why-json-settings.md
 create mode 100644 docs/plans/json-settings/02-policy-model.md
 create mode 100644 docs/plans/json-settings/03-code-and-payload-changes.md
 create mode 100644 docs/plans/json-settings/04-migration-testing-and-risks.md
 create mode 100644 docs/plans/json-settings/README.md
 create mode 100644 docs/plans/json-settings/TODO.md

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 454b7c1..2c3ea4f 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -65,3 +65,23 @@ Reworked the package docs into a multi-file operator guide covering flow, runtim
 
 ### QA Notes
 - N/A
+
+## 260413-06:34:01 - Add JSON settings implementation plan
+
+### Summary
+Added a structured planning packet for moving story-automator contracts to snapshot-backed JSON settings.
+
+### Added
+- Added a dedicated plan set covering goals, architecture, code and payload touchpoints, migration strategy, testing strategy, and risk controls for the JSON settings refactor.
+- Added a sequential implementation TODO with dependencies, phase boundaries, and done criteria for executing the refactor in bounded slices.
+
+### Files
+- `docs/plans/json-settings/README.md`
+- `docs/plans/json-settings/01-why-json-settings.md`
+- `docs/plans/json-settings/02-policy-model.md`
+- `docs/plans/json-settings/03-code-and-payload-changes.md`
+- `docs/plans/json-settings/04-migration-testing-and-risks.md`
+- `docs/plans/json-settings/TODO.md`
+
+### QA Notes
+- N/A
diff --git a/docs/plans/json-settings/01-why-json-settings.md b/docs/plans/json-settings/01-why-json-settings.md
new file mode 100644
index 0000000..bfdba0b
--- /dev/null
+++ b/docs/plans/json-settings/01-why-json-settings.md
@@ -0,0 +1,132 @@
+# Why JSON Settings
+
+## Problem
+
+The current system already behaves like it has a policy layer, but that policy is scattered.
+
+Examples:
+
+- step prompts are assembled inline in `source/src/story_automator/commands/tmux.py`
+- parse contracts are hard-coded in `source/src/story_automator/commands/orchestrator_parse.py`
+- retry limits and escalation budgets are hard-coded in `source/src/story_automator/commands/orchestrator.py`
+- review completion logic is fixed in `source/src/story_automator/core/review_verify.py`
+- step asset discovery is encoded in `source/src/story_automator/core/workflow_paths.py`
+- human-facing loop rules live in payload docs under `payload/.claude/skills/bmad-story-automator/`
+
+That creates four problems:
+
+1. the real contract is hard to see in one place
+2. docs and runtime can drift
+3. customization requires source edits
+4. resume determinism is fragile if behavior depends on live files
+
+## Goals
+
+The implementation should make these customizable:
+
+- per-step prompt templates
+- parse contracts and output schema expectations
+- success verifier thresholds and source order
+- bounded loop settings such as retry counts and review max cycles
+- step asset resolution rules
+
+It should also preserve:
+
+- zero-config current behavior
+- install layout
+- current runtime engine model
+- deterministic resume/replay
+
+## Non-Goals
+
+This work should not become:
+
+- a generic DSL
+- a plugin system for arbitrary verifier code
+- a graph workflow engine
+- a rewrite of tmux/session execution
+
+## Why JSON
+
+This repo should choose JSON settings instead of YAML for the machine contract.
+
+Reasons:
+
+1. No new dependency.
+   The repo currently has no YAML parser dependency in the Python package. JSON keeps the runtime dependency-free.
+
+2. Existing code already speaks JSON.
+   `state.py`, `orchestrator_parse.py`, agent config helpers, and multiple command surfaces already pass JSON around.
+
+3. Snapshot determinism is simpler.
+   Stable sorting, hashing, and byte-for-byte snapshots are easier with JSON.
+
+4. Parse schemas are already a JSON-shaped concept.
+   Moving step parse contracts into `.json` files is a natural fit.
+
+5. Settings are machine-facing.
+   Long prose belongs in markdown/XML files anyway, so readability pressure on the main settings file is lower than it would be for a human-authored workflow language.
+
+## Why Not YAML First
+
+YAML would be nicer for comments and long-form hand editing, but it adds cost now:
+
+- new parsing dependency or hand-rolled parser
+- more edge cases around scalars and lists
+- more work to normalize and hash deterministically
+
+If operator ergonomics later require comments, the safer follow-up is JSONC or a small translator layer, not immediate YAML adoption.
+
+## Existing Repo Fit
+
+The repo already has a natural home for settings files:
+
+- `payload/.claude/skills/bmad-story-automator/data/`
+
+That directory already holds:
+
+- rules docs
+- retry docs
+- complexity JSON
+- prompt-related docs
+- monitoring docs
+
+Adding JSON policy and parse files there follows the existing layout instead of inventing a new storage pattern.
+
+## Architectural Principle
+
+Use:
+
+```text
+declarative contracts
++ imperative engine
+```
+
+Declarative:
+
+- prompts
+- parse schema
+- verifier parameters
+- asset path candidates
+- loop budgets
+
+Imperative:
+
+- tmux spawning
+- session capture
+- crash/stuck detection
+- file reads/writes
+- snapshot creation
+- verifier execution
+
+## Success Standard
+
+This refactor is worth doing only if it makes behavior easier to change without making runtime behavior harder to trust.
+
+Practical success means:
+
+- changing prompt text means editing a payload file or override, not Python
+- changing review completion thresholds means editing JSON settings, not Python
+- changing retry budgets means editing JSON settings, not env-only knobs
+- resume always uses the same effective contract as the run start
+
diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md
new file mode 100644
index 0000000..56acdb6
--- /dev/null
+++ b/docs/plans/json-settings/02-policy-model.md
@@ -0,0 +1,273 @@
+# Policy Model
+
+## Target Shape
+
+Introduce one new concept: the runtime policy.
+
+It has three layers:
+
+1. bundled default policy
+2. optional project override policy
+3. effective snapshot written at orchestration start
+
+Only the snapshot is allowed to drive an in-flight run.
+
+## File Locations
+
+### Bundled default policy
+
+```text
+payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json
+```
+
+### Bundled prompt templates
+
+```text
+payload/.claude/skills/bmad-story-automator/data/prompts/
+  create.md
+  dev.md
+  auto.md
+  review.md
+  retro.md
+```
+
+### Bundled parse contracts
+
+```text
+payload/.claude/skills/bmad-story-automator/data/parse/
+  create.json
+  dev.json
+  auto.json
+  review.json
+  retro.json
+```
+
+### Project override
+
+```text
+_bmad/bmm/story-automator.policy.json
+```
+
+### Effective snapshot
+
+```text
+_bmad-output/story-automator/policy-snapshots/<stamp>-<hash>.json
+```
+
+### Review machine contract
+
+```text
+payload/.claude/skills/bmad-story-automator-review/contract.json
+```
+
+## Data Flow
+
+```mermaid
+flowchart TD
+    A["Bundled policy JSON"] --> D["runtime_policy.py"]
+    B["Project override JSON"] --> D
+    D --> E["Resolve paths + validate + merge"]
+    E --> F["Write effective snapshot JSON"]
+    F --> G["State doc stores path + hash"]
+    F --> H["tmux build-cmd"]
+    F --> I["orchestrator_parse"]
+    F --> J["success verifiers"]
+    F --> K["orchestrator budgets"]
+```
+
+## Core Rules
+
+### Rule 1
+
+Bundled defaults must reproduce current behavior exactly.
+
+### Rule 2
+
+Project overrides can customize values, but cannot register new executable code.
+
+### Rule 3
+
+Resume must use the snapshot from state, never live re-merge.
+
+### Rule 4
+
+Prompt text, parse contracts, and verifier thresholds are data.
+
+### Rule 5
+
+tmux lifecycle, monitor logic, file IO, and verifier execution remain Python.
+
+## Merge Rules
+
+Use deterministic merge semantics:
+
+- maps: deep merge
+- arrays: replace
+- scalars: override
+- unknown top-level keys: reject with validation error
+
+This keeps overrides predictable and makes snapshots stable.
+
+## JSON Schema Shape
+
+High-level example:
+
+```json
+{
+  "version": 1,
+  "snapshot": {
+    "relativeDir": "_bmad-output/story-automator/policy-snapshots"
+  },
+  "runtime": {
+    "parser": {
+      "provider": "claude",
+      "model": "haiku",
+      "timeoutSeconds": 120
+    },
+    "merge": {
+      "maps": "deep",
+      "arrays": "replace"
+    }
+  },
+  "workflow": {
+    "sequence": ["create", "dev", "auto", "review"],
+    "optional": {
+      "auto": {
+        "skipWhenOverride": "skipAutomate"
+      }
+    },
+    "repeat": {
+      "review": {
+        "maxCycles": 5,
+        "successVerifier": "review_completion",
+        "onIncomplete": "retry",
+        "onExhausted": "escalate"
+      }
+    },
+    "crash": {
+      "maxRetries": 2,
+      "onExhausted": "escalate"
+    },
+    "triggers": [
+      {
+        "name": "retrospective_on_epic_complete",
+        "after": "review",
+        "verifier": "epic_complete",
+        "run": "retro",
+        "blocking": false,
+        "forceAgent": "claude"
+      }
+    ]
+  },
+  "steps": {
+    "create": {
+      "label": "create-story",
+      "assets": {
+        "skillName": "bmad-create-story",
+        "workflowCandidates": ["workflow.md", "workflow.yaml"],
+        "instructionsCandidates": ["discover-inputs.md"],
+        "checklistCandidates": ["checklist.md"],
+        "templateCandidates": ["template.md"],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/create.md",
+        "interactionMode": "autonomous"
+      },
+      "parse": {
+        "schemaFile": "data/parse/create.json"
+      },
+      "success": {
+        "verifier": "create_story_artifact",
+        "config": {
+          "glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md",
+          "expectedMatches": 1
+        }
+      }
+    },
+    "review": {
+      "label": "code-review",
+      "assets": {
+        "skillName": "bmad-story-automator-review",
+        "workflowCandidates": ["workflow.yaml", "workflow.md"],
+        "instructionsCandidates": ["instructions.xml"],
+        "checklistCandidates": ["checklist.md"],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/review.md",
+        "interactionMode": "autonomous",
+        "acceptExtraInstruction": true,
+        "defaultExtraInstruction": "auto-fix all issues without prompting"
+      },
+      "parse": {
+        "schemaFile": "data/parse/review.json"
+      },
+      "success": {
+        "verifier": "review_completion",
+        "contractFile": ".claude/skills/bmad-story-automator-review/contract.json"
+      }
+    }
+  }
+}
+```
+
+## Named Verifiers
+
+Initial verifier set should stay small:
+
+- `create_story_artifact`
+- `session_exit`
+- `review_completion`
+- `epic_complete`
+
+No custom verifier registration in settings.
+
+## Prompt Template Rules
+
+Prompt templates should support simple substitution only:
+
+- `{{story_id}}`
+- `{{story_prefix}}`
+- `{{skill_path}}`
+- `{{workflow_path}}`
+- `{{instructions_line}}`
+- `{{checklist_line}}`
+- `{{template_line}}`
+- `{{extra_instruction}}`
+
+No loops, no conditions beyond small optional-line helpers in Python.
+
+## Success Contract Rules
+
+The runtime should use settings to decide:
+
+- which verifier to run
+- which config to pass it
+- which sources to trust first
+- which statuses count as done or incomplete
+
+It should not use session output alone as final truth except for explicitly simple verifiers like `session_exit`.
+
+## State Doc Metadata
+
+State frontmatter should store only:
+
+- `policyVersion`
+- `policySnapshotFile`
+- `policySnapshotHash`
+- `legacyPolicy` when needed
+
+The state file should not store the full merged policy blob.
+
+## Why The Snapshot Matters
+
+Without a pinned snapshot, these changes become unsafe:
+
+- payload update
+- project override edit
+- prompt tweak during an in-flight run
+- verifier threshold change after preflight
+
+The snapshot prevents those mutations from changing the behavior of a resumed orchestration.
+
diff --git a/docs/plans/json-settings/03-code-and-payload-changes.md b/docs/plans/json-settings/03-code-and-payload-changes.md
new file mode 100644
index 0000000..85b8b66
--- /dev/null
+++ b/docs/plans/json-settings/03-code-and-payload-changes.md
@@ -0,0 +1,250 @@
+# Code And Payload Changes
+
+## Implementation Principle
+
+Keep files under control. Avoid one giant refactor file.
+
+Recommended new source modules:
+
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/core/success_verifiers.py`
+
+Keep prompt rendering small enough to live in existing command modules unless it grows past a reasonable size.
+
+## Source Changes
+
+### New: `source/src/story_automator/core/runtime_policy.py`
+
+Responsibilities:
+
+- load bundled default policy JSON
+- load optional project override JSON
+- merge deterministically
+- validate structure
+- resolve step asset paths
+- write effective snapshot JSON
+- load policy from snapshot during resume
+- expose helpers such as `load_effective_policy()` and `step_contract()`
+
+Notes:
+
+- this module is the only policy merge point
+- it should normalize relative paths against project root or installed skill root
+- it should reject unknown verifier names and invalid step references early
+
+### New: `source/src/story_automator/core/success_verifiers.py`
+
+Responsibilities:
+
+- named verifier registry
+- `session_exit`
+- `create_story_artifact`
+- `review_completion`
+- `epic_complete`
+
+Notes:
+
+- keep `verify_code_review_completion()` as a backward-compatible wrapper
+- verifier config comes from policy, verifier execution stays in Python
+
+### `source/src/story_automator/commands/tmux.py`
+
+Replace hard-coded prompt assembly with policy-driven prompt rendering.
+
+Changes:
+
+- stop building prompts from inline string map
+- load step contract from snapshot or effective policy
+- render step prompt from `prompt.templateFile`
+- use policy-driven step label instead of `_automate_workflow_label()`
+- shrink `_build_retro_prompt()` into data-backed template usage
+- make `monitor-session` call the configured verifier, not a permanent review special case
+
+Keep in Python:
+
+- Codex/Claude CLI invocation
+- `CODEX_HOME` setup
+- `tmux` session lifecycle
+- heartbeat/status logic
+
+### `source/src/story_automator/commands/orchestrator_parse.py`
+
+Replace the `if step == ...` schema tree.
+
+Changes:
+
+- read `parse.schemaFile` and optional parser prompt template
+- inject `label` and schema into parser call
+- validate returned JSON against required keys from schema
+- preserve current command output shape
+
+### `source/src/story_automator/core/review_verify.py`
+
+Reduce it to a compatibility wrapper.
+
+Changes:
+
+- keep current public function
+- delegate to `success_verifiers.review_completion`
+- allow contract-driven status values and source order
+
+### `source/src/story_automator/commands/orchestrator.py`
+
+Move hard-coded budgets into policy.
+
+Changes:
+
+- `review-loop` limit comes from `workflow.repeat.review.maxCycles`
+- `session-crash` limit comes from `workflow.crash.maxRetries`
+- story creation validation becomes part of `create_story_artifact`
+- escalation actions stay in Python
+
+### `source/src/story_automator/commands/state.py`
+
+Add policy metadata at state document creation.
+
+Changes:
+
+- write `policyVersion`
+- write `policySnapshotFile`
+- write `policySnapshotHash`
+- optionally write `legacyPolicy`
+- surface these in state summary and validation
+
+Do not:
+
+- embed full policy JSON in frontmatter
+
+### `source/src/story_automator/core/frontmatter.py`
+
+Keep changes minimal.
+
+Possible work:
+
+- teach state readers to return new scalar metadata
+- no nested policy parser
+
+### `source/src/story_automator/core/workflow_paths.py`
+
+Refactor into policy-backed asset resolution.
+
+Changes:
+
+- resolve explicit path or candidate list from policy
+- distinguish required vs optional assets
+- fail fast for missing required assets
+- preserve compatibility wrappers where useful
+
+Important fix:
+
+- required assets must no longer silently return the first candidate string when nothing exists
+
+## Payload Changes
+
+### New: `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json`
+
+This is the default machine contract for the installed skill.
+
+### New: `payload/.claude/skills/bmad-story-automator/data/prompts/*.md`
+
+Add prompt templates for:
+
+- create
+- dev
+- auto
+- review
+- retro
+
+### New: `payload/.claude/skills/bmad-story-automator/data/parse/*.json`
+
+Add step parse contracts for:
+
+- create
+- dev
+- auto
+- review
+- retro
+
+### `payload/.claude/skills/bmad-story-automator/workflow.md`
+
+Keep this human-facing and orchestration-facing.
+
+Changes:
+
+- reference the runtime policy file explicitly
+- describe the current sequence as the default policy, not the only possible future shape
+- align wording with policy terms: prompt contract, parse contract, success verifier, snapshot
+
+### `payload/.claude/skills/bmad-story-automator-review/workflow.yaml`
+
+Add a machine contract pointer, for example:
+
+- `contract: "./contract.json"`
+
+### New: `payload/.claude/skills/bmad-story-automator-review/contract.json`
+
+Store structured review completion semantics:
+
+- blocking severity
+- allowed done values
+- allowed in-progress values
+- source order
+- sprint sync expectations
+
+### `payload/.claude/skills/bmad-story-automator-review/instructions.xml`
+
+Keep the adversarial review behavior, but align it with autonomous mode.
+
+Changes:
+
+- stop relying on prompt folklore to override user-choice branches
+- make automatic fix behavior driven by explicit interaction mode
+- keep review prose separate from machine completion rules
+
+## Installer And Packaging Impact
+
+### `install.sh`
+
+Likely no logic change needed because the installer already copies the whole payload tree.
+
+Needed checks:
+
+- verify new payload files exist after install
+- update smoke tests to assert new data files are present
+
+### `package.json`
+
+Likely no change needed because `payload/` and `source/` are already in `files`.
+
+## Verification Surface Changes
+
+### `scripts/smoke-test.sh`
+
+Must update smoke coverage for:
+
+- installed policy JSON presence
+- installed prompt template presence
+- installed parse JSON presence
+- prompt-building behavior still matching default policy
+- policy-backed build-cmd output for create/auto/review/retro
+
+### Suggested new tests under `source/tests/`
+
+- `test_runtime_policy.py`
+- `test_success_verifiers.py`
+- `test_orchestrator_parse.py`
+- `test_state_policy_metadata.py`
+
+Use stdlib `unittest` unless a dependency-free alternative is clearly better.
+
+## Recommended Module Boundaries
+
+To keep files under roughly 500 LOC:
+
+- `runtime_policy.py`: load, merge, validate, snapshot, resolve
+- `success_verifiers.py`: registry and concrete verifiers
+- `tmux.py`: session behavior plus prompt rendering entrypoint only
+- `orchestrator_parse.py`: parser command plus schema validation
+
+If `runtime_policy.py` grows too large, split only after phase 1 lands.
+
diff --git a/docs/plans/json-settings/04-migration-testing-and-risks.md b/docs/plans/json-settings/04-migration-testing-and-risks.md
new file mode 100644
index 0000000..93e4c93
--- /dev/null
+++ b/docs/plans/json-settings/04-migration-testing-and-risks.md
@@ -0,0 +1,200 @@
+# Migration, Testing, And Risks
+
+## Compatibility Plan
+
+### Default behavior
+
+Bundled default JSON settings must preserve today's behavior exactly.
+
+That includes:
+
+- prompt wording
+- asset path candidate order
+- parser labels and required fields
+- review completion fallback to story file status
+- review max cycles
+- crash retry count
+- retrospective forcing Claude
+
+### Old state docs
+
+If a state document has no policy metadata:
+
+- resume in legacy mode
+- load bundled defaults
+- mark the run summary with `legacyPolicy: true`
+
+This is the only safe fallback for pre-refactor state docs.
+
+### New state docs
+
+If a state document has:
+
+- `policySnapshotFile`
+- `policySnapshotHash`
+
+then resume must:
+
+- load the snapshot
+- verify the hash
+- fail validation if snapshot missing or mismatched
+
+Do not silently fall back to live defaults for a new-format state doc.
+
+### Legacy env vars
+
+For one release cycle, continue to honor:
+
+- `MAX_REVIEW_CYCLES`
+- `MAX_CRASH_RETRIES`
+
+But resolve them once at orchestration start and bake the effective values into the snapshot.
+
+That preserves old operator habits without breaking deterministic resume.
+
+## Test Strategy
+
+### Principle
+
+Add focused Python tests for new policy behavior, then keep the smoke suite as the installer/integration safety net.
+
+### Recommended Test Harness
+
+Use stdlib `unittest` first.
+
+Reasons:
+
+- no new dependency
+- enough for merge/validation/path-resolution tests
+- enough for verifier tests with temporary directories
+
+### New Python Test Coverage
+
+### `test_runtime_policy.py`
+
+Cases:
+
+- bundled default loads
+- project override deep-merges maps
+- arrays replace cleanly
+- invalid step name rejected
+- invalid verifier name rejected
+- required asset missing fails
+- snapshot hash stable
+
+### `test_success_verifiers.py`
+
+Cases:
+
+- `create_story_artifact` returns fail for 0 matches
+- `create_story_artifact` returns pass for 1 match
+- `create_story_artifact` returns fail for runaway multiple matches
+- `review_completion` passes on sprint status done
+- `review_completion` falls back to story file `Status: done`
+- `review_completion` fails on in-progress/unknown
+- `epic_complete` respects sprint status values
+
+### `test_orchestrator_parse.py`
+
+Cases:
+
+- parse schema loads from step contract
+- invalid schema file rejected
+- invalid child JSON rejected
+- output shape remains compatible
+
+### `test_state_policy_metadata.py`
+
+Cases:
+
+- state doc writes policy metadata
+- summary surfaces policy metadata
+- legacy state without policy metadata remains valid
+
+### Smoke Test Updates
+
+Extend `scripts/smoke-test.sh` to verify:
+
+- installed `data/orchestration-policy.json`
+- installed prompt template files
+- installed parse JSON files
+- `tmux-wrapper build-cmd` still emits expected default text
+- review prompt still defaults to automatic fixes in autonomous mode
+
+### Verify Command Updates
+
+Recommended future command shape:
+
+```bash
+python3 -m unittest discover -s source/tests
+npm run test:smoke
+npm run pack:dry-run
+```
+
+Then fold that into `npm run verify`.
+
+## Risk Register
+
+| Risk | Why it matters | Mitigation |
+|------|----------------|------------|
+| Prompt drift changes agent behavior | Equivalent wording is not actually equivalent for model behavior | Golden prompt tests against current defaults |
+| Snapshot ignored on resume | Live payload changes mutate in-flight run behavior | Resume path must require snapshot for new-format states |
+| Review still asks the user in autonomous mode | Current review workflow prose still has a menu branch | Add explicit interaction-mode contract and payload alignment |
+| Required asset silent fallback | Missing workflow may look valid until runtime | Resolver must fail closed for required assets |
+| Custom statuses cause false positives | Review completion may pass with wrong values | Contract validation + verifier tests |
+| Optional auto skill incomplete | Step contract may claim assets that do not exist | Required/optional separation in resolver |
+| Policy file grows too complex | Moderate refactor turns into new engine | Keep bounded primitives only |
+
+## Rollout Strategy
+
+### Phase 1
+
+Land:
+
+- policy loader
+- bundled default policy
+- project override support
+- pinned snapshot
+- prompt templates
+- parse contracts
+- policy-backed retry budgets
+
+Keep:
+
+- fixed engine shape
+- existing review special logic if needed for the first slice
+
+### Phase 2
+
+Land:
+
+- verifier registry
+- policy-backed `monitor-session` verifier dispatch
+- `contract.json` for review
+- review payload alignment for autonomous mode
+
+### Phase 3
+
+Land:
+
+- policy-backed bounded loop config
+- optional-step and trigger wiring
+- cleanup of old hard-coded helpers
+
+## Phase Exit Criteria
+
+Phase 1 exit:
+
+- zero-config build-cmd output matches baseline
+- snapshot created and stored in state
+- parse schemas load from JSON files
+
+Phase 2 exit:
+
+- review completion no longer special-cased in `monitor-session`
+- review contract is structured and tested
+
+Phase 3 exit:
+
+- retry/repeat/trigger policy comes from snapshot
+- docs and runtime use the same terms
diff --git a/docs/plans/json-settings/README.md b/docs/plans/json-settings/README.md
new file mode 100644
index 0000000..6490892
--- /dev/null
+++ b/docs/plans/json-settings/README.md
@@ -0,0 +1,85 @@
+# JSON Settings Plan
+
+Purpose: move prompt text, parse contracts, verifier thresholds, and bounded loop rules out of scattered Python constants and into deterministic JSON settings, without replacing the existing runtime engine.
+
+## Summary
+
+This plan chooses:
+
+- JSON for machine settings
+- markdown/XML for long prompt and workflow prose
+- bundled defaults plus optional project override plus pinned snapshot
+- named Python verifiers, not arbitrary expressions
+- bounded workflow primitives, not user-defined workflow graphs
+
+That gives most of the configurability value with moderate risk.
+
+## Why This Exists
+
+Today the behavior is split across:
+
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/core/review_verify.py`
+- `source/src/story_automator/core/workflow_paths.py`
+- `payload/.claude/skills/bmad-story-automator/workflow.md`
+- `payload/.claude/skills/bmad-story-automator-review/workflow.yaml`
+- `payload/.claude/skills/bmad-story-automator-review/instructions.xml`
+
+That split is the main source of drift risk. This packet defines one implementation path to pull the machine contract into JSON settings while keeping the current engine intact.
+
+## Doc Map
+
+- [01-why-json-settings.md](./01-why-json-settings.md)  
+  Problem, goals, non-goals, and why JSON is the right fit for this repo.
+- [02-policy-model.md](./02-policy-model.md)  
+  Target architecture, file locations, merge rules, schema shape, and data/runtime boundaries.
+- [03-code-and-payload-changes.md](./03-code-and-payload-changes.md)  
+  Exact source and payload touchpoints, including new modules and file-by-file changes.
+- [04-migration-testing-and-risks.md](./04-migration-testing-and-risks.md)  
+  Compatibility plan, resume semantics, test strategy, and risk controls.
+- [TODO.md](./TODO.md)  
+  Sequential execution checklist with dependencies and exit criteria.
+
+## Read Order
+
+1. Read [01-why-json-settings.md](./01-why-json-settings.md)
+2. Read [02-policy-model.md](./02-policy-model.md)
+3. Read [03-code-and-payload-changes.md](./03-code-and-payload-changes.md)
+4. Read [04-migration-testing-and-risks.md](./04-migration-testing-and-risks.md)
+5. Execute [TODO.md](./TODO.md) top to bottom
+
+## Core Decision
+
+The implementation should use this model:
+
+```text
+bundled default policy
+  + optional project override
+  = effective runtime policy
+  -> pinned snapshot at orchestration start
+  -> state doc stores pointer + hash
+  -> all resume/replay uses snapshot only
+```
+
+## Definition Of Done
+
+This plan is complete when the implementation can:
+
+- customize step prompts without editing Python
+- customize parse schemas without editing Python
+- customize verifier thresholds and retry budgets without editing Python
+- keep zero-config behavior identical to today
+- resume from a pinned snapshot even if payload or override files later change
+- reject invalid settings safely
+
+## Out Of Scope
+
+This plan does not try to deliver:
+
+- arbitrary user-defined workflow graphs
+- custom Python or shell expressions in config
+- a general workflow interpreter
+- rich nested policy blobs embedded in frontmatter
+
diff --git a/docs/plans/json-settings/TODO.md b/docs/plans/json-settings/TODO.md
new file mode 100644
index 0000000..9270b2f
--- /dev/null
+++ b/docs/plans/json-settings/TODO.md
@@ -0,0 +1,276 @@
+# TODO
+
+Execute in order. Do not skip ahead unless the dependency line says it is safe.
+
+## Phase 0: Baseline
+
+1. [ ] Capture current behavior baselines.
+   Files: `source/src/story_automator/commands/tmux.py`, `source/src/story_automator/commands/orchestrator_parse.py`, `source/src/story_automator/commands/orchestrator.py`, `source/src/story_automator/core/review_verify.py`
+   Actions:
+   - run `npm run verify`
+   - capture `tmux-wrapper build-cmd` output for `create`, `auto`, `review`, `retro`
+   - note current review/crash limits and review completion behavior
+   Done when:
+   - baseline commands are saved in working notes
+   - current default behavior is explicit before edits start
+
+2. [ ] Freeze the target JSON settings shape.
+   Depends on: 1
+   Files: `docs/plans/json-settings/02-policy-model.md`
+   Actions:
+   - confirm final top-level keys
+   - confirm snapshot file path
+   - confirm verifier names
+   Done when:
+   - no open schema ambiguity remains
+
+## Phase 1: Policy Loader And Default Policy
+
+3. [ ] Add bundled default policy JSON and data directories.
+   Depends on: 2
+   Files:
+   - `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json`
+   - `payload/.claude/skills/bmad-story-automator/data/prompts/*.md`
+   - `payload/.claude/skills/bmad-story-automator/data/parse/*.json`
+   Actions:
+   - encode current behavior exactly
+   - keep prompt wording as close to current strings as possible
+   Done when:
+   - payload contains complete default machine contract
+
+4. [ ] Implement `runtime_policy.py`.
+   Depends on: 3
+   Files:
+   - `source/src/story_automator/core/runtime_policy.py`
+   Actions:
+   - load bundled policy
+   - load optional `_bmad/bmm/story-automator.policy.json`
+   - deep-merge maps, replace arrays
+   - validate known keys and verifier names
+   - resolve relative paths
+   - write stable snapshot JSON with hash
+   Done when:
+   - one call can return effective policy plus snapshot metadata
+
+5. [ ] Refactor required/optional asset resolution behind policy.
+   Depends on: 4
+   Files:
+   - `source/src/story_automator/core/workflow_paths.py`
+   - `source/src/story_automator/core/runtime_policy.py`
+   Actions:
+   - move candidate-list resolution behind policy
+   - fail closed on missing required assets
+   - preserve compatibility wrappers where helpful
+   Done when:
+   - required assets never silently resolve to non-existent placeholders
+
+6. [ ] Add state metadata for policy snapshots.
+   Depends on: 4
+   Files:
+   - `source/src/story_automator/commands/state.py`
+   - `source/src/story_automator/core/frontmatter.py`
+   Actions:
+   - write `policyVersion`
+   - write `policySnapshotFile`
+   - write `policySnapshotHash`
+   - add `legacyPolicy` handling
+   Done when:
+   - new state docs point at a snapshot instead of embedding policy
+
+## Phase 2: Prompt And Parse Externalization
+
+7. [ ] Replace hard-coded tmux prompts with template rendering.
+   Depends on: 4, 5, 6
+   Files:
+   - `source/src/story_automator/commands/tmux.py`
+   Actions:
+   - load step contract from effective policy
+   - render prompt from template file
+   - preserve current Codex/Claude boot logic
+   - preserve current default prompt text behavior
+   Done when:
+   - `build-cmd` no longer uses the hard-coded prompt map
+
+8. [ ] Replace hard-coded parse schema switch with policy-backed contracts.
+   Depends on: 4
+   Files:
+   - `source/src/story_automator/commands/orchestrator_parse.py`
+   Actions:
+   - load step parse schema JSON
+   - render parser prompt from label + schema
+   - validate returned JSON against required keys
+   Done when:
+   - parser behavior comes from data files, not `if step == ...`
+
+9. [ ] Move retry budgets into policy-backed reads.
+   Depends on: 4
+   Files:
+   - `source/src/story_automator/commands/orchestrator.py`
+   Actions:
+   - source review max cycles from policy
+   - source crash retry limit from policy
+   - remove direct env-default dependence from active run behavior
+   Done when:
+   - budgets come from effective snapshot
+
+## Phase 3: Success Verifiers
+
+10. [ ] Add verifier registry and concrete implementations.
+    Depends on: 4
+    Files:
+    - `source/src/story_automator/core/success_verifiers.py`
+    - `source/src/story_automator/core/review_verify.py`
+    Actions:
+    - implement `session_exit`
+    - implement `create_story_artifact`
+    - implement `review_completion`
+    - implement `epic_complete`
+    - keep backward-compatible wrapper for existing review helper
+    Done when:
+    - verifiers are selected by name and tested independently
+
+11. [ ] Wire `monitor-session` to policy-backed verifier dispatch.
+    Depends on: 7, 10
+    Files:
+    - `source/src/story_automator/commands/tmux.py`
+    Actions:
+    - remove permanent review special case
+    - use step contract `success.verifier`
+    - pass verifier config and story context
+    Done when:
+    - completion logic is step-driven, not `workflow == "review"` driven
+
+12. [ ] Fold create story validation into `create_story_artifact`.
+    Depends on: 10, 11
+    Files:
+    - `source/src/story_automator/commands/orchestrator.py`
+    - `source/src/story_automator/core/success_verifiers.py`
+    Actions:
+    - remove duplicated create validation trigger logic
+    - route create pass/fail through verifier
+    Done when:
+    - create success semantics exist in one place only
+
+## Phase 4: Review Payload Alignment
+
+13. [ ] Add structured review contract file.
+    Depends on: 3
+    Files:
+    - `payload/.claude/skills/bmad-story-automator-review/contract.json`
+    - `payload/.claude/skills/bmad-story-automator-review/workflow.yaml`
+    Actions:
+    - move machine completion semantics into JSON
+    - make workflow point to the contract
+    Done when:
+    - review machine truth is no longer hidden inside prose only
+
+14. [ ] Align review instructions with autonomous mode.
+    Depends on: 13
+    Files:
+    - `payload/.claude/skills/bmad-story-automator-review/instructions.xml`
+    Actions:
+    - remove reliance on prompt folklore for auto-fix behavior
+    - make automatic fix path explicit for autonomous mode
+    Done when:
+    - review payload no longer contradicts runtime prompt defaults
+
+15. [ ] Update main workflow prose to reference runtime policy.
+    Depends on: 3
+    Files:
+    - `payload/.claude/skills/bmad-story-automator/workflow.md`
+    Actions:
+    - reference `orchestration-policy.json`
+    - describe fixed loop as default policy
+    - align terms with runtime policy language
+    Done when:
+    - payload docs and runtime use the same contract vocabulary
+
+## Phase 5: Testing
+
+16. [ ] Add Python unit tests for policy and verifiers.
+    Depends on: 4, 8, 10
+    Files:
+    - `source/tests/test_runtime_policy.py`
+    - `source/tests/test_success_verifiers.py`
+    - `source/tests/test_orchestrator_parse.py`
+    - `source/tests/test_state_policy_metadata.py`
+    Actions:
+    - use stdlib `unittest`
+    - cover merge, validation, snapshot, verifier behavior, parser loading
+    Done when:
+    - policy-specific behavior has direct automated coverage
+
+17. [ ] Update smoke tests for installed policy assets and defaults.
+    Depends on: 7, 8, 11, 13, 14, 15
+    Files:
+    - `scripts/smoke-test.sh`
+    Actions:
+    - assert policy JSON exists after install
+    - assert prompt templates and parse files exist
+    - assert default prompt output still matches baseline expectations
+    Done when:
+    - installer/integration behavior remains covered end to end
+
+18. [ ] Update local verify flow.
+    Depends on: 16, 17
+    Files:
+    - `package.json`
+    - `docs/development.md`
+    Actions:
+    - add Python unit test command
+    - fold it into `npm run verify`
+    - document new verify sequence
+    Done when:
+    - one verify command covers unit + smoke + package dry run
+
+## Phase 6: Compatibility And Cleanup
+
+19. [ ] Implement legacy resume behavior and strict new-state validation.
+    Depends on: 6, 10, 11
+    Files:
+    - `source/src/story_automator/commands/state.py`
+    - `source/src/story_automator/core/runtime_policy.py`
+    - any resume path using state metadata
+    Actions:
+    - old state without snapshot -> legacy defaults + `legacyPolicy: true`
+    - new state with missing snapshot -> validation failure
+    Done when:
+    - resume is deterministic and explicit in both modes
+
+20. [ ] Preserve env compatibility for one release cycle.
+    Depends on: 9
+    Files:
+    - `source/src/story_automator/core/runtime_policy.py`
+    - docs as needed
+    Actions:
+    - read legacy env vars once at orchestration start
+    - bake effective values into snapshot
+    - document deprecation path
+    Done when:
+    - old env knobs still work without mutating resumed runs
+
+21. [ ] Remove or shrink obsolete hard-coded helpers.
+    Depends on: 7, 8, 9, 10, 11
+    Files:
+    - `source/src/story_automator/commands/tmux.py`
+    - `source/src/story_automator/commands/orchestrator_parse.py`
+    - `source/src/story_automator/commands/orchestrator.py`
+    Actions:
+    - delete dead prompt-schema branches
+    - remove stale helpers after tests pass
+    Done when:
+    - no duplicate machine contract remains in code
+
+## Final Gate
+
+22. [ ] Run full verification and review default behavior drift.
+    Depends on: 1 through 21
+    Actions:
+    - run Python unit tests
+    - run `npm run verify`
+    - compare prompt baselines against phase 0 captures
+    - review installed payload tree manually once
+    Done when:
+    - zero-config behavior matches baseline
+    - customization surfaces work
+    - resume uses snapshots only

From af8c1d6c31939a72d78612faa9b976b7ae980c5c Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:29:35 -0300
Subject: [PATCH 02/17] feat: add JSON runtime policy foundation

---
 docs/changelog/260413.md                      |  35 +++
 docs/development.md                           |   4 +-
 package.json                                  |   3 +-
 .../bmad-story-automator-review/contract.json |   7 +
 .../bmad-story-automator-review/workflow.yaml |   1 +
 .../data/orchestration-policy.json            | 146 ++++++++++
 .../bmad-story-automator/data/parse/auto.json |  10 +
 .../data/parse/create.json                    |  10 +
 .../bmad-story-automator/data/parse/dev.json  |  10 +
 .../data/parse/retro.json                     |   8 +
 .../data/parse/review.json                    |  15 +
 .../bmad-story-automator/data/prompts/auto.md |   4 +
 .../data/prompts/create.md                    |   7 +
 .../bmad-story-automator/data/prompts/dev.md  |   4 +
 .../data/prompts/retro.md                     |  33 +++
 .../data/prompts/review.md                    |   4 +
 .../templates/state-document.md               |   4 +
 .../skills/bmad-story-automator/workflow.md   |   2 +
 scripts/smoke-test.sh                         |   6 +
 .../story_automator/commands/orchestrator.py  |  10 +-
 .../commands/orchestrator_parse.py            |  53 ++--
 source/src/story_automator/commands/state.py  |  12 +-
 source/src/story_automator/commands/tmux.py   | 165 ++---------
 .../story_automator/core/runtime_policy.py    | 265 ++++++++++++++++++
 .../story_automator/core/workflow_paths.py    | 118 +-------
 source/tests/test_orchestrator_parse.py       |  99 +++++++
 source/tests/test_runtime_policy.py           |  93 ++++++
 source/tests/test_state_policy_metadata.py    | 138 +++++++++
 28 files changed, 998 insertions(+), 268 deletions(-)
 create mode 100644 payload/.claude/skills/bmad-story-automator-review/contract.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/auto.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/create.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/dev.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/retro.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/review.json
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/auto.md
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/create.md
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/dev.md
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/retro.md
 create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/review.md
 create mode 100644 source/src/story_automator/core/runtime_policy.py
 create mode 100644 source/tests/test_orchestrator_parse.py
 create mode 100644 source/tests/test_runtime_policy.py
 create mode 100644 source/tests/test_state_policy_metadata.py

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 2c3ea4f..556a30e 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -85,3 +85,38 @@ Added a structured planning packet for moving story-automator contracts to snaps
 
 ### QA Notes
 - N/A
+
+## 260413-07:29:16 - Add JSON runtime policy foundation
+
+### Summary
+Implemented the first JSON-settings slice with bundled policy data, snapshot-backed state metadata, and policy-driven prompt/parse wiring.
+
+### Added
+- Added a runtime policy loader with deterministic merge, asset resolution, environment compatibility, and snapshot writing.
+- Added bundled orchestration policy, prompt templates, parse contracts, and a structured review contract file.
+- Added Python unit coverage for policy loading, parser contracts, and state snapshot metadata.
+
+### Changed
+- Changed `tmux-wrapper build-cmd` to render prompts from policy-backed templates instead of inline string maps.
+- Changed parser contract loading, retry budget reads, smoke coverage, and `npm run verify` to use the new JSON-policy foundation.
+- Changed orchestration state documents to persist `policyVersion`, `policySnapshotFile`, and `policySnapshotHash`.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/core/workflow_paths.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/state.py`
+- `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json`
+- `payload/.claude/skills/bmad-story-automator/data/prompts/`
+- `payload/.claude/skills/bmad-story-automator/data/parse/`
+- `payload/.claude/skills/bmad-story-automator-review/contract.json`
+- `scripts/smoke-test.sh`
+- `package.json`
+- `docs/development.md`
+- `docs/changelog/260413.md`
+- `source/tests/`
+
+### QA Notes
+- N/A
diff --git a/docs/development.md b/docs/development.md
index 2307d52..ba9ef4b 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -13,6 +13,7 @@ PYTHONPATH=source/src python3 -m story_automator --help
 
 `npm run verify` expands to:
 
+- `npm run test:python`
 - `npm run pack:dry-run`
 - `npm run test:smoke`
 
@@ -25,13 +26,14 @@ The smoke suite validates:
 - required and optional dependency handling
 - legacy backup behavior
 - installed skill layout
+- installed runtime policy, prompt templates, and parse contracts
 - prompt-building behavior for Claude and Codex child sessions
 
 ## Repo Verification Flow
 
 ```mermaid
 flowchart TD
-    A["Edit installer, payload, or runtime"] --> B["Run python helper sanity checks"]
+    A["Edit installer, payload, or runtime"] --> B["Run npm run test:python"]
     B --> C["Run npm run test:smoke"]
     C --> D["Run npm run pack:dry-run"]
     D --> E["Run npm run verify"]
diff --git a/package.json b/package.json
index 4cac0ac..4ba537f 100644
--- a/package.json
+++ b/package.json
@@ -17,8 +17,9 @@
   ],
   "scripts": {
     "pack:dry-run": "npm pack --dry-run",
+    "test:python": "PYTHONPATH=source/src python3 -m unittest discover -s source/tests",
     "test:smoke": "bash scripts/smoke-test.sh",
-    "verify": "npm run pack:dry-run && npm run test:smoke"
+    "verify": "npm run test:python && npm run pack:dry-run && npm run test:smoke"
   },
   "engines": {
     "node": ">=18"
diff --git a/payload/.claude/skills/bmad-story-automator-review/contract.json b/payload/.claude/skills/bmad-story-automator-review/contract.json
new file mode 100644
index 0000000..946bae5
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator-review/contract.json
@@ -0,0 +1,7 @@
+{
+  "blockingSeverity": ["critical"],
+  "doneValues": ["done"],
+  "inProgressValues": ["in-progress", "in_progress", "review", "qa"],
+  "sourceOrder": ["sprint-status.yaml", "story-file"],
+  "syncSprintStatus": true
+}
diff --git a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
index 05b5347..f7c9283 100644
--- a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
+++ b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
@@ -16,4 +16,5 @@ sprint_status: "{implementation_artifacts}/sprint-status.yaml"
 # Workflow components
 instructions: "./instructions.xml"
 validation: "./checklist.md"
+contract: "./contract.json"
 standalone: true
diff --git a/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json b/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json
new file mode 100644
index 0000000..1699f1b
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json
@@ -0,0 +1,146 @@
+{
+  "version": 1,
+  "snapshot": {
+    "relativeDir": "_bmad-output/story-automator/policy-snapshots"
+  },
+  "runtime": {
+    "parser": {
+      "provider": "claude",
+      "model": "haiku",
+      "timeoutSeconds": 120
+    },
+    "merge": {
+      "maps": "deep",
+      "arrays": "replace"
+    }
+  },
+  "workflow": {
+    "sequence": ["create", "dev", "auto", "review", "retro"],
+    "repeat": {
+      "review": {
+        "maxCycles": 5,
+        "successVerifier": "review_completion",
+        "onIncomplete": "retry",
+        "onExhausted": "escalate"
+      }
+    },
+    "crash": {
+      "maxRetries": 2,
+      "onExhausted": "escalate"
+    }
+  },
+  "steps": {
+    "create": {
+      "label": "create-story",
+      "assets": {
+        "skillName": "bmad-create-story",
+        "workflowCandidates": ["workflow.md", "workflow.yaml"],
+        "instructionsCandidates": ["discover-inputs.md"],
+        "checklistCandidates": ["checklist.md"],
+        "templateCandidates": ["template.md"],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/create.md",
+        "interactionMode": "autonomous"
+      },
+      "parse": {
+        "schemaFile": "data/parse/create.json"
+      },
+      "success": {
+        "verifier": "create_story_artifact",
+        "config": {
+          "glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md",
+          "expectedMatches": 1
+        }
+      }
+    },
+    "dev": {
+      "label": "dev-story",
+      "assets": {
+        "skillName": "bmad-dev-story",
+        "workflowCandidates": ["workflow.md", "workflow.yaml"],
+        "instructionsCandidates": [],
+        "checklistCandidates": ["checklist.md"],
+        "templateCandidates": [],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/dev.md",
+        "interactionMode": "autonomous"
+      },
+      "parse": {
+        "schemaFile": "data/parse/dev.json"
+      },
+      "success": {
+        "verifier": "session_exit"
+      }
+    },
+    "auto": {
+      "label": "qa-generate-e2e-tests",
+      "assets": {
+        "skillName": "bmad-qa-generate-e2e-tests",
+        "workflowCandidates": ["workflow.md", "workflow.yaml"],
+        "instructionsCandidates": [],
+        "checklistCandidates": ["checklist.md"],
+        "templateCandidates": [],
+        "required": []
+      },
+      "prompt": {
+        "templateFile": "data/prompts/auto.md",
+        "interactionMode": "autonomous"
+      },
+      "parse": {
+        "schemaFile": "data/parse/auto.json"
+      },
+      "success": {
+        "verifier": "session_exit"
+      }
+    },
+    "review": {
+      "label": "code-review",
+      "assets": {
+        "skillName": "bmad-story-automator-review",
+        "workflowCandidates": ["workflow.yaml", "workflow.md"],
+        "instructionsCandidates": ["instructions.xml"],
+        "checklistCandidates": ["checklist.md"],
+        "templateCandidates": [],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/review.md",
+        "interactionMode": "autonomous",
+        "acceptExtraInstruction": true,
+        "defaultExtraInstruction": "auto-fix all issues without prompting"
+      },
+      "parse": {
+        "schemaFile": "data/parse/review.json"
+      },
+      "success": {
+        "verifier": "review_completion",
+        "contractFile": ".claude/skills/bmad-story-automator-review/contract.json"
+      }
+    },
+    "retro": {
+      "label": "retrospective",
+      "assets": {
+        "skillName": "bmad-retrospective",
+        "workflowCandidates": ["workflow.md", "workflow.yaml"],
+        "instructionsCandidates": [],
+        "checklistCandidates": [],
+        "templateCandidates": [],
+        "required": ["skill", "workflow"]
+      },
+      "prompt": {
+        "templateFile": "data/prompts/retro.md",
+        "interactionMode": "autonomous"
+      },
+      "parse": {
+        "schemaFile": "data/parse/retro.json"
+      },
+      "success": {
+        "verifier": "epic_complete"
+      }
+    }
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/auto.json b/payload/.claude/skills/bmad-story-automator/data/parse/auto.json
new file mode 100644
index 0000000..ba9a61e
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/parse/auto.json
@@ -0,0 +1,10 @@
+{
+  "requiredKeys": ["status", "tests_added", "coverage_improved", "summary", "next_action"],
+  "schema": {
+    "status": "SUCCESS|FAILURE|AMBIGUOUS",
+    "tests_added": "integer",
+    "coverage_improved": "true|false",
+    "summary": "brief description",
+    "next_action": "proceed|retry|escalate"
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/create.json b/payload/.claude/skills/bmad-story-automator/data/parse/create.json
new file mode 100644
index 0000000..9c420f6
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/parse/create.json
@@ -0,0 +1,10 @@
+{
+  "requiredKeys": ["status", "story_created", "story_file", "summary", "next_action"],
+  "schema": {
+    "status": "SUCCESS|FAILURE|AMBIGUOUS",
+    "story_created": "true|false",
+    "story_file": "path or null",
+    "summary": "brief description",
+    "next_action": "proceed|retry|escalate"
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/dev.json b/payload/.claude/skills/bmad-story-automator/data/parse/dev.json
new file mode 100644
index 0000000..3d02f30
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/parse/dev.json
@@ -0,0 +1,10 @@
+{
+  "requiredKeys": ["status", "tests_passed", "build_passed", "summary", "next_action"],
+  "schema": {
+    "status": "SUCCESS|FAILURE|AMBIGUOUS",
+    "tests_passed": "true|false",
+    "build_passed": "true|false",
+    "summary": "brief description",
+    "next_action": "proceed|retry|escalate"
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/retro.json b/payload/.claude/skills/bmad-story-automator/data/parse/retro.json
new file mode 100644
index 0000000..3b9ed5a
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/parse/retro.json
@@ -0,0 +1,8 @@
+{
+  "requiredKeys": ["status", "summary", "next_action"],
+  "schema": {
+    "status": "SUCCESS|FAILURE|AMBIGUOUS",
+    "summary": "brief description",
+    "next_action": "proceed|retry|escalate"
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/review.json b/payload/.claude/skills/bmad-story-automator/data/parse/review.json
new file mode 100644
index 0000000..cfa86cd
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/parse/review.json
@@ -0,0 +1,15 @@
+{
+  "requiredKeys": ["status", "issues_found", "all_fixed", "summary", "next_action"],
+  "schema": {
+    "status": "SUCCESS|FAILURE|AMBIGUOUS",
+    "issues_found": {
+      "critical": "integer",
+      "high": "integer",
+      "medium": "integer",
+      "low": "integer"
+    },
+    "all_fixed": "true|false",
+    "summary": "brief description",
+    "next_action": "proceed|retry|escalate"
+  }
+}
diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md b/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md
new file mode 100644
index 0000000..28911dd
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md
@@ -0,0 +1,4 @@
+Execute the BMAD {{label}} workflow for story {{story_id}}.
+
+{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md
+Auto-apply all discovered gaps in tests.
diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/create.md b/payload/.claude/skills/bmad-story-automator/data/prompts/create.md
new file mode 100644
index 0000000..cf9b745
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/prompts/create.md
@@ -0,0 +1,7 @@
+Execute the BMAD create-story workflow for story {{story_id}}.
+
+{{skill_line}}{{workflow_line}}{{instructions_line}}{{template_line}}{{checklist_line}}Create story file at: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md
+
+Story ID: {{story_id}}
+
+#YOLO - Do NOT wait for user input.
diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md b/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md
new file mode 100644
index 0000000..a9eaa27
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md
@@ -0,0 +1,4 @@
+Execute the BMAD dev-story workflow for story {{story_id}}.
+
+{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md
+Implement all tasks marked [ ]. Run tests. Update checkboxes.
diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md b/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md
new file mode 100644
index 0000000..82724af
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md
@@ -0,0 +1,33 @@
+Execute the BMAD retrospective workflow for epic {{story_id}}.
+
+{{skill_line}}{{workflow_line}}{{instructions_line}}Run the retrospective in #YOLO mode.
+Assume the user will NOT provide any input to the retrospective directly.
+For ALL prompts that expect user input, make reasonable autonomous decisions based on:
+- Sprint status data
+- Story files and their dev notes
+- Previous retrospective if available
+- Architecture and PRD documents
+
+Key behaviors:
+- When asked to confirm epic number: auto-confirm based on sprint-status
+- When asked for observations: synthesize from story analysis
+- When asked for decisions: make data-driven choices
+- When presented menus: select the most appropriate option based on context
+- Skip all "WAIT for user" instructions - continue autonomously
+
+After the retrospective has run and created documents, you MUST:
+1. Create a list of documentation that may need updates based on implementation learnings
+2. For each doc in the list, verify whether updates are actually needed by:
+   - Reading the current doc content
+   - Comparing against actual implementation code
+   - Checking for discrepancies between doc and code
+3. Update docs that have verified discrepancies
+4. Discard proposed updates where code matches docs
+
+Focus on these doc types:
+- Architecture decisions that changed during implementation
+- API documentation that diverged from specs
+- README files with outdated instructions
+- Configuration documentation
+
+EVERYTHING SHOULD BE AUTOMATED. THIS IS NOT A SESSION WHERE YOU SHOULD BE EXPECTING USER INPUT.
diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/review.md b/payload/.claude/skills/bmad-story-automator/data/prompts/review.md
new file mode 100644
index 0000000..960d18f
--- /dev/null
+++ b/payload/.claude/skills/bmad-story-automator/data/prompts/review.md
@@ -0,0 +1,4 @@
+Execute the story-automator review workflow for story {{story_id}}.
+
+{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md
+Review implementation, find issues, fix them automatically. {{extra_instruction}}
diff --git a/payload/.claude/skills/bmad-story-automator/templates/state-document.md b/payload/.claude/skills/bmad-story-automator/templates/state-document.md
index 80f17ea..50657d8 100644
--- a/payload/.claude/skills/bmad-story-automator/templates/state-document.md
+++ b/payload/.claude/skills/bmad-story-automator/templates/state-document.md
@@ -18,6 +18,10 @@ overrides:
 customInstructions: ""  # User-provided instructions for orchestration
 agentsFile: ""  # Deterministic per-story agent selections
 complexityFile: ""  # Persisted story complexity data
+policyVersion: 0
+policySnapshotFile: ""
+policySnapshotHash: ""
+legacyPolicy: false
 
 # Agent Configuration (v3.0.0)
 agentConfig:
diff --git a/payload/.claude/skills/bmad-story-automator/workflow.md b/payload/.claude/skills/bmad-story-automator/workflow.md
index baa9b2d..6d430ec 100644
--- a/payload/.claude/skills/bmad-story-automator/workflow.md
+++ b/payload/.claude/skills/bmad-story-automator/workflow.md
@@ -20,6 +20,8 @@ outputFolder: '{output_folder}/story-automator'
 
 **Meta-Context:** This orchestrator spawns and monitors other workflows (create-story, dev-story, automate, code-review, retrospective) in isolated T-Mux sessions. It tracks state for full resumability and escalates to the user only when autonomous decisions cannot be made.
 
+**Runtime Policy:** Machine settings live in `data/orchestration-policy.json`. Prompt contracts, parse contracts, retry budgets, and verifier selection should follow the pinned policy snapshot written at orchestration start.
+
 ---
 
 ## MULTI-EPIC SUPPORT
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index 9af6456..0e0f0fb 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -207,10 +207,16 @@ verify_common_install() {
   assert_file "$story_dir/workflow.md"
   assert_file "$story_dir/scripts/story-automator"
   assert_file "$story_dir/src/story_automator/cli.py"
+  assert_file "$story_dir/data/orchestration-policy.json"
+  assert_file "$story_dir/data/prompts/create.md"
+  assert_file "$story_dir/data/prompts/review.md"
+  assert_file "$story_dir/data/parse/create.json"
+  assert_file "$story_dir/data/parse/review.json"
   assert_file "$story_dir/pyproject.toml"
   assert_file "$story_dir/README.md"
   assert_file "$review_dir/SKILL.md"
   assert_file "$review_dir/instructions.xml"
+  assert_file "$review_dir/contract.json"
   assert_contains "name: bmad-story-automator" "$story_dir/SKILL.md"
   assert_contains "Follow the instructions in ./workflow.md." "$story_dir/SKILL.md"
 
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 0639746..11d2bb1 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter
+from story_automator.core.runtime_policy import crash_max_retries, load_effective_policy, review_max_cycles
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.sprint import sprint_status_epic, sprint_status_get
 from story_automator.core.story_keys import normalize_story_key, sprint_status_file
@@ -244,6 +245,10 @@ def _state_summary(args: list[str]) -> int:
             "currentStep": find_frontmatter_value(args[0], "currentStep"),
             "status": find_frontmatter_value(args[0], "status"),
             "lastUpdated": find_frontmatter_value(args[0], "lastUpdated"),
+            "policyVersion": find_frontmatter_value(args[0], "policyVersion"),
+            "policySnapshotFile": find_frontmatter_value(args[0], "policySnapshotFile"),
+            "policySnapshotHash": find_frontmatter_value(args[0], "policySnapshotHash"),
+            "legacyPolicy": find_frontmatter_value(args[0], "legacyPolicy"),
             "lastAction": extract_last_action(args[0]),
         }
     )
@@ -278,9 +283,10 @@ def _state_update(args: list[str]) -> int:
 def _escalate(args: list[str]) -> int:
     trigger = args[0] if args else ""
     context = args[1] if len(args) > 1 else ""
+    policy = load_effective_policy(get_project_root())
     if trigger == "review-loop":
         cycles = _parse_context_int(context, "cycles")
-        limit = int(os.environ.get("MAX_REVIEW_CYCLES", "5"))
+        limit = review_max_cycles(policy)
         if cycles >= limit:
             print_json({"escalate": True, "reason": f"Review loop exceeded max cycles ({cycles}/{limit})"})
         else:
@@ -288,7 +294,7 @@ def _escalate(args: list[str]) -> int:
         return 0
     if trigger == "session-crash":
         retries = _parse_context_int(context, "retries")
-        limit = int(os.environ.get("MAX_CRASH_RETRIES", "2"))
+        limit = crash_max_retries(policy)
         if retries >= limit:
             print_json({"escalate": True, "reason": f"Session crashed after {retries} retries"})
         else:
diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py
index 72f89e9..6a95226 100644
--- a/source/src/story_automator/commands/orchestrator_parse.py
+++ b/source/src/story_automator/commands/orchestrator_parse.py
@@ -2,6 +2,7 @@
 
 import json
 
+from story_automator.core.runtime_policy import load_effective_policy, step_contract
 from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines
 
 
@@ -22,7 +23,13 @@ def parse_output_action(args: list[str]) -> int:
         print('{"status":"error","reason":"output file not found or empty"}')
         return 1
     lines = trim_lines(content)[:150]
-    prompt = _build_parse_prompt(step, "\n".join(lines))
+    try:
+        contract = step_contract(load_effective_policy(), step)
+        parse_contract = _load_parse_contract(contract)
+    except (FileNotFoundError, json.JSONDecodeError, ValueError):
+        print_json({"status": "error", "reason": "parse_contract_invalid"})
+        return 1
+    prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines))
     result = run_cmd(
         "claude",
         "-p",
@@ -41,28 +48,36 @@ def parse_output_action(args: list[str]) -> int:
         print_json({"status": "error", "reason": "sub-agent returned invalid json"})
         return 1
     try:
-        json.loads(json_line)
+        payload = json.loads(json_line)
     except json.JSONDecodeError:
         print_json({"status": "error", "reason": "sub-agent returned invalid json"})
         return 1
-    print(json_line)
+    if not _has_required_keys(payload, parse_contract.get("requiredKeys") or []):
+        print_json({"status": "error", "reason": "sub-agent returned invalid json"})
+        return 1
+    print(json.dumps(payload, separators=(",", ":")))
     return 0
 
 
-def _build_parse_prompt(step: str, content: str) -> str:
-    if step == "create":
-        schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","story_created":true/false,"story_file":"path or null","summary":"brief description","next_action":"proceed|retry|escalate"}'
-        label = "create-story"
-    elif step == "dev":
-        schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","tests_passed":true/false,"build_passed":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}'
-        label = "dev-story"
-    elif step == "auto":
-        schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","tests_added":N,"coverage_improved":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}'
-        label = "automate-tests"
-    elif step == "review":
-        schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","issues_found":{"critical":N,"high":N,"medium":N,"low":N},"all_fixed":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}'
-        label = "code-review"
-    else:
-        schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","summary":"brief description","next_action":"proceed|retry|escalate"}'
-        label = "session"
+def _load_parse_contract(contract: dict[str, object]) -> dict[str, object]:
+    parse = contract.get("parse") or {}
+    payload = json.loads(read_text(str(parse.get("schemaPath") or "")))
+    if not isinstance(payload, dict):
+        raise ValueError("invalid parse schema")
+    if not isinstance(payload.get("requiredKeys"), list):
+        raise ValueError("invalid parse schema")
+    if not isinstance(payload.get("schema"), dict):
+        raise ValueError("invalid parse schema")
+    return payload
+
+
+def _build_parse_prompt(contract: dict[str, object], parse_contract: dict[str, object], content: str) -> str:
+    label = str(contract.get("label") or "session")
+    schema = json.dumps(parse_contract.get("schema") or {}, separators=(",", ":"))
     return f"Analyze this {label} session output. Return JSON only:\n{schema}\n\nSession output:\n---\n{content}\n---"
+
+
+def _has_required_keys(payload: object, required_keys: list[object]) -> bool:
+    if not isinstance(payload, dict):
+        return False
+    return all(isinstance(key, str) and key in payload for key in required_keys)
diff --git a/source/src/story_automator/commands/state.py b/source/src/story_automator/commands/state.py
index feb17c9..ede9786 100644
--- a/source/src/story_automator/commands/state.py
+++ b/source/src/story_automator/commands/state.py
@@ -6,7 +6,8 @@
 from typing import Any
 
 from ..core.frontmatter import extract_frontmatter, parse_simple_frontmatter
-from ..core.utils import count_matches, ensure_dir, file_exists, now_utc, now_utc_z, read_text, write_json
+from ..core.runtime_policy import PolicyError, load_policy_for_state, snapshot_effective_policy
+from ..core.utils import count_matches, ensure_dir, file_exists, get_project_root, now_utc, now_utc_z, read_text, write_json
 
 
 def cmd_build_state_doc(args: list[str]) -> int:
@@ -42,6 +43,7 @@ def cmd_build_state_doc(args: list[str]) -> int:
     epic = str(config.get("epic") or "epic")
     safe_epic = re.sub(r"[^a-zA-Z0-9]+", "-", epic).strip("-") or "epic"
     output_path = Path(output_folder) / f"orchestration-{safe_epic}-{stamp}.md"
+    snapshot = snapshot_effective_policy(get_project_root())
     text = read_text(template)
     replacements: dict[str, Any] = {
         "epic": config.get("epic", ""),
@@ -56,6 +58,10 @@ def cmd_build_state_doc(args: list[str]) -> int:
         "aiCommand": config.get("aiCommand", ""),
         "agentsFile": config.get("agentsFile", ""),
         "complexityFile": config.get("complexityFile", ""),
+        "policyVersion": snapshot["policyVersion"],
+        "policySnapshotFile": snapshot["policySnapshotFile"],
+        "policySnapshotHash": snapshot["policySnapshotHash"],
+        "legacyPolicy": False,
     }
     overrides = config.get("overrides", {}) if isinstance(config.get("overrides"), dict) else {}
     text = re.sub(
@@ -228,5 +234,9 @@ def required(key: str, validator: Any = None) -> None:
     required("status", lambda value: isinstance(value, str) and value in allowed)
     required("lastUpdated", lambda value: isinstance(value, str) and re.search(r"\d{4}-\d{2}-\d{2}T", value))
     required("aiCommand")
+    try:
+        load_policy_for_state(state)
+    except PolicyError as exc:
+        issues.append(str(exc))
     write_json({"ok": True, "structure": "issues" if issues else "ok", "issues": issues})
     return 0
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 0daa637..00e24c8 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -6,6 +6,7 @@
 import time
 from pathlib import Path
 
+from story_automator.core.runtime_policy import load_effective_policy, step_contract
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.utils import (
     atomic_write,
@@ -21,13 +22,6 @@
     read_text,
     run_cmd,
 )
-from story_automator.core.workflow_paths import (
-    create_story_workflow_paths,
-    dev_story_workflow_paths,
-    retrospective_workflow_paths,
-    review_workflow_paths,
-    testarch_automate_workflow_paths,
-)
 
 
 def cmd_tmux_wrapper(args: list[str]) -> int:
@@ -191,12 +185,10 @@ def _build_cmd(args: list[str]) -> int:
     agent = agent or agent_type()
     story_prefix = story_id.replace(".", "-")
     root = get_project_root()
-    create_paths = create_story_workflow_paths(root)
-    dev_paths = dev_story_workflow_paths(root)
-    auto_paths = testarch_automate_workflow_paths(root)
-    review_paths = review_workflow_paths(root)
-    retro_paths = retrospective_workflow_paths(root)
-    auto_label = _automate_workflow_label(auto_paths.workflow)
+    if step not in {"create", "dev", "auto", "review", "retro"}:
+        print(f"Unknown step type: {step}", file=__import__("sys").stderr)
+        return 1
+    policy = load_effective_policy(root)
     ai_command = os.environ.get("AI_COMMAND")
     if ai_command and not os.environ.get("AI_AGENT"):
         cli = ai_command
@@ -204,92 +196,7 @@ def _build_cmd(args: list[str]) -> int:
         cli = agent_cli(agent)
     else:
         cli = "codex exec"
-    if step not in {"create", "dev", "auto", "review", "retro"}:
-        print(f"Unknown step type: {step}", file=__import__("sys").stderr)
-        return 1
-    create_extra = ""
-    if create_paths.instructions:
-        create_extra += f"Then read: {create_paths.instructions}\n"
-    if create_paths.template:
-        create_extra += f"Use template: {create_paths.template}\n"
-    if create_paths.checklist:
-        create_extra += f"Validate with: {create_paths.checklist}\n"
-
-    dev_extra = ""
-    if dev_paths.instructions:
-        dev_extra += f"Then read: {dev_paths.instructions}\n"
-    if dev_paths.checklist:
-        dev_extra += f"Validate with: {dev_paths.checklist}\n"
-
-    auto_extra = ""
-    if auto_paths.skill:
-        auto_extra += f"READ this skill first: {auto_paths.skill}\n"
-    if auto_paths.workflow:
-        auto_extra += f"READ this workflow file next: {auto_paths.workflow}\n"
-    if auto_paths.instructions:
-        auto_extra += f"Then read: {auto_paths.instructions}\n"
-    if auto_paths.checklist:
-        auto_extra += f"Validate with: {auto_paths.checklist}\n"
-
-    review_extra = ""
-    if review_paths.instructions:
-        review_extra += f"Then read: {review_paths.instructions}\n"
-    if review_paths.checklist:
-        review_extra += f"Validate with: {review_paths.checklist}\n"
-
-    retro_extra = ""
-    if retro_paths.instructions:
-        retro_extra += f"Then read: {retro_paths.instructions}\n"
-
-    prompt = {
-        "create": (
-            (
-                f"Execute the BMAD create-story workflow for story {story_id}.\n\n"
-                f"READ this skill first: {create_paths.skill}\n"
-                f"READ this workflow file next: {create_paths.workflow}\n"
-            )
-            + create_extra
-            + (
-            f"Create story file at: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n\n"
-            f"Story ID: {story_id}\n\n#YOLO - Do NOT wait for user input."
-            )
-        ),
-        "dev": (
-            (
-                f"Execute the BMAD dev-story workflow for story {story_id}.\n\n"
-                f"READ this skill first: {dev_paths.skill}\n"
-                f"READ this workflow file next: {dev_paths.workflow}\n"
-            )
-            + dev_extra
-            + (
-            f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n"
-            "Implement all tasks marked [ ]. Run tests. Update checkboxes."
-            )
-        ),
-        "auto": (
-            (
-                f"Execute the BMAD {auto_label} workflow for story {story_id}.\n\n"
-            )
-            + auto_extra
-            + (
-            f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n"
-            "Auto-apply all discovered gaps in tests."
-            )
-        ),
-        "review": (
-            (
-                f"Execute the story-automator review workflow for story {story_id}.\n\n"
-                f"READ this skill first: {review_paths.skill}\n"
-                f"READ this workflow file next: {review_paths.workflow}\n"
-            )
-            + review_extra
-            + (
-            f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n"
-            f"Review implementation, find issues, fix them automatically. {extra or 'auto-fix all issues without prompting'}"
-            )
-        ),
-        "retro": _build_retro_prompt(story_id, retro_paths, retro_extra),
-    }[step]
+    prompt = _render_step_prompt(step_contract(policy, step), story_id, story_prefix, extra)
     escaped = prompt.replace("\\", "\\\\").replace('"', '\\"')
     if agent == "codex" and not ai_command:
         codex_home = f"/tmp/sa-codex-home-{project_hash(root)}"
@@ -318,48 +225,28 @@ def skill_prefix(agent: str) -> str:
     return "none" if agent == "codex" else "bmad-"
 
 
-def _build_retro_prompt(epic_number: str, retro_paths, retro_extra: str) -> str:
-    return (
-        (
-            f"Execute the BMAD retrospective workflow for epic {epic_number}.\n\n"
-            f"READ this skill first: {retro_paths.skill}\n"
-            f"READ this workflow file next: {retro_paths.workflow}\n"
-        )
-        + retro_extra
-        + (
-            "Run the retrospective in #YOLO mode.\n"
-            "Assume the user will NOT provide any input to the retrospective directly.\n"
-            "For ALL prompts that expect user input, make reasonable autonomous decisions based on:\n"
-            "- Sprint status data\n"
-            "- Story files and their dev notes\n"
-            "- Previous retrospective if available\n"
-            "- Architecture and PRD documents\n\n"
-            "Key behaviors:\n"
-            "- When asked to confirm epic number: auto-confirm based on sprint-status\n"
-            "- When asked for observations: synthesize from story analysis\n"
-            "- When asked for decisions: make data-driven choices\n"
-            "- When presented menus: select the most appropriate option based on context\n"
-            '- Skip all "WAIT for user" instructions - continue autonomously\n\n'
-            "After the retrospective has run and created documents, you MUST:\n"
-            "1. Create a list of documentation that may need updates based on implementation learnings\n"
-            "2. For each doc in the list, verify whether updates are actually needed by:\n"
-            "   - Reading the current doc content\n"
-            "   - Comparing against actual implementation code\n"
-            "   - Checking for discrepancies between doc and code\n"
-            "3. Update docs that have verified discrepancies\n"
-            "4. Discard proposed updates where code matches docs\n\n"
-            "Focus on these doc types:\n"
-            "- Architecture decisions that changed during implementation\n"
-            "- API documentation that diverged from specs\n"
-            "- README files with outdated instructions\n"
-            "- Configuration documentation\n\n"
-            "EVERYTHING SHOULD BE AUTOMATED. THIS IS NOT A SESSION WHERE YOU SHOULD BE EXPECTING USER INPUT."
-        )
-    )
+def _render_step_prompt(contract: dict[str, object], story_id: str, story_prefix: str, extra_instruction: str) -> str:
+    prompt_cfg = contract.get("prompt") or {}
+    assets = (contract.get("assets") or {}).get("files") or {}
+    template = read_text(str(prompt_cfg.get("templatePath") or ""))
+    replacements = {
+        "{{story_id}}": story_id,
+        "{{story_prefix}}": story_prefix,
+        "{{label}}": str(contract.get("label") or ""),
+        "{{skill_line}}": _prompt_line("READ this skill first", str(assets.get("skill") or "")),
+        "{{workflow_line}}": _prompt_line("READ this workflow file next", str(assets.get("workflow") or "")),
+        "{{instructions_line}}": _prompt_line("Then read", str(assets.get("instructions") or "")),
+        "{{checklist_line}}": _prompt_line("Validate with", str(assets.get("checklist") or "")),
+        "{{template_line}}": _prompt_line("Use template", str(assets.get("template") or "")),
+        "{{extra_instruction}}": extra_instruction.strip() or str(prompt_cfg.get("defaultExtraInstruction") or ""),
+    }
+    for key, value in replacements.items():
+        template = template.replace(key, value)
+    return template
 
 
-def _automate_workflow_label(workflow_path: str) -> str:
-    return "qa-generate-e2e-tests" if "qa-generate-e2e-tests" in workflow_path else "qa-generate-e2e-tests"
+def _prompt_line(prefix: str, value: str) -> str:
+    return f"{prefix}: {value}\n" if value else ""
 
 
 def generate_session_name(step: str, epic: str, story_id: str, cycle: str = "") -> str:
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
new file mode 100644
index 0000000..ca472a0
--- /dev/null
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -0,0 +1,265 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from .frontmatter import parse_simple_frontmatter
+from .utils import ensure_dir, get_project_root, iso_now, md5_hex8, read_text, write_atomic
+
+VALID_TOP_LEVEL_KEYS = {"version", "snapshot", "runtime", "workflow", "steps"}
+VALID_STEP_NAMES = {"create", "dev", "auto", "review", "retro"}
+VALID_VERIFIERS = {"create_story_artifact", "session_exit", "review_completion", "epic_complete"}
+VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"}
+
+
+class PolicyError(ValueError):
+    pass
+
+
+def load_effective_policy(project_root: str | None = None) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    bundle_root = bundled_skill_root(root)
+    bundled = _read_json(bundle_root / "data" / "orchestration-policy.json")
+    override_path = root / "_bmad" / "bmm" / "story-automator.policy.json"
+    override = _read_json(override_path) if override_path.is_file() else {}
+    policy = _deep_merge(bundled, override)
+    _apply_legacy_env(policy)
+    _validate_policy_shape(policy)
+    _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root)
+    return policy
+
+
+def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    policy = load_effective_policy(str(root))
+    snapshot_dir = root / _snapshot_relative_dir(policy)
+    ensure_dir(snapshot_dir)
+    stable_json = _stable_policy_json(policy)
+    snapshot_hash = md5_hex8(stable_json)
+    stamp = iso_now().replace("-", "").replace(":", "").replace("T", "-").replace("Z", "")
+    snapshot_path = snapshot_dir / f"{stamp}-{snapshot_hash}.json"
+    write_atomic(snapshot_path, stable_json)
+    return {
+        "policy": policy,
+        "policyVersion": policy.get("version", 1),
+        "policySnapshotHash": snapshot_hash,
+        "policySnapshotFile": _display_path(snapshot_path, root),
+    }
+
+
+def load_policy_snapshot(
+    snapshot_file: str,
+    *,
+    project_root: str | None = None,
+    expected_hash: str = "",
+) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    path = Path(snapshot_file)
+    if not path.is_absolute():
+        path = root / path
+    if not path.is_file():
+        raise PolicyError(f"policy snapshot missing: {path}")
+    raw = read_text(path)
+    actual_hash = md5_hex8(raw)
+    if expected_hash and actual_hash != expected_hash:
+        raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}")
+    policy = json.loads(raw)
+    _validate_policy_shape(policy)
+    return policy
+
+
+def load_policy_for_state(state_file: str | Path, project_root: str | None = None) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    fields = parse_simple_frontmatter(read_text(state_file))
+    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
+    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+    if snapshot_file or snapshot_hash:
+        if not snapshot_file or not snapshot_hash:
+            raise PolicyError("state policy metadata incomplete")
+        return load_policy_snapshot(snapshot_file, project_root=str(root), expected_hash=snapshot_hash)
+    return load_effective_policy(str(root))
+
+
+def step_contract(policy: dict[str, Any], step: str) -> dict[str, Any]:
+    contract = (policy.get("steps") or {}).get(step)
+    if not isinstance(contract, dict):
+        raise PolicyError(f"unknown step: {step}")
+    return contract
+
+
+def review_max_cycles(policy: dict[str, Any]) -> int:
+    repeat = ((policy.get("workflow") or {}).get("repeat") or {}).get("review") or {}
+    return int(repeat.get("maxCycles", 5))
+
+
+def crash_max_retries(policy: dict[str, Any]) -> int:
+    crash = ((policy.get("workflow") or {}).get("crash")) or {}
+    return int(crash.get("maxRetries", 2))
+
+
+def bundled_skill_root(project_root: str | Path | None = None) -> Path:
+    root = Path(project_root or get_project_root()).resolve()
+    installed = root / ".claude" / "skills" / "bmad-story-automator"
+    if (installed / "data" / "orchestration-policy.json").is_file():
+        return installed
+    for parent in Path(__file__).resolve().parents:
+        candidate = parent / "payload" / ".claude" / "skills" / "bmad-story-automator"
+        if (candidate / "data" / "orchestration-policy.json").is_file():
+            return candidate
+    raise PolicyError("bundled policy not found")
+
+
+def _read_json(path: str | Path) -> dict[str, Any]:
+    payload = json.loads(read_text(path))
+    if not isinstance(payload, dict):
+        raise PolicyError(f"policy json must be an object: {path}")
+    return payload
+
+
+def _deep_merge(base: Any, override: Any) -> Any:
+    if isinstance(base, dict) and isinstance(override, dict):
+        merged = dict(base)
+        for key, value in override.items():
+            merged[key] = _deep_merge(merged[key], value) if key in merged else value
+        return merged
+    if isinstance(override, list):
+        return list(override)
+    return override
+
+
+def _apply_legacy_env(policy: dict[str, Any]) -> None:
+    review_cycles = os.environ.get("MAX_REVIEW_CYCLES")
+    crash_retries = os.environ.get("MAX_CRASH_RETRIES")
+    if review_cycles:
+        policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = int(review_cycles)
+    if crash_retries:
+        policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = int(crash_retries)
+
+
+def _validate_policy_shape(policy: dict[str, Any]) -> None:
+    unknown_keys = sorted(set(policy) - VALID_TOP_LEVEL_KEYS)
+    if unknown_keys:
+        raise PolicyError(f"unknown top-level policy keys: {', '.join(unknown_keys)}")
+    steps = policy.get("steps")
+    if not isinstance(steps, dict):
+        raise PolicyError("steps must be an object")
+    unknown_steps = sorted(set(steps) - VALID_STEP_NAMES)
+    if unknown_steps:
+        raise PolicyError(f"unknown step names: {', '.join(unknown_steps)}")
+    sequence = ((policy.get("workflow") or {}).get("sequence")) or []
+    if not isinstance(sequence, list) or not all(isinstance(item, str) for item in sequence):
+        raise PolicyError("workflow.sequence must be a string array")
+    for step in sequence:
+        if step not in steps:
+            raise PolicyError(f"workflow.sequence references missing step: {step}")
+    for name, contract in steps.items():
+        if not isinstance(contract, dict):
+            raise PolicyError(f"step contract must be an object: {name}")
+        verifier = str(((contract.get("success") or {}).get("verifier")) or "")
+        if verifier not in VALID_VERIFIERS:
+            raise PolicyError(f"invalid verifier for {name}: {verifier}")
+        required = ((contract.get("assets") or {}).get("required")) or []
+        if not isinstance(required, list) or any(item not in VALID_ASSET_NAMES for item in required):
+            raise PolicyError(f"invalid required assets for {name}")
+
+
+def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None:
+    for name, contract in (policy.get("steps") or {}).items():
+        assets = contract.setdefault("assets", {})
+        assets["files"] = _resolve_step_assets(name, assets, project_root)
+        prompt = contract.setdefault("prompt", {})
+        template_file = str(prompt.get("templateFile") or "").strip()
+        if not template_file:
+            raise PolicyError(f"missing prompt template for {name}")
+        prompt["templatePath"] = _resolve_data_path(template_file, project_root=project_root, bundle_root=bundle_root)
+        parse = contract.setdefault("parse", {})
+        schema_file = str(parse.get("schemaFile") or "").strip()
+        if not schema_file:
+            raise PolicyError(f"missing parse schema for {name}")
+        parse["schemaPath"] = _resolve_data_path(schema_file, project_root=project_root, bundle_root=bundle_root)
+        success = contract.setdefault("success", {})
+        contract_file = str(success.get("contractFile") or "").strip()
+        if contract_file:
+            success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root)
+
+
+def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]:
+    skill_name = str(assets.get("skillName") or "").strip()
+    if not skill_name:
+        raise PolicyError(f"missing skillName for {step}")
+    skill_dir = project_root / ".claude" / "skills" / skill_name
+    required = set(assets.get("required") or [])
+    files = {
+        "skill": _resolve_required_file(skill_dir / "SKILL.md", project_root, required, "skill", step),
+        "workflow": _resolve_candidate_file(skill_dir, assets.get("workflowCandidates"), project_root, required, "workflow", step),
+        "instructions": _resolve_candidate_file(skill_dir, assets.get("instructionsCandidates"), project_root, required, "instructions", step),
+        "checklist": _resolve_candidate_file(skill_dir, assets.get("checklistCandidates"), project_root, required, "checklist", step),
+        "template": _resolve_candidate_file(skill_dir, assets.get("templateCandidates"), project_root, required, "template", step),
+    }
+    if ("skill" not in required and "workflow" not in required) and bool(files["skill"]) != bool(files["workflow"]):
+        files["skill"] = ""
+        files["workflow"] = ""
+    return files
+
+
+def _resolve_required_file(path: Path, project_root: Path, required: set[str], asset: str, step: str) -> str:
+    if path.is_file():
+        return _display_path(path, project_root)
+    if asset in required:
+        raise PolicyError(f"missing required {asset} asset for {step}: {path}")
+    return ""
+
+
+def _resolve_candidate_file(
+    skill_dir: Path,
+    candidates: Any,
+    project_root: Path,
+    required: set[str],
+    asset: str,
+    step: str,
+) -> str:
+    if not isinstance(candidates, list):
+        candidates = []
+    for name in candidates:
+        if not isinstance(name, str) or not name:
+            continue
+        path = skill_dir / name
+        if path.is_file():
+            return _display_path(path, project_root)
+    if asset in required:
+        searched = ", ".join(str(skill_dir / str(name)) for name in candidates if isinstance(name, str) and name)
+        raise PolicyError(f"missing required {asset} asset for {step}: {searched}")
+    return ""
+
+
+def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path) -> str:
+    raw = Path(path_value)
+    if raw.is_absolute():
+        if not raw.is_file():
+            raise PolicyError(f"policy data file missing: {raw}")
+        return str(raw)
+    for base in (bundle_root, project_root):
+        candidate = (base / raw).resolve()
+        if candidate.is_file():
+            return str(candidate)
+    raise PolicyError(f"policy data file missing: {path_value}")
+
+
+def _snapshot_relative_dir(policy: dict[str, Any]) -> str:
+    relative_dir = str((policy.get("snapshot") or {}).get("relativeDir") or "").strip()
+    if not relative_dir:
+        raise PolicyError("snapshot.relativeDir missing")
+    return relative_dir
+
+
+def _stable_policy_json(policy: dict[str, Any]) -> str:
+    return json.dumps(policy, indent=2, sort_keys=True) + "\n"
+
+
+def _display_path(path: Path, project_root: Path) -> str:
+    try:
+        return str(path.resolve().relative_to(project_root.resolve()))
+    except ValueError:
+        return str(path.resolve())
diff --git a/source/src/story_automator/core/workflow_paths.py b/source/src/story_automator/core/workflow_paths.py
index 13bb279..3fa47c6 100644
--- a/source/src/story_automator/core/workflow_paths.py
+++ b/source/src/story_automator/core/workflow_paths.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from pathlib import Path
 
-from story_automator.core.utils import get_project_root
+from story_automator.core.runtime_policy import load_effective_policy, step_contract
 
 
 @dataclass(frozen=True)
@@ -15,123 +14,32 @@ class WorkflowPaths:
     template: str = ""
 
 
-def _first_existing_relative_path(*candidates: str, project_root: str | None = None) -> str:
-    root = Path(project_root or get_project_root())
-    for rel in candidates:
-        if rel and (root / rel).exists():
-            return rel
-    for rel in candidates:
-        if rel:
-            return rel
-    return ""
-
-
-def _existing_relative_path_or_empty(*candidates: str, project_root: str | None = None) -> str:
-    root = Path(project_root or get_project_root())
-    for rel in candidates:
-        if rel and (root / rel).exists():
-            return rel
-    return ""
-
-
-def _skill_file(skill_name: str) -> str:
-    return f".claude/skills/{skill_name}/SKILL.md"
-
-
-def _workflow_file(skill_name: str, *names: str, project_root: str | None = None) -> str:
-    return _first_existing_relative_path(
-        *(f".claude/skills/{skill_name}/{name}" for name in names),
-        project_root=project_root,
-    )
-
-
-def _optional_file(skill_name: str, *names: str, project_root: str | None = None) -> str:
-    return _existing_relative_path_or_empty(
-        *(f".claude/skills/{skill_name}/{name}" for name in names),
-        project_root=project_root,
-    )
-
-
-def _paired_optional_workflow_paths(
-    skill_name: str,
-    *,
-    workflow_names: tuple[str, ...],
-    checklist_names: tuple[str, ...] = (),
-    project_root: str | None = None,
-) -> WorkflowPaths:
-    skill = _existing_relative_path_or_empty(_skill_file(skill_name), project_root=project_root)
-    workflow = _existing_relative_path_or_empty(
-        *(f".claude/skills/{skill_name}/{name}" for name in workflow_names),
-        project_root=project_root,
-    )
-    if not skill or not workflow:
-        return WorkflowPaths()
+def _paths_for_step(step: str, project_root: str | None = None) -> WorkflowPaths:
+    files = (step_contract(load_effective_policy(project_root), step).get("assets") or {}).get("files") or {}
     return WorkflowPaths(
-        skill=skill,
-        workflow=workflow,
-        checklist=_existing_relative_path_or_empty(
-            *(f".claude/skills/{skill_name}/{name}" for name in checklist_names),
-            project_root=project_root,
-        ),
+        skill=str(files.get("skill") or ""),
+        workflow=str(files.get("workflow") or ""),
+        instructions=str(files.get("instructions") or ""),
+        checklist=str(files.get("checklist") or ""),
+        template=str(files.get("template") or ""),
     )
 
 
 def create_story_workflow_paths(project_root: str | None = None) -> WorkflowPaths:
-    return WorkflowPaths(
-        skill=_first_existing_relative_path(_skill_file("bmad-create-story"), project_root=project_root),
-        workflow=_workflow_file("bmad-create-story", "workflow.md", "workflow.yaml", project_root=project_root),
-        instructions=_optional_file("bmad-create-story", "discover-inputs.md", project_root=project_root),
-        checklist=_optional_file("bmad-create-story", "checklist.md", project_root=project_root),
-        template=_optional_file("bmad-create-story", "template.md", project_root=project_root),
-    )
+    return _paths_for_step("create", project_root)
 
 
 def dev_story_workflow_paths(project_root: str | None = None) -> WorkflowPaths:
-    return WorkflowPaths(
-        skill=_first_existing_relative_path(_skill_file("bmad-dev-story"), project_root=project_root),
-        workflow=_workflow_file("bmad-dev-story", "workflow.md", "workflow.yaml", project_root=project_root),
-        instructions="",
-        checklist=_optional_file("bmad-dev-story", "checklist.md", project_root=project_root),
-    )
+    return _paths_for_step("dev", project_root)
 
 
 def retrospective_workflow_paths(project_root: str | None = None) -> WorkflowPaths:
-    return WorkflowPaths(
-        skill=_first_existing_relative_path(_skill_file("bmad-retrospective"), project_root=project_root),
-        workflow=_workflow_file("bmad-retrospective", "workflow.md", "workflow.yaml", project_root=project_root),
-        instructions="",
-    )
+    return _paths_for_step("retro", project_root)
 
 
 def review_workflow_paths(project_root: str | None = None) -> WorkflowPaths:
-    return WorkflowPaths(
-        skill=_first_existing_relative_path(
-            _skill_file("bmad-story-automator-review"),
-            project_root=project_root,
-        ),
-        workflow=_workflow_file(
-            "bmad-story-automator-review",
-            "workflow.yaml",
-            "workflow.md",
-            project_root=project_root,
-        ),
-        instructions=_optional_file(
-            "bmad-story-automator-review",
-            "instructions.xml",
-            project_root=project_root,
-        ),
-        checklist=_optional_file(
-            "bmad-story-automator-review",
-            "checklist.md",
-            project_root=project_root,
-        ),
-    )
+    return _paths_for_step("review", project_root)
 
 
 def testarch_automate_workflow_paths(project_root: str | None = None) -> WorkflowPaths:
-    return _paired_optional_workflow_paths(
-        "bmad-qa-generate-e2e-tests",
-        workflow_names=("workflow.md", "workflow.yaml"),
-        checklist_names=("checklist.md",),
-        project_root=project_root,
-    )
+    return _paths_for_step("auto", project_root)
diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py
new file mode 100644
index 0000000..0d5c1e3
--- /dev/null
+++ b/source/tests/test_orchestrator_parse.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import io
+import json
+import shutil
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+from pathlib import Path
+from unittest.mock import patch
+
+from story_automator.commands.orchestrator_parse import parse_output_action
+from story_automator.core.utils import CommandResult
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+class OrchestratorParseTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.project_root = Path(self.tmp.name)
+        self._install_bundle()
+        self._install_required_skills()
+        self.output_file = self.project_root / "session.txt"
+        self.output_file.write_text("session output\n", encoding="utf-8")
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def test_parse_schema_loads_from_step_contract(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["story_created"])
+
+    def test_invalid_schema_file_rejected(self) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True)
+        (override_dir / "story-automator.policy.json").write_text(
+            json.dumps({"steps": {"create": {"parse": {"schemaFile": "missing.json"}}}}),
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid")
+
+    def test_invalid_child_json_rejected(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult("not json", 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json")
+
+    def test_output_shape_remains_compatible(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"SUCCESS","issues_found":{"critical":0,"high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "review"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertIn("issues_found", payload)
+        self.assertIn("all_fixed", payload)
+
+    def _install_bundle(self) -> None:
+        source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
+        source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
+        target_root = self.project_root / ".claude" / "skills"
+        target_root.mkdir(parents=True, exist_ok=True)
+        shutil.copytree(source_skill, target_root / "bmad-story-automator")
+        shutil.copytree(source_review, target_root / "bmad-story-automator-review")
+
+    def _install_required_skills(self) -> None:
+        for name in ("bmad-create-story", "bmad-dev-story", "bmad-retrospective", "bmad-qa-generate-e2e-tests"):
+            skill_dir = self.project_root / ".claude" / "skills" / name
+            skill_dir.mkdir(parents=True, exist_ok=True)
+            (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8")
+            (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "discover-inputs.md").write_text("# discover\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "template.md").write_text("# template\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
new file mode 100644
index 0000000..8b64f3b
--- /dev/null
+++ b/source/tests/test_runtime_policy.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import json
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+from story_automator.core.runtime_policy import PolicyError, load_effective_policy, snapshot_effective_policy
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+class RuntimePolicyTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.project_root = Path(self.tmp.name)
+        self._install_bundle()
+        self._install_required_skills()
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def test_bundled_default_loads(self) -> None:
+        policy = load_effective_policy(str(self.project_root))
+        self.assertEqual(policy["version"], 1)
+        self.assertEqual(policy["steps"]["review"]["success"]["verifier"], "review_completion")
+
+    def test_project_override_deep_merges_and_arrays_replace(self) -> None:
+        self._write_override(
+            {
+                "workflow": {"sequence": ["create", "review"]},
+                "steps": {"review": {"prompt": {"defaultExtraInstruction": "fix critical issues only"}}},
+            }
+        )
+        policy = load_effective_policy(str(self.project_root))
+        self.assertEqual(policy["workflow"]["sequence"], ["create", "review"])
+        self.assertEqual(policy["steps"]["review"]["prompt"]["defaultExtraInstruction"], "fix critical issues only")
+
+    def test_invalid_step_name_rejected(self) -> None:
+        self._write_override({"steps": {"ship": {"success": {"verifier": "session_exit"}}}})
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_invalid_verifier_name_rejected(self) -> None:
+        self._write_override({"steps": {"review": {"success": {"verifier": "nope"}}}})
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_required_asset_missing_fails(self) -> None:
+        shutil.rmtree(self.project_root / ".claude" / "skills" / "bmad-create-story")
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_snapshot_hash_stable(self) -> None:
+        first = snapshot_effective_policy(str(self.project_root))
+        second = snapshot_effective_policy(str(self.project_root))
+        self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"])
+
+    def _install_bundle(self) -> None:
+        source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
+        source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
+        target_root = self.project_root / ".claude" / "skills"
+        target_root.mkdir(parents=True, exist_ok=True)
+        shutil.copytree(source_skill, target_root / "bmad-story-automator")
+        shutil.copytree(source_review, target_root / "bmad-story-automator-review")
+
+    def _install_required_skills(self) -> None:
+        self._make_skill(
+            "bmad-create-story",
+            extras={"discover-inputs.md": "# discover\n", "checklist.md": "# checklist\n", "template.md": "# template\n"},
+        )
+        self._make_skill("bmad-dev-story", extras={"checklist.md": "# checklist\n"})
+        self._make_skill("bmad-retrospective")
+        self._make_skill("bmad-qa-generate-e2e-tests", extras={"checklist.md": "# checklist\n"})
+
+    def _make_skill(self, name: str, *, extras: dict[str, str] | None = None) -> None:
+        skill_dir = self.project_root / ".claude" / "skills" / name
+        skill_dir.mkdir(parents=True, exist_ok=True)
+        (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8")
+        (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8")
+        for rel, content in (extras or {}).items():
+            (skill_dir / rel).write_text(content, encoding="utf-8")
+
+    def _write_override(self, payload: dict[str, object]) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(json.dumps(payload), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
new file mode 100644
index 0000000..5f4528b
--- /dev/null
+++ b/source/tests/test_state_policy_metadata.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+import io
+import json
+import shutil
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+from pathlib import Path
+
+from story_automator.commands.orchestrator import cmd_orchestrator_helper
+from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+class StatePolicyMetadataTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.project_root = Path(self.tmp.name)
+        self.output_dir = self.project_root / "_bmad-output" / "story-automator"
+        self._install_bundle()
+        self._install_required_skills()
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def test_state_doc_writes_policy_metadata(self) -> None:
+        stdout = io.StringIO()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_build_state_doc(
+                [
+                    "--template",
+                    str(template),
+                    "--output-folder",
+                    str(self.output_dir),
+                    "--config-json",
+                    json.dumps(self._config()),
+                ]
+            )
+        self.assertEqual(code, 0)
+        state_file = Path(json.loads(stdout.getvalue())["path"])
+        text = state_file.read_text(encoding="utf-8")
+        self.assertIn("policySnapshotFile:", text)
+        self.assertIn("policySnapshotHash:", text)
+
+    def test_summary_surfaces_policy_metadata(self) -> None:
+        state_file = self._build_state()
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["policySnapshotFile"])
+        self.assertTrue(payload["policySnapshotHash"])
+
+    def test_legacy_state_without_policy_metadata_remains_valid(self) -> None:
+        legacy = self.project_root / "legacy.md"
+        legacy.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_state(["--state", str(legacy)])
+        self.assertEqual(code, 0)
+        self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok")
+
+    def _build_state(self) -> Path:
+        stdout = io.StringIO()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            cmd_build_state_doc(
+                [
+                    "--template",
+                    str(template),
+                    "--output-folder",
+                    str(self.output_dir),
+                    "--config-json",
+                    json.dumps(self._config()),
+                ]
+            )
+        return Path(json.loads(stdout.getvalue())["path"])
+
+    def _config(self) -> dict[str, object]:
+        return {
+            "epic": "1",
+            "epicName": "Epic 1",
+            "storyRange": ["1.1"],
+            "status": "READY",
+            "aiCommand": "claude --dangerously-skip-permissions",
+        }
+
+    def _install_bundle(self) -> None:
+        source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
+        source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
+        target_root = self.project_root / ".claude" / "skills"
+        target_root.mkdir(parents=True, exist_ok=True)
+        shutil.copytree(source_skill, target_root / "bmad-story-automator")
+        shutil.copytree(source_review, target_root / "bmad-story-automator-review")
+
+    def _install_required_skills(self) -> None:
+        for name in ("bmad-create-story", "bmad-dev-story", "bmad-retrospective", "bmad-qa-generate-e2e-tests"):
+            skill_dir = self.project_root / ".claude" / "skills" / name
+            skill_dir.mkdir(parents=True, exist_ok=True)
+            (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8")
+            (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "discover-inputs.md").write_text("# discover\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-create-story" / "template.md").write_text("# template\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+        (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
+
+
+class patch_env:
+    def __init__(self, project_root: Path) -> None:
+        self.project_root = str(project_root)
+        self.previous = None
+
+    def __enter__(self) -> None:
+        import os
+
+        self.previous = os.environ.get("PROJECT_ROOT")
+        os.environ["PROJECT_ROOT"] = self.project_root
+
+    def __exit__(self, exc_type, exc, tb) -> None:
+        import os
+
+        if self.previous is None:
+            os.environ.pop("PROJECT_ROOT", None)
+        else:
+            os.environ["PROJECT_ROOT"] = self.previous
+
+
+if __name__ == "__main__":
+    unittest.main()

From 01a3e1cc308c170c0dc982f88eea15bef5760fa5 Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 07:55:40 -0300
Subject: [PATCH 03/17] fix: harden runtime policy snapshot handling

---
 docs/changelog/260413.md                      | 27 ++++++
 .../story_automator/commands/orchestrator.py  | 12 ++-
 .../commands/orchestrator_parse.py            | 14 ++-
 source/src/story_automator/commands/tmux.py   |  9 +-
 .../story_automator/core/runtime_policy.py    | 96 ++++++++++++++++++-
 source/tests/test_orchestrator_parse.py       | 44 +++++++++
 source/tests/test_runtime_policy.py           | 46 ++++++++-
 source/tests/test_state_policy_metadata.py    | 24 +++++
 8 files changed, 259 insertions(+), 13 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 556a30e..56731d9 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -120,3 +120,30 @@ Implemented the first JSON-settings slice with bundled policy data, snapshot-bac
 
 ### QA Notes
 - N/A
+
+## 260413-07:55:28 - Harden runtime policy snapshot handling
+
+### Summary
+Fixed the follow-up review findings around snapshot consumption, policy validation, and stale marker fallback behavior.
+
+### Fixed
+- Fixed runtime policy consumers to honor pinned state snapshots for prompt building, parser contract loading, and escalation budgets when a state file is available.
+- Fixed malformed override JSON and invalid nested policy shapes to fail through controlled validation paths instead of crashing later with raw exceptions.
+- Fixed implicit marker/env state lookup to fall back safely when the referenced state file is missing or the marker payload is malformed.
+- Fixed `tmux-wrapper build-cmd` to strip `--state-file` from prompt text instead of leaking the flag into child instructions.
+
+### Changed
+- Added regression tests covering snapshot reuse after override changes, invalid nested workflow shapes, malformed marker files, and state-aware prompt/build behavior.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_orchestrator_parse.py`
+- `source/tests/test_state_policy_metadata.py`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 11d2bb1..31ac9db 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter
-from story_automator.core.runtime_policy import crash_max_retries, load_effective_policy, review_max_cycles
+from story_automator.core.runtime_policy import crash_max_retries, load_runtime_policy, review_max_cycles
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.sprint import sprint_status_epic, sprint_status_get
 from story_automator.core.story_keys import normalize_story_key, sprint_status_file
@@ -283,7 +283,15 @@ def _state_update(args: list[str]) -> int:
 def _escalate(args: list[str]) -> int:
     trigger = args[0] if args else ""
     context = args[1] if len(args) > 1 else ""
-    policy = load_effective_policy(get_project_root())
+    state_file = ""
+    idx = 2
+    while idx < len(args):
+        if args[idx] == "--state-file" and idx + 1 < len(args):
+            state_file = args[idx + 1]
+            idx += 2
+            continue
+        idx += 1
+    policy = load_runtime_policy(get_project_root(), state_file=state_file)
     if trigger == "review-loop":
         cycles = _parse_context_int(context, "cycles")
         limit = review_max_cycles(policy)
diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py
index 6a95226..c809cfd 100644
--- a/source/src/story_automator/commands/orchestrator_parse.py
+++ b/source/src/story_automator/commands/orchestrator_parse.py
@@ -2,7 +2,7 @@
 
 import json
 
-from story_automator.core.runtime_policy import load_effective_policy, step_contract
+from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract
 from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines
 
 
@@ -14,6 +14,14 @@ def parse_output_action(args: list[str]) -> int:
         print('{"status":"error","reason":"output file not found or empty"}')
         return 1
     output_file, step = args[:2]
+    state_file = ""
+    idx = 2
+    while idx < len(args):
+        if args[idx] == "--state-file" and idx + 1 < len(args):
+            state_file = args[idx + 1]
+            idx += 2
+            continue
+        idx += 1
     try:
         content = read_text(output_file)
     except FileNotFoundError:
@@ -24,9 +32,9 @@ def parse_output_action(args: list[str]) -> int:
         return 1
     lines = trim_lines(content)[:150]
     try:
-        contract = step_contract(load_effective_policy(), step)
+        contract = step_contract(load_runtime_policy(state_file=state_file), step)
         parse_contract = _load_parse_contract(contract)
-    except (FileNotFoundError, json.JSONDecodeError, ValueError):
+    except (FileNotFoundError, json.JSONDecodeError, ValueError, PolicyError):
         print_json({"status": "error", "reason": "parse_contract_invalid"})
         return 1
     prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines))
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 00e24c8..329014d 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -6,7 +6,7 @@
 import time
 from pathlib import Path
 
-from story_automator.core.runtime_policy import load_effective_policy, step_contract
+from story_automator.core.runtime_policy import load_runtime_policy, step_contract
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.utils import (
     atomic_write,
@@ -175,11 +175,16 @@ def _build_cmd(args: list[str]) -> int:
     extra = ""
     tail = args[2:]
     idx = 0
+    state_file = ""
     while idx < len(tail):
         if tail[idx] == "--agent" and idx + 1 < len(tail):
             agent = tail[idx + 1]
             idx += 2
             continue
+        if tail[idx] == "--state-file" and idx + 1 < len(tail):
+            state_file = tail[idx + 1]
+            idx += 2
+            continue
         extra = f"{extra} {tail[idx]}".strip()
         idx += 1
     agent = agent or agent_type()
@@ -188,7 +193,7 @@ def _build_cmd(args: list[str]) -> int:
     if step not in {"create", "dev", "auto", "review", "retro"}:
         print(f"Unknown step type: {step}", file=__import__("sys").stderr)
         return 1
-    policy = load_effective_policy(root)
+    policy = load_runtime_policy(root, state_file=state_file)
     ai_command = os.environ.get("AI_COMMAND")
     if ai_command and not os.environ.get("AI_AGENT"):
         cli = ai_command
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index ca472a0..ea3a536 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -31,6 +31,18 @@ def load_effective_policy(project_root: str | None = None) -> dict[str, Any]:
     return policy
 
 
+def load_runtime_policy(project_root: str | None = None, state_file: str | Path | None = None) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    resolved_state, source = resolve_policy_state_file(root, state_file)
+    if resolved_state:
+        try:
+            return load_policy_for_state(resolved_state, project_root=str(root))
+        except (FileNotFoundError, PolicyError):
+            if source == "explicit":
+                raise
+    return load_effective_policy(str(root))
+
+
 def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     policy = load_effective_policy(str(root))
@@ -65,8 +77,12 @@ def load_policy_snapshot(
     actual_hash = md5_hex8(raw)
     if expected_hash and actual_hash != expected_hash:
         raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}")
-    policy = json.loads(raw)
+    try:
+        policy = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        raise PolicyError(f"policy json invalid: {path}") from exc
     _validate_policy_shape(policy)
+    _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
     return policy
 
 
@@ -82,6 +98,26 @@ def load_policy_for_state(state_file: str | Path, project_root: str | None = Non
     return load_effective_policy(str(root))
 
 
+def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]:
+    root = Path(project_root or get_project_root()).resolve()
+    explicit = Path(state_file).expanduser() if state_file else None
+    if explicit:
+        return str(_resolve_state_path(root, explicit)), "explicit"
+    env_state = os.environ.get("STORY_AUTOMATOR_STATE_FILE", "").strip()
+    if env_state:
+        return str(_resolve_state_path(root, Path(env_state).expanduser())), "env"
+    marker = root / ".claude" / ".story-automator-active"
+    if marker.is_file():
+        try:
+            payload = _read_json(marker)
+        except PolicyError:
+            return "", ""
+        marker_state = str(payload.get("stateFile") or "").strip()
+        if marker_state:
+            return str(_resolve_state_path(root, Path(marker_state).expanduser())), "marker"
+    return "", ""
+
+
 def step_contract(policy: dict[str, Any], step: str) -> dict[str, Any]:
     contract = (policy.get("steps") or {}).get(step)
     if not isinstance(contract, dict):
@@ -112,7 +148,10 @@ def bundled_skill_root(project_root: str | Path | None = None) -> Path:
 
 
 def _read_json(path: str | Path) -> dict[str, Any]:
-    payload = json.loads(read_text(path))
+    try:
+        payload = json.loads(read_text(path))
+    except json.JSONDecodeError as exc:
+        raise PolicyError(f"policy json invalid: {path}") from exc
     if not isinstance(payload, dict):
         raise PolicyError(f"policy json must be an object: {path}")
     return payload
@@ -142,25 +181,40 @@ def _validate_policy_shape(policy: dict[str, Any]) -> None:
     unknown_keys = sorted(set(policy) - VALID_TOP_LEVEL_KEYS)
     if unknown_keys:
         raise PolicyError(f"unknown top-level policy keys: {', '.join(unknown_keys)}")
+    snapshot = _expect_optional_dict(policy, "snapshot")
+    if "snapshot" in policy and "relativeDir" in snapshot and not isinstance(snapshot.get("relativeDir"), str):
+        raise PolicyError("snapshot.relativeDir must be a string")
+    workflow = _expect_optional_dict(policy, "workflow")
+    repeat = _expect_optional_nested_dict(workflow, "repeat", "workflow")
+    review = _expect_optional_nested_dict(repeat, "review", "workflow.repeat")
+    crash = _expect_optional_nested_dict(workflow, "crash", "workflow")
     steps = policy.get("steps")
     if not isinstance(steps, dict):
         raise PolicyError("steps must be an object")
     unknown_steps = sorted(set(steps) - VALID_STEP_NAMES)
     if unknown_steps:
         raise PolicyError(f"unknown step names: {', '.join(unknown_steps)}")
-    sequence = ((policy.get("workflow") or {}).get("sequence")) or []
+    sequence = (workflow.get("sequence")) or []
     if not isinstance(sequence, list) or not all(isinstance(item, str) for item in sequence):
         raise PolicyError("workflow.sequence must be a string array")
+    if "maxCycles" in review and not isinstance(review.get("maxCycles"), int):
+        raise PolicyError("workflow.repeat.review.maxCycles must be an integer")
+    if "maxRetries" in crash and not isinstance(crash.get("maxRetries"), int):
+        raise PolicyError("workflow.crash.maxRetries must be an integer")
     for step in sequence:
         if step not in steps:
             raise PolicyError(f"workflow.sequence references missing step: {step}")
     for name, contract in steps.items():
         if not isinstance(contract, dict):
             raise PolicyError(f"step contract must be an object: {name}")
+        assets = _expect_step_dict(contract, "assets", name)
+        _expect_step_dict(contract, "prompt", name)
+        _expect_step_dict(contract, "parse", name)
+        _expect_step_dict(contract, "success", name)
         verifier = str(((contract.get("success") or {}).get("verifier")) or "")
         if verifier not in VALID_VERIFIERS:
             raise PolicyError(f"invalid verifier for {name}: {verifier}")
-        required = ((contract.get("assets") or {}).get("required")) or []
+        required = (assets.get("required")) or []
         if not isinstance(required, list) or any(item not in VALID_ASSET_NAMES for item in required):
             raise PolicyError(f"invalid required assets for {name}")
 
@@ -248,7 +302,8 @@ def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path
 
 
 def _snapshot_relative_dir(policy: dict[str, Any]) -> str:
-    relative_dir = str((policy.get("snapshot") or {}).get("relativeDir") or "").strip()
+    snapshot = _expect_optional_dict(policy, "snapshot")
+    relative_dir = str(snapshot.get("relativeDir") or "").strip()
     if not relative_dir:
         raise PolicyError("snapshot.relativeDir missing")
     return relative_dir
@@ -263,3 +318,34 @@ def _display_path(path: Path, project_root: Path) -> str:
         return str(path.resolve().relative_to(project_root.resolve()))
     except ValueError:
         return str(path.resolve())
+
+
+def _resolve_state_path(project_root: Path, path: Path) -> Path:
+    return path if path.is_absolute() else project_root / path
+
+
+def _expect_optional_dict(payload: dict[str, Any], key: str) -> dict[str, Any]:
+    value = payload.get(key)
+    if value is None:
+        return {}
+    if not isinstance(value, dict):
+        raise PolicyError(f"{key} must be an object")
+    return value
+
+
+def _expect_step_dict(contract: dict[str, Any], key: str, step: str) -> dict[str, Any]:
+    value = contract.get(key)
+    if value is None:
+        return {}
+    if not isinstance(value, dict):
+        raise PolicyError(f"{step}.{key} must be an object")
+    return value
+
+
+def _expect_optional_nested_dict(payload: dict[str, Any], key: str, label: str) -> dict[str, Any]:
+    value = payload.get(key)
+    if value is None:
+        return {}
+    if not isinstance(value, dict):
+        raise PolicyError(f"{label}.{key} must be an object")
+    return value
diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py
index 0d5c1e3..99abaf5 100644
--- a/source/tests/test_orchestrator_parse.py
+++ b/source/tests/test_orchestrator_parse.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 from unittest.mock import patch
 
+from story_automator.commands.state import cmd_build_state_doc
 from story_automator.commands.orchestrator_parse import parse_output_action
 from story_automator.core.utils import CommandResult
 
@@ -74,6 +75,23 @@ def test_output_shape_remains_compatible(self) -> None:
         self.assertIn("issues_found", payload)
         self.assertIn("all_fixed", payload)
 
+    def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) -> None:
+        state_file = self._build_state()
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(
+            json.dumps({"steps": {"create": {"parse": {"schemaFile": "missing.json"}}}}),
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        self.assertTrue(json.loads(stdout.getvalue())["story_created"])
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
@@ -94,6 +112,32 @@ def _install_required_skills(self) -> None:
         (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
         (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8")
 
+    def _build_state(self) -> Path:
+        output_dir = self.project_root / "_bmad-output" / "story-automator"
+        output_dir.mkdir(parents=True, exist_ok=True)
+        stdout = io.StringIO()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout):
+            cmd_build_state_doc(
+                [
+                    "--template",
+                    str(template),
+                    "--output-folder",
+                    str(output_dir),
+                    "--config-json",
+                    json.dumps(
+                        {
+                            "epic": "1",
+                            "epicName": "Epic 1",
+                            "storyRange": ["1.1"],
+                            "status": "READY",
+                            "aiCommand": "claude --dangerously-skip-permissions",
+                        }
+                    ),
+                ]
+            )
+        return Path(json.loads(stdout.getvalue())["path"])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 8b64f3b..3979f54 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -6,7 +6,7 @@
 import unittest
 from pathlib import Path
 
-from story_automator.core.runtime_policy import PolicyError, load_effective_policy, snapshot_effective_policy
+from story_automator.core.runtime_policy import PolicyError, load_effective_policy, load_runtime_policy, snapshot_effective_policy
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
@@ -58,6 +58,50 @@ def test_snapshot_hash_stable(self) -> None:
         second = snapshot_effective_policy(str(self.project_root))
         self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"])
 
+    def test_malformed_override_json_raises_policy_error(self) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text("{bad json", encoding="utf-8")
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_invalid_assets_type_rejected(self) -> None:
+        self._write_override({"steps": {"review": {"assets": []}}})
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_invalid_workflow_and_snapshot_types_rejected(self) -> None:
+        self._write_override({"workflow": [], "snapshot": []})
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_invalid_nested_workflow_types_rejected(self) -> None:
+        self._write_override({"workflow": {"repeat": [1], "crash": [2]}})
+        with self.assertRaises(PolicyError):
+            load_effective_policy(str(self.project_root))
+
+    def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        copied_root = Path(self.tmp.name) / "copied"
+        shutil.copytree(self.project_root, copied_root)
+        policy = load_runtime_policy(str(copied_root), state_file=str(copied_root / snapshot["policySnapshotFile"]))
+        template_path = policy["steps"]["create"]["prompt"]["templatePath"]
+        self.assertTrue(str(copied_root) in template_path)
+
+    def test_missing_marker_state_falls_back_to_effective_policy(self) -> None:
+        marker = self.project_root / ".claude" / ".story-automator-active"
+        marker.parent.mkdir(parents=True, exist_ok=True)
+        marker.write_text(json.dumps({"stateFile": "missing.md"}), encoding="utf-8")
+        policy = load_runtime_policy(str(self.project_root))
+        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
+
+    def test_malformed_marker_falls_back_to_effective_policy(self) -> None:
+        marker = self.project_root / ".claude" / ".story-automator-active"
+        marker.parent.mkdir(parents=True, exist_ok=True)
+        marker.write_text("{bad json", encoding="utf-8")
+        policy = load_runtime_policy(str(self.project_root))
+        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 5f4528b..72301ca 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -10,6 +10,7 @@
 
 from story_automator.commands.orchestrator import cmd_orchestrator_helper
 from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state
+from story_automator.commands.tmux import _build_cmd
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
@@ -68,6 +69,29 @@ def test_legacy_state_without_policy_metadata_remains_valid(self) -> None:
         self.assertEqual(code, 0)
         self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok")
 
+    def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
+        state_file = self._build_state()
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(
+            json.dumps({"workflow": {"repeat": {"review": {"maxCycles": 1}}}}),
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=2", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        self.assertFalse(json.loads(stdout.getvalue())["escalate"])
+
+    def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None:
+        state_file = self._build_state()
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = _build_cmd(["review", "1.1", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        rendered = stdout.getvalue()
+        self.assertNotIn("--state-file", rendered)
+
     def _build_state(self) -> Path:
         stdout = io.StringIO()
         template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"

From e84e4b612a25dee59855ff49436054bf557cf6dc Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:06:04 -0300
Subject: [PATCH 04/17] feat: wire policy-backed success verifiers

---
 docs/changelog/260413.md                      |  24 ++
 .../story_automator/commands/orchestrator.py  |   7 +-
 source/src/story_automator/commands/tmux.py   |  62 ++++-
 .../src/story_automator/core/review_verify.py |  42 +--
 .../story_automator/core/success_verifiers.py | 240 ++++++++++++++++++
 source/tests/test_success_verifiers.py        | 194 ++++++++++++++
 6 files changed, 533 insertions(+), 36 deletions(-)
 create mode 100644 source/src/story_automator/core/success_verifiers.py
 create mode 100644 source/tests/test_success_verifiers.py

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 56731d9..faaca3e 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -1,5 +1,29 @@
 # Changelog - 260413
 
+## 260413-08:05:51 - Wire policy-backed success verifiers
+
+### Summary
+Moved review completion checks onto the JSON policy contract and routed monitor verification through the named verifier registry.
+
+### Added
+- Added a shared success verifier registry covering session exit, story artifact creation, review completion, and epic completion.
+- Added unit coverage for contract-driven review verification, create artifact matching, epic completion, pinned snapshot reuse, and monitor dispatch.
+
+### Changed
+- Changed `monitor-session` to resolve the active step's `success.verifier` from policy, accept `--state-file`, and verify completion through the configured verifier instead of a hard-coded review branch.
+- Changed `verify-code-review` to resolve review completion from the pinned state snapshot when provided, so review verification stays aligned with the active runtime policy.
+
+### Files
+- `source/src/story_automator/core/success_verifiers.py`
+- `source/src/story_automator/core/review_verify.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/tests/test_success_verifiers.py`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
+
 ## 260413-11:35:00 - Verify packed npx install path
 
 ### Summary
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 31ac9db..4e69b22 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -378,7 +378,12 @@ def _verify_code_review(args: list[str]) -> int:
     if not args:
         print_json({"verified": False, "reason": "story_key_required"})
         return 1
-    payload = verify_code_review_completion(get_project_root(), args[0])
+    state_file = ""
+    tail = args[1:]
+    for idx, arg in enumerate(tail):
+        if arg == "--state-file" and idx + 1 < len(tail):
+            state_file = tail[idx + 1]
+    payload = verify_code_review_completion(get_project_root(), args[0], state_file=state_file or None)
     print_json(payload)
     return 0 if bool(payload.get("verified")) else 1
 
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 329014d..c6ddbeb 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -6,8 +6,8 @@
 import time
 from pathlib import Path
 
-from story_automator.core.runtime_policy import load_runtime_policy, step_contract
-from story_automator.core.review_verify import verify_code_review_completion
+from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract
+from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier
 from story_automator.core.utils import (
     atomic_write,
     command_exists,
@@ -652,7 +652,7 @@ def cmd_monitor_session(args: list[str]) -> int:
         return 1
     if args[0] in {"--help", "-h"}:
         print("Usage: monitor-session <session_name> [options]")
-        print("Options: --max-polls N --initial-wait N --project-root PATH --timeout MIN --verbose --json --agent TYPE --workflow TYPE --story-key KEY")
+        print("Options: --max-polls N --initial-wait N --project-root PATH --timeout MIN --verbose --json --agent TYPE --workflow TYPE --story-key KEY --state-file PATH")
         return 0
     session = args[0]
     max_polls = 30
@@ -662,6 +662,7 @@ def cmd_monitor_session(args: list[str]) -> int:
     agent = os.environ.get("AI_AGENT", "claude")
     workflow = "dev"
     story_key = ""
+    state_file = ""
     project_root = get_project_root()
     idx = 1
     while idx < len(args):
@@ -692,6 +693,10 @@ def cmd_monitor_session(args: list[str]) -> int:
             story_key = args[idx + 1]
             idx += 2
             continue
+        elif arg == "--state-file" and idx + 1 < len(args):
+            state_file = args[idx + 1]
+            idx += 2
+            continue
         elif arg == "--project-root" and idx + 1 < len(args):
             project_root = args[idx + 1]
             idx += 2
@@ -713,11 +718,26 @@ def cmd_monitor_session(args: list[str]) -> int:
         state = str(status["session_state"])
         if state == "completed":
             output = session_status(session, full=True, codex=agent == "codex", project_root=project_root)["active_task"]
-            if workflow == "review" and story_key:
-                verified = verify_code_review_completion(project_root, story_key)
+            verification = _verify_monitor_completion(
+                workflow,
+                project_root=project_root,
+                story_key=story_key,
+                output_file=str(output),
+                state_file=state_file or None,
+            )
+            if verification is not None:
+                verified, verifier_name = verification
                 if bool(verified.get("verified")):
-                    return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "verified_complete")
-                return _emit_monitor(json_output, "incomplete", last_done, last_total, str(output), "workflow_not_verified")
+                    reason = "normal_completion" if verifier_name == "session_exit" else "verified_complete"
+                    return _emit_monitor(json_output, "completed", last_done, last_total, str(output), reason)
+                return _emit_monitor(
+                    json_output,
+                    "incomplete",
+                    last_done,
+                    last_total,
+                    str(output),
+                    str(verified.get("reason") or "workflow_not_verified"),
+                )
             return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "normal_completion")
         if state == "crashed":
             crashed = session_status(session, full=True, codex=agent == "codex", project_root=project_root)
@@ -745,3 +765,31 @@ def _emit_monitor(json_output: bool, state: str, done: int, total: int, output_f
     else:
         print(f"{state},{done},{total},{output_file},{reason}")
     return 0
+
+
+def _verify_monitor_completion(
+    workflow: str,
+    *,
+    project_root: str,
+    story_key: str,
+    output_file: str,
+    state_file: str | Path | None = None,
+) -> tuple[dict[str, object], str] | None:
+    try:
+        contract = resolve_success_contract(project_root, workflow, state_file=state_file)
+    except (FileNotFoundError, PolicyError):
+        return ({"verified": False, "reason": "verifier_contract_invalid"}, "")
+    verifier_name = str(contract.get("verifier") or "").strip()
+    if not verifier_name:
+        return None
+    try:
+        result = run_success_verifier(
+            verifier_name,
+            project_root=project_root,
+            story_key=story_key,
+            output_file=output_file,
+            contract=contract,
+        )
+    except PolicyError:
+        return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name)
+    return (result, verifier_name)
diff --git a/source/src/story_automator/core/review_verify.py b/source/src/story_automator/core/review_verify.py
index d321bcb..5975c69 100644
--- a/source/src/story_automator/core/review_verify.py
+++ b/source/src/story_automator/core/review_verify.py
@@ -1,34 +1,20 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import Any
 
-from .frontmatter import find_frontmatter_value_case
-from .sprint import sprint_status_get
-from .story_keys import normalize_story_key
+from .success_verifiers import resolve_success_contract, review_completion
 
 
-def verify_code_review_completion(project_root: str, story_key: str) -> dict[str, object]:
-    norm = normalize_story_key(project_root, story_key)
-    if norm is None:
-        return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
-    status = sprint_status_get(project_root, norm.id)
-    if status.done:
-        return {"verified": True, "story": norm.key, "sprint_status": "done", "source": "sprint-status.yaml"}
-    matches = sorted((Path(project_root) / "_bmad-output" / "implementation-artifacts").glob(f"{norm.prefix}-*.md"))
-    story_status = find_frontmatter_value_case(matches[0], "Status") if matches else ""
-    if story_status == "done":
-        return {
-            "verified": True,
-            "story": norm.key,
-            "sprint_status": status.status,
-            "story_file_status": "done",
-            "source": "story-file",
-            "note": "sprint_status_not_updated",
-        }
-    return {
-        "verified": False,
-        "story": norm.key,
-        "sprint_status": status.status,
-        "story_file_status": story_status or "unknown",
-        "reason": "workflow_not_complete",
-    }
+def verify_code_review_completion(
+    project_root: str,
+    story_key: str,
+    *,
+    success_contract: dict[str, Any] | None = None,
+    state_file: str | Path | None = None,
+) -> dict[str, object]:
+    try:
+        contract = success_contract or resolve_success_contract(project_root, "review", state_file=state_file)
+        return review_completion(project_root=project_root, story_key=story_key, contract=contract)
+    except (FileNotFoundError, ValueError) as exc:
+        return {"verified": False, "reason": "review_contract_invalid", "input": story_key, "error": str(exc)}
diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py
new file mode 100644
index 0000000..0d596f8
--- /dev/null
+++ b/source/src/story_automator/core/success_verifiers.py
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Callable
+
+from .frontmatter import find_frontmatter_value_case
+from .runtime_policy import PolicyError, load_runtime_policy, step_contract
+from .sprint import sprint_status_epic, sprint_status_get
+from .story_keys import normalize_story_key
+from .utils import read_text
+
+ALLOWED_REVIEW_CONTRACT_KEYS = {"blockingSeverity", "doneValues", "inProgressValues", "sourceOrder", "syncSprintStatus"}
+ALLOWED_REVIEW_SOURCES = {"sprint-status.yaml", "story-file"}
+DEFAULT_REVIEW_CONTRACT = {
+    "blockingSeverity": ["critical"],
+    "doneValues": ["done"],
+    "inProgressValues": ["in-progress", "in_progress", "review", "qa"],
+    "sourceOrder": ["sprint-status.yaml", "story-file"],
+    "syncSprintStatus": True,
+}
+
+
+def resolve_success_contract(project_root: str, step: str, *, state_file: str | Path | None = None) -> dict[str, Any]:
+    policy = load_runtime_policy(project_root, state_file=state_file)
+    success = step_contract(policy, step).get("success") or {}
+    if not isinstance(success, dict):
+        raise PolicyError(f"invalid success contract for {step}")
+    return success
+
+
+def run_success_verifier(
+    name: str,
+    *,
+    project_root: str,
+    story_key: str = "",
+    output_file: str = "",
+    contract: dict[str, Any] | None = None,
+) -> dict[str, object]:
+    verifier = VERIFIERS.get(name)
+    if verifier is None:
+        raise PolicyError(f"unknown success verifier: {name}")
+    return verifier(project_root=project_root, story_key=story_key, output_file=output_file, contract=contract or {})
+
+
+def session_exit(
+    *,
+    project_root: str,
+    story_key: str = "",
+    output_file: str = "",
+    contract: dict[str, Any] | None = None,
+) -> dict[str, object]:
+    payload: dict[str, object] = {"verified": True, "source": "session_exit"}
+    if story_key:
+        payload["story"] = story_key
+    if output_file:
+        payload["outputFile"] = output_file
+    return payload
+
+
+def create_story_artifact(
+    *,
+    project_root: str,
+    story_key: str,
+    output_file: str = "",
+    contract: dict[str, Any] | None = None,
+) -> dict[str, object]:
+    norm = normalize_story_key(project_root, story_key)
+    if norm is None:
+        return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
+    config = _success_config(contract)
+    raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md")
+    expected = int(config.get("expectedMatches", 1))
+    pattern = _format_story_pattern(raw_glob, norm)
+    matches = sorted(Path(project_root).glob(pattern))
+    payload: dict[str, object] = {
+        "verified": len(matches) == expected,
+        "story": norm.key,
+        "source": "artifact_glob",
+        "pattern": pattern,
+        "expectedMatches": expected,
+        "actualMatches": len(matches),
+        "matches": [str(match) for match in matches],
+    }
+    if not bool(payload["verified"]):
+        payload["reason"] = "unexpected_story_artifact_count"
+    return payload
+
+
+def review_completion(
+    *,
+    project_root: str,
+    story_key: str,
+    output_file: str = "",
+    contract: dict[str, Any] | None = None,
+) -> dict[str, object]:
+    norm = normalize_story_key(project_root, story_key)
+    if norm is None:
+        return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
+    review_contract = _load_review_contract(project_root, contract or {})
+    done_values = {value.lower() for value in review_contract["doneValues"]}
+    sprint = sprint_status_get(project_root, norm.id)
+    story_file = _story_artifact_path(project_root, norm.prefix)
+    story_status = find_frontmatter_value_case(story_file, "Status") if story_file else ""
+    for source in review_contract["sourceOrder"]:
+        if source == "sprint-status.yaml" and sprint.status.lower() in done_values:
+            return {
+                "verified": True,
+                "story": norm.key,
+                "sprint_status": sprint.status,
+                "story_file_status": story_status or "unknown",
+                "source": "sprint-status.yaml",
+            }
+        if source == "story-file" and story_status.lower() in done_values:
+            payload: dict[str, object] = {
+                "verified": True,
+                "story": norm.key,
+                "sprint_status": sprint.status,
+                "story_file_status": story_status,
+                "source": "story-file",
+            }
+            if review_contract["syncSprintStatus"] and not sprint.done:
+                payload["note"] = "sprint_status_not_updated"
+            return payload
+    return {
+        "verified": False,
+        "story": norm.key,
+        "sprint_status": sprint.status,
+        "story_file_status": story_status or "unknown",
+        "reason": "workflow_not_complete",
+    }
+
+
+def epic_complete(
+    *,
+    project_root: str,
+    story_key: str,
+    output_file: str = "",
+    contract: dict[str, Any] | None = None,
+) -> dict[str, object]:
+    norm = normalize_story_key(project_root, story_key)
+    if norm is None:
+        return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
+    epic = norm.id.split(".", 1)[0]
+    stories, done = sprint_status_epic(project_root, epic)
+    if not stories:
+        return {"verified": False, "epic": epic, "reason": "no_stories_found", "source": "sprint-status.yaml"}
+    return {
+        "verified": done == len(stories),
+        "epic": epic,
+        "story": norm.key,
+        "totalStories": len(stories),
+        "doneStories": done,
+        "source": "sprint-status.yaml",
+        **({} if done == len(stories) else {"reason": "epic_incomplete"}),
+    }
+
+
+def _success_config(contract: dict[str, Any] | None) -> dict[str, Any]:
+    config = (contract or {}).get("config") or {}
+    if not isinstance(config, dict):
+        raise PolicyError("success.config must be an object")
+    return config
+
+
+def _format_story_pattern(pattern: str, story) -> str:
+    return (
+        pattern.replace("{story_prefix}", story.prefix)
+        .replace("{story_id}", story.id)
+        .replace("{story_key}", story.key)
+    )
+
+
+def _story_artifact_path(project_root: str, story_prefix: str) -> Path | None:
+    matches = sorted((Path(project_root) / "_bmad-output" / "implementation-artifacts").glob(f"{story_prefix}-*.md"))
+    return matches[0] if matches else None
+
+
+def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[str, Any]:
+    merged = dict(DEFAULT_REVIEW_CONTRACT)
+    contract_path = str(contract.get("contractPath") or "").strip()
+    if contract_path:
+        path = Path(contract_path)
+        if not path.is_absolute():
+            path = Path(project_root) / path
+        try:
+            payload = json.loads(read_text(path))
+        except json.JSONDecodeError as exc:
+            raise PolicyError(f"review contract json invalid: {path}") from exc
+        if not isinstance(payload, dict):
+            raise PolicyError(f"review contract must be an object: {path}")
+        merged.update(payload)
+    inline = _inline_review_contract(contract)
+    merged.update(inline)
+    _validate_review_contract(merged)
+    return {
+        "blockingSeverity": [str(value).strip() for value in merged["blockingSeverity"] if str(value).strip()],
+        "doneValues": [str(value).strip() for value in merged["doneValues"] if str(value).strip()],
+        "inProgressValues": [str(value).strip() for value in merged["inProgressValues"] if str(value).strip()],
+        "sourceOrder": [str(value).strip() for value in merged["sourceOrder"] if str(value).strip()],
+        "syncSprintStatus": bool(merged["syncSprintStatus"]),
+    }
+
+
+def _inline_review_contract(contract: dict[str, Any]) -> dict[str, Any]:
+    inline: dict[str, Any] = {}
+    config = contract.get("config")
+    if isinstance(config, dict):
+        for key in ALLOWED_REVIEW_CONTRACT_KEYS:
+            if key in config:
+                inline[key] = config[key]
+    for key in ALLOWED_REVIEW_CONTRACT_KEYS:
+        if key in contract:
+            inline[key] = contract[key]
+    return inline
+
+
+def _validate_review_contract(contract: dict[str, Any]) -> None:
+    unknown_keys = sorted(set(contract) - ALLOWED_REVIEW_CONTRACT_KEYS)
+    if unknown_keys:
+        raise PolicyError(f"unknown review contract keys: {', '.join(unknown_keys)}")
+    for key in ("blockingSeverity", "doneValues", "inProgressValues", "sourceOrder"):
+        values = contract.get(key)
+        if not isinstance(values, list) or not all(isinstance(value, str) for value in values):
+            raise PolicyError(f"review contract {key} must be a string array")
+    if not isinstance(contract.get("syncSprintStatus"), bool):
+        raise PolicyError("review contract syncSprintStatus must be a boolean")
+    invalid_sources = sorted({value for value in contract["sourceOrder"] if value not in ALLOWED_REVIEW_SOURCES})
+    if invalid_sources:
+        raise PolicyError(f"review contract sourceOrder contains unknown sources: {', '.join(invalid_sources)}")
+
+
+VerifierFn = Callable[..., dict[str, object]]
+
+VERIFIERS: dict[str, VerifierFn] = {
+    "create_story_artifact": create_story_artifact,
+    "session_exit": session_exit,
+    "review_completion": review_completion,
+    "epic_complete": epic_complete,
+}
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
new file mode 100644
index 0000000..9071bdb
--- /dev/null
+++ b/source/tests/test_success_verifiers.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+import io
+import json
+import shutil
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+from pathlib import Path
+
+from story_automator.commands.state import cmd_build_state_doc
+from story_automator.commands.tmux import _verify_monitor_completion
+from story_automator.core.review_verify import verify_code_review_completion
+from story_automator.core.runtime_policy import PolicyError
+from story_automator.core.success_verifiers import create_story_artifact, epic_complete, review_completion
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+class SuccessVerifierTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.project_root = Path(self.tmp.name)
+        self.output_dir = self.project_root / "_bmad-output" / "story-automator"
+        self.artifacts_dir = self.project_root / "_bmad-output" / "implementation-artifacts"
+        self._install_bundle()
+        self._install_required_skills()
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def test_create_story_artifact_matches_configured_glob(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        payload = create_story_artifact(
+            project_root=str(self.project_root),
+            story_key="1.2",
+            contract={"config": {"glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md", "expectedMatches": 1}},
+        )
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["actualMatches"], 1)
+
+    def test_review_completion_uses_contract_done_values(self) -> None:
+        self._write_story("1-2-example", status="approved")
+        contract = self._write_review_contract(
+            {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False}
+        )
+        payload = review_completion(
+            project_root=str(self.project_root),
+            story_key="1.2",
+            contract={"contractPath": str(contract)},
+        )
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["source"], "story-file")
+        self.assertNotIn("note", payload)
+
+    def test_review_completion_rejects_invalid_contract(self) -> None:
+        contract = self._write_review_contract({"sourceOrder": ["bad-source"]})
+        with self.assertRaises(PolicyError):
+            review_completion(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"contractPath": str(contract)},
+            )
+
+    def test_epic_complete_checks_sprint_status(self) -> None:
+        self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n")
+        payload = epic_complete(project_root=str(self.project_root), story_key="1.2")
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["doneStories"], 2)
+
+    def test_review_wrapper_uses_pinned_state_snapshot(self) -> None:
+        self._write_story("1-2-example", status="approved")
+        state_file = self._build_state()
+        self._write_override(
+            {
+                "steps": {
+                    "review": {
+                        "success": {
+                            "config": {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False}
+                        }
+                    }
+                }
+            }
+        )
+        payload = verify_code_review_completion(str(self.project_root), "1.2", state_file=state_file)
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "workflow_not_complete")
+
+    def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None:
+        self._write_story("1-2-example", status="done")
+        result = _verify_monitor_completion(
+            "review",
+            project_root=str(self.project_root),
+            story_key="1.2",
+            output_file="/tmp/session.txt",
+        )
+        self.assertIsNotNone(result)
+        payload, verifier = result or ({}, "")
+        self.assertEqual(verifier, "review_completion")
+        self.assertTrue(payload["verified"])
+
+    def _build_state(self) -> Path:
+        stdout = io.StringIO()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            cmd_build_state_doc(
+                [
+                    "--template",
+                    str(template),
+                    "--output-folder",
+                    str(self.output_dir),
+                    "--config-json",
+                    json.dumps(
+                        {
+                            "epic": "1",
+                            "epicName": "Epic 1",
+                            "storyRange": ["1.2"],
+                            "status": "READY",
+                            "aiCommand": "claude --dangerously-skip-permissions",
+                        }
+                    ),
+                ]
+            )
+        return Path(json.loads(stdout.getvalue())["path"])
+
+    def _install_bundle(self) -> None:
+        source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
+        source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
+        target_root = self.project_root / ".claude" / "skills"
+        target_root.mkdir(parents=True, exist_ok=True)
+        shutil.copytree(source_skill, target_root / "bmad-story-automator")
+        shutil.copytree(source_review, target_root / "bmad-story-automator-review")
+
+    def _install_required_skills(self) -> None:
+        self._make_skill(
+            "bmad-create-story",
+            extras={"discover-inputs.md": "# discover\n", "checklist.md": "# checklist\n", "template.md": "# template\n"},
+        )
+        self._make_skill("bmad-dev-story", extras={"checklist.md": "# checklist\n"})
+        self._make_skill("bmad-retrospective")
+        self._make_skill("bmad-qa-generate-e2e-tests", extras={"checklist.md": "# checklist\n"})
+
+    def _make_skill(self, name: str, *, extras: dict[str, str] | None = None) -> None:
+        skill_dir = self.project_root / ".claude" / "skills" / name
+        skill_dir.mkdir(parents=True, exist_ok=True)
+        (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8")
+        (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8")
+        for rel, content in (extras or {}).items():
+            (skill_dir / rel).write_text(content, encoding="utf-8")
+
+    def _write_story(self, stem: str, *, status: str) -> Path:
+        self.artifacts_dir.mkdir(parents=True, exist_ok=True)
+        path = self.artifacts_dir / f"{stem}.md"
+        path.write_text(f"---\nStatus: {status}\nTitle: Story\n---\n", encoding="utf-8")
+        return path
+
+    def _write_sprint_status(self, content: str) -> None:
+        self.artifacts_dir.mkdir(parents=True, exist_ok=True)
+        (self.artifacts_dir / "sprint-status.yaml").write_text(content, encoding="utf-8")
+
+    def _write_review_contract(self, payload: dict[str, object]) -> Path:
+        path = self.project_root / "review-contract.json"
+        path.write_text(json.dumps(payload), encoding="utf-8")
+        return path
+
+    def _write_override(self, payload: dict[str, object]) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(json.dumps(payload), encoding="utf-8")
+
+
+class patch_env:
+    def __init__(self, project_root: Path) -> None:
+        self.project_root = str(project_root)
+        self.previous = None
+
+    def __enter__(self) -> None:
+        import os
+
+        self.previous = os.environ.get("PROJECT_ROOT")
+        os.environ["PROJECT_ROOT"] = self.project_root
+
+    def __exit__(self, exc_type, exc, tb) -> None:
+        import os
+
+        if self.previous is None:
+            os.environ.pop("PROJECT_ROOT", None)
+        else:
+            os.environ["PROJECT_ROOT"] = self.previous
+
+
+if __name__ == "__main__":
+    unittest.main()

From f12226a7464ac45bb96b1b7008276a0acee6132e Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:35:30 -0300
Subject: [PATCH 05/17] fix: harden success verifier review repairs

---
 docs/changelog/260413.md                      | 25 ++++++++
 .../data/code-review-loop.md                  |  8 +--
 scripts/smoke-test.sh                         |  5 ++
 .../story_automator/core/runtime_policy.py    | 48 +++++++++++---
 .../story_automator/core/success_verifiers.py | 62 ++++++++++++++-----
 source/tests/test_success_verifiers.py        | 45 ++++++++++++++
 6 files changed, 166 insertions(+), 27 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index faaca3e..232a993 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -24,6 +24,31 @@ Moved review completion checks onto the JSON policy contract and routed monitor
 ### QA Notes
 - N/A
 
+## 260413-08:34:25 - Harden success verifier review fixes
+
+### Summary
+Closed the review-loop findings around contract-safe verifier loading, snapshot handoff coverage, and verifier config failure modes.
+
+### Fixed
+- Fixed review verification to load only step-local success contract paths so unrelated missing skill assets no longer turn completed reviews into `review_contract_invalid`.
+- Fixed review contract validation to reject empty or whitespace-only completion arrays and fixed verifier numeric parsing to reject malformed or boolean `expectedMatches`.
+- Fixed retrospective completion checks to accept the real bare epic identifier used by the retro step.
+
+### Changed
+- Changed the shipped code-review loop to pass `--state-file` through create/build, monitor, parse, and verify commands so pinned policy snapshots stay consistent end to end.
+- Changed smoke coverage and unit tests to pin each state-file handoff and the new verifier hardening paths.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/core/success_verifiers.py`
+- `source/tests/test_success_verifiers.py`
+- `payload/.claude/skills/bmad-story-automator/data/code-review-loop.md`
+- `scripts/smoke-test.sh`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- `npm run verify`
+
 ## 260413-11:35:00 - Verify packed npx install path
 
 ### Summary
diff --git a/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md b/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md
index e20bfe2..0723fae 100644
--- a/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md
+++ b/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md
@@ -56,7 +56,7 @@ scripts="$(printf "%s" "{project_root}/.claude/skills/bmad-story-automator/scrip
 session_name=$("$scripts" tmux-wrapper spawn review {epic} {story_id} \
   --agent "$review_agent" \
   --cycle $reviewCycle \
-  --command "$("$scripts" tmux-wrapper build-cmd review {story_id} --agent "$review_agent")")
+  --command "$("$scripts" tmux-wrapper build-cmd review {story_id} --agent "$review_agent" --state-file "$state_file")")
 ```
 
 ### 2. Monitor Session with Verification (v2.2)
@@ -66,7 +66,7 @@ session_name=$("$scripts" tmux-wrapper spawn review {epic} {story_id} \
 # Pass --workflow and --story-key for completion verification
 result=$("$scripts" monitor-session "$session_name" --json --verbose \
   --agent "$review_agent" \
-  --workflow review --story-key {story_id})
+  --workflow review --story-key {story_id} --state-file "$state_file")
 final_state=$(echo "$result" | jq -r '.final_state')
 output_file=$(echo "$result" | jq -r '.output_file')
 ```
@@ -77,7 +77,7 @@ output_file=$(echo "$result" | jq -r '.output_file')
 
 ```bash
 # Sub-agent parsing (haiku, 99% cheaper than main context)
-parsed=$("$scripts" orchestrator-helper parse-output "$output_file" review)
+parsed=$("$scripts" orchestrator-helper parse-output "$output_file" review --state-file "$state_file")
 ```
 
 ### 4. Verify Sprint Status
@@ -159,6 +159,6 @@ file_status=$("$scripts" orchestrator-helper story-file-status {story_id})
 Check if code-review actually completed:
 
 ```bash
-"$scripts" orchestrator-helper verify-code-review {story_id}
+"$scripts" orchestrator-helper verify-code-review {story_id} --state-file "$state_file"
 # Returns: {"verified":true/false, "sprint_status":"...", ...}
 ```
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index 0e0f0fb..ee74970 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -241,6 +241,11 @@ verify_common_install() {
   assert_contains "outside .claude/skills/" "$review_dir/instructions.xml"
   assert_contains 'installed helper at `scripts/story-automator`' "$story_dir/data/scripts-reference.md"
   assert_not_contains "bin/" "$story_dir/data/monitoring-pattern.md"
+  assert_contains 'state-file "$state_file"' "$story_dir/data/code-review-loop.md"
+  assert_contains 'build-cmd review {story_id} --agent "$review_agent" --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
+  assert_contains 'workflow review --story-key {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
+  assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
+  assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
 }
 
 verify_qa_prompts() {
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index ea3a536..93c0c62 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -18,7 +18,7 @@ class PolicyError(ValueError):
     pass
 
 
-def load_effective_policy(project_root: str | None = None) -> dict[str, Any]:
+def load_effective_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     bundle_root = bundled_skill_root(root)
     bundled = _read_json(bundle_root / "data" / "orchestration-policy.json")
@@ -27,20 +27,28 @@ def load_effective_policy(project_root: str | None = None) -> dict[str, Any]:
     policy = _deep_merge(bundled, override)
     _apply_legacy_env(policy)
     _validate_policy_shape(policy)
-    _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root)
+    if resolve_assets:
+        _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root)
+    else:
+        _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root)
     return policy
 
 
-def load_runtime_policy(project_root: str | None = None, state_file: str | Path | None = None) -> dict[str, Any]:
+def load_runtime_policy(
+    project_root: str | None = None,
+    state_file: str | Path | None = None,
+    *,
+    resolve_assets: bool = True,
+) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     resolved_state, source = resolve_policy_state_file(root, state_file)
     if resolved_state:
         try:
-            return load_policy_for_state(resolved_state, project_root=str(root))
+            return load_policy_for_state(resolved_state, project_root=str(root), resolve_assets=resolve_assets)
         except (FileNotFoundError, PolicyError):
             if source == "explicit":
                 raise
-    return load_effective_policy(str(root))
+    return load_effective_policy(str(root), resolve_assets=resolve_assets)
 
 
 def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]:
@@ -66,6 +74,7 @@ def load_policy_snapshot(
     *,
     project_root: str | None = None,
     expected_hash: str = "",
+    resolve_assets: bool = True,
 ) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     path = Path(snapshot_file)
@@ -82,11 +91,19 @@ def load_policy_snapshot(
     except json.JSONDecodeError as exc:
         raise PolicyError(f"policy json invalid: {path}") from exc
     _validate_policy_shape(policy)
-    _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
+    if resolve_assets:
+        _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
+    else:
+        _resolve_success_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
     return policy
 
 
-def load_policy_for_state(state_file: str | Path, project_root: str | None = None) -> dict[str, Any]:
+def load_policy_for_state(
+    state_file: str | Path,
+    project_root: str | None = None,
+    *,
+    resolve_assets: bool = True,
+) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     fields = parse_simple_frontmatter(read_text(state_file))
     snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
@@ -94,8 +111,13 @@ def load_policy_for_state(state_file: str | Path, project_root: str | None = Non
     if snapshot_file or snapshot_hash:
         if not snapshot_file or not snapshot_hash:
             raise PolicyError("state policy metadata incomplete")
-        return load_policy_snapshot(snapshot_file, project_root=str(root), expected_hash=snapshot_hash)
-    return load_effective_policy(str(root))
+        return load_policy_snapshot(
+            snapshot_file,
+            project_root=str(root),
+            expected_hash=snapshot_hash,
+            resolve_assets=resolve_assets,
+        )
+    return load_effective_policy(str(root), resolve_assets=resolve_assets)
 
 
 def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]:
@@ -239,6 +261,14 @@ def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_
             success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root)
 
 
+def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None:
+    for contract in (policy.get("steps") or {}).values():
+        success = contract.setdefault("success", {})
+        contract_file = str(success.get("contractFile") or "").strip()
+        if contract_file:
+            success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root)
+
+
 def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]:
     skill_name = str(assets.get("skillName") or "").strip()
     if not skill_name:
diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py
index 0d596f8..4a5cf42 100644
--- a/source/src/story_automator/core/success_verifiers.py
+++ b/source/src/story_automator/core/success_verifiers.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import re
 from pathlib import Path
 from typing import Any, Callable
 
@@ -22,7 +23,7 @@
 
 
 def resolve_success_contract(project_root: str, step: str, *, state_file: str | Path | None = None) -> dict[str, Any]:
-    policy = load_runtime_policy(project_root, state_file=state_file)
+    policy = load_runtime_policy(project_root, state_file=state_file, resolve_assets=False)
     success = step_contract(policy, step).get("success") or {}
     if not isinstance(success, dict):
         raise PolicyError(f"invalid success contract for {step}")
@@ -70,7 +71,7 @@ def create_story_artifact(
         return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
     config = _success_config(contract)
     raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md")
-    expected = int(config.get("expectedMatches", 1))
+    expected = _parse_int(config.get("expectedMatches", 1), "success.config.expectedMatches", minimum=0)
     pattern = _format_story_pattern(raw_glob, norm)
     matches = sorted(Path(project_root).glob(pattern))
     payload: dict[str, object] = {
@@ -138,17 +139,16 @@ def epic_complete(
     output_file: str = "",
     contract: dict[str, Any] | None = None,
 ) -> dict[str, object]:
-    norm = normalize_story_key(project_root, story_key)
-    if norm is None:
+    epic = _epic_identifier(project_root, story_key)
+    if not epic:
         return {"verified": False, "reason": "could_not_normalize_key", "input": story_key}
-    epic = norm.id.split(".", 1)[0]
     stories, done = sprint_status_epic(project_root, epic)
     if not stories:
         return {"verified": False, "epic": epic, "reason": "no_stories_found", "source": "sprint-status.yaml"}
     return {
         "verified": done == len(stories),
         "epic": epic,
-        "story": norm.key,
+        "story": story_key,
         "totalStories": len(stories),
         "doneStories": done,
         "source": "sprint-status.yaml",
@@ -193,13 +193,7 @@ def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[s
     inline = _inline_review_contract(contract)
     merged.update(inline)
     _validate_review_contract(merged)
-    return {
-        "blockingSeverity": [str(value).strip() for value in merged["blockingSeverity"] if str(value).strip()],
-        "doneValues": [str(value).strip() for value in merged["doneValues"] if str(value).strip()],
-        "inProgressValues": [str(value).strip() for value in merged["inProgressValues"] if str(value).strip()],
-        "sourceOrder": [str(value).strip() for value in merged["sourceOrder"] if str(value).strip()],
-        "syncSprintStatus": bool(merged["syncSprintStatus"]),
-    }
+    return _sanitize_review_contract(merged)
 
 
 def _inline_review_contract(contract: dict[str, Any]) -> dict[str, Any]:
@@ -225,11 +219,51 @@ def _validate_review_contract(contract: dict[str, Any]) -> None:
             raise PolicyError(f"review contract {key} must be a string array")
     if not isinstance(contract.get("syncSprintStatus"), bool):
         raise PolicyError("review contract syncSprintStatus must be a boolean")
-    invalid_sources = sorted({value for value in contract["sourceOrder"] if value not in ALLOWED_REVIEW_SOURCES})
+    if not _sanitize_string_list(contract["doneValues"]):
+        raise PolicyError("review contract doneValues must not be empty")
+    source_order = _sanitize_string_list(contract["sourceOrder"])
+    if not source_order:
+        raise PolicyError("review contract sourceOrder must not be empty")
+    invalid_sources = sorted({value for value in source_order if value not in ALLOWED_REVIEW_SOURCES})
     if invalid_sources:
         raise PolicyError(f"review contract sourceOrder contains unknown sources: {', '.join(invalid_sources)}")
 
 
+def _parse_int(value: Any, field: str, *, minimum: int | None = None) -> int:
+    if isinstance(value, bool):
+        raise PolicyError(f"{field} must be an integer")
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError) as exc:
+        raise PolicyError(f"{field} must be an integer") from exc
+    if minimum is not None and parsed < minimum:
+        raise PolicyError(f"{field} must be >= {minimum}")
+    return parsed
+
+
+def _epic_identifier(project_root: str, story_key: str) -> str:
+    if re.fullmatch(r"\d+", story_key):
+        return story_key
+    norm = normalize_story_key(project_root, story_key)
+    if norm is None:
+        return ""
+    return norm.id.split(".", 1)[0]
+
+
+def _sanitize_review_contract(contract: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "blockingSeverity": _sanitize_string_list(contract["blockingSeverity"]),
+        "doneValues": _sanitize_string_list(contract["doneValues"]),
+        "inProgressValues": _sanitize_string_list(contract["inProgressValues"]),
+        "sourceOrder": _sanitize_string_list(contract["sourceOrder"]),
+        "syncSprintStatus": contract["syncSprintStatus"],
+    }
+
+
+def _sanitize_string_list(values: list[str]) -> list[str]:
+    return [value.strip() for value in values if value.strip()]
+
+
 VerifierFn = Callable[..., dict[str, object]]
 
 VERIFIERS: dict[str, VerifierFn] = {
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 9071bdb..a007dde 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -63,12 +63,34 @@ def test_review_completion_rejects_invalid_contract(self) -> None:
                 contract={"contractPath": str(contract)},
             )
 
+    def test_review_completion_rejects_empty_contract_lists(self) -> None:
+        with self.assertRaises(PolicyError):
+            review_completion(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"doneValues": [], "sourceOrder": []},
+            )
+
+    def test_review_completion_rejects_whitespace_only_done_values(self) -> None:
+        with self.assertRaises(PolicyError):
+            review_completion(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"doneValues": ["   "], "sourceOrder": ["story-file"]},
+            )
+
     def test_epic_complete_checks_sprint_status(self) -> None:
         self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n")
         payload = epic_complete(project_root=str(self.project_root), story_key="1.2")
         self.assertTrue(payload["verified"])
         self.assertEqual(payload["doneStories"], 2)
 
+    def test_epic_complete_accepts_bare_epic_id(self) -> None:
+        self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n")
+        payload = epic_complete(project_root=str(self.project_root), story_key="1")
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["epic"], "1")
+
     def test_review_wrapper_uses_pinned_state_snapshot(self) -> None:
         self._write_story("1-2-example", status="approved")
         state_file = self._build_state()
@@ -87,6 +109,13 @@ def test_review_wrapper_uses_pinned_state_snapshot(self) -> None:
         self.assertFalse(payload["verified"])
         self.assertEqual(payload["reason"], "workflow_not_complete")
 
+    def test_review_wrapper_ignores_unrelated_missing_assets(self) -> None:
+        shutil.rmtree(self.project_root / ".claude" / "skills" / "bmad-create-story")
+        self._write_story("1-2-example", status="done")
+        payload = verify_code_review_completion(str(self.project_root), "1.2")
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["source"], "story-file")
+
     def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None:
         self._write_story("1-2-example", status="done")
         result = _verify_monitor_completion(
@@ -100,6 +129,22 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None:
         self.assertEqual(verifier, "review_completion")
         self.assertTrue(payload["verified"])
 
+    def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None:
+        with self.assertRaises(PolicyError):
+            create_story_artifact(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"config": {"expectedMatches": "abc"}},
+            )
+
+    def test_create_story_artifact_rejects_boolean_expected_matches(self) -> None:
+        with self.assertRaises(PolicyError):
+            create_story_artifact(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"config": {"expectedMatches": False}},
+            )
+
     def _build_state(self) -> Path:
         stdout = io.StringIO()
         template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"

From 5ea85409861fb052c63eec3f760844f9d49061da Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:39:52 -0300
Subject: [PATCH 06/17] refactor: route create validation through verifier

---
 docs/changelog/260413.md                      | 27 ++++++++++++++
 docs/cli-reference.md                         |  7 ++++
 .../data/monitoring-pattern.md                | 12 ++-----
 .../data/retry-fallback-implementation.md     |  5 ++-
 .../data/scripts-reference.md                 |  3 +-
 .../steps-c/step-03-execute.md                | 12 +++----
 scripts/smoke-test.sh                         |  4 +++
 .../story_automator/commands/orchestrator.py  | 36 ++++++++++++++++++-
 source/tests/test_success_verifiers.py        | 23 ++++++++++++
 9 files changed, 108 insertions(+), 21 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 232a993..c915f58 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -24,6 +24,33 @@ Moved review completion checks onto the JSON policy contract and routed monitor
 ### QA Notes
 - N/A
 
+## 260413-08:39:42 - Route create validation through shared verifier
+
+### Summary
+Removed the duplicate create-story file counting path and exposed the shared verifier registry as the public success-check interface.
+
+### Added
+- Added `orchestrator-helper verify-step` so workflow steps can run the configured success verifier directly, with optional pinned `--state-file` and `--output-file` inputs.
+
+### Changed
+- Changed the shipped create workflow, retry guidance, monitoring pattern, and CLI docs to use `verify-step create ... --state-file "$state_file"` instead of `validate-story-creation check`.
+- Changed docs and smoke coverage to pin the new create verifier command forms and their state-file handoff.
+- Changed regression tests to cover `verify-step create` and pinned create-policy snapshot reuse.
+
+### Files
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/tests/test_success_verifiers.py`
+- `payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md`
+- `payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md`
+- `payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md`
+- `payload/.claude/skills/bmad-story-automator/data/scripts-reference.md`
+- `docs/cli-reference.md`
+- `scripts/smoke-test.sh`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- `npm run verify`
+
 ## 260413-08:34:25 - Harden success verifier review fixes
 
 ### Summary
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 8e24b5a..6b83009 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -62,6 +62,7 @@ Critical rule:
 - `orchestrator-helper state-summary`
 - `orchestrator-helper state-update`
 - `orchestrator-helper marker create|remove|check|heartbeat`
+- `orchestrator-helper verify-step`
 - `orchestrator-helper verify-code-review`
 - `orchestrator-helper get-epic-stories`
 - `orchestrator-helper check-epic-complete`
@@ -110,6 +111,12 @@ session="$("$scripts" tmux-wrapper spawn review 1 1.2 --agent claude --command "
 "$scripts" orchestrator-helper agents-resolve --state-file "$state_file" --story 1.2 --task review
 ```
 
+### Verify Create Success
+
+```bash
+"$scripts" orchestrator-helper verify-step create 1.2 --state-file "$state_file"
+```
+
 ## Read Next
 
 - [Agents And Monitoring](./agents-and-monitoring.md)
diff --git a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
index 6d13f4d..530a613 100644
--- a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
+++ b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
@@ -105,17 +105,11 @@ next_action=$(echo "$parsed" | jq -r '.next_action')
 "$scripts" orchestrator-helper escalate <trigger> <context>
 ```
 
-### $scripts validate-story-creation
+### $scripts orchestrator-helper verify-step
 
 ```bash
-# Count before session
-before=$("$scripts" validate-story-creation count 5.3)
-
-# ... run create-story session ...
-
-# Count after and validate
-after=$("$scripts" validate-story-creation count 5.3)
-"$scripts" validate-story-creation check 5.3 --before $before --after $after
+# Validate create-story via the shared success verifier
+"$scripts" orchestrator-helper verify-step create 5.3 --state-file "$state_file"
 ```
 
 ---
diff --git a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
index 982505f..10e6f4b 100644
--- a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
+++ b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
@@ -75,9 +75,8 @@ fi
 
 ### Create Story
 ```bash
-after=$("$scripts" validate-story-creation count {story_id})
-validation=$("$scripts" validate-story-creation check {story_id} --before $before --after $after)
-validation_passed=$(echo "$validation" | jq -r '.valid')
+validation=$("$scripts" orchestrator-helper verify-step create {story_id} --state-file "$state_file")
+validation_passed=$(echo "$validation" | jq -r '.verified')
 ```
 
 ### Dev Story
diff --git a/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md b/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md
index 7e5dc24..bbc2ec6 100644
--- a/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md
+++ b/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md
@@ -12,9 +12,10 @@ All operations use the installed helper at `scripts/story-automator` (usually vi
 | `$scripts codex-status-check` | Codex-specific status with heartbeat (v2.4.0) |
 | `$scripts heartbeat-check` | CPU-based process heartbeat detection |
 | `$scripts orchestrator-helper` | Sprint-status, parsing, markers |
+| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step |
 | `$scripts orchestrator-helper agents-build` | Deterministic agents file generation |
 | `$scripts orchestrator-helper agents-resolve` | Agent lookup per story/task via state file or direct agents file |
-| `$scripts validate-story-creation` | Story file count validation |
+| `$scripts validate-story-creation` | Legacy story file count validation |
 | `$scripts commit-story` | Deterministic git commit with JSON output |
 
 ## Usage Pattern
diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
index 1d11781..d7e60fa 100644
--- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
+++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
@@ -127,26 +127,24 @@ If multiple logs exist, run one grep/regex pass across all log files and forward
 **Apply retry/fallback pattern from `{retryStrategy}`:** Up to 5 attempts, alternating agents, network-aware delays.
 
 ```bash
-before=$("$scripts" validate-story-creation count {story_id})
 # Retry loop: see {retryStrategy}
 session=$("$scripts" tmux-wrapper spawn create {epic} {story_id} \
   --agent "$current_agent" \
-  --command "$("$scripts" tmux-wrapper build-cmd create {story_id} --agent "$current_agent")")
+  --command "$("$scripts" tmux-wrapper build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file")")
 result=$("$scripts" monitor-session "$session" --json --agent "$current_agent")
 "$scripts" tmux-wrapper kill "$session"
-after=$("$scripts" validate-story-creation count {story_id})
-validation=$("$scripts" validate-story-creation check {story_id} --before $before --after $after)
+validation=$("$scripts" orchestrator-helper verify-step create {story_id} --state-file "$state_file")
 ```
 
-- If `validation.valid == true`:
+- If `validation.verified == true`:
   ```bash
   # Update Story Progress: mark create-story done
   tmp_state=$(mktemp)
   sed "s/^| ${story_id} |.*$/| ${story_id} | done | - | - | - | - | in-progress |/" "$state_file" > "$tmp_state" && mv "$tmp_state" "$state_file"
   ```
   → proceed to B
-- If `validation.valid == false` AND attempts < 5 → retry with next agent (see `{retryStrategy}`)
-- If `validation.valid == false` AND attempts == 5 → escalate (all retries exhausted)
+- If `validation.verified == false` AND attempts < 5 → retry with next agent (see `{retryStrategy}`)
+- If `validation.verified == false` AND attempts == 5 → escalate (all retries exhausted)
 
 ### B. Dev Story
 
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index ee74970..44e44ef 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -246,6 +246,10 @@ verify_common_install() {
   assert_contains 'workflow review --story-key {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
   assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
   assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
+  assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
+  assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md"
+  assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md"
+  assert_contains '| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step |' "$story_dir/data/scripts-reference.md"
 }
 
 verify_qa_prompts() {
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 4e69b22..694430a 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -6,8 +6,9 @@
 from pathlib import Path
 
 from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter
-from story_automator.core.runtime_policy import crash_max_retries, load_runtime_policy, review_max_cycles
+from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles
 from story_automator.core.review_verify import verify_code_review_completion
+from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier
 from story_automator.core.sprint import sprint_status_epic, sprint_status_get
 from story_automator.core.story_keys import normalize_story_key, sprint_status_file
 from story_automator.core.utils import (
@@ -51,6 +52,7 @@ def cmd_orchestrator_helper(args: list[str]) -> int:
         "commit-ready": _commit_ready,
         "normalize-key": _normalize_key,
         "story-file-status": _story_file_status,
+        "verify-step": _verify_step,
         "verify-code-review": _verify_code_review,
         "check-epic-complete": check_epic_complete_action,
         "get-epic-stories": get_epic_stories_action,
@@ -86,6 +88,7 @@ def _usage(code: int) -> int:
     print("  commit-ready <story_id>", file=target)
     print("  normalize-key <input> [--to id|key|prefix|json]", file=target)
     print("  story-file-status <story>", file=target)
+    print("  verify-step <step> <story_or_epic> [--state-file path] [--output-file path]", file=target)
     print("  verify-code-review <story>", file=target)
     print("  check-epic-complete <epic> <story> [--state-file path]", file=target)
     print("  get-epic-stories <epic> [--state-file path]", file=target)
@@ -388,6 +391,37 @@ def _verify_code_review(args: list[str]) -> int:
     return 0 if bool(payload.get("verified")) else 1
 
 
+def _verify_step(args: list[str]) -> int:
+    if len(args) < 2:
+        print_json({"verified": False, "reason": "step_and_story_required"})
+        return 1
+    step, story_key = args[:2]
+    state_file = ""
+    output_file = ""
+    tail = args[2:]
+    for idx, arg in enumerate(tail):
+        if arg == "--state-file" and idx + 1 < len(tail):
+            state_file = tail[idx + 1]
+        elif arg == "--output-file" and idx + 1 < len(tail):
+            output_file = tail[idx + 1]
+    try:
+        contract = resolve_success_contract(get_project_root(), step, state_file=state_file or None)
+        verifier = str(contract.get("verifier") or "").strip()
+        if not verifier:
+            raise PolicyError(f"missing success verifier for {step}")
+        payload = run_success_verifier(
+            verifier,
+            project_root=get_project_root(),
+            story_key=story_key,
+            output_file=output_file,
+            contract=contract,
+        )
+    except (FileNotFoundError, PolicyError) as exc:
+        payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)}
+    print_json(payload)
+    return 0 if bool(payload.get("verified")) else 1
+
+
 def _parse_context_int(context: str, key: str) -> int:
     match = re.search(rf"{re.escape(key)}=(\d+)", context)
     return int(match.group(1)) if match else 0
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index a007dde..0ab5f04 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -8,6 +8,7 @@
 from contextlib import redirect_stdout
 from pathlib import Path
 
+from story_automator.commands.orchestrator import cmd_orchestrator_helper
 from story_automator.commands.state import cmd_build_state_doc
 from story_automator.commands.tmux import _verify_monitor_completion
 from story_automator.core.review_verify import verify_code_review_completion
@@ -129,6 +130,28 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None:
         self.assertEqual(verifier, "review_completion")
         self.assertTrue(payload["verified"])
 
+    def test_verify_step_create_uses_shared_verifier(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["verify-step", "create", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["source"], "artifact_glob")
+
+    def test_verify_step_create_uses_pinned_snapshot(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        state_file = self._build_state()
+        self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}})
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["verify-step", "create", "1.2", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["expectedMatches"], 1)
+
     def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None:
         with self.assertRaises(PolicyError):
             create_story_artifact(

From 1b8e2d03dc390f1051916c1e7a7e2902f8ccabdb Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:49:08 -0300
Subject: [PATCH 07/17] fix: restore verify-step retry contract

---
 docs/changelog/260413.md                      | 24 +++++++++++++++++++
 .../data/monitoring-pattern.md                | 20 +++++++++-------
 .../data/retry-fallback-implementation.md     |  2 +-
 scripts/smoke-test.sh                         |  4 ++++
 .../story_automator/commands/orchestrator.py  |  4 +++-
 source/tests/test_success_verifiers.py        |  9 +++++++
 6 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index c915f58..861257f 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -1,5 +1,29 @@
 # Changelog - 260413
 
+## 260413-09:14:32 - Restore verify-step retry contract
+
+### Summary
+Restored the shared verifier CLI contract so create-step retries can inspect JSON failures without aborting, and aligned the shipped create examples with the pinned verifier path.
+
+### Fixed
+- Fixed `orchestrator-helper verify-step` to return JSON with exit code `0` for ordinary verification failures, preserving retry loops that branch on `.verified` instead of shell exit status.
+- Fixed the create monitoring quick reference and retry wrapper examples to keep `--state-file` attached through build and monitor handoff.
+
+### Changed
+- Changed smoke coverage and unit tests to pin the create verifier failure contract and the updated state-file command forms.
+
+### Files
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/tests/test_success_verifiers.py`
+- `payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md`
+- `payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md`
+- `scripts/smoke-test.sh`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- `PYTHONPATH=source/src python3 -m unittest source.tests.test_success_verifiers`
+- `bash scripts/smoke-test.sh`
+
 ## 260413-08:05:51 - Wire policy-backed success verifiers
 
 ### Summary
diff --git a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
index 530a613..cfa8441 100644
--- a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
+++ b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md
@@ -24,22 +24,24 @@ scripts/
 
 ---
 
-## Standard Workflow: Spawn + Monitor + Parse
+## Standard Workflow: Spawn + Monitor + Verify (Create Example)
 
 ```bash
 # STEP 1: Spawn session (use $scripts tmux-wrapper)
 session_name=$("$scripts" tmux-wrapper spawn create 5 5.3 \
-  --command "$("$scripts" tmux-wrapper build-cmd create 5.3)")
+  --command "$("$scripts" tmux-wrapper build-cmd create 5.3 --state-file "$state_file")")
 
 # STEP 2: Monitor until completion (SINGLE API CALL)
-result=$("$scripts" monitor-session "$session_name" --verbose --json)
+result=$("$scripts" monitor-session "$session_name" \
+  --verbose --json \
+  --workflow create --story-key 5.3 --state-file "$state_file")
 
-# STEP 3: Parse output with sub-agent
-output_file=$(echo "$result" | jq -r '.output_file')
-parsed=$("$scripts" orchestrator-helper parse-output "$output_file" create)
+# STEP 3: Verify success against the shared create contract
+validation=$("$scripts" orchestrator-helper verify-step create 5.3 --state-file "$state_file")
+verified=$(echo "$validation" | jq -r '.verified')
 
-# STEP 4: Act on parsed result
-next_action=$(echo "$parsed" | jq -r '.next_action')
+# STEP 4: Act on verifier result
+[ "$verified" = "true" ] || echo "retry-or-escalate"
 
 # STEP 5: ALWAYS cleanup session (v1.2.0)
 "$scripts" tmux-wrapper kill "$session_name"
@@ -120,7 +122,7 @@ After `$scripts monitor-session` returns:
 
 | final_state | Action |
 |-------------|--------|
-| `completed` | Parse output → act on `next_action` |
+| `completed` | Run step verifier or parser for the active workflow |
 | `incomplete` | **(v2.2)** Session idle but workflow NOT verified → Escalate immediately |
 | `crashed` | Check retry count → retry or escalate |
 | `stuck` | Get output → investigate → may need restart |
diff --git a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
index 10e6f4b..47452bd 100644
--- a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
+++ b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md
@@ -37,7 +37,7 @@ while [ $attempt -lt $max_attempts ] && [ "$success" = "false" ]; do
     # Execute workflow step
     session=$("$scripts" tmux-wrapper spawn {step} {epic} {story_id} \
         --agent "$current_agent" \
-        --command "$("$scripts" tmux-wrapper build-cmd {step} {story_id} --agent "$current_agent")")
+        --command "$("$scripts" tmux-wrapper build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file")")
     result=$("$scripts" monitor-session "$session" --json --agent "$current_agent")
 
     # Cleanup session
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index 44e44ef..7414554 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -247,8 +247,12 @@ verify_common_install() {
   assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
   assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
   assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
+  assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
   assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md"
+  assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md"
   assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md"
+  assert_contains 'workflow create --story-key 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md"
+  assert_not_contains 'parse-output "$output_file" create' "$story_dir/data/monitoring-pattern.md"
   assert_contains '| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step |' "$story_dir/data/scripts-reference.md"
 }
 
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 694430a..5eb902e 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -416,10 +416,12 @@ def _verify_step(args: list[str]) -> int:
             output_file=output_file,
             contract=contract,
         )
+        exit_code = 0
     except (FileNotFoundError, PolicyError) as exc:
         payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)}
+        exit_code = 1
     print_json(payload)
-    return 0 if bool(payload.get("verified")) else 1
+    return exit_code
 
 
 def _parse_context_int(context: str, key: str) -> int:
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 0ab5f04..4bee455 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -152,6 +152,15 @@ def test_verify_step_create_uses_pinned_snapshot(self) -> None:
         self.assertTrue(payload["verified"])
         self.assertEqual(payload["expectedMatches"], 1)
 
+    def test_verify_step_create_returns_json_on_verification_failure(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["verify-step", "create", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "unexpected_story_artifact_count")
+
     def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None:
         with self.assertRaises(PolicyError):
             create_story_artifact(

From 92360247e154c7bd751d2522325e9d07a40c22d4 Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:14:50 -0300
Subject: [PATCH 08/17] fix: enforce snapshot-only resume semantics

---
 docs/changelog/260413.md                      | 25 +++++++++++
 docs/development.md                           |  9 ++++
 docs/state-and-resume.md                      | 23 ++++++++++
 .../story_automator/commands/orchestrator.py  | 34 +++++++++-----
 .../story_automator/core/runtime_policy.py    | 30 ++++++++-----
 source/tests/test_runtime_policy.py           | 44 ++++++++++++++++++-
 source/tests/test_state_policy_metadata.py    | 29 ++++++++++++
 7 files changed, 172 insertions(+), 22 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 861257f..2437e31 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -247,3 +247,28 @@ Fixed the follow-up review findings around snapshot consumption, policy validati
 
 ### QA Notes
 - N/A
+
+## 260413-09:13:20 - Enforce snapshot-only resume semantics
+
+### Summary
+Locked resume behavior to pinned snapshots for new state docs while keeping legacy states on bundled defaults only.
+
+### Fixed
+- Fixed legacy state resumes to ignore live project overrides and legacy env knobs so old runs stay on bundled defaults.
+- Fixed marker and env discovered new-format states to fail validation when the pinned snapshot file is missing instead of silently falling back to live policy.
+
+### Changed
+- Changed `state-summary` to infer `legacyPolicy: true` for old state docs without snapshot metadata.
+- Changed tests and operator docs to pin snapshot-only resume rules and the one-release env compatibility window for `MAX_REVIEW_CYCLES` and `MAX_CRASH_RETRIES`.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_state_policy_metadata.py`
+- `docs/state-and-resume.md`
+- `docs/development.md`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
diff --git a/docs/development.md b/docs/development.md
index ba9ef4b..ada477a 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -68,6 +68,15 @@ python3 -m story_automator
 
 with `PYTHONPATH` pointed at `source/src`.
 
+## Legacy Env Compatibility
+
+For one release cycle, fresh orchestration starts still honor:
+
+- `MAX_REVIEW_CYCLES`
+- `MAX_CRASH_RETRIES`
+
+Those values are resolved once during snapshot creation. Resume paths read the pinned snapshot, not the current shell env. Prefer `_bmad/bmm/story-automator.policy.json` for new configuration changes.
+
 ## What To Re-Check After Runtime Changes
 
 If you change:
diff --git a/docs/state-and-resume.md b/docs/state-and-resume.md
index b90104d..41da500 100644
--- a/docs/state-and-resume.md
+++ b/docs/state-and-resume.md
@@ -35,6 +35,10 @@ Important frontmatter fields:
 - `agentConfig`
 - `activeSessions`
 - `completedSessions`
+- `policyVersion`
+- `policySnapshotFile`
+- `policySnapshotHash`
+- `legacyPolicy`
 
 ### Body Sections
 
@@ -107,6 +111,25 @@ flowchart TD
 
 Resume is step-aware. It does not blindly restart from the beginning.
 
+### Policy Rules On Resume
+
+- new-format state docs must load `policySnapshotFile` plus `policySnapshotHash`
+- missing or mismatched snapshots are validation failures, not fallback cases
+- old state docs without snapshot metadata resume in legacy mode with bundled defaults
+- `state-summary` reports `legacyPolicy: true` for those legacy resumes
+
+### Legacy Env Compatibility
+
+For one release cycle, `MAX_REVIEW_CYCLES` and `MAX_CRASH_RETRIES` still work at orchestration start.
+
+They are resolved once, written into the effective policy snapshot, and ignored on resume after that.
+
+Deprecation path:
+
+1. keep existing env knobs working for fresh starts
+2. prefer JSON policy overrides for new setup
+3. remove the env path after the compatibility window closes
+
 ## Validate Flow
 
 Validation is a first-class mode, not an ad hoc debug routine.
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 5eb902e..70db01f 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -5,7 +5,13 @@
 import re
 from pathlib import Path
 
-from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter
+from story_automator.core.frontmatter import (
+    extract_last_action,
+    find_frontmatter_value,
+    find_frontmatter_value_case,
+    parse_frontmatter,
+    parse_simple_frontmatter,
+)
 from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier
@@ -239,19 +245,25 @@ def _state_summary(args: list[str]) -> int:
     if not args or not file_exists(args[0]):
         print_json({"ok": False, "error": "file_not_found"})
         return 1
+    fields = parse_simple_frontmatter(read_text(args[0]))
+    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
+    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+    legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower()
+    if legacy_policy not in {"true", "false"}:
+        legacy_policy = "true" if not snapshot_file and not snapshot_hash else "false"
     print_json(
         {
             "ok": True,
-            "epic": find_frontmatter_value(args[0], "epic"),
-            "epicName": find_frontmatter_value(args[0], "epicName"),
-            "currentStory": find_frontmatter_value(args[0], "currentStory"),
-            "currentStep": find_frontmatter_value(args[0], "currentStep"),
-            "status": find_frontmatter_value(args[0], "status"),
-            "lastUpdated": find_frontmatter_value(args[0], "lastUpdated"),
-            "policyVersion": find_frontmatter_value(args[0], "policyVersion"),
-            "policySnapshotFile": find_frontmatter_value(args[0], "policySnapshotFile"),
-            "policySnapshotHash": find_frontmatter_value(args[0], "policySnapshotHash"),
-            "legacyPolicy": find_frontmatter_value(args[0], "legacyPolicy"),
+            "epic": str(fields.get("epic") or ""),
+            "epicName": str(fields.get("epicName") or ""),
+            "currentStory": str(fields.get("currentStory") or ""),
+            "currentStep": str(fields.get("currentStep") or ""),
+            "status": str(fields.get("status") or ""),
+            "lastUpdated": str(fields.get("lastUpdated") or ""),
+            "policyVersion": str(fields.get("policyVersion") or ""),
+            "policySnapshotFile": snapshot_file,
+            "policySnapshotHash": snapshot_hash,
+            "legacyPolicy": legacy_policy,
             "lastAction": extract_last_action(args[0]),
         }
     )
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index 93c0c62..d307537 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -14,23 +14,34 @@
 VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"}
 
 
+def load_bundled_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]:
+    root = Path(project_root or get_project_root()).resolve()
+    bundle_root = bundled_skill_root(root)
+    policy = _read_json(bundle_root / "data" / "orchestration-policy.json")
+    _validate_policy_shape(policy)
+    if resolve_assets:
+        _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root)
+    else:
+        _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root)
+    return policy
+
+
 class PolicyError(ValueError):
     pass
 
 
 def load_effective_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
-    bundle_root = bundled_skill_root(root)
-    bundled = _read_json(bundle_root / "data" / "orchestration-policy.json")
+    bundled = load_bundled_policy(str(root), resolve_assets=False)
     override_path = root / "_bmad" / "bmm" / "story-automator.policy.json"
     override = _read_json(override_path) if override_path.is_file() else {}
     policy = _deep_merge(bundled, override)
     _apply_legacy_env(policy)
     _validate_policy_shape(policy)
     if resolve_assets:
-        _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root)
+        _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
     else:
-        _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root)
+        _resolve_success_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
     return policy
 
 
@@ -43,11 +54,10 @@ def load_runtime_policy(
     root = Path(project_root or get_project_root()).resolve()
     resolved_state, source = resolve_policy_state_file(root, state_file)
     if resolved_state:
-        try:
-            return load_policy_for_state(resolved_state, project_root=str(root), resolve_assets=resolve_assets)
-        except (FileNotFoundError, PolicyError):
-            if source == "explicit":
-                raise
+        state_path = Path(resolved_state)
+        if source != "explicit" and not state_path.is_file():
+            return load_effective_policy(str(root), resolve_assets=resolve_assets)
+        return load_policy_for_state(str(state_path), project_root=str(root), resolve_assets=resolve_assets)
     return load_effective_policy(str(root), resolve_assets=resolve_assets)
 
 
@@ -117,7 +127,7 @@ def load_policy_for_state(
             expected_hash=snapshot_hash,
             resolve_assets=resolve_assets,
         )
-    return load_effective_policy(str(root), resolve_assets=resolve_assets)
+    return load_bundled_policy(str(root), resolve_assets=resolve_assets)
 
 
 def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]:
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 3979f54..2548fc5 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -5,8 +5,15 @@
 import tempfile
 import unittest
 from pathlib import Path
+from unittest.mock import patch
 
-from story_automator.core.runtime_policy import PolicyError, load_effective_policy, load_runtime_policy, snapshot_effective_policy
+from story_automator.core.runtime_policy import (
+    PolicyError,
+    load_effective_policy,
+    load_policy_snapshot,
+    load_runtime_policy,
+    snapshot_effective_policy,
+)
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
@@ -58,6 +65,18 @@ def test_snapshot_hash_stable(self) -> None:
         second = snapshot_effective_policy(str(self.project_root))
         self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"])
 
+    def test_snapshot_bakes_legacy_env_values_for_resume(self) -> None:
+        with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "2", "MAX_CRASH_RETRIES": "4"}, clear=False):
+            snapshot = snapshot_effective_policy(str(self.project_root))
+        with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "9", "MAX_CRASH_RETRIES": "9"}, clear=False):
+            policy = load_policy_snapshot(
+                snapshot["policySnapshotFile"],
+                project_root=str(self.project_root),
+                expected_hash=snapshot["policySnapshotHash"],
+            )
+        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 2)
+        self.assertEqual(policy["workflow"]["crash"]["maxRetries"], 4)
+
     def test_malformed_override_json_raises_policy_error(self) -> None:
         override_dir = self.project_root / "_bmad" / "bmm"
         override_dir.mkdir(parents=True, exist_ok=True)
@@ -102,6 +121,29 @@ def test_malformed_marker_falls_back_to_effective_policy(self) -> None:
         policy = load_runtime_policy(str(self.project_root))
         self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
 
+    def test_legacy_state_uses_bundled_defaults_without_override_or_env(self) -> None:
+        self._write_override({"workflow": {"repeat": {"review": {"maxCycles": 1}}}})
+        legacy_state = self.project_root / "legacy.md"
+        legacy_state.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n",
+            encoding="utf-8",
+        )
+        with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "2"}, clear=False):
+            policy = load_runtime_policy(str(self.project_root), state_file=str(legacy_state))
+        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
+
+    def test_marker_resume_with_missing_snapshot_raises_policy_error(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"_bmad-output/story-automator/snapshots/missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n",
+            encoding="utf-8",
+        )
+        marker = self.project_root / ".claude" / ".story-automator-active"
+        marker.parent.mkdir(parents=True, exist_ok=True)
+        marker.write_text(json.dumps({"stateFile": str(state_file.relative_to(self.project_root))}), encoding="utf-8")
+        with self.assertRaises(PolicyError):
+            load_runtime_policy(str(self.project_root))
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 72301ca..ceccd8f 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -69,6 +69,35 @@ def test_legacy_state_without_policy_metadata_remains_valid(self) -> None:
         self.assertEqual(code, 0)
         self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok")
 
+    def test_summary_infers_legacy_policy_for_old_state(self) -> None:
+        legacy = self.project_root / "legacy.md"
+        legacy.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(legacy)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertEqual(payload["legacyPolicy"], "true")
+
+    def test_validate_state_rejects_new_state_with_missing_snapshot(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicySnapshotFile: \"_bmad-output/story-automator/snapshots/missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_state(["--state", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["structure"], "issues")
+        self.assertTrue(any("policy snapshot missing" in issue for issue in payload["issues"]))
+
     def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
         state_file = self._build_state()
         override_dir = self.project_root / "_bmad" / "bmm"

From 3b5d55f6d5099a10eb7cf312443c274360d965db Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:26:44 -0300
Subject: [PATCH 09/17] fix: tighten state policy compatibility helpers

---
 docs/changelog/260413.md                      | 26 ++++++
 docs/cli-reference.md                         |  8 +-
 .../story_automator/commands/orchestrator.py  |  3 +-
 .../commands/validate_story_creation.py       | 88 +++++++++++++------
 .../story_automator/core/runtime_policy.py    | 23 +++--
 source/tests/test_runtime_policy.py           |  9 ++
 source/tests/test_state_policy_metadata.py    | 27 ++++++
 source/tests/test_success_verifiers.py        | 25 ++++++
 8 files changed, 177 insertions(+), 32 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 2437e31..3f814fe 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -48,6 +48,32 @@ Moved review completion checks onto the JSON policy contract and routed monitor
 ### QA Notes
 - N/A
 
+## 260413-09:26:29 - Tighten state policy compatibility helpers
+
+### Summary
+Closed the remaining state-policy fallback hole and routed the legacy create validator through the shared verifier.
+
+### Changed
+- Changed `validate-story-creation check` into a compatibility wrapper around the policy-backed create success verifier and updated the CLI docs to point new callers at `orchestrator-helper verify-step create`.
+- Changed regression coverage to pin the legacy create wrapper against the shared verifier and pinned state snapshots.
+
+### Fixed
+- Fixed new-format state docs with policy-era metadata but missing snapshot metadata to fail instead of slipping into legacy defaults.
+- Fixed `state-summary` to avoid inferring `legacyPolicy: true` for malformed new-format state docs.
+
+### Files
+- `source/src/story_automator/commands/validate_story_creation.py`
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/tests/test_success_verifiers.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_state_policy_metadata.py`
+- `docs/cli-reference.md`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
+
 ## 260413-08:39:42 - Route create validation through shared verifier
 
 ### Summary
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 6b83009..2cf3592 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -87,7 +87,7 @@ These support saved presets and generated agent plans.
 - `stop-hook`
 - `list-sessions`
 - `commit-story`
-- `validate-story-creation`
+- `validate-story-creation` (legacy compatibility wrapper; prefer `orchestrator-helper verify-step create`)
 
 ## Typical Patterns
 
@@ -117,6 +117,12 @@ session="$("$scripts" tmux-wrapper spawn review 1 1.2 --agent claude --command "
 "$scripts" orchestrator-helper verify-step create 1.2 --state-file "$state_file"
 ```
 
+Legacy compatibility:
+
+```bash
+"$scripts" validate-story-creation check 1.2 --state-file "$state_file"
+```
+
 ## Read Next
 
 - [Agents And Monitoring](./agents-and-monitoring.md)
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 70db01f..d54fd4a 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -248,9 +248,10 @@ def _state_summary(args: list[str]) -> int:
     fields = parse_simple_frontmatter(read_text(args[0]))
     snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
     snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+    policy_version = str(fields.get("policyVersion") or "").strip()
     legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower()
     if legacy_policy not in {"true", "false"}:
-        legacy_policy = "true" if not snapshot_file and not snapshot_hash else "false"
+        legacy_policy = "true" if not snapshot_file and not snapshot_hash and not policy_version else "false"
     print_json(
         {
             "ok": True,
diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py
index feda1b0..948a2cc 100644
--- a/source/src/story_automator/commands/validate_story_creation.py
+++ b/source/src/story_automator/commands/validate_story_creation.py
@@ -1,8 +1,12 @@
 from __future__ import annotations
 
+import json
 import os
 from pathlib import Path
 
+from story_automator.core.runtime_policy import PolicyError
+from story_automator.core.success_verifiers import create_story_artifact, resolve_success_contract
+
 
 def cmd_validate_story_creation(args: list[str]) -> int:
     action = args[0] if args else ""
@@ -16,6 +20,43 @@ def story_prefix(story_id: str) -> str:
     def count_files(story_id: str, folder: Path) -> int:
         return len(list(folder.glob(f"{story_prefix(story_id)}-*.md")))
 
+    def check_usage() -> int:
+        print(
+            "Usage: validate-story-creation check <story_id> [--state-file PATH] [--before N --after N]",
+            file=os.sys.stderr,
+        )
+        return 1
+
+    def create_check_payload(story_id: str, state_file: str) -> dict[str, object]:
+        contract = resolve_success_contract(project_root, "create", state_file=state_file or None)
+        payload = create_story_artifact(project_root=project_root, story_key=story_id, contract=contract)
+        expected = int(payload.get("expectedMatches", 1) or 1)
+        actual = int(payload.get("actualMatches", 0) or 0)
+        valid = bool(payload.get("verified"))
+        if valid:
+            reason = "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected"
+        elif actual == 0:
+            reason = "No story file created - session may have failed"
+        elif actual > expected:
+            reason = f"RUNAWAY CREATION: {actual} files created instead of {expected}"
+        else:
+            reason = f"Unexpected story artifact count: {actual} files instead of {expected}"
+        response: dict[str, object] = {
+            "valid": valid,
+            "verified": valid,
+            "created_count": actual,
+            "expected": expected,
+            "prefix": story_prefix(story_id),
+            "action": "proceed" if valid else "escalate",
+            "reason": reason,
+            "source": payload.get("source", ""),
+            "pattern": payload.get("pattern", ""),
+            "matches": payload.get("matches", []),
+        }
+        if payload.get("story"):
+            response["story"] = payload["story"]
+        return response
+
     if action == "count":
         if not rest:
             print("Usage: validate-story-creation count <story_id>", file=os.sys.stderr)
@@ -29,45 +70,42 @@ def count_files(story_id: str, folder: Path) -> int:
 
     if action == "check":
         if not rest:
-            print("Usage: validate-story-creation check <story_id> --before N --after N", file=os.sys.stderr)
-            return 1
+            return check_usage()
         story_id = rest[0]
-        before = after = None
+        state_file = ""
+        before = after = ""
         idx = 1
         while idx < len(rest):
             if rest[idx] == "--before" and idx + 1 < len(rest):
-                before = int(rest[idx + 1])
+                before = rest[idx + 1]
                 idx += 2
                 continue
             if rest[idx] == "--after" and idx + 1 < len(rest):
-                after = int(rest[idx + 1])
+                after = rest[idx + 1]
                 idx += 2
                 continue
             if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest):
                 artifacts_dir = Path(rest[idx + 1])
                 idx += 2
                 continue
+            if rest[idx] == "--state-file" and idx + 1 < len(rest):
+                state_file = rest[idx + 1]
+                idx += 2
+                continue
             idx += 1
-        if before is None or after is None:
-            print("Usage: validate-story-creation check <story_id> --before N --after N", file=os.sys.stderr)
+        if artifacts_dir != Path(project_root) / "_bmad-output" / "implementation-artifacts":
+            print("validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", file=os.sys.stderr)
             return 1
-        created = after - before
-        valid = created == 1
-        reason = (
-            "Exactly 1 story file created as expected"
-            if created == 1
-            else "No story file created - session may have failed"
-            if created == 0
-            else f"Story files decreased ({created}) - unexpected deletion"
-            if created < 0
-            else f"RUNAWAY CREATION: {created} files created instead of 1"
-        )
-        action_name = "proceed" if valid else "escalate"
-        print(
-            f'{{"valid":{str(valid).lower()},"created_count":{created},"expected":1,'
-            f'"before":{before},"after":{after},"prefix":"{story_prefix(story_id)}",'
-            f'"action":"{action_name}","reason":"{reason}"}}'
-        )
+        try:
+            payload = create_check_payload(story_id, state_file)
+        except (PolicyError, ValueError) as exc:
+            print(json.dumps({"valid": False, "verified": False, "action": "escalate", "reason": str(exc)}, separators=(",", ":")))
+            return 1
+        if before:
+            payload["before"] = before
+        if after:
+            payload["after"] = after
+        print(json.dumps(payload, separators=(",", ":")))
         return 0
 
     if action == "list":
@@ -98,7 +136,7 @@ def count_files(story_id: str, folder: Path) -> int:
     print("", file=os.sys.stderr)
     print("Actions:", file=os.sys.stderr)
     print("  count <story_id>              - Count current story files", file=os.sys.stderr)
-    print("  check <story_id> --before N --after N  - Validate creation", file=os.sys.stderr)
+    print("  check <story_id> [--state-file PATH]   - Compatibility wrapper for create verifier", file=os.sys.stderr)
     print("  list <story_id>               - List matching files", file=os.sys.stderr)
     print("  prefix <story_id>             - Convert story ID to file prefix", file=os.sys.stderr)
     return 1
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index d307537..1fcbae9 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -116,11 +116,8 @@ def load_policy_for_state(
 ) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     fields = parse_simple_frontmatter(read_text(state_file))
-    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
-    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
-    if snapshot_file or snapshot_hash:
-        if not snapshot_file or not snapshot_hash:
-            raise PolicyError("state policy metadata incomplete")
+    snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields)
+    if not legacy_mode:
         return load_policy_snapshot(
             snapshot_file,
             project_root=str(root),
@@ -364,6 +361,22 @@ def _resolve_state_path(project_root: Path, path: Path) -> Path:
     return path if path.is_absolute() else project_root / path
 
 
+def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]:
+    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
+    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+    policy_version = str(fields.get("policyVersion") or "").strip()
+    legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower()
+    if snapshot_file or snapshot_hash:
+        if not snapshot_file or not snapshot_hash:
+            raise PolicyError("state policy metadata incomplete")
+        return snapshot_file, snapshot_hash, False
+    if legacy_policy == "true":
+        return "", "", True
+    if legacy_policy == "false" or policy_version:
+        raise PolicyError("state policy snapshot missing")
+    return "", "", True
+
+
 def _expect_optional_dict(payload: dict[str, Any], key: str) -> dict[str, Any]:
     value = payload.get(key)
     if value is None:
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 2548fc5..b1fa2b2 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -144,6 +144,15 @@ def test_marker_resume_with_missing_snapshot_raises_policy_error(self) -> None:
         with self.assertRaises(PolicyError):
             load_runtime_policy(str(self.project_root))
 
+    def test_new_state_without_snapshot_metadata_is_rejected(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: false\n---\n",
+            encoding="utf-8",
+        )
+        with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"):
+            load_runtime_policy(str(self.project_root), state_file=str(state_file))
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index ceccd8f..286a16d 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -98,6 +98,33 @@ def test_validate_state_rejects_new_state_with_missing_snapshot(self) -> None:
         self.assertEqual(payload["structure"], "issues")
         self.assertTrue(any("policy snapshot missing" in issue for issue in payload["issues"]))
 
+    def test_validate_state_rejects_new_state_missing_snapshot_metadata(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: false\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_state(["--state", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["structure"], "issues")
+        self.assertTrue(any("state policy snapshot missing" in issue for issue in payload["issues"]))
+
+    def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["legacyPolicy"], "false")
+
     def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
         state_file = self._build_state()
         override_dir = self.project_root / "_bmad" / "bmm"
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 4bee455..6fde4fd 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -11,6 +11,7 @@
 from story_automator.commands.orchestrator import cmd_orchestrator_helper
 from story_automator.commands.state import cmd_build_state_doc
 from story_automator.commands.tmux import _verify_monitor_completion
+from story_automator.commands.validate_story_creation import cmd_validate_story_creation
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.runtime_policy import PolicyError
 from story_automator.core.success_verifiers import create_story_artifact, epic_complete, review_completion
@@ -161,6 +162,30 @@ def test_verify_step_create_returns_json_on_verification_failure(self) -> None:
         self.assertFalse(payload["verified"])
         self.assertEqual(payload["reason"], "unexpected_story_artifact_count")
 
+    def test_validate_story_creation_check_uses_shared_verifier(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["valid"])
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["expected"], 1)
+
+    def test_validate_story_creation_check_uses_pinned_snapshot(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        state_file = self._build_state()
+        self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}})
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["valid"])
+        self.assertEqual(payload["expected"], 1)
+
     def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None:
         with self.assertRaises(PolicyError):
             create_story_artifact(

From 54ffd3fb736325bebe28b37863d853205b7d9b9e Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 11:01:52 -0300
Subject: [PATCH 10/17] fix: harden parser runtime and validator compatibility

---
 docs/changelog/260413.md                      |  28 ++++
 .../story_automator/commands/orchestrator.py  |  15 +-
 .../commands/orchestrator_parse.py            |  15 +-
 source/src/story_automator/commands/tmux.py   |  10 +-
 .../commands/validate_story_creation.py       | 158 +++++++++++++-----
 .../story_automator/core/runtime_policy.py    |  34 +++-
 source/tests/test_orchestrator_parse.py       |  17 ++
 source/tests/test_runtime_policy.py           |  14 ++
 source/tests/test_state_policy_metadata.py    |  22 ++-
 source/tests/test_success_verifiers.py        | 145 ++++++++++++++++
 10 files changed, 390 insertions(+), 68 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 3f814fe..6327858 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -298,3 +298,31 @@ Locked resume behavior to pinned snapshots for new state docs while keeping lega
 
 ### QA Notes
 - N/A
+
+## 260413-11:00:47 - Harden parser runtime and validator compatibility
+
+### Summary
+Hardened the policy-backed parser/runtime wiring and closed the remaining legacy validator compatibility gaps.
+
+### Changed
+- Changed parser dispatch to read provider, model, and timeout settings from `runtime.parser` policy data instead of hard-coded CLI defaults.
+- Changed `tmux-wrapper build-cmd` to reject unknown steps through policy contract lookup instead of a local step allowlist.
+
+### Fixed
+- Fixed `validate-story-creation` compatibility mode to preserve `before`/`after` delta semantics, forward trailing flags like `--state-file`, preserve zero-expected contracts, and return one JSON envelope across malformed flag, positional, and policy-error paths.
+- Fixed policy-state classification so contradictory `policyVersion` and `legacyPolicy: true` metadata fails closed and `state-summary` reports the same boundary as the runtime loader.
+
+### Files
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/validate_story_creation.py`
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/tests/test_orchestrator_parse.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_state_policy_metadata.py`
+- `source/tests/test_success_verifiers.py`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index d54fd4a..c8e115d 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -12,7 +12,13 @@
     parse_frontmatter,
     parse_simple_frontmatter,
 )
-from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles
+from story_automator.core.runtime_policy import (
+    PolicyError,
+    crash_max_retries,
+    load_runtime_policy,
+    review_max_cycles,
+    summarize_state_policy_fields,
+)
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier
 from story_automator.core.sprint import sprint_status_epic, sprint_status_get
@@ -246,12 +252,7 @@ def _state_summary(args: list[str]) -> int:
         print_json({"ok": False, "error": "file_not_found"})
         return 1
     fields = parse_simple_frontmatter(read_text(args[0]))
-    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
-    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
-    policy_version = str(fields.get("policyVersion") or "").strip()
-    legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower()
-    if legacy_policy not in {"true", "false"}:
-        legacy_policy = "true" if not snapshot_file and not snapshot_hash and not policy_version else "false"
+    snapshot_file, snapshot_hash, policy_version, legacy_policy = summarize_state_policy_fields(fields)
     print_json(
         {
             "ok": True,
diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py
index c809cfd..3965f79 100644
--- a/source/src/story_automator/commands/orchestrator_parse.py
+++ b/source/src/story_automator/commands/orchestrator_parse.py
@@ -2,13 +2,10 @@
 
 import json
 
-from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract
+from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, parser_runtime_config, step_contract
 from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines
 
 
-PARSE_OUTPUT_TIMEOUT = 120
-
-
 def parse_output_action(args: list[str]) -> int:
     if len(args) < 2:
         print('{"status":"error","reason":"output file not found or empty"}')
@@ -32,20 +29,22 @@ def parse_output_action(args: list[str]) -> int:
         return 1
     lines = trim_lines(content)[:150]
     try:
-        contract = step_contract(load_runtime_policy(state_file=state_file), step)
+        policy = load_runtime_policy(state_file=state_file)
+        contract = step_contract(policy, step)
         parse_contract = _load_parse_contract(contract)
+        parser_cfg = parser_runtime_config(policy)
     except (FileNotFoundError, json.JSONDecodeError, ValueError, PolicyError):
         print_json({"status": "error", "reason": "parse_contract_invalid"})
         return 1
     prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines))
     result = run_cmd(
-        "claude",
+        str(parser_cfg["provider"]),
         "-p",
         "--model",
-        "haiku",
+        str(parser_cfg["model"]),
         prompt,
         env={"STORY_AUTOMATOR_CHILD": "true", "CLAUDECODE": ""},
-        timeout=PARSE_OUTPUT_TIMEOUT,
+        timeout=int(parser_cfg["timeoutSeconds"]),
     )
     if result.exit_code != 0:
         reason = "sub-agent call timed out" if result.exit_code == COMMAND_TIMEOUT_EXIT else "sub-agent call failed"
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index c6ddbeb..6b0bb70 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -190,10 +190,12 @@ def _build_cmd(args: list[str]) -> int:
     agent = agent or agent_type()
     story_prefix = story_id.replace(".", "-")
     root = get_project_root()
-    if step not in {"create", "dev", "auto", "review", "retro"}:
-        print(f"Unknown step type: {step}", file=__import__("sys").stderr)
+    try:
+        policy = load_runtime_policy(root, state_file=state_file)
+        contract = step_contract(policy, step)
+    except (FileNotFoundError, PolicyError) as exc:
+        print(str(exc), file=__import__("sys").stderr)
         return 1
-    policy = load_runtime_policy(root, state_file=state_file)
     ai_command = os.environ.get("AI_COMMAND")
     if ai_command and not os.environ.get("AI_AGENT"):
         cli = ai_command
@@ -201,7 +203,7 @@ def _build_cmd(args: list[str]) -> int:
         cli = agent_cli(agent)
     else:
         cli = "codex exec"
-    prompt = _render_step_prompt(step_contract(policy, step), story_id, story_prefix, extra)
+    prompt = _render_step_prompt(contract, story_id, story_prefix, extra)
     escaped = prompt.replace("\\", "\\\\").replace('"', '\\"')
     if agent == "codex" and not ai_command:
         codex_home = f"/tmp/sa-codex-home-{project_hash(root)}"
diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py
index 948a2cc..746b019 100644
--- a/source/src/story_automator/commands/validate_story_creation.py
+++ b/source/src/story_automator/commands/validate_story_creation.py
@@ -12,7 +12,8 @@ def cmd_validate_story_creation(args: list[str]) -> int:
     action = args[0] if args else ""
     rest = args[1:] if args else []
     project_root = os.environ.get("PROJECT_ROOT", os.getcwd())
-    artifacts_dir = Path(project_root) / "_bmad-output" / "implementation-artifacts"
+    default_artifacts_dir = Path(project_root) / "_bmad-output" / "implementation-artifacts"
+    artifacts_dir = default_artifacts_dir
 
     def story_prefix(story_id: str) -> str:
         return story_id.replace(".", "-")
@@ -20,43 +21,84 @@ def story_prefix(story_id: str) -> str:
     def count_files(story_id: str, folder: Path) -> int:
         return len(list(folder.glob(f"{story_prefix(story_id)}-*.md")))
 
-    def check_usage() -> int:
-        print(
-            "Usage: validate-story-creation check <story_id> [--state-file PATH] [--before N --after N]",
-            file=os.sys.stderr,
-        )
-        return 1
-
     def create_check_payload(story_id: str, state_file: str) -> dict[str, object]:
         contract = resolve_success_contract(project_root, "create", state_file=state_file or None)
-        payload = create_story_artifact(project_root=project_root, story_key=story_id, contract=contract)
-        expected = int(payload.get("expectedMatches", 1) or 1)
-        actual = int(payload.get("actualMatches", 0) or 0)
-        valid = bool(payload.get("verified"))
-        if valid:
-            reason = "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected"
-        elif actual == 0:
-            reason = "No story file created - session may have failed"
-        elif actual > expected:
-            reason = f"RUNAWAY CREATION: {actual} files created instead of {expected}"
-        else:
-            reason = f"Unexpected story artifact count: {actual} files instead of {expected}"
+        return create_story_artifact(project_root=project_root, story_key=story_id, contract=contract)
+
+    def expected_matches(payload: dict[str, object] | None) -> int:
+        if payload is None:
+            return 1
+        return int(payload.get("expectedMatches", 1))
+
+    def count_reason(created: int, expected: int) -> str:
+        if created == expected:
+            return "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected"
+        if created == 0:
+            return "No story file created - session may have failed"
+        if created < 0:
+            return f"Story files decreased ({created}) - unexpected deletion"
+        if created > expected:
+            return f"RUNAWAY CREATION: {created} files created instead of {expected}"
+        return f"Unexpected story artifact count: {created} files instead of {expected}"
+
+    def build_check_response(
+        story_id: str,
+        payload: dict[str, object] | None,
+        *,
+        before_count: int | None = None,
+        after_count: int | None = None,
+        valid_override: bool | None = None,
+        reason_override: str | None = None,
+    ) -> dict[str, object]:
+        expected = expected_matches(payload)
+        created = int(payload.get("actualMatches", 0)) if payload is not None else 0
+        valid = bool(payload.get("verified")) if payload is not None else False
+        reason = count_reason(created, expected)
+        if before_count is not None and after_count is not None:
+            created = after_count - before_count
+            valid = created == expected
+            reason = count_reason(created, expected)
+        if valid_override is not None:
+            valid = valid_override
+        if reason_override is not None:
+            reason = reason_override
         response: dict[str, object] = {
             "valid": valid,
             "verified": valid,
-            "created_count": actual,
+            "created_count": created,
             "expected": expected,
             "prefix": story_prefix(story_id),
             "action": "proceed" if valid else "escalate",
             "reason": reason,
-            "source": payload.get("source", ""),
-            "pattern": payload.get("pattern", ""),
-            "matches": payload.get("matches", []),
+            "source": payload.get("source", "") if payload is not None else "",
+            "pattern": payload.get("pattern", "") if payload is not None else "",
+            "matches": payload.get("matches", []) if payload is not None else [],
         }
-        if payload.get("story"):
+        if before_count is not None and after_count is not None:
+            response["before"] = before_count
+            response["after"] = after_count
+        if payload is not None and payload.get("story"):
             response["story"] = payload["story"]
         return response
 
+    def print_check_error(
+        story_id: str,
+        *,
+        reason: str,
+        before_count: int | None = None,
+        after_count: int | None = None,
+    ) -> int:
+        response = build_check_response(
+            story_id,
+            None,
+            before_count=before_count,
+            after_count=after_count,
+            valid_override=False,
+            reason_override=reason,
+        )
+        print(json.dumps(response, separators=(",", ":")))
+        return 1
+
     if action == "count":
         if not rest:
             print("Usage: validate-story-creation count <story_id>", file=os.sys.stderr)
@@ -70,42 +112,62 @@ def create_check_payload(story_id: str, state_file: str) -> dict[str, object]:
 
     if action == "check":
         if not rest:
-            return check_usage()
+            return print_check_error("", reason="story_id required")
         story_id = rest[0]
         state_file = ""
-        before = after = ""
+        before_value = after_value = None
+        before_seen = after_seen = False
         idx = 1
         while idx < len(rest):
-            if rest[idx] == "--before" and idx + 1 < len(rest):
-                before = rest[idx + 1]
-                idx += 2
+            if rest[idx] == "--before":
+                before_seen = True
+                if idx + 1 < len(rest):
+                    before_value = rest[idx + 1]
+                    idx += 2
+                else:
+                    return print_check_error(story_id, reason="--before requires a value")
                 continue
-            if rest[idx] == "--after" and idx + 1 < len(rest):
-                after = rest[idx + 1]
-                idx += 2
+            if rest[idx] == "--after":
+                after_seen = True
+                if idx + 1 < len(rest):
+                    after_value = rest[idx + 1]
+                    idx += 2
+                else:
+                    return print_check_error(story_id, reason="--after requires a value")
                 continue
             if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest):
                 artifacts_dir = Path(rest[idx + 1])
                 idx += 2
                 continue
+            if rest[idx] == "--artifacts-dir":
+                return print_check_error(story_id, reason="--artifacts-dir requires a value")
             if rest[idx] == "--state-file" and idx + 1 < len(rest):
                 state_file = rest[idx + 1]
                 idx += 2
                 continue
-            idx += 1
-        if artifacts_dir != Path(project_root) / "_bmad-output" / "implementation-artifacts":
-            print("validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", file=os.sys.stderr)
-            return 1
+            if rest[idx] == "--state-file":
+                return print_check_error(story_id, reason="--state-file requires a value")
+            return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}")
+        if before_seen != after_seen:
+            return print_check_error(story_id, reason="both --before and --after are required together")
+        before_count = after_count = None
+        if before_seen and after_seen:
+            try:
+                before_count = int(before_value or "")
+                after_count = int(after_value or "")
+            except ValueError:
+                return print_check_error(story_id, reason="before/after must be integers")
+        if artifacts_dir != default_artifacts_dir and not (before_seen and after_seen):
+            return print_check_error(
+                story_id,
+                reason="validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders",
+            )
         try:
             payload = create_check_payload(story_id, state_file)
+            response = build_check_response(story_id, payload, before_count=before_count, after_count=after_count)
         except (PolicyError, ValueError) as exc:
-            print(json.dumps({"valid": False, "verified": False, "action": "escalate", "reason": str(exc)}, separators=(",", ":")))
-            return 1
-        if before:
-            payload["before"] = before
-        if after:
-            payload["after"] = after
-        print(json.dumps(payload, separators=(",", ":")))
+            return print_check_error(story_id, reason=str(exc), before_count=before_count, after_count=after_count)
+        print(json.dumps(response, separators=(",", ":")))
         return 0
 
     if action == "list":
@@ -129,8 +191,12 @@ def create_check_payload(story_id: str, state_file: str) -> dict[str, object]:
         print(story_prefix(rest[0]))
         return 0
 
-    if action and len(rest) >= 2 and rest[0].isdigit() and rest[1].isdigit():
-        return cmd_validate_story_creation(["check", action, "--before", rest[0], "--after", rest[1]])
+    if action and action not in {"count", "check", "list", "prefix"}:
+        if not rest:
+            return print_check_error(action, reason="both --before and --after are required together")
+        if len(rest) == 1:
+            return cmd_validate_story_creation(["check", action, "--before", rest[0]])
+        return cmd_validate_story_creation(["check", action, "--before", rest[0], "--after", rest[1], *rest[2:]])
 
     print("Usage: validate-story-creation <action> [args]", file=os.sys.stderr)
     print("", file=os.sys.stderr)
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index 1fcbae9..73ec450 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -12,6 +12,7 @@
 VALID_STEP_NAMES = {"create", "dev", "auto", "review", "retro"}
 VALID_VERIFIERS = {"create_story_artifact", "session_exit", "review_completion", "epic_complete"}
 VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"}
+VALID_PARSER_PROVIDERS = {"claude"}
 
 
 def load_bundled_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]:
@@ -127,6 +128,17 @@ def load_policy_for_state(
     return load_bundled_policy(str(root), resolve_assets=resolve_assets)
 
 
+def summarize_state_policy_fields(fields: dict[str, Any]) -> tuple[str, str, str, str]:
+    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
+    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+    policy_version = str(fields.get("policyVersion") or "").strip()
+    try:
+        _, _, legacy_mode = _state_policy_mode(fields)
+    except PolicyError:
+        legacy_mode = False
+    return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false"
+
+
 def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]:
     root = Path(project_root or get_project_root()).resolve()
     explicit = Path(state_file).expanduser() if state_file else None
@@ -164,6 +176,21 @@ def crash_max_retries(policy: dict[str, Any]) -> int:
     return int(crash.get("maxRetries", 2))
 
 
+def parser_runtime_config(policy: dict[str, Any]) -> dict[str, object]:
+    runtime = _expect_optional_dict(policy, "runtime")
+    parser = _expect_optional_nested_dict(runtime, "parser", "runtime")
+    provider = str(parser.get("provider") or "").strip()
+    model = str(parser.get("model") or "").strip()
+    timeout = parser.get("timeoutSeconds")
+    if provider not in VALID_PARSER_PROVIDERS:
+        raise PolicyError(f"runtime.parser.provider must be one of: {', '.join(sorted(VALID_PARSER_PROVIDERS))}")
+    if not model:
+        raise PolicyError("runtime.parser.model must be a string")
+    if isinstance(timeout, bool) or not isinstance(timeout, int) or timeout <= 0:
+        raise PolicyError("runtime.parser.timeoutSeconds must be a positive integer")
+    return {"provider": provider, "model": model, "timeoutSeconds": timeout}
+
+
 def bundled_skill_root(project_root: str | Path | None = None) -> Path:
     root = Path(project_root or get_project_root()).resolve()
     installed = root / ".claude" / "skills" / "bmad-story-automator"
@@ -213,6 +240,9 @@ def _validate_policy_shape(policy: dict[str, Any]) -> None:
     snapshot = _expect_optional_dict(policy, "snapshot")
     if "snapshot" in policy and "relativeDir" in snapshot and not isinstance(snapshot.get("relativeDir"), str):
         raise PolicyError("snapshot.relativeDir must be a string")
+    runtime = _expect_optional_dict(policy, "runtime")
+    _expect_optional_nested_dict(runtime, "merge", "runtime")
+    parser_runtime_config(policy)
     workflow = _expect_optional_dict(policy, "workflow")
     repeat = _expect_optional_nested_dict(workflow, "repeat", "workflow")
     review = _expect_optional_nested_dict(repeat, "review", "workflow.repeat")
@@ -370,10 +400,10 @@ def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]:
         if not snapshot_file or not snapshot_hash:
             raise PolicyError("state policy metadata incomplete")
         return snapshot_file, snapshot_hash, False
-    if legacy_policy == "true":
-        return "", "", True
     if legacy_policy == "false" or policy_version:
         raise PolicyError("state policy snapshot missing")
+    if legacy_policy == "true":
+        return "", "", True
     return "", "", True
 
 
diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py
index 99abaf5..110b004 100644
--- a/source/tests/test_orchestrator_parse.py
+++ b/source/tests/test_orchestrator_parse.py
@@ -92,6 +92,23 @@ def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) ->
         self.assertEqual(code, 0)
         self.assertTrue(json.loads(stdout.getvalue())["story_created"])
 
+    def test_parser_runtime_uses_policy_settings(self) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(
+            json.dumps({"runtime": {"parser": {"provider": "claude", "model": "sonnet", "timeoutSeconds": 33}}}),
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0),
+        ) as mock_run, redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create"])
+        self.assertEqual(code, 0)
+        self.assertEqual(mock_run.call_args.args[:4], ("claude", "-p", "--model", "sonnet"))
+        self.assertEqual(mock_run.call_args.kwargs["timeout"], 33)
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index b1fa2b2..7c080ca 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -99,6 +99,11 @@ def test_invalid_nested_workflow_types_rejected(self) -> None:
         with self.assertRaises(PolicyError):
             load_effective_policy(str(self.project_root))
 
+    def test_invalid_parser_runtime_rejected(self) -> None:
+        self._write_override({"runtime": {"parser": {"provider": "bad", "model": "haiku", "timeoutSeconds": 120}}})
+        with self.assertRaisesRegex(PolicyError, "runtime.parser.provider"):
+            load_effective_policy(str(self.project_root))
+
     def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None:
         snapshot = snapshot_effective_policy(str(self.project_root))
         copied_root = Path(self.tmp.name) / "copied"
@@ -153,6 +158,15 @@ def test_new_state_without_snapshot_metadata_is_rejected(self) -> None:
         with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"):
             load_runtime_policy(str(self.project_root), state_file=str(state_file))
 
+    def test_contradictory_legacy_flag_with_policy_version_is_rejected(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: true\n---\n",
+            encoding="utf-8",
+        )
+        with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"):
+            load_runtime_policy(str(self.project_root), state_file=str(state_file))
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 286a16d..c43ed17 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -5,7 +5,7 @@
 import shutil
 import tempfile
 import unittest
-from contextlib import redirect_stdout
+from contextlib import redirect_stderr, redirect_stdout
 from pathlib import Path
 
 from story_automator.commands.orchestrator import cmd_orchestrator_helper
@@ -125,6 +125,19 @@ def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(s
         payload = json.loads(stdout.getvalue())
         self.assertEqual(payload["legacyPolicy"], "false")
 
+    def test_summary_does_not_mark_contradictory_legacy_flag_as_legacy(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: true\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["legacyPolicy"], "false")
+
     def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
         state_file = self._build_state()
         override_dir = self.project_root / "_bmad" / "bmm"
@@ -148,6 +161,13 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None:
         rendered = stdout.getvalue()
         self.assertNotIn("--state-file", rendered)
 
+    def test_build_cmd_rejects_unknown_step_via_policy(self) -> None:
+        stderr = io.StringIO()
+        with patch_env(self.project_root), redirect_stderr(stderr):
+            code = _build_cmd(["ship", "1.1"])
+        self.assertEqual(code, 1)
+        self.assertIn("unknown step: ship", stderr.getvalue())
+
     def _build_state(self) -> Path:
         stdout = io.StringIO()
         template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 6fde4fd..95a1510 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -186,6 +186,151 @@ def test_validate_story_creation_check_uses_pinned_snapshot(self) -> None:
         self.assertTrue(payload["valid"])
         self.assertEqual(payload["expected"], 1)
 
+    def test_validate_story_creation_check_uses_before_after_delta(self) -> None:
+        self._write_story("1-2-existing", status="draft")
+        self._write_story("1-2-new", status="draft")
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "1", "--after", "2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["valid"])
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 1)
+        self.assertEqual(payload["after"], 2)
+
+    def test_validate_story_creation_positional_mode_forwards_state_file(self) -> None:
+        self._write_story("1-2-example", status="draft")
+        state_file = self._build_state()
+        self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}})
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0", "1", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["valid"])
+        self.assertEqual(payload["expected"], 1)
+        self.assertEqual(payload["created_count"], 1)
+
+    def test_validate_story_creation_check_returns_compat_schema_on_policy_error(self) -> None:
+        self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": "abc"}}}}})
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["expected"], 1)
+        self.assertEqual(payload["created_count"], 0)
+        self.assertEqual(payload["prefix"], "1-2")
+        self.assertEqual(payload["source"], "")
+        self.assertEqual(payload["pattern"], "")
+        self.assertEqual(payload["matches"], [])
+
+    def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "x", "--after", "1"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "before/after must be integers")
+        self.assertEqual(payload["expected"], 1)
+        self.assertEqual(payload["created_count"], 0)
+
+    def test_validate_story_creation_check_returns_compat_schema_on_partial_counts(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "1"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "both --before and --after are required together")
+        self.assertEqual(payload["prefix"], "1-2")
+
+    def test_validate_story_creation_check_returns_compat_schema_on_trailing_before_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "--before requires a value")
+
+    def test_validate_story_creation_check_returns_compat_schema_on_empty_counts(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "", "--after", ""])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "before/after must be integers")
+
+    def test_validate_story_creation_check_returns_compat_schema_on_unsupported_artifacts_dir(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--artifacts-dir", str(self.project_root / "tmp")])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"])
+
+    def test_validate_story_creation_positional_mode_returns_compat_schema_on_bad_counts(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "x", "1"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "before/after must be integers")
+
+    def test_validate_story_creation_positional_mode_returns_compat_schema_on_missing_after(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "both --before and --after are required together")
+
+    def test_validate_story_creation_positional_mode_returns_compat_schema_on_missing_counts(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "both --before and --after are required together")
+
+    def test_validate_story_creation_positional_mode_returns_compat_schema_on_extra_token(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0", "1", "junk"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "unsupported check argument: junk")
+
+    def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomplete_state_file(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0", "1", "--state-file"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "--state-file requires a value")
+
+    def test_validate_story_creation_check_preserves_zero_expected_matches(self) -> None:
+        self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 0}}}}})
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["valid"])
+        self.assertEqual(payload["expected"], 0)
+        self.assertEqual(payload["created_count"], 0)
+
     def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None:
         with self.assertRaises(PolicyError):
             create_story_artifact(

From 2dbf6f3e5bfb56d50afd87bea8044ecae7f8e36d Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Mon, 13 Apr 2026 21:55:18 -0300
Subject: [PATCH 11/17] fix: close state-summary and validator gaps

---
 docs/changelog/260413.md                      | 22 +++++
 .../story_automator/commands/orchestrator.py  | 36 +++----
 .../commands/validate_story_creation.py       | 27 ++++--
 .../story_automator/core/runtime_policy.py    | 21 +++--
 source/tests/test_runtime_policy.py           | 10 ++
 source/tests/test_state_policy_metadata.py    | 93 ++++++++++++++++++-
 source/tests/test_success_verifiers.py        | 86 +++++++++++++++++
 7 files changed, 262 insertions(+), 33 deletions(-)

diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md
index 6327858..99d17cc 100644
--- a/docs/changelog/260413.md
+++ b/docs/changelog/260413.md
@@ -326,3 +326,25 @@ Hardened the policy-backed parser/runtime wiring and closed the remaining legacy
 
 ### QA Notes
 - N/A
+
+## 260413-21:53:12 - Close state-summary and validator compatibility gaps
+
+### Summary
+Fixed remaining review-loop gaps in policy snapshot reporting and legacy create-validator error payloads.
+
+### Changed
+- Changed `state-summary` to validate snapshot metadata against the runtime project root and surface `policyError` when state policy metadata is contradictory, incomplete, missing, or hash-mismatched.
+- Changed `validate-story-creation check` and the positional compatibility shim to preserve parsed delta metadata across malformed trailing argument paths and reject all `--artifacts-dir` overrides consistently.
+- Changed regression coverage to pin the new state-summary error reporting and compatibility-payload branches found during the clean review loop.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/validate_story_creation.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_state_policy_metadata.py`
+- `source/tests/test_success_verifiers.py`
+- `docs/changelog/260413.md`
+
+### QA Notes
+- N/A
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index c8e115d..986a317 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -252,23 +252,27 @@ def _state_summary(args: list[str]) -> int:
         print_json({"ok": False, "error": "file_not_found"})
         return 1
     fields = parse_simple_frontmatter(read_text(args[0]))
-    snapshot_file, snapshot_hash, policy_version, legacy_policy = summarize_state_policy_fields(fields)
-    print_json(
-        {
-            "ok": True,
-            "epic": str(fields.get("epic") or ""),
-            "epicName": str(fields.get("epicName") or ""),
-            "currentStory": str(fields.get("currentStory") or ""),
-            "currentStep": str(fields.get("currentStep") or ""),
-            "status": str(fields.get("status") or ""),
-            "lastUpdated": str(fields.get("lastUpdated") or ""),
-            "policyVersion": str(fields.get("policyVersion") or ""),
-            "policySnapshotFile": snapshot_file,
-            "policySnapshotHash": snapshot_hash,
-            "legacyPolicy": legacy_policy,
-            "lastAction": extract_last_action(args[0]),
-        }
+    snapshot_file, snapshot_hash, policy_version, legacy_policy, policy_error = summarize_state_policy_fields(
+        fields,
+        project_root=get_project_root(),
     )
+    payload = {
+        "ok": True,
+        "epic": str(fields.get("epic") or ""),
+        "epicName": str(fields.get("epicName") or ""),
+        "currentStory": str(fields.get("currentStory") or ""),
+        "currentStep": str(fields.get("currentStep") or ""),
+        "status": str(fields.get("status") or ""),
+        "lastUpdated": str(fields.get("lastUpdated") or ""),
+        "policyVersion": policy_version,
+        "policySnapshotFile": snapshot_file,
+        "policySnapshotHash": snapshot_hash,
+        "legacyPolicy": legacy_policy,
+        "lastAction": extract_last_action(args[0]),
+    }
+    if policy_error:
+        payload["policyError"] = policy_error
+    print_json(payload)
     return 0
 
 
diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py
index 746b019..cb12f5f 100644
--- a/source/src/story_automator/commands/validate_story_creation.py
+++ b/source/src/story_automator/commands/validate_story_creation.py
@@ -99,6 +99,14 @@ def print_check_error(
         print(json.dumps(response, separators=(",", ":")))
         return 1
 
+    def parsed_delta_counts(before_value: str | None, after_value: str | None) -> tuple[int | None, int | None]:
+        if before_value is None or after_value is None:
+            return None, None
+        try:
+            return int(before_value or ""), int(after_value or "")
+        except ValueError:
+            return None, None
+
     if action == "count":
         if not rest:
             print("Usage: validate-story-creation count <story_id>", file=os.sys.stderr)
@@ -125,7 +133,8 @@ def print_check_error(
                     before_value = rest[idx + 1]
                     idx += 2
                 else:
-                    return print_check_error(story_id, reason="--before requires a value")
+                    before_count, after_count = parsed_delta_counts(before_value, after_value)
+                    return print_check_error(story_id, reason="--before requires a value", before_count=before_count, after_count=after_count)
                 continue
             if rest[idx] == "--after":
                 after_seen = True
@@ -133,21 +142,25 @@ def print_check_error(
                     after_value = rest[idx + 1]
                     idx += 2
                 else:
-                    return print_check_error(story_id, reason="--after requires a value")
+                    before_count, after_count = parsed_delta_counts(before_value, after_value)
+                    return print_check_error(story_id, reason="--after requires a value", before_count=before_count, after_count=after_count)
                 continue
             if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest):
                 artifacts_dir = Path(rest[idx + 1])
                 idx += 2
                 continue
             if rest[idx] == "--artifacts-dir":
-                return print_check_error(story_id, reason="--artifacts-dir requires a value")
+                before_count, after_count = parsed_delta_counts(before_value, after_value)
+                return print_check_error(story_id, reason="--artifacts-dir requires a value", before_count=before_count, after_count=after_count)
             if rest[idx] == "--state-file" and idx + 1 < len(rest):
                 state_file = rest[idx + 1]
                 idx += 2
                 continue
             if rest[idx] == "--state-file":
-                return print_check_error(story_id, reason="--state-file requires a value")
-            return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}")
+                before_count, after_count = parsed_delta_counts(before_value, after_value)
+                return print_check_error(story_id, reason="--state-file requires a value", before_count=before_count, after_count=after_count)
+            before_count, after_count = parsed_delta_counts(before_value, after_value)
+            return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}", before_count=before_count, after_count=after_count)
         if before_seen != after_seen:
             return print_check_error(story_id, reason="both --before and --after are required together")
         before_count = after_count = None
@@ -157,10 +170,12 @@ def print_check_error(
                 after_count = int(after_value or "")
             except ValueError:
                 return print_check_error(story_id, reason="before/after must be integers")
-        if artifacts_dir != default_artifacts_dir and not (before_seen and after_seen):
+        if artifacts_dir != default_artifacts_dir:
             return print_check_error(
                 story_id,
                 reason="validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders",
+                before_count=before_count,
+                after_count=after_count,
             )
         try:
             payload = create_check_payload(story_id, state_file)
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index 73ec450..d440ce6 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -128,15 +128,20 @@ def load_policy_for_state(
     return load_bundled_policy(str(root), resolve_assets=resolve_assets)
 
 
-def summarize_state_policy_fields(fields: dict[str, Any]) -> tuple[str, str, str, str]:
-    snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
-    snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
+def summarize_state_policy_fields(fields: dict[str, Any], *, project_root: str | Path | None = None) -> tuple[str, str, str, str, str]:
     policy_version = str(fields.get("policyVersion") or "").strip()
     try:
-        _, _, legacy_mode = _state_policy_mode(fields)
-    except PolicyError:
-        legacy_mode = False
-    return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false"
+        snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields)
+        if snapshot_file and snapshot_hash:
+            load_policy_snapshot(
+                snapshot_file,
+                project_root=str(Path(project_root or get_project_root()).resolve()),
+                expected_hash=snapshot_hash,
+                resolve_assets=False,
+            )
+    except PolicyError as exc:
+        return "", "", policy_version, "false", str(exc)
+    return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false", ""
 
 
 def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]:
@@ -399,6 +404,8 @@ def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]:
     if snapshot_file or snapshot_hash:
         if not snapshot_file or not snapshot_hash:
             raise PolicyError("state policy metadata incomplete")
+        if legacy_policy == "true":
+            raise PolicyError("state policy metadata contradictory")
         return snapshot_file, snapshot_hash, False
     if legacy_policy == "false" or policy_version:
         raise PolicyError("state policy snapshot missing")
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 7c080ca..f5bf559 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -167,6 +167,16 @@ def test_contradictory_legacy_flag_with_policy_version_is_rejected(self) -> None
         with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"):
             load_runtime_policy(str(self.project_root), state_file=str(state_file))
 
+    def test_snapshot_metadata_with_legacy_flag_is_rejected(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            f"---\npolicySnapshotFile: \"{snapshot['policySnapshotFile']}\"\npolicySnapshotHash: \"{snapshot['policySnapshotHash']}\"\nlegacyPolicy: true\n---\n",
+            encoding="utf-8",
+        )
+        with self.assertRaisesRegex(PolicyError, "state policy metadata contradictory"):
+            load_runtime_policy(str(self.project_root), state_file=str(state_file))
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index c43ed17..7af8365 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -50,7 +50,7 @@ def test_state_doc_writes_policy_metadata(self) -> None:
     def test_summary_surfaces_policy_metadata(self) -> None:
         state_file = self._build_state()
         stdout = io.StringIO()
-        with redirect_stdout(stdout):
+        with patch_env(self.project_root), redirect_stdout(stdout):
             code = cmd_orchestrator_helper(["state-summary", str(state_file)])
         self.assertEqual(code, 0)
         payload = json.loads(stdout.getvalue())
@@ -76,7 +76,7 @@ def test_summary_infers_legacy_policy_for_old_state(self) -> None:
             encoding="utf-8",
         )
         stdout = io.StringIO()
-        with redirect_stdout(stdout):
+        with patch_env(self.project_root), redirect_stdout(stdout):
             code = cmd_orchestrator_helper(["state-summary", str(legacy)])
         self.assertEqual(code, 0)
         payload = json.loads(stdout.getvalue())
@@ -119,7 +119,7 @@ def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(s
             encoding="utf-8",
         )
         stdout = io.StringIO()
-        with redirect_stdout(stdout):
+        with patch_env(self.project_root), redirect_stdout(stdout):
             code = cmd_orchestrator_helper(["state-summary", str(state_file)])
         self.assertEqual(code, 0)
         payload = json.loads(stdout.getvalue())
@@ -132,11 +132,96 @@ def test_summary_does_not_mark_contradictory_legacy_flag_as_legacy(self) -> None
             encoding="utf-8",
         )
         stdout = io.StringIO()
-        with redirect_stdout(stdout):
+        with patch_env(self.project_root), redirect_stdout(stdout):
             code = cmd_orchestrator_helper(["state-summary", str(state_file)])
         self.assertEqual(code, 0)
         payload = json.loads(stdout.getvalue())
         self.assertEqual(payload["legacyPolicy"], "false")
+        self.assertEqual(payload["policyError"], "state policy snapshot missing")
+
+    def test_summary_clears_contradictory_snapshot_metadata(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"snap.json\"\npolicySnapshotHash: \"deadbeef\"\nlegacyPolicy: true\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertEqual(payload["legacyPolicy"], "false")
+        self.assertEqual(payload["policyError"], "state policy metadata contradictory")
+
+    def test_summary_clears_incomplete_snapshot_metadata(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"snap.json\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertEqual(payload["legacyPolicy"], "false")
+        self.assertEqual(payload["policyError"], "state policy metadata incomplete")
+
+    def test_summary_reports_missing_snapshot_reference(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertIn("policy snapshot missing", payload["policyError"])
+
+    def test_summary_reports_snapshot_hash_mismatch(self) -> None:
+        state_file = self._build_state()
+        lines = []
+        for line in state_file.read_text(encoding="utf-8").splitlines():
+            if line.startswith("policySnapshotHash: "):
+                lines.append('policySnapshotHash: "deadbeef"')
+            else:
+                lines.append(line)
+        state_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertIn("policy snapshot hash mismatch", payload["policyError"])
+
+    def test_summary_uses_runtime_root_for_relative_snapshot_validation(self) -> None:
+        outside = self.project_root.parent / "outside-state"
+        outside.mkdir(parents=True, exist_ok=True)
+        shadow = outside / "snap.json"
+        shadow.write_text("{}", encoding="utf-8")
+        state_file = outside / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"snap.json\"\npolicySnapshotHash: \"99999999\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["state-summary", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["policySnapshotFile"], "")
+        self.assertEqual(payload["policySnapshotHash"], "")
+        self.assertIn("policy snapshot missing", payload["policyError"])
 
     def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
         state_file = self._build_state()
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 95a1510..91264d6 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -275,6 +275,30 @@ def test_validate_story_creation_check_returns_compat_schema_on_unsupported_arti
         self.assertFalse(payload["valid"])
         self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"])
 
+    def test_validate_story_creation_check_rejects_artifacts_dir_in_delta_mode(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--artifacts-dir", str(self.project_root / "tmp")])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"])
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
+    def test_validate_story_creation_positional_mode_rejects_artifacts_dir_with_delta_fields(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0", "1", "--artifacts-dir", str(self.project_root / "tmp")])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"])
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
     def test_validate_story_creation_positional_mode_returns_compat_schema_on_bad_counts(self) -> None:
         stdout = io.StringIO()
         with patch_env(self.project_root), redirect_stdout(stdout):
@@ -310,6 +334,9 @@ def test_validate_story_creation_positional_mode_returns_compat_schema_on_extra_
         payload = json.loads(stdout.getvalue())
         self.assertFalse(payload["valid"])
         self.assertEqual(payload["reason"], "unsupported check argument: junk")
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
 
     def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomplete_state_file(self) -> None:
         stdout = io.StringIO()
@@ -319,6 +346,65 @@ def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomp
         payload = json.loads(stdout.getvalue())
         self.assertFalse(payload["valid"])
         self.assertEqual(payload["reason"], "--state-file requires a value")
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
+    def test_validate_story_creation_check_preserves_delta_on_incomplete_state_file(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--state-file"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "--state-file requires a value")
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
+    def test_validate_story_creation_check_preserves_delta_on_trailing_before_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--before"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "--before requires a value")
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
+    def test_validate_story_creation_positional_mode_preserves_delta_on_trailing_before_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "0", "1", "--before"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["reason"], "--before requires a value")
+        self.assertEqual(payload["created_count"], 1)
+        self.assertEqual(payload["before"], 0)
+        self.assertEqual(payload["after"], 1)
+
+    def test_validate_story_creation_check_returns_compat_failure_without_exception(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["created_count"], 0)
+        self.assertEqual(payload["reason"], "No story file created - session may have failed")
+
+    def test_validate_story_creation_positional_mode_returns_delta_failure_without_exception(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["1.2", "1", "3"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertEqual(payload["created_count"], 2)
+        self.assertEqual(payload["reason"], "RUNAWAY CREATION: 2 files created instead of 1")
 
     def test_validate_story_creation_check_preserves_zero_expected_matches(self) -> None:
         self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 0}}}}})

From 667b4d528cc5986bb1382a0b7d479396bf43e208 Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Tue, 14 Apr 2026 00:44:42 -0300
Subject: [PATCH 12/17] docs: update json settings todo status

---
 docs/plans/json-settings/TODO.md | 44 ++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/docs/plans/json-settings/TODO.md b/docs/plans/json-settings/TODO.md
index 9270b2f..cbf7163 100644
--- a/docs/plans/json-settings/TODO.md
+++ b/docs/plans/json-settings/TODO.md
@@ -2,6 +2,12 @@
 
 Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
+Status backfill: checked against shipped code and `npm run verify` on 2026-04-13.
+
+Notes:
+- Item 1 remains open because the original pre-edit baseline notes were not preserved in-repo.
+- Item 14 remains open because the review payload still relies on the extra instruction `auto-fix all issues without prompting` instead of encoding autonomous fix behavior directly in `instructions.xml`.
+
 ## Phase 0: Baseline
 
 1. [ ] Capture current behavior baselines.
@@ -14,7 +20,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    - baseline commands are saved in working notes
    - current default behavior is explicit before edits start
 
-2. [ ] Freeze the target JSON settings shape.
+2. [x] Freeze the target JSON settings shape.
    Depends on: 1
    Files: `docs/plans/json-settings/02-policy-model.md`
    Actions:
@@ -26,7 +32,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 1: Policy Loader And Default Policy
 
-3. [ ] Add bundled default policy JSON and data directories.
+3. [x] Add bundled default policy JSON and data directories.
    Depends on: 2
    Files:
    - `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json`
@@ -38,7 +44,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    Done when:
    - payload contains complete default machine contract
 
-4. [ ] Implement `runtime_policy.py`.
+4. [x] Implement `runtime_policy.py`.
    Depends on: 3
    Files:
    - `source/src/story_automator/core/runtime_policy.py`
@@ -52,7 +58,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    Done when:
    - one call can return effective policy plus snapshot metadata
 
-5. [ ] Refactor required/optional asset resolution behind policy.
+5. [x] Refactor required/optional asset resolution behind policy.
    Depends on: 4
    Files:
    - `source/src/story_automator/core/workflow_paths.py`
@@ -64,7 +70,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    Done when:
    - required assets never silently resolve to non-existent placeholders
 
-6. [ ] Add state metadata for policy snapshots.
+6. [x] Add state metadata for policy snapshots.
    Depends on: 4
    Files:
    - `source/src/story_automator/commands/state.py`
@@ -79,7 +85,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 2: Prompt And Parse Externalization
 
-7. [ ] Replace hard-coded tmux prompts with template rendering.
+7. [x] Replace hard-coded tmux prompts with template rendering.
    Depends on: 4, 5, 6
    Files:
    - `source/src/story_automator/commands/tmux.py`
@@ -91,7 +97,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    Done when:
    - `build-cmd` no longer uses the hard-coded prompt map
 
-8. [ ] Replace hard-coded parse schema switch with policy-backed contracts.
+8. [x] Replace hard-coded parse schema switch with policy-backed contracts.
    Depends on: 4
    Files:
    - `source/src/story_automator/commands/orchestrator_parse.py`
@@ -102,7 +108,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
    Done when:
    - parser behavior comes from data files, not `if step == ...`
 
-9. [ ] Move retry budgets into policy-backed reads.
+9. [x] Move retry budgets into policy-backed reads.
    Depends on: 4
    Files:
    - `source/src/story_automator/commands/orchestrator.py`
@@ -115,7 +121,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 3: Success Verifiers
 
-10. [ ] Add verifier registry and concrete implementations.
+10. [x] Add verifier registry and concrete implementations.
     Depends on: 4
     Files:
     - `source/src/story_automator/core/success_verifiers.py`
@@ -129,7 +135,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - verifiers are selected by name and tested independently
 
-11. [ ] Wire `monitor-session` to policy-backed verifier dispatch.
+11. [x] Wire `monitor-session` to policy-backed verifier dispatch.
     Depends on: 7, 10
     Files:
     - `source/src/story_automator/commands/tmux.py`
@@ -140,7 +146,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - completion logic is step-driven, not `workflow == "review"` driven
 
-12. [ ] Fold create story validation into `create_story_artifact`.
+12. [x] Fold create story validation into `create_story_artifact`.
     Depends on: 10, 11
     Files:
     - `source/src/story_automator/commands/orchestrator.py`
@@ -153,7 +159,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 4: Review Payload Alignment
 
-13. [ ] Add structured review contract file.
+13. [x] Add structured review contract file.
     Depends on: 3
     Files:
     - `payload/.claude/skills/bmad-story-automator-review/contract.json`
@@ -174,7 +180,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - review payload no longer contradicts runtime prompt defaults
 
-15. [ ] Update main workflow prose to reference runtime policy.
+15. [x] Update main workflow prose to reference runtime policy.
     Depends on: 3
     Files:
     - `payload/.claude/skills/bmad-story-automator/workflow.md`
@@ -187,7 +193,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 5: Testing
 
-16. [ ] Add Python unit tests for policy and verifiers.
+16. [x] Add Python unit tests for policy and verifiers.
     Depends on: 4, 8, 10
     Files:
     - `source/tests/test_runtime_policy.py`
@@ -200,7 +206,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - policy-specific behavior has direct automated coverage
 
-17. [ ] Update smoke tests for installed policy assets and defaults.
+17. [x] Update smoke tests for installed policy assets and defaults.
     Depends on: 7, 8, 11, 13, 14, 15
     Files:
     - `scripts/smoke-test.sh`
@@ -211,7 +217,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - installer/integration behavior remains covered end to end
 
-18. [ ] Update local verify flow.
+18. [x] Update local verify flow.
     Depends on: 16, 17
     Files:
     - `package.json`
@@ -225,7 +231,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
 
 ## Phase 6: Compatibility And Cleanup
 
-19. [ ] Implement legacy resume behavior and strict new-state validation.
+19. [x] Implement legacy resume behavior and strict new-state validation.
     Depends on: 6, 10, 11
     Files:
     - `source/src/story_automator/commands/state.py`
@@ -237,7 +243,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - resume is deterministic and explicit in both modes
 
-20. [ ] Preserve env compatibility for one release cycle.
+20. [x] Preserve env compatibility for one release cycle.
     Depends on: 9
     Files:
     - `source/src/story_automator/core/runtime_policy.py`
@@ -249,7 +255,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe.
     Done when:
     - old env knobs still work without mutating resumed runs
 
-21. [ ] Remove or shrink obsolete hard-coded helpers.
+21. [x] Remove or shrink obsolete hard-coded helpers.
     Depends on: 7, 8, 9, 10, 11
     Files:
     - `source/src/story_automator/commands/tmux.py`

From 55b2fd4350dd3b64f358024cf316c485cbd3bd30 Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Tue, 14 Apr 2026 17:16:16 -0300
Subject: [PATCH 13/17] fix: address PR review follow-ups

---
 .../story_automator/commands/orchestrator.py  |  6 +++++-
 source/src/story_automator/commands/tmux.py   |  2 ++
 .../story_automator/core/runtime_policy.py    | 17 +++++++++++++--
 source/tests/test_runtime_policy.py           |  5 +++++
 source/tests/test_state_policy_metadata.py    | 14 +++++++++++++
 source/tests/test_success_verifiers.py        | 21 +++++++++++++++++++
 6 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 986a317..0fe139a 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -312,7 +312,11 @@ def _escalate(args: list[str]) -> int:
             idx += 2
             continue
         idx += 1
-    policy = load_runtime_policy(get_project_root(), state_file=state_file)
+    try:
+        policy = load_runtime_policy(get_project_root(), state_file=state_file)
+    except (FileNotFoundError, PolicyError) as exc:
+        print_json({"escalate": True, "reason": str(exc)})
+        return 0
     if trigger == "review-loop":
         cycles = _parse_context_int(context, "cycles")
         limit = review_max_cycles(policy)
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 6b0bb70..d61cea8 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -784,6 +784,8 @@ def _verify_monitor_completion(
     verifier_name = str(contract.get("verifier") or "").strip()
     if not verifier_name:
         return None
+    if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip():
+        return None
     try:
         result = run_success_verifier(
             verifier_name,
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index d440ce6..d3eb8cc 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -233,9 +233,22 @@ def _apply_legacy_env(policy: dict[str, Any]) -> None:
     review_cycles = os.environ.get("MAX_REVIEW_CYCLES")
     crash_retries = os.environ.get("MAX_CRASH_RETRIES")
     if review_cycles:
-        policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = int(review_cycles)
+        policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = _legacy_env_int(
+            "MAX_REVIEW_CYCLES",
+            review_cycles,
+        )
     if crash_retries:
-        policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = int(crash_retries)
+        policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = _legacy_env_int(
+            "MAX_CRASH_RETRIES",
+            crash_retries,
+        )
+
+
+def _legacy_env_int(name: str, raw: str) -> int:
+    try:
+        return int(raw)
+    except ValueError as exc:
+        raise PolicyError(f"{name} must be an integer") from exc
 
 
 def _validate_policy_shape(policy: dict[str, Any]) -> None:
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index f5bf559..87b204b 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -77,6 +77,11 @@ def test_snapshot_bakes_legacy_env_values_for_resume(self) -> None:
         self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 2)
         self.assertEqual(policy["workflow"]["crash"]["maxRetries"], 4)
 
+    def test_invalid_legacy_env_value_raises_policy_error(self) -> None:
+        with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "nope"}, clear=False):
+            with self.assertRaisesRegex(PolicyError, "MAX_REVIEW_CYCLES must be an integer"):
+                load_effective_policy(str(self.project_root))
+
     def test_malformed_override_json_raises_policy_error(self) -> None:
         override_dir = self.project_root / "_bmad" / "bmm"
         override_dir.mkdir(parents=True, exist_ok=True)
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 7af8365..a6cb3e0 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -237,6 +237,20 @@ def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None:
         self.assertEqual(code, 0)
         self.assertFalse(json.loads(stdout.getvalue())["escalate"])
 
+    def test_escalate_returns_json_when_state_snapshot_is_invalid(self) -> None:
+        state_file = self.project_root / "orchestration.md"
+        state_file.write_text(
+            "---\npolicySnapshotFile: \"missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n",
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=1", "--state-file", str(state_file)])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["escalate"])
+        self.assertIn("policy snapshot missing", payload["reason"])
+
     def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None:
         state_file = self._build_state()
         stdout = io.StringIO()
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 91264d6..92725cc 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -131,6 +131,27 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None:
         self.assertEqual(verifier, "review_completion")
         self.assertTrue(payload["verified"])
 
+    def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) -> None:
+        result = _verify_monitor_completion(
+            "review",
+            project_root=str(self.project_root),
+            story_key="",
+            output_file="/tmp/session.txt",
+        )
+        self.assertIsNone(result)
+
+    def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None:
+        result = _verify_monitor_completion(
+            "dev",
+            project_root=str(self.project_root),
+            story_key="",
+            output_file="/tmp/session.txt",
+        )
+        self.assertIsNotNone(result)
+        payload, verifier = result or ({}, "")
+        self.assertEqual(verifier, "session_exit")
+        self.assertTrue(payload["verified"])
+
     def test_verify_step_create_uses_shared_verifier(self) -> None:
         self._write_story("1-2-example", status="draft")
         stdout = io.StringIO()

From fdb4d710055abdb887874397864b6a1ed1d1287a Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:01:52 -0300
Subject: [PATCH 14/17] fix: address CodeRabbit review findings

---
 docs/changelog/260414.md                      | 40 +++++++++
 docs/development.md                           |  4 +-
 docs/plans/json-settings/02-policy-model.md   | 20 +----
 docs/plans/json-settings/README.md            |  7 +-
 .../steps-c/step-03-execute.md                |  2 +-
 scripts/smoke-test.sh                         |  1 +
 .../story_automator/commands/orchestrator.py  | 20 +++--
 .../commands/orchestrator_parse.py            | 13 ++-
 source/src/story_automator/commands/state.py  |  6 +-
 source/src/story_automator/commands/tmux.py   |  9 +-
 .../commands/validate_story_creation.py       |  2 +-
 .../src/story_automator/core/review_verify.py |  5 +-
 .../story_automator/core/runtime_policy.py    | 88 +++++++++++++++++--
 source/tests/test_orchestrator_parse.py       | 16 ++++
 source/tests/test_runtime_policy.py           | 21 +++++
 source/tests/test_state_policy_metadata.py    | 26 ++++++
 source/tests/test_success_verifiers.py        | 52 ++++++++++-
 17 files changed, 285 insertions(+), 47 deletions(-)
 create mode 100644 docs/changelog/260414.md

diff --git a/docs/changelog/260414.md b/docs/changelog/260414.md
new file mode 100644
index 0000000..9579976
--- /dev/null
+++ b/docs/changelog/260414.md
@@ -0,0 +1,40 @@
+# Changelog - 260414
+
+## 260414-21:51:35 - Harden snapshot and verifier review fixes
+
+### Summary
+Closed the latest CodeRabbit review pass by aligning docs/examples with shipped behavior and hardening snapshot, parser, and verifier paths.
+
+### Changed
+- Changed the development and JSON-settings docs to match the shipped verify order, supported workflow keys, and full state-policy metadata contract.
+- Changed the execute-step example and smoke coverage so dev `build-cmd` keeps the pinned `--state-file` handoff.
+- Changed regression coverage to pin the new parser, snapshot, verifier, and compatibility-wrapper failure contracts.
+
+### Fixed
+- Fixed `tmux-wrapper build-cmd` prompt escaping to quote shell input safely for both Claude and Codex child sessions.
+- Fixed `parse-output` and `verify-step` flag parsing to reject incomplete `--state-file` and `--output-file` arguments instead of silently falling back.
+- Fixed state-doc creation, review verification, and legacy create validation to normalize missing snapshot/contract failures into structured JSON responses.
+- Fixed monitor verification to surface `story_key_required` when a story-bound verifier is invoked without a story key.
+- Fixed runtime policy snapshot loading to hash prompt/schema/contract files and reject snapshot or asset paths that escape allowed roots.
+
+### Files
+- `docs/development.md`
+- `docs/plans/json-settings/02-policy-model.md`
+- `docs/plans/json-settings/README.md`
+- `payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md`
+- `scripts/smoke-test.sh`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/src/story_automator/commands/state.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/commands/validate_story_creation.py`
+- `source/src/story_automator/core/review_verify.py`
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/tests/test_orchestrator_parse.py`
+- `source/tests/test_runtime_policy.py`
+- `source/tests/test_state_policy_metadata.py`
+- `source/tests/test_success_verifiers.py`
+- `docs/changelog/260414.md`
+
+### QA Notes
+- N/A
diff --git a/docs/development.md b/docs/development.md
index ada477a..ea31fe9 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -34,8 +34,8 @@ The smoke suite validates:
 ```mermaid
 flowchart TD
     A["Edit installer, payload, or runtime"] --> B["Run npm run test:python"]
-    B --> C["Run npm run test:smoke"]
-    C --> D["Run npm run pack:dry-run"]
+    B --> C["Run npm run pack:dry-run"]
+    C --> D["Run npm run test:smoke"]
     D --> E["Run npm run verify"]
 ```
 
diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md
index 56acdb6..3a168d9 100644
--- a/docs/plans/json-settings/02-policy-model.md
+++ b/docs/plans/json-settings/02-policy-model.md
@@ -130,12 +130,7 @@ High-level example:
     }
   },
   "workflow": {
-    "sequence": ["create", "dev", "auto", "review"],
-    "optional": {
-      "auto": {
-        "skipWhenOverride": "skipAutomate"
-      }
-    },
+    "sequence": ["create", "dev", "auto", "review", "retro"],
     "repeat": {
       "review": {
         "maxCycles": 5,
@@ -147,17 +142,7 @@ High-level example:
     "crash": {
       "maxRetries": 2,
       "onExhausted": "escalate"
-    },
-    "triggers": [
-      {
-        "name": "retrospective_on_epic_complete",
-        "after": "review",
-        "verifier": "epic_complete",
-        "run": "retro",
-        "blocking": false,
-        "forceAgent": "claude"
-      }
-    ]
+    }
   },
   "steps": {
     "create": {
@@ -270,4 +255,3 @@ Without a pinned snapshot, these changes become unsafe:
 - verifier threshold change after preflight
 
 The snapshot prevents those mutations from changing the behavior of a resumed orchestration.
-
diff --git a/docs/plans/json-settings/README.md b/docs/plans/json-settings/README.md
index 6490892..e9cf39a 100644
--- a/docs/plans/json-settings/README.md
+++ b/docs/plans/json-settings/README.md
@@ -59,7 +59,11 @@ bundled default policy
   + optional project override
   = effective runtime policy
   -> pinned snapshot at orchestration start
-  -> state doc stores pointer + hash
+  -> state doc stores:
+     - policySnapshotFile (string snapshot pointer)
+     - policySnapshotHash (string snapshot hash)
+     - policyVersion (string/integer runtime policy version)
+     - legacyPolicy (boolean legacy-state marker)
   -> all resume/replay uses snapshot only
 ```
 
@@ -82,4 +86,3 @@ This plan does not try to deliver:
 - custom Python or shell expressions in config
 - a general workflow interpreter
 - rich nested policy blobs embedded in frontmatter
-
diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
index d7e60fa..7faa3fc 100644
--- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
+++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md
@@ -154,7 +154,7 @@ validation=$("$scripts" orchestrator-helper verify-step create {story_id} --stat
 # Retry loop with agent alternation: see {retryStrategy}
 session=$("$scripts" tmux-wrapper spawn dev {epic} {story_id} \
   --agent "$current_agent" \
-  --command "$("$scripts" tmux-wrapper build-cmd dev {story_id} --agent "$current_agent")")
+  --command "$("$scripts" tmux-wrapper build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file")")
 result=$("$scripts" monitor-session "$session" --json --agent "$current_agent")
 "$scripts" tmux-wrapper kill "$session"
 ```
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index 7414554..d64bf14 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -248,6 +248,7 @@ verify_common_install() {
   assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md"
   assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
   assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
+  assert_contains 'build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
   assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md"
   assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md"
   assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md"
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 0fe139a..5723fd2 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -421,12 +421,20 @@ def _verify_step(args: list[str]) -> int:
     state_file = ""
     output_file = ""
     tail = args[2:]
-    for idx, arg in enumerate(tail):
-        if arg == "--state-file" and idx + 1 < len(tail):
-            state_file = tail[idx + 1]
-        elif arg == "--output-file" and idx + 1 < len(tail):
-            output_file = tail[idx + 1]
     try:
+        idx = 0
+        while idx < len(tail):
+            arg = tail[idx]
+            if arg in {"--state-file", "--output-file"}:
+                if idx + 1 >= len(tail) or not tail[idx + 1].strip() or tail[idx + 1].startswith("--"):
+                    raise PolicyError(f"{arg} requires a value")
+                if arg == "--state-file":
+                    state_file = tail[idx + 1]
+                else:
+                    output_file = tail[idx + 1]
+                idx += 2
+                continue
+            idx += 1
         contract = resolve_success_contract(get_project_root(), step, state_file=state_file or None)
         verifier = str(contract.get("verifier") or "").strip()
         if not verifier:
@@ -439,7 +447,7 @@ def _verify_step(args: list[str]) -> int:
             contract=contract,
         )
         exit_code = 0
-    except (FileNotFoundError, PolicyError) as exc:
+    except (FileNotFoundError, PolicyError, ValueError) as exc:
         payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)}
         exit_code = 1
     print_json(payload)
diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py
index 3965f79..714c577 100644
--- a/source/src/story_automator/commands/orchestrator_parse.py
+++ b/source/src/story_automator/commands/orchestrator_parse.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+from typing import Any
 
 from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, parser_runtime_config, step_contract
 from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines
@@ -14,7 +15,10 @@ def parse_output_action(args: list[str]) -> int:
     state_file = ""
     idx = 2
     while idx < len(args):
-        if args[idx] == "--state-file" and idx + 1 < len(args):
+        if args[idx] == "--state-file":
+            if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"):
+                print_json({"status": "error", "reason": "parse_contract_invalid"})
+                return 1
             state_file = args[idx + 1]
             idx += 2
             continue
@@ -71,7 +75,10 @@ def _load_parse_contract(contract: dict[str, object]) -> dict[str, object]:
     payload = json.loads(read_text(str(parse.get("schemaPath") or "")))
     if not isinstance(payload, dict):
         raise ValueError("invalid parse schema")
-    if not isinstance(payload.get("requiredKeys"), list):
+    required_keys = payload.get("requiredKeys")
+    if not isinstance(required_keys, list):
+        raise ValueError("invalid parse schema")
+    if any(not isinstance(key, str) or not key.strip() for key in required_keys):
         raise ValueError("invalid parse schema")
     if not isinstance(payload.get("schema"), dict):
         raise ValueError("invalid parse schema")
@@ -84,7 +91,7 @@ def _build_parse_prompt(contract: dict[str, object], parse_contract: dict[str, o
     return f"Analyze this {label} session output. Return JSON only:\n{schema}\n\nSession output:\n---\n{content}\n---"
 
 
-def _has_required_keys(payload: object, required_keys: list[object]) -> bool:
+def _has_required_keys(payload: object, required_keys: list[Any]) -> bool:
     if not isinstance(payload, dict):
         return False
     return all(isinstance(key, str) and key in payload for key in required_keys)
diff --git a/source/src/story_automator/commands/state.py b/source/src/story_automator/commands/state.py
index ede9786..89b1c8a 100644
--- a/source/src/story_automator/commands/state.py
+++ b/source/src/story_automator/commands/state.py
@@ -43,7 +43,11 @@ def cmd_build_state_doc(args: list[str]) -> int:
     epic = str(config.get("epic") or "epic")
     safe_epic = re.sub(r"[^a-zA-Z0-9]+", "-", epic).strip("-") or "epic"
     output_path = Path(output_folder) / f"orchestration-{safe_epic}-{stamp}.md"
-    snapshot = snapshot_effective_policy(get_project_root())
+    try:
+        snapshot = snapshot_effective_policy(get_project_root())
+    except (FileNotFoundError, PolicyError, ValueError) as exc:
+        write_json({"ok": False, "error": "policy_snapshot_failed", "reason": str(exc)})
+        return 1
     text = read_text(template)
     replacements: dict[str, Any] = {
         "epic": config.get("epic", ""),
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index d61cea8..1fe0c75 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -3,6 +3,7 @@
 import json
 import os
 import re
+import shlex
 import time
 from pathlib import Path
 
@@ -204,7 +205,7 @@ def _build_cmd(args: list[str]) -> int:
     else:
         cli = "codex exec"
     prompt = _render_step_prompt(contract, story_id, story_prefix, extra)
-    escaped = prompt.replace("\\", "\\\\").replace('"', '\\"')
+    quoted_prompt = shlex.quote(prompt)
     if agent == "codex" and not ai_command:
         codex_home = f"/tmp/sa-codex-home-{project_hash(root)}"
         auth_src = os.path.expanduser("~/.codex/auth.json")
@@ -213,10 +214,10 @@ def _build_cmd(args: list[str]) -> int:
             + f' && if [ -f "{auth_src}" ]; then ln -sf "{auth_src}" "{codex_home}/auth.json"; fi'
             + f' && CODEX_HOME="{codex_home}" codex exec -s workspace-write -c \'approval_policy="never"\''
             + f' -c \'model_reasoning_effort="high"\''
-            + f' --disable plugins --disable sqlite --disable shell_snapshot "{escaped}"'
+            + f" --disable plugins --disable sqlite --disable shell_snapshot {quoted_prompt}"
         )
     else:
-        print(f'unset CLAUDECODE && {cli} "{escaped}"')
+        print(f"unset CLAUDECODE && {cli} {quoted_prompt}")
     return 0
 
 
@@ -785,7 +786,7 @@ def _verify_monitor_completion(
     if not verifier_name:
         return None
     if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip():
-        return None
+        return ({"verified": False, "reason": "story_key_required", "verifier": verifier_name}, verifier_name)
     try:
         result = run_success_verifier(
             verifier_name,
diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py
index cb12f5f..b8e1d0e 100644
--- a/source/src/story_automator/commands/validate_story_creation.py
+++ b/source/src/story_automator/commands/validate_story_creation.py
@@ -180,7 +180,7 @@ def parsed_delta_counts(before_value: str | None, after_value: str | None) -> tu
         try:
             payload = create_check_payload(story_id, state_file)
             response = build_check_response(story_id, payload, before_count=before_count, after_count=after_count)
-        except (PolicyError, ValueError) as exc:
+        except (FileNotFoundError, PolicyError, ValueError) as exc:
             return print_check_error(story_id, reason=str(exc), before_count=before_count, after_count=after_count)
         print(json.dumps(response, separators=(",", ":")))
         return 0
diff --git a/source/src/story_automator/core/review_verify.py b/source/src/story_automator/core/review_verify.py
index 5975c69..029c67a 100644
--- a/source/src/story_automator/core/review_verify.py
+++ b/source/src/story_automator/core/review_verify.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 from typing import Any
 
+from .runtime_policy import PolicyError
 from .success_verifiers import resolve_success_contract, review_completion
 
 
@@ -14,7 +15,7 @@ def verify_code_review_completion(
     state_file: str | Path | None = None,
 ) -> dict[str, object]:
     try:
-        contract = success_contract or resolve_success_contract(project_root, "review", state_file=state_file)
+        contract = resolve_success_contract(project_root, "review", state_file=state_file) if success_contract is None else success_contract
         return review_completion(project_root=project_root, story_key=story_key, contract=contract)
-    except (FileNotFoundError, ValueError) as exc:
+    except (FileNotFoundError, ValueError, PolicyError) as exc:
         return {"verified": False, "reason": "review_contract_invalid", "input": story_key, "error": str(exc)}
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index d3eb8cc..812d5e2 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -39,6 +39,7 @@ def load_effective_policy(project_root: str | None = None, *, resolve_assets: bo
     policy = _deep_merge(bundled, override)
     _apply_legacy_env(policy)
     _validate_policy_shape(policy)
+    _clear_resolved_fields(policy)
     if resolve_assets:
         _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root))
     else:
@@ -65,7 +66,7 @@ def load_runtime_policy(
 def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
     policy = load_effective_policy(str(root))
-    snapshot_dir = root / _snapshot_relative_dir(policy)
+    snapshot_dir = _resolve_snapshot_dir(policy, root)
     ensure_dir(snapshot_dir)
     stable_json = _stable_policy_json(policy)
     snapshot_hash = md5_hex8(stable_json)
@@ -229,6 +230,27 @@ def _deep_merge(base: Any, override: Any) -> Any:
     return override
 
 
+def _clear_resolved_fields(policy: dict[str, Any]) -> None:
+    for contract in (policy.get("steps") or {}).values():
+        if not isinstance(contract, dict):
+            continue
+        assets = contract.get("assets")
+        if isinstance(assets, dict):
+            assets.pop("files", None)
+        prompt = contract.get("prompt")
+        if isinstance(prompt, dict):
+            prompt.pop("templatePath", None)
+            prompt.pop("templateHash", None)
+        parse = contract.get("parse")
+        if isinstance(parse, dict):
+            parse.pop("schemaPath", None)
+            parse.pop("schemaHash", None)
+        success = contract.get("success")
+        if isinstance(success, dict):
+            success.pop("contractPath", None)
+            success.pop("contractHash", None)
+
+
 def _apply_legacy_env(policy: dict[str, Any]) -> None:
     review_cycles = os.environ.get("MAX_REVIEW_CYCLES")
     crash_retries = os.environ.get("MAX_CRASH_RETRIES")
@@ -305,15 +327,18 @@ def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_
         if not template_file:
             raise PolicyError(f"missing prompt template for {name}")
         prompt["templatePath"] = _resolve_data_path(template_file, project_root=project_root, bundle_root=bundle_root)
+        _set_or_verify_hash(prompt, path_key="templatePath", hash_key="templateHash", label="policy template")
         parse = contract.setdefault("parse", {})
         schema_file = str(parse.get("schemaFile") or "").strip()
         if not schema_file:
             raise PolicyError(f"missing parse schema for {name}")
         parse["schemaPath"] = _resolve_data_path(schema_file, project_root=project_root, bundle_root=bundle_root)
+        _set_or_verify_hash(parse, path_key="schemaPath", hash_key="schemaHash", label="policy parse schema")
         success = contract.setdefault("success", {})
         contract_file = str(success.get("contractFile") or "").strip()
         if contract_file:
             success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root)
+            _set_or_verify_hash(success, path_key="contractPath", hash_key="contractHash", label="policy success contract")
 
 
 def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None:
@@ -322,13 +347,15 @@ def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle
         contract_file = str(success.get("contractFile") or "").strip()
         if contract_file:
             success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root)
+            _set_or_verify_hash(success, path_key="contractPath", hash_key="contractHash", label="policy success contract")
 
 
 def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]:
     skill_name = str(assets.get("skillName") or "").strip()
     if not skill_name:
         raise PolicyError(f"missing skillName for {step}")
-    skill_dir = project_root / ".claude" / "skills" / skill_name
+    skills_root = (project_root / ".claude" / "skills").resolve()
+    skill_dir = _ensure_within(skills_root / skill_name, skills_root, f"skillName for {step}")
     required = set(assets.get("required") or [])
     files = {
         "skill": _resolve_required_file(skill_dir / "SKILL.md", project_root, required, "skill", step),
@@ -364,7 +391,7 @@ def _resolve_candidate_file(
     for name in candidates:
         if not isinstance(name, str) or not name:
             continue
-        path = skill_dir / name
+        path = _ensure_within(skill_dir / name, skill_dir, f"{asset} candidate for {step}")
         if path.is_file():
             return _display_path(path, project_root)
     if asset in required:
@@ -375,14 +402,24 @@ def _resolve_candidate_file(
 
 def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path) -> str:
     raw = Path(path_value)
+    allowed_roots = (bundle_root.resolve(), project_root.resolve())
     if raw.is_absolute():
-        if not raw.is_file():
+        resolved = raw.resolve()
+        if not _is_within_any(resolved, allowed_roots):
+            raise PolicyError(f"policy data path escapes allowed roots: {path_value}")
+        if not resolved.is_file():
             raise PolicyError(f"policy data file missing: {raw}")
-        return str(raw)
-    for base in (bundle_root, project_root):
+        return str(resolved)
+    escaped_all = True
+    for base in allowed_roots:
         candidate = (base / raw).resolve()
+        if not _is_within(candidate, base):
+            continue
+        escaped_all = False
         if candidate.is_file():
             return str(candidate)
+    if escaped_all:
+        raise PolicyError(f"policy data path escapes allowed roots: {path_value}")
     raise PolicyError(f"policy data file missing: {path_value}")
 
 
@@ -394,6 +431,12 @@ def _snapshot_relative_dir(policy: dict[str, Any]) -> str:
     return relative_dir
 
 
+def _resolve_snapshot_dir(policy: dict[str, Any], project_root: Path) -> Path:
+    raw = Path(_snapshot_relative_dir(policy))
+    candidate = raw if raw.is_absolute() else project_root / raw
+    return _ensure_within(candidate, project_root.resolve(), "snapshot.relativeDir")
+
+
 def _stable_policy_json(policy: dict[str, Any]) -> str:
     return json.dumps(policy, indent=2, sort_keys=True) + "\n"
 
@@ -409,6 +452,39 @@ def _resolve_state_path(project_root: Path, path: Path) -> Path:
     return path if path.is_absolute() else project_root / path
 
 
+def _set_or_verify_hash(payload: dict[str, Any], *, path_key: str, hash_key: str, label: str) -> None:
+    path = str(payload.get(path_key) or "").strip()
+    if not path:
+        return
+    actual = md5_hex8(read_text(path))
+    expected = str(payload.get(hash_key) or "").strip()
+    if expected and expected != actual:
+        raise PolicyError(f"{label} hash mismatch: {path}")
+    payload[hash_key] = actual
+
+
+def _ensure_within(path: Path, root: Path, label: str) -> Path:
+    resolved = path.resolve()
+    root_resolved = root.resolve()
+    try:
+        resolved.relative_to(root_resolved)
+    except ValueError as exc:
+        raise PolicyError(f"{label} escapes allowed root: {path}") from exc
+    return resolved
+
+
+def _is_within(path: Path, root: Path) -> bool:
+    try:
+        path.resolve().relative_to(root.resolve())
+    except ValueError:
+        return False
+    return True
+
+
+def _is_within_any(path: Path, roots: tuple[Path, ...]) -> bool:
+    return any(_is_within(path, root) for root in roots)
+
+
 def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]:
     snapshot_file = str(fields.get("policySnapshotFile") or "").strip()
     snapshot_hash = str(fields.get("policySnapshotHash") or "").strip()
diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py
index 110b004..cc451f4 100644
--- a/source/tests/test_orchestrator_parse.py
+++ b/source/tests/test_orchestrator_parse.py
@@ -53,6 +53,22 @@ def test_invalid_schema_file_rejected(self) -> None:
         self.assertEqual(code, 1)
         self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid")
 
+    def test_missing_state_file_flag_value_rejected(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create", "--state-file"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid")
+
+    def test_non_string_required_key_rejected(self) -> None:
+        schema = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "parse" / "create.json"
+        schema.write_text(json.dumps({"requiredKeys": [True], "schema": {}}), encoding="utf-8")
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "create"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid")
+
     def test_invalid_child_json_rejected(self) -> None:
         stdout = io.StringIO()
         with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 87b204b..2136d0f 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -117,6 +117,27 @@ def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None:
         template_path = policy["steps"]["create"]["prompt"]["templatePath"]
         self.assertTrue(str(copied_root) in template_path)
 
+    def test_snapshot_relative_dir_cannot_escape_project_root(self) -> None:
+        self._write_override({"snapshot": {"relativeDir": "../outside"}})
+        with self.assertRaisesRegex(PolicyError, "snapshot.relativeDir escapes allowed root"):
+            snapshot_effective_policy(str(self.project_root))
+
+    def test_data_path_cannot_escape_allowed_roots(self) -> None:
+        self._write_override({"steps": {"create": {"prompt": {"templateFile": "../outside.md"}}}})
+        with self.assertRaisesRegex(PolicyError, "policy data path escapes allowed roots"):
+            load_effective_policy(str(self.project_root))
+
+    def test_snapshot_detects_prompt_template_drift(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        prompt = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "create.md"
+        prompt.write_text("# changed\n", encoding="utf-8")
+        with self.assertRaisesRegex(PolicyError, "policy template hash mismatch"):
+            load_policy_snapshot(
+                snapshot["policySnapshotFile"],
+                project_root=str(self.project_root),
+                expected_hash=snapshot["policySnapshotHash"],
+            )
+
     def test_missing_marker_state_falls_back_to_effective_policy(self) -> None:
         marker = self.project_root / ".claude" / ".story-automator-active"
         marker.parent.mkdir(parents=True, exist_ok=True)
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index a6cb3e0..748dd0b 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -259,6 +259,32 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None:
         self.assertEqual(code, 0)
         rendered = stdout.getvalue()
         self.assertNotIn("--state-file", rendered)
+        self.assertNotIn(str(state_file), rendered)
+
+    def test_build_state_doc_returns_json_on_policy_snapshot_failure(self) -> None:
+        override_dir = self.project_root / "_bmad" / "bmm"
+        override_dir.mkdir(parents=True, exist_ok=True)
+        (override_dir / "story-automator.policy.json").write_text(
+            json.dumps({"snapshot": {"relativeDir": "../outside"}}),
+            encoding="utf-8",
+        )
+        stdout = io.StringIO()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_build_state_doc(
+                [
+                    "--template",
+                    str(template),
+                    "--output-folder",
+                    str(self.output_dir),
+                    "--config-json",
+                    json.dumps(self._config()),
+                ]
+            )
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["ok"])
+        self.assertEqual(payload["error"], "policy_snapshot_failed")
 
     def test_build_cmd_rejects_unknown_step_via_policy(self) -> None:
         stderr = io.StringIO()
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 92725cc..f858d5c 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -138,7 +138,11 @@ def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) ->
             story_key="",
             output_file="/tmp/session.txt",
         )
-        self.assertIsNone(result)
+        self.assertIsNotNone(result)
+        payload, verifier = result or ({}, "")
+        self.assertEqual(verifier, "review_completion")
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "story_key_required")
 
     def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None:
         result = _verify_monitor_completion(
@@ -248,6 +252,52 @@ def test_validate_story_creation_check_returns_compat_schema_on_policy_error(sel
         self.assertEqual(payload["pattern"], "")
         self.assertEqual(payload["matches"], [])
 
+    def test_validate_story_creation_check_returns_compat_schema_on_missing_state_file(self) -> None:
+        stdout = io.StringIO()
+        missing = self.project_root / "missing-state.md"
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(missing)])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertIn("missing-state.md", payload["reason"])
+
+    def test_review_wrapper_honors_empty_injected_contract(self) -> None:
+        self._write_story("1-2-example", status="done")
+        self._write_override(
+            {
+                "steps": {
+                    "review": {
+                        "success": {
+                            "config": {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False}
+                        }
+                    }
+                }
+            }
+        )
+        payload = verify_code_review_completion(str(self.project_root), "1.2", success_contract={})
+        self.assertTrue(payload["verified"])
+        self.assertEqual(payload["source"], "story-file")
+
+    def test_review_wrapper_normalizes_policy_error(self) -> None:
+        payload = verify_code_review_completion(
+            str(self.project_root),
+            "1.2",
+            success_contract={"doneValues": [], "sourceOrder": ["story-file"]},
+        )
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "review_contract_invalid")
+
+    def test_verify_step_rejects_incomplete_state_file_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["verify-step", "create", "1.2", "--state-file"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "verifier_contract_invalid")
+        self.assertEqual(payload["error"], "--state-file requires a value")
+
     def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None:
         stdout = io.StringIO()
         with patch_env(self.project_root), redirect_stdout(stdout):

From e8a8f78fd752350251cc42283d8e493a9975744b Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Wed, 15 Apr 2026 01:20:41 -0300
Subject: [PATCH 15/17] fix: harden policy resume and review parsing

---
 docs/changelog/260415.md                      | 31 ++++++++++
 docs/plans/json-settings/02-policy-model.md   |  5 +-
 .../03-code-and-payload-changes.md            |  7 ++-
 .../bmad-story-automator-review/workflow.yaml |  1 -
 .../steps-c/step-03a-execute-review.md        |  4 +-
 scripts/smoke-test.sh                         |  2 +
 .../story_automator/commands/orchestrator.py  | 36 +++++++++---
 .../commands/orchestrator_parse.py            | 25 ++++++++
 source/src/story_automator/commands/tmux.py   | 46 ++++++++++-----
 .../story_automator/core/runtime_policy.py    | 31 +++++++---
 .../story_automator/core/success_verifiers.py | 23 +++++++-
 source/tests/test_orchestrator_parse.py       | 20 +++++++
 source/tests/test_runtime_policy.py           | 58 +++++++++++++++++--
 source/tests/test_state_policy_metadata.py    | 31 +++++++++-
 source/tests/test_success_verifiers.py        | 49 ++++++++++++++++
 15 files changed, 320 insertions(+), 49 deletions(-)
 create mode 100644 docs/changelog/260415.md

diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md
new file mode 100644
index 0000000..c54cdaa
--- /dev/null
+++ b/docs/changelog/260415.md
@@ -0,0 +1,31 @@
+# Changelog - 260415
+
+## 260415-01:20:16 - Harden policy resume and review parsing
+
+### Summary
+Closed PR review gaps around state-file handling, snapshot boundaries, parser validation, and policy docs.
+
+### Changed
+- Made `load_runtime_policy()` fail closed for broken active-run markers and missing marker/env state files.
+- Enforced parse-contract schema shapes for nested review payloads instead of checking required top-level keys only.
+- Removed the unused top-level review workflow `contract` pointer and aligned the plan docs with step-level policy ownership.
+
+### Fixed
+- Added strict `--state-file` propagation and help coverage for review execution flow docs and tmux/orchestrator helpers.
+- Bounded snapshot, state-file, and artifact-glob resolution to the project roots used by the runtime.
+- Expanded regression coverage for prompt/schema/contract snapshot drift, malformed markers, and invalid review parser output.
+
+### Files
+- `source/src/story_automator/core/runtime_policy.py`
+- `source/src/story_automator/commands/orchestrator_parse.py`
+- `source/src/story_automator/commands/orchestrator.py`
+- `source/src/story_automator/commands/tmux.py`
+- `source/src/story_automator/core/success_verifiers.py`
+- `payload/.claude/skills/bmad-story-automator-review/workflow.yaml`
+- `docs/plans/json-settings/02-policy-model.md`
+- `docs/plans/json-settings/03-code-and-payload-changes.md`
+- `source/tests/test_orchestrator_parse.py`
+- `source/tests/test_runtime_policy.py`
+
+### QA Notes
+- N/A
diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md
index 3a168d9..50fd8a7 100644
--- a/docs/plans/json-settings/02-policy-model.md
+++ b/docs/plans/json-settings/02-policy-model.md
@@ -214,8 +214,9 @@ Prompt templates should support simple substitution only:
 
 - `{{story_id}}`
 - `{{story_prefix}}`
-- `{{skill_path}}`
-- `{{workflow_path}}`
+- `{{label}}`
+- `{{skill_line}}`
+- `{{workflow_line}}`
 - `{{instructions_line}}`
 - `{{checklist_line}}`
 - `{{template_line}}`
diff --git a/docs/plans/json-settings/03-code-and-payload-changes.md b/docs/plans/json-settings/03-code-and-payload-changes.md
index 85b8b66..f326c59 100644
--- a/docs/plans/json-settings/03-code-and-payload-changes.md
+++ b/docs/plans/json-settings/03-code-and-payload-changes.md
@@ -177,9 +177,11 @@ Changes:
 
 ### `payload/.claude/skills/bmad-story-automator-review/workflow.yaml`
 
-Add a machine contract pointer, for example:
+Keep this file human-facing only.
 
-- `contract: "./contract.json"`
+The machine contract should stay in step policy:
+
+- `steps.review.success.contractFile = ".claude/skills/bmad-story-automator-review/contract.json"`
 
 ### New: `payload/.claude/skills/bmad-story-automator-review/contract.json`
 
@@ -247,4 +249,3 @@ To keep files under roughly 500 LOC:
 - `orchestrator_parse.py`: parser command plus schema validation
 
 If `runtime_policy.py` grows too large, split only after phase 1 lands.
-
diff --git a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
index f7c9283..05b5347 100644
--- a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
+++ b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml
@@ -16,5 +16,4 @@ sprint_status: "{implementation_artifacts}/sprint-status.yaml"
 # Workflow components
 instructions: "./instructions.xml"
 validation: "./checklist.md"
-contract: "./contract.json"
 standalone: true
diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md
index a88d198..61a06bd 100644
--- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md
+++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md
@@ -35,7 +35,7 @@ Set: `scripts="{scriptsDir}"`
 # --command required (see Spawn Pattern in step-03)
 session=$("$scripts" tmux-wrapper spawn auto {epic} {story_id} \
   --agent "$current_agent" \
-  --command "$("$scripts" tmux-wrapper build-cmd auto {story_id} --agent "$current_agent")")
+  --command "$("$scripts" tmux-wrapper build-cmd auto {story_id} --agent "$current_agent" --state-file "$state_file")")
 result=$("$scripts" monitor-session "$session" --json --agent "$current_agent")
 "$scripts" tmux-wrapper kill "$session"
 ```
@@ -74,7 +74,7 @@ if [ -z "$review_focus" ]; then
 fi
 
 # Compact subprocess-style summary contract for parent flow
-review_summary=$("$scripts" orchestrator-helper parse-output "$review_log" review | jq -c '
+review_summary=$("$scripts" orchestrator-helper parse-output "$review_log" review --state-file "$state_file" | jq -c '
   {
     next_action: (.next_action // "retry"),
     confidence: (.confidence // 0),
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
index d64bf14..5e2ac85 100755
--- a/scripts/smoke-test.sh
+++ b/scripts/smoke-test.sh
@@ -249,6 +249,8 @@ verify_common_install() {
   assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
   assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
   assert_contains 'build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md"
+  assert_contains 'build-cmd auto {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03a-execute-review.md"
+  assert_contains 'parse-output "$review_log" review --state-file "$state_file"' "$story_dir/steps-c/step-03a-execute-review.md"
   assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md"
   assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md"
   assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md"
diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py
index 5723fd2..fddbf83 100644
--- a/source/src/story_automator/commands/orchestrator.py
+++ b/source/src/story_automator/commands/orchestrator.py
@@ -306,12 +306,16 @@ def _escalate(args: list[str]) -> int:
     context = args[1] if len(args) > 1 else ""
     state_file = ""
     idx = 2
-    while idx < len(args):
-        if args[idx] == "--state-file" and idx + 1 < len(args):
-            state_file = args[idx + 1]
-            idx += 2
-            continue
-        idx += 1
+    try:
+        while idx < len(args):
+            if args[idx] == "--state-file":
+                state_file = _flag_value(args, idx, "--state-file")
+                idx += 2
+                continue
+            idx += 1
+    except PolicyError as exc:
+        print_json({"escalate": True, "reason": str(exc)})
+        return 0
     try:
         policy = load_runtime_policy(get_project_root(), state_file=state_file)
     except (FileNotFoundError, PolicyError) as exc:
@@ -405,9 +409,17 @@ def _verify_code_review(args: list[str]) -> int:
         return 1
     state_file = ""
     tail = args[1:]
-    for idx, arg in enumerate(tail):
-        if arg == "--state-file" and idx + 1 < len(tail):
-            state_file = tail[idx + 1]
+    try:
+        idx = 0
+        while idx < len(tail):
+            if tail[idx] == "--state-file":
+                state_file = _flag_value(tail, idx, "--state-file")
+                idx += 2
+                continue
+            idx += 1
+    except PolicyError as exc:
+        print_json({"verified": False, "reason": "review_contract_invalid", "input": args[0], "error": str(exc)})
+        return 1
     payload = verify_code_review_completion(get_project_root(), args[0], state_file=state_file or None)
     print_json(payload)
     return 0 if bool(payload.get("verified")) else 1
@@ -457,3 +469,9 @@ def _verify_step(args: list[str]) -> int:
 def _parse_context_int(context: str, key: str) -> int:
     match = re.search(rf"{re.escape(key)}=(\d+)", context)
     return int(match.group(1)) if match else 0
+
+
+def _flag_value(args: list[str], idx: int, flag: str) -> str:
+    if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"):
+        raise PolicyError(f"{flag} requires a value")
+    return args[idx + 1]
diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py
index 714c577..0f7ea28 100644
--- a/source/src/story_automator/commands/orchestrator_parse.py
+++ b/source/src/story_automator/commands/orchestrator_parse.py
@@ -66,6 +66,9 @@ def parse_output_action(args: list[str]) -> int:
     if not _has_required_keys(payload, parse_contract.get("requiredKeys") or []):
         print_json({"status": "error", "reason": "sub-agent returned invalid json"})
         return 1
+    if not _matches_schema(payload, parse_contract.get("schema") or {}):
+        print_json({"status": "error", "reason": "sub-agent returned invalid json"})
+        return 1
     print(json.dumps(payload, separators=(",", ":")))
     return 0
 
@@ -95,3 +98,25 @@ def _has_required_keys(payload: object, required_keys: list[Any]) -> bool:
     if not isinstance(payload, dict):
         return False
     return all(isinstance(key, str) and key in payload for key in required_keys)
+
+
+def _matches_schema(payload: object, schema: object) -> bool:
+    if isinstance(schema, dict):
+        if not isinstance(payload, dict):
+            return False
+        for key, child_schema in schema.items():
+            if key not in payload or not _matches_schema(payload[key], child_schema):
+                return False
+        return True
+    if not isinstance(schema, str):
+        return False
+    rule = schema.strip()
+    if rule == "integer":
+        return isinstance(payload, int) and not isinstance(payload, bool)
+    if rule == "true|false":
+        return isinstance(payload, bool)
+    if rule == "path or null":
+        return payload is None or (isinstance(payload, str) and bool(payload.strip()))
+    if "|" in rule and " " not in rule:
+        return isinstance(payload, str) and payload in rule.split("|")
+    return isinstance(payload, str) and bool(payload.strip())
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 1fe0c75..9208042 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -98,7 +98,7 @@ def _usage(code: int) -> int:
     print("  kill <session_name>", file=target)
     print("  kill-all [--project-only]", file=target)
     print("  exists <session_name>", file=target)
-    print("  build-cmd <step> <story_id> [--agent TYPE] [extra_instruction]", file=target)
+    print("  build-cmd <step> <story_id> [--agent TYPE] [--state-file PATH] [extra_instruction]", file=target)
     print("  project-slug", file=target)
     print("  project-hash", file=target)
     print("  story-suffix <story_id>", file=target)
@@ -109,6 +109,8 @@ def _usage(code: int) -> int:
 
 
 def _spawn(args: list[str]) -> int:
+    if args and args[0] in {"--help", "-h"}:
+        return _usage(0)
     if len(args) < 3:
         return _usage(1)
     step, epic, story_id = args[:3]
@@ -169,6 +171,8 @@ def _spawn(args: list[str]) -> int:
 
 
 def _build_cmd(args: list[str]) -> int:
+    if args and args[0] in {"--help", "-h"}:
+        return _usage(0)
     if len(args) < 2:
         return _usage(1)
     step, story_id = args[:2]
@@ -177,17 +181,21 @@ def _build_cmd(args: list[str]) -> int:
     tail = args[2:]
     idx = 0
     state_file = ""
-    while idx < len(tail):
-        if tail[idx] == "--agent" and idx + 1 < len(tail):
-            agent = tail[idx + 1]
-            idx += 2
-            continue
-        if tail[idx] == "--state-file" and idx + 1 < len(tail):
-            state_file = tail[idx + 1]
-            idx += 2
-            continue
-        extra = f"{extra} {tail[idx]}".strip()
-        idx += 1
+    try:
+        while idx < len(tail):
+            if tail[idx] == "--agent":
+                agent = _flag_value(tail, idx, "--agent")
+                idx += 2
+                continue
+            if tail[idx] == "--state-file":
+                state_file = _flag_value(tail, idx, "--state-file")
+                idx += 2
+                continue
+            extra = f"{extra} {tail[idx]}".strip()
+            idx += 1
+    except PolicyError as exc:
+        print(str(exc), file=__import__("sys").stderr)
+        return 1
     agent = agent or agent_type()
     story_prefix = story_id.replace(".", "-")
     root = get_project_root()
@@ -696,8 +704,12 @@ def cmd_monitor_session(args: list[str]) -> int:
             story_key = args[idx + 1]
             idx += 2
             continue
-        elif arg == "--state-file" and idx + 1 < len(args):
-            state_file = args[idx + 1]
+        elif arg == "--state-file":
+            try:
+                state_file = _flag_value(args, idx, "--state-file")
+            except PolicyError as exc:
+                print(str(exc), file=__import__("sys").stderr)
+                return 1
             idx += 2
             continue
         elif arg == "--project-root" and idx + 1 < len(args):
@@ -798,3 +810,9 @@ def _verify_monitor_completion(
     except PolicyError:
         return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name)
     return (result, verifier_name)
+
+
+def _flag_value(args: list[str], idx: int, flag: str) -> str:
+    if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"):
+        raise PolicyError(f"{flag} requires a value")
+    return args[idx + 1]
diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py
index 812d5e2..4fbffc4 100644
--- a/source/src/story_automator/core/runtime_policy.py
+++ b/source/src/story_automator/core/runtime_policy.py
@@ -57,6 +57,8 @@ def load_runtime_policy(
     resolved_state, source = resolve_policy_state_file(root, state_file)
     if resolved_state:
         state_path = Path(resolved_state)
+        if source in {"env", "marker"} and not state_path.is_file():
+            raise PolicyError(f"{source} state file missing: {state_path}")
         if source != "explicit" and not state_path.is_file():
             return load_effective_policy(str(root), resolve_assets=resolve_assets)
         return load_policy_for_state(str(state_path), project_root=str(root), resolve_assets=resolve_assets)
@@ -92,9 +94,13 @@ def load_policy_snapshot(
     path = Path(snapshot_file)
     if not path.is_absolute():
         path = root / path
+    path = _ensure_within(path, root, "policy snapshot")
     if not path.is_file():
         raise PolicyError(f"policy snapshot missing: {path}")
-    raw = read_text(path)
+    try:
+        raw = read_text(path)
+    except OSError as exc:
+        raise PolicyError(f"policy snapshot unreadable: {path}") from exc
     actual_hash = md5_hex8(raw)
     if expected_hash and actual_hash != expected_hash:
         raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}")
@@ -117,7 +123,10 @@ def load_policy_for_state(
     resolve_assets: bool = True,
 ) -> dict[str, Any]:
     root = Path(project_root or get_project_root()).resolve()
-    fields = parse_simple_frontmatter(read_text(state_file))
+    try:
+        fields = parse_simple_frontmatter(read_text(state_file))
+    except OSError as exc:
+        raise PolicyError(f"state file unreadable: {state_file}") from exc
     snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields)
     if not legacy_mode:
         return load_policy_snapshot(
@@ -152,16 +161,17 @@ def resolve_policy_state_file(project_root: str | Path | None = None, state_file
         return str(_resolve_state_path(root, explicit)), "explicit"
     env_state = os.environ.get("STORY_AUTOMATOR_STATE_FILE", "").strip()
     if env_state:
-        return str(_resolve_state_path(root, Path(env_state).expanduser())), "env"
+        return str(_resolve_state_path(root, Path(env_state).expanduser(), allow_outside=False, label="env state file")), "env"
     marker = root / ".claude" / ".story-automator-active"
     if marker.is_file():
         try:
             payload = _read_json(marker)
-        except PolicyError:
-            return "", ""
+        except PolicyError as exc:
+            raise PolicyError(f"active-run marker invalid: {exc}") from exc
         marker_state = str(payload.get("stateFile") or "").strip()
-        if marker_state:
-            return str(_resolve_state_path(root, Path(marker_state).expanduser())), "marker"
+        if not marker_state:
+            raise PolicyError("active-run marker missing stateFile")
+        return str(_resolve_state_path(root, Path(marker_state).expanduser(), allow_outside=False, label="marker state file")), "marker"
     return "", ""
 
 
@@ -448,8 +458,11 @@ def _display_path(path: Path, project_root: Path) -> str:
         return str(path.resolve())
 
 
-def _resolve_state_path(project_root: Path, path: Path) -> Path:
-    return path if path.is_absolute() else project_root / path
+def _resolve_state_path(project_root: Path, path: Path, *, allow_outside: bool = True, label: str = "state file") -> Path:
+    candidate = path if path.is_absolute() else project_root / path
+    if allow_outside:
+        return candidate.resolve()
+    return _ensure_within(candidate, project_root.resolve(), label)
 
 
 def _set_or_verify_hash(payload: dict[str, Any], *, path_key: str, hash_key: str, label: str) -> None:
diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py
index 4a5cf42..40b32e8 100644
--- a/source/src/story_automator/core/success_verifiers.py
+++ b/source/src/story_automator/core/success_verifiers.py
@@ -73,12 +73,13 @@ def create_story_artifact(
     raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md")
     expected = _parse_int(config.get("expectedMatches", 1), "success.config.expectedMatches", minimum=0)
     pattern = _format_story_pattern(raw_glob, norm)
-    matches = sorted(Path(project_root).glob(pattern))
+    root, safe_pattern = _resolve_artifact_glob(project_root, pattern)
+    matches = sorted(root.glob(safe_pattern))
     payload: dict[str, object] = {
         "verified": len(matches) == expected,
         "story": norm.key,
         "source": "artifact_glob",
-        "pattern": pattern,
+        "pattern": safe_pattern,
         "expectedMatches": expected,
         "actualMatches": len(matches),
         "matches": [str(match) for match in matches],
@@ -176,6 +177,24 @@ def _story_artifact_path(project_root: str, story_prefix: str) -> Path | None:
     return matches[0] if matches else None
 
 
+def _resolve_artifact_glob(project_root: str, pattern: str) -> tuple[Path, str]:
+    root = Path(project_root).resolve()
+    artifacts_root = (root / "_bmad-output" / "implementation-artifacts").resolve()
+    raw = Path(pattern)
+    if raw.is_absolute():
+        raise PolicyError("success.config.glob must be relative to _bmad-output/implementation-artifacts")
+    resolved = (root / raw).resolve()
+    try:
+        relative = resolved.relative_to(root)
+    except ValueError as exc:
+        raise PolicyError("success.config.glob escapes project root") from exc
+    try:
+        resolved.relative_to(artifacts_root)
+    except ValueError as exc:
+        raise PolicyError("success.config.glob must stay within _bmad-output/implementation-artifacts") from exc
+    return root, str(relative)
+
+
 def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[str, Any]:
     merged = dict(DEFAULT_REVIEW_CONTRACT)
     contract_path = str(contract.get("contractPath") or "").strip()
diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py
index cc451f4..b9a2dba 100644
--- a/source/tests/test_orchestrator_parse.py
+++ b/source/tests/test_orchestrator_parse.py
@@ -91,6 +91,26 @@ def test_output_shape_remains_compatible(self) -> None:
         self.assertIn("issues_found", payload)
         self.assertIn("all_fixed", payload)
 
+    def test_review_output_rejects_invalid_nested_shape(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"SUCCESS","issues_found":{"critical":"0","high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "review"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json")
+
+    def test_review_output_rejects_invalid_enum_value(self) -> None:
+        stdout = io.StringIO()
+        with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch(
+            "story_automator.commands.orchestrator_parse.run_cmd",
+            return_value=CommandResult('{"status":"BROKEN","issues_found":{"critical":0,"high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0),
+        ), redirect_stdout(stdout):
+            code = parse_output_action([str(self.output_file), "review"])
+        self.assertEqual(code, 1)
+        self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json")
+
     def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) -> None:
         state_file = self._build_state()
         override_dir = self.project_root / "_bmad" / "bmm"
diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py
index 2136d0f..e402320 100644
--- a/source/tests/test_runtime_policy.py
+++ b/source/tests/test_runtime_policy.py
@@ -127,6 +127,14 @@ def test_data_path_cannot_escape_allowed_roots(self) -> None:
         with self.assertRaisesRegex(PolicyError, "policy data path escapes allowed roots"):
             load_effective_policy(str(self.project_root))
 
+    def test_snapshot_file_cannot_escape_project_root(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        source_path = self.project_root / snapshot["policySnapshotFile"]
+        external = self.project_root.parent / "external-snapshot.json"
+        external.write_text(source_path.read_text(encoding="utf-8"), encoding="utf-8")
+        with self.assertRaisesRegex(PolicyError, "policy snapshot escapes allowed root"):
+            load_policy_snapshot(str(external), project_root=str(self.project_root), expected_hash=snapshot["policySnapshotHash"])
+
     def test_snapshot_detects_prompt_template_drift(self) -> None:
         snapshot = snapshot_effective_policy(str(self.project_root))
         prompt = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "create.md"
@@ -138,19 +146,53 @@ def test_snapshot_detects_prompt_template_drift(self) -> None:
                 expected_hash=snapshot["policySnapshotHash"],
             )
 
-    def test_missing_marker_state_falls_back_to_effective_policy(self) -> None:
+    def test_snapshot_detects_parse_schema_drift(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        schema = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "parse" / "create.json"
+        schema.write_text('{"requiredKeys":["status"],"schema":{"status":"SUCCESS|FAILURE|AMBIGUOUS"}}\n', encoding="utf-8")
+        with self.assertRaisesRegex(PolicyError, "policy parse schema hash mismatch"):
+            load_policy_snapshot(
+                snapshot["policySnapshotFile"],
+                project_root=str(self.project_root),
+                expected_hash=snapshot["policySnapshotHash"],
+            )
+
+    def test_snapshot_detects_success_contract_drift(self) -> None:
+        snapshot = snapshot_effective_policy(str(self.project_root))
+        contract = self.project_root / ".claude" / "skills" / "bmad-story-automator-review" / "contract.json"
+        contract.write_text('{"doneValues":["approved"],"sourceOrder":["story-file"],"syncSprintStatus":false}\n', encoding="utf-8")
+        with self.assertRaisesRegex(PolicyError, "policy success contract hash mismatch"):
+            load_policy_snapshot(
+                snapshot["policySnapshotFile"],
+                project_root=str(self.project_root),
+                expected_hash=snapshot["policySnapshotHash"],
+            )
+
+    def test_missing_marker_state_raises_policy_error(self) -> None:
         marker = self.project_root / ".claude" / ".story-automator-active"
         marker.parent.mkdir(parents=True, exist_ok=True)
         marker.write_text(json.dumps({"stateFile": "missing.md"}), encoding="utf-8")
-        policy = load_runtime_policy(str(self.project_root))
-        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
+        with self.assertRaisesRegex(PolicyError, "marker state file missing"):
+            load_runtime_policy(str(self.project_root))
 
-    def test_malformed_marker_falls_back_to_effective_policy(self) -> None:
+    def test_marker_state_cannot_escape_project_root(self) -> None:
+        marker = self.project_root / ".claude" / ".story-automator-active"
+        marker.parent.mkdir(parents=True, exist_ok=True)
+        marker.write_text(json.dumps({"stateFile": "../outside.md"}), encoding="utf-8")
+        with self.assertRaisesRegex(PolicyError, "marker state file escapes allowed root"):
+            load_runtime_policy(str(self.project_root))
+
+    def test_malformed_marker_raises_policy_error(self) -> None:
         marker = self.project_root / ".claude" / ".story-automator-active"
         marker.parent.mkdir(parents=True, exist_ok=True)
         marker.write_text("{bad json", encoding="utf-8")
-        policy = load_runtime_policy(str(self.project_root))
-        self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5)
+        with self.assertRaisesRegex(PolicyError, "active-run marker invalid"):
+            load_runtime_policy(str(self.project_root))
+
+    def test_env_state_cannot_escape_project_root(self) -> None:
+        with patch.dict("os.environ", {"STORY_AUTOMATOR_STATE_FILE": "../outside.md"}, clear=False):
+            with self.assertRaisesRegex(PolicyError, "env state file escapes allowed root"):
+                load_runtime_policy(str(self.project_root))
 
     def test_legacy_state_uses_bundled_defaults_without_override_or_env(self) -> None:
         self._write_override({"workflow": {"repeat": {"review": {"maxCycles": 1}}}})
@@ -203,6 +245,10 @@ def test_snapshot_metadata_with_legacy_flag_is_rejected(self) -> None:
         with self.assertRaisesRegex(PolicyError, "state policy metadata contradictory"):
             load_runtime_policy(str(self.project_root), state_file=str(state_file))
 
+    def test_explicit_directory_state_file_raises_policy_error(self) -> None:
+        with self.assertRaisesRegex(PolicyError, "state file unreadable"):
+            load_runtime_policy(str(self.project_root), state_file=str(self.project_root))
+
     def _install_bundle(self) -> None:
         source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator"
         source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review"
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 748dd0b..04afb23 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -10,7 +10,7 @@
 
 from story_automator.commands.orchestrator import cmd_orchestrator_helper
 from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state
-from story_automator.commands.tmux import _build_cmd
+from story_automator.commands.tmux import _build_cmd, cmd_tmux_wrapper
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
@@ -261,6 +261,26 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None:
         self.assertNotIn("--state-file", rendered)
         self.assertNotIn(str(state_file), rendered)
 
+    def test_build_cmd_rejects_incomplete_state_file_flag(self) -> None:
+        stderr = io.StringIO()
+        with patch_env(self.project_root), redirect_stderr(stderr):
+            code = _build_cmd(["review", "1.1", "--state-file"])
+        self.assertEqual(code, 1)
+        self.assertIn("--state-file requires a value", stderr.getvalue())
+
+    def test_tmux_subcommand_help_matches_step_preflight_contract(self) -> None:
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_tmux_wrapper(["spawn", "--help"])
+        self.assertEqual(code, 0)
+        self.assertIn("--command", stdout.getvalue())
+
+        stdout = io.StringIO()
+        with redirect_stdout(stdout):
+            code = cmd_tmux_wrapper(["build-cmd", "--help"])
+        self.assertEqual(code, 0)
+        self.assertIn("--state-file", stdout.getvalue())
+
     def test_build_state_doc_returns_json_on_policy_snapshot_failure(self) -> None:
         override_dir = self.project_root / "_bmad" / "bmm"
         override_dir.mkdir(parents=True, exist_ok=True)
@@ -293,6 +313,15 @@ def test_build_cmd_rejects_unknown_step_via_policy(self) -> None:
         self.assertEqual(code, 1)
         self.assertIn("unknown step: ship", stderr.getvalue())
 
+    def test_escalate_returns_json_on_incomplete_state_file_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=1", "--state-file"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertTrue(payload["escalate"])
+        self.assertEqual(payload["reason"], "--state-file requires a value")
+
     def _build_state(self) -> Path:
         stdout = io.StringIO()
         template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md"
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index f858d5c..aec2a4d 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -42,6 +42,30 @@ def test_create_story_artifact_matches_configured_glob(self) -> None:
         self.assertTrue(payload["verified"])
         self.assertEqual(payload["actualMatches"], 1)
 
+    def test_create_story_artifact_rejects_glob_that_escapes_project_root(self) -> None:
+        with self.assertRaisesRegex(PolicyError, "success.config.glob escapes project root"):
+            create_story_artifact(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"config": {"glob": "../other/{story_prefix}-*.md", "expectedMatches": 1}},
+            )
+
+    def test_create_story_artifact_rejects_glob_outside_artifacts_dir(self) -> None:
+        with self.assertRaisesRegex(PolicyError, "success.config.glob must stay within _bmad-output/implementation-artifacts"):
+            create_story_artifact(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"config": {"glob": "docs/{story_prefix}-*.md", "expectedMatches": 1}},
+            )
+
+    def test_create_story_artifact_rejects_absolute_glob(self) -> None:
+        with self.assertRaisesRegex(PolicyError, "success.config.glob must be relative to _bmad-output/implementation-artifacts"):
+            create_story_artifact(
+                project_root=str(self.project_root),
+                story_key="1.2",
+                contract={"config": {"glob": "/tmp/{story_prefix}-*.md", "expectedMatches": 1}},
+            )
+
     def test_review_completion_uses_contract_done_values(self) -> None:
         self._write_story("1-2-example", status="approved")
         contract = self._write_review_contract(
@@ -262,6 +286,21 @@ def test_validate_story_creation_check_returns_compat_schema_on_missing_state_fi
         self.assertFalse(payload["valid"])
         self.assertIn("missing-state.md", payload["reason"])
 
+    def test_review_wrapper_normalizes_directory_state_file(self) -> None:
+        payload = verify_code_review_completion(str(self.project_root), "1.2", state_file=self.project_root)
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "review_contract_invalid")
+        self.assertIn("state file unreadable", str(payload.get("error")))
+
+    def test_validate_story_creation_check_returns_compat_schema_on_directory_state_file(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(self.project_root)])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["valid"])
+        self.assertIn("state file unreadable", payload["reason"])
+
     def test_review_wrapper_honors_empty_injected_contract(self) -> None:
         self._write_story("1-2-example", status="done")
         self._write_override(
@@ -298,6 +337,16 @@ def test_verify_step_rejects_incomplete_state_file_flag(self) -> None:
         self.assertEqual(payload["reason"], "verifier_contract_invalid")
         self.assertEqual(payload["error"], "--state-file requires a value")
 
+    def test_verify_code_review_rejects_incomplete_state_file_flag(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), redirect_stdout(stdout):
+            code = cmd_orchestrator_helper(["verify-code-review", "1.2", "--state-file"])
+        self.assertEqual(code, 1)
+        payload = json.loads(stdout.getvalue())
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "review_contract_invalid")
+        self.assertEqual(payload["error"], "--state-file requires a value")
+
     def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None:
         stdout = io.StringIO()
         with patch_env(self.project_root), redirect_stdout(stdout):

From d7ffdf5802213dc63a8a94693370f468377b33a5 Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Wed, 15 Apr 2026 06:47:36 -0300
Subject: [PATCH 16/17] fix: harden tmux monitor failure handling

---
 docs/changelog/260415.md                    | 18 +++++++++++
 source/src/story_automator/commands/tmux.py |  6 ++--
 source/tests/test_state_policy_metadata.py  | 21 +++++++++++++
 source/tests/test_success_verifiers.py      | 33 ++++++++++++++++++++-
 4 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md
index c54cdaa..084800a 100644
--- a/docs/changelog/260415.md
+++ b/docs/changelog/260415.md
@@ -29,3 +29,21 @@ Closed PR review gaps around state-file handling, snapshot boundaries, parser va
 
 ### QA Notes
 - N/A
+
+## 260415-06:47:15 - Harden tmux prompt and monitor contract failures
+
+### Summary
+Kept tmux workflow helpers fail-closed when prompt templates or success verifiers are missing or unreadable.
+
+### Fixed
+- Moved review prompt rendering under the existing `build-cmd` policy/contract error path and broadened it to catch `OSError` template read failures.
+- Made monitor verification return `verifier_contract_invalid` when `success.verifier` is blank instead of falling through to `normal_completion`.
+- Added regression coverage for missing/directory prompt templates and for the monitor-session caller path when verifier config is empty.
+
+### Files
+- `source/src/story_automator/commands/tmux.py`
+- `source/tests/test_state_policy_metadata.py`
+- `source/tests/test_success_verifiers.py`
+
+### QA Notes
+- N/A
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index 9208042..b35a1d1 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -202,7 +202,8 @@ def _build_cmd(args: list[str]) -> int:
     try:
         policy = load_runtime_policy(root, state_file=state_file)
         contract = step_contract(policy, step)
-    except (FileNotFoundError, PolicyError) as exc:
+        prompt = _render_step_prompt(contract, story_id, story_prefix, extra)
+    except (OSError, PolicyError) as exc:
         print(str(exc), file=__import__("sys").stderr)
         return 1
     ai_command = os.environ.get("AI_COMMAND")
@@ -212,7 +213,6 @@ def _build_cmd(args: list[str]) -> int:
         cli = agent_cli(agent)
     else:
         cli = "codex exec"
-    prompt = _render_step_prompt(contract, story_id, story_prefix, extra)
     quoted_prompt = shlex.quote(prompt)
     if agent == "codex" and not ai_command:
         codex_home = f"/tmp/sa-codex-home-{project_hash(root)}"
@@ -796,7 +796,7 @@ def _verify_monitor_completion(
         return ({"verified": False, "reason": "verifier_contract_invalid"}, "")
     verifier_name = str(contract.get("verifier") or "").strip()
     if not verifier_name:
-        return None
+        return ({"verified": False, "reason": "verifier_contract_invalid"}, "")
     if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip():
         return ({"verified": False, "reason": "story_key_required", "verifier": verifier_name}, verifier_name)
     try:
diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py
index 04afb23..a617de0 100644
--- a/source/tests/test_state_policy_metadata.py
+++ b/source/tests/test_state_policy_metadata.py
@@ -268,6 +268,27 @@ def test_build_cmd_rejects_incomplete_state_file_flag(self) -> None:
         self.assertEqual(code, 1)
         self.assertIn("--state-file requires a value", stderr.getvalue())
 
+    def test_build_cmd_returns_exit_code_one_when_prompt_template_is_missing(self) -> None:
+        state_file = self._build_state()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "review.md"
+        template.unlink()
+        stderr = io.StringIO()
+        with patch_env(self.project_root), redirect_stderr(stderr):
+            code = _build_cmd(["review", "1.1", "--state-file", str(state_file)])
+        self.assertEqual(code, 1)
+        self.assertIn("review.md", stderr.getvalue())
+
+    def test_build_cmd_returns_exit_code_one_when_prompt_template_becomes_directory(self) -> None:
+        state_file = self._build_state()
+        template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "review.md"
+        template.unlink()
+        template.mkdir()
+        stderr = io.StringIO()
+        with patch_env(self.project_root), redirect_stderr(stderr):
+            code = _build_cmd(["review", "1.1", "--state-file", str(state_file)])
+        self.assertEqual(code, 1)
+        self.assertIn("review.md", stderr.getvalue())
+
     def test_tmux_subcommand_help_matches_step_preflight_contract(self) -> None:
         stdout = io.StringIO()
         with redirect_stdout(stdout):
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index aec2a4d..8d277a2 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -7,10 +7,11 @@
 import unittest
 from contextlib import redirect_stdout
 from pathlib import Path
+from unittest.mock import patch
 
 from story_automator.commands.orchestrator import cmd_orchestrator_helper
 from story_automator.commands.state import cmd_build_state_doc
-from story_automator.commands.tmux import _verify_monitor_completion
+from story_automator.commands.tmux import _verify_monitor_completion, cmd_monitor_session
 from story_automator.commands.validate_story_creation import cmd_validate_story_creation
 from story_automator.core.review_verify import verify_code_review_completion
 from story_automator.core.runtime_policy import PolicyError
@@ -168,6 +169,36 @@ def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) ->
         self.assertFalse(payload["verified"])
         self.assertEqual(payload["reason"], "story_key_required")
 
+    def test_monitor_dispatch_rejects_missing_verifier_in_contract(self) -> None:
+        self._write_override({"steps": {"review": {"success": {"verifier": ""}}}})
+        result = _verify_monitor_completion(
+            "review",
+            project_root=str(self.project_root),
+            story_key="1.2",
+            output_file="/tmp/session.txt",
+        )
+        self.assertIsNotNone(result)
+        payload, verifier = result or ({}, "")
+        self.assertEqual(verifier, "")
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "verifier_contract_invalid")
+
+    def test_monitor_session_reports_incomplete_when_verifier_missing(self) -> None:
+        self._write_override({"steps": {"review": {"success": {"verifier": ""}}}})
+        stdout = io.StringIO()
+        statuses = [
+            {"todos_done": 1, "todos_total": 1, "session_state": "completed"},
+            {"active_task": "/tmp/session.txt"},
+        ]
+        with patch_env(self.project_root), patch("story_automator.commands.tmux.time.sleep"), patch(
+            "story_automator.commands.tmux.session_status", side_effect=statuses
+        ), redirect_stdout(stdout):
+            code = cmd_monitor_session(["fake-session", "--json", "--workflow", "review", "--story-key", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["final_state"], "incomplete")
+        self.assertEqual(payload["exit_reason"], "verifier_contract_invalid")
+
     def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None:
         result = _verify_monitor_completion(
             "dev",

From 85f49a405f45d735bd1be77139ef063320fea29f Mon Sep 17 00:00:00 2001
From: bmad <236206860+bma-d@users.noreply.github.com>
Date: Wed, 15 Apr 2026 07:55:10 -0300
Subject: [PATCH 17/17] fix: tighten tmux monitor output verification

---
 docs/changelog/260415.md                    | 18 +++++++++
 source/src/story_automator/commands/tmux.py | 35 +++++++++++++++--
 source/tests/test_success_verifiers.py      | 43 +++++++++++++++++++++
 3 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md
index 084800a..5c6a8aa 100644
--- a/docs/changelog/260415.md
+++ b/docs/changelog/260415.md
@@ -47,3 +47,21 @@ Kept tmux workflow helpers fail-closed when prompt templates or success verifier
 
 ### QA Notes
 - N/A
+
+## 260415-07:54:52 - Tighten tmux monitor output verification
+
+### Summary
+Made monitor JSON reflect actual verifier results and narrowed verifier-side file error normalization.
+
+### Fixed
+- Defaulted `output_verified` to `false` when monitor terminal states have no verifier result, instead of deriving it from `output_file` presence.
+- Kept verified and incomplete monitor branches explicit about verifier outcome in emitted JSON.
+- Narrowed verifier-side error normalization to missing/path-shape file failures plus `PolicyError`, instead of flattening all `OSError`s.
+- Added regressions for verifier-side file failures and timeout output remaining unverified.
+
+### Files
+- `source/src/story_automator/commands/tmux.py`
+- `source/tests/test_success_verifiers.py`
+
+### QA Notes
+- N/A
diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py
index b35a1d1..5e839f5 100644
--- a/source/src/story_automator/commands/tmux.py
+++ b/source/src/story_automator/commands/tmux.py
@@ -744,7 +744,15 @@ def cmd_monitor_session(args: list[str]) -> int:
                 verified, verifier_name = verification
                 if bool(verified.get("verified")):
                     reason = "normal_completion" if verifier_name == "session_exit" else "verified_complete"
-                    return _emit_monitor(json_output, "completed", last_done, last_total, str(output), reason)
+                    return _emit_monitor(
+                        json_output,
+                        "completed",
+                        last_done,
+                        last_total,
+                        str(output),
+                        reason,
+                        output_verified=bool(verified.get("verified")),
+                    )
                 return _emit_monitor(
                     json_output,
                     "incomplete",
@@ -752,6 +760,7 @@ def cmd_monitor_session(args: list[str]) -> int:
                     last_total,
                     str(output),
                     str(verified.get("reason") or "workflow_not_verified"),
+                    output_verified=bool(verified.get("verified")),
                 )
             return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "normal_completion")
         if state == "crashed":
@@ -774,9 +783,27 @@ def cmd_monitor_session(args: list[str]) -> int:
     return _emit_monitor(json_output, "timeout", last_done, last_total, str(output), "max_polls_exceeded")
 
 
-def _emit_monitor(json_output: bool, state: str, done: int, total: int, output_file: str, reason: str) -> int:
+def _emit_monitor(
+    json_output: bool,
+    state: str,
+    done: int,
+    total: int,
+    output_file: str,
+    reason: str,
+    *,
+    output_verified: bool | None = None,
+) -> int:
     if json_output:
-        print_json({"final_state": state, "todos_done": done, "todos_total": total, "output_file": output_file, "exit_reason": reason, "output_verified": bool(output_file)})
+        print_json(
+            {
+                "final_state": state,
+                "todos_done": done,
+                "todos_total": total,
+                "output_file": output_file,
+                "exit_reason": reason,
+                "output_verified": False if output_verified is None else output_verified,
+            }
+        )
     else:
         print(f"{state},{done},{total},{output_file},{reason}")
     return 0
@@ -807,7 +834,7 @@ def _verify_monitor_completion(
             output_file=output_file,
             contract=contract,
         )
-    except PolicyError:
+    except (FileNotFoundError, IsADirectoryError, NotADirectoryError, PolicyError):
         return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name)
     return (result, verifier_name)
 
diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py
index 8d277a2..db94d9e 100644
--- a/source/tests/test_success_verifiers.py
+++ b/source/tests/test_success_verifiers.py
@@ -198,6 +198,49 @@ def test_monitor_session_reports_incomplete_when_verifier_missing(self) -> None:
         payload = json.loads(stdout.getvalue())
         self.assertEqual(payload["final_state"], "incomplete")
         self.assertEqual(payload["exit_reason"], "verifier_contract_invalid")
+        self.assertFalse(payload["output_verified"])
+
+    def test_monitor_dispatch_rejects_verifier_side_file_error(self) -> None:
+        with patch("story_automator.commands.tmux.run_success_verifier", side_effect=FileNotFoundError("missing.json")):
+            result = _verify_monitor_completion(
+                "review",
+                project_root=str(self.project_root),
+                story_key="1.2",
+                output_file="/tmp/session.txt",
+            )
+        self.assertIsNotNone(result)
+        payload, verifier = result or ({}, "")
+        self.assertEqual(verifier, "review_completion")
+        self.assertFalse(payload["verified"])
+        self.assertEqual(payload["reason"], "verifier_contract_invalid")
+
+    def test_monitor_session_reports_incomplete_when_verifier_raises_file_error(self) -> None:
+        stdout = io.StringIO()
+        statuses = [
+            {"todos_done": 1, "todos_total": 1, "session_state": "completed"},
+            {"active_task": "/tmp/session.txt"},
+        ]
+        with patch_env(self.project_root), patch("story_automator.commands.tmux.time.sleep"), patch(
+            "story_automator.commands.tmux.session_status", side_effect=statuses
+        ), patch("story_automator.commands.tmux.run_success_verifier", side_effect=FileNotFoundError("missing.json")), redirect_stdout(stdout):
+            code = cmd_monitor_session(["fake-session", "--json", "--workflow", "review", "--story-key", "1.2"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["final_state"], "incomplete")
+        self.assertEqual(payload["exit_reason"], "verifier_contract_invalid")
+        self.assertFalse(payload["output_verified"])
+
+    def test_monitor_session_timeout_keeps_output_unverified_without_verifier_result(self) -> None:
+        stdout = io.StringIO()
+        with patch_env(self.project_root), patch(
+            "story_automator.commands.tmux.session_status", return_value={"active_task": "/tmp/session.txt"}
+        ), redirect_stdout(stdout):
+            code = cmd_monitor_session(["fake-session", "--json", "--max-polls", "0"])
+        self.assertEqual(code, 0)
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(payload["final_state"], "timeout")
+        self.assertEqual(payload["exit_reason"], "max_polls_exceeded")
+        self.assertFalse(payload["output_verified"])
 
     def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None:
         result = _verify_monitor_completion(