From 1fd07b73ce532e34f5f2db71f302e895805edaee Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 06:34:38 -0300 Subject: [PATCH 01/17] docs: add json settings implementation plan --- docs/changelog/260413.md | 20 ++ .../json-settings/01-why-json-settings.md | 132 +++++++++ docs/plans/json-settings/02-policy-model.md | 273 +++++++++++++++++ .../03-code-and-payload-changes.md | 250 ++++++++++++++++ .../04-migration-testing-and-risks.md | 200 +++++++++++++ docs/plans/json-settings/README.md | 85 ++++++ docs/plans/json-settings/TODO.md | 276 ++++++++++++++++++ 7 files changed, 1236 insertions(+) create mode 100644 docs/plans/json-settings/01-why-json-settings.md create mode 100644 docs/plans/json-settings/02-policy-model.md create mode 100644 docs/plans/json-settings/03-code-and-payload-changes.md create mode 100644 docs/plans/json-settings/04-migration-testing-and-risks.md create mode 100644 docs/plans/json-settings/README.md create mode 100644 docs/plans/json-settings/TODO.md diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 454b7c1..2c3ea4f 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -65,3 +65,23 @@ Reworked the package docs into a multi-file operator guide covering flow, runtim ### QA Notes - N/A + +## 260413-06:34:01 - Add JSON settings implementation plan + +### Summary +Added a structured planning packet for moving story-automator contracts to snapshot-backed JSON settings. + +### Added +- Added a dedicated plan set covering goals, architecture, code and payload touchpoints, migration strategy, testing strategy, and risk controls for the JSON settings refactor. +- Added a sequential implementation TODO with dependencies, phase boundaries, and done criteria for executing the refactor in bounded slices. + +### Files +- `docs/plans/json-settings/README.md` +- `docs/plans/json-settings/01-why-json-settings.md` +- `docs/plans/json-settings/02-policy-model.md` +- `docs/plans/json-settings/03-code-and-payload-changes.md` +- `docs/plans/json-settings/04-migration-testing-and-risks.md` +- `docs/plans/json-settings/TODO.md` + +### QA Notes +- N/A diff --git a/docs/plans/json-settings/01-why-json-settings.md b/docs/plans/json-settings/01-why-json-settings.md new file mode 100644 index 0000000..bfdba0b --- /dev/null +++ b/docs/plans/json-settings/01-why-json-settings.md @@ -0,0 +1,132 @@ +# Why JSON Settings + +## Problem + +The current system already behaves like it has a policy layer, but that policy is scattered. + +Examples: + +- step prompts are assembled inline in `source/src/story_automator/commands/tmux.py` +- parse contracts are hard-coded in `source/src/story_automator/commands/orchestrator_parse.py` +- retry limits and escalation budgets are hard-coded in `source/src/story_automator/commands/orchestrator.py` +- review completion logic is fixed in `source/src/story_automator/core/review_verify.py` +- step asset discovery is encoded in `source/src/story_automator/core/workflow_paths.py` +- human-facing loop rules live in payload docs under `payload/.claude/skills/bmad-story-automator/` + +That creates four problems: + +1. the real contract is hard to see in one place +2. docs and runtime can drift +3. customization requires source edits +4. resume determinism is fragile if behavior depends on live files + +## Goals + +The implementation should make these customizable: + +- per-step prompt templates +- parse contracts and output schema expectations +- success verifier thresholds and source order +- bounded loop settings such as retry counts and review max cycles +- step asset resolution rules + +It should also preserve: + +- zero-config current behavior +- install layout +- current runtime engine model +- deterministic resume/replay + +## Non-Goals + +This work should not become: + +- a generic DSL +- a plugin system for arbitrary verifier code +- a graph workflow engine +- a rewrite of tmux/session execution + +## Why JSON + +This repo should choose JSON settings instead of YAML for the machine contract. + +Reasons: + +1. No new dependency. + The repo currently has no YAML parser dependency in the Python package. JSON keeps the runtime dependency-free. + +2. Existing code already speaks JSON. + `state.py`, `orchestrator_parse.py`, agent config helpers, and multiple command surfaces already pass JSON around. + +3. Snapshot determinism is simpler. + Stable sorting, hashing, and byte-for-byte snapshots are easier with JSON. + +4. Parse schemas are already a JSON-shaped concept. + Moving step parse contracts into `.json` files is a natural fit. + +5. Settings are machine-facing. + Long prose belongs in markdown/XML files anyway, so readability pressure on the main settings file is lower than it would be for a human-authored workflow language. + +## Why Not YAML First + +YAML would be nicer for comments and long-form hand editing, but it adds cost now: + +- new parsing dependency or hand-rolled parser +- more edge cases around scalars and lists +- more work to normalize and hash deterministically + +If operator ergonomics later require comments, the safer follow-up is JSONC or a small translator layer, not immediate YAML adoption. + +## Existing Repo Fit + +The repo already has a natural home for settings files: + +- `payload/.claude/skills/bmad-story-automator/data/` + +That directory already holds: + +- rules docs +- retry docs +- complexity JSON +- prompt-related docs +- monitoring docs + +Adding JSON policy and parse files there follows the existing layout instead of inventing a new storage pattern. + +## Architectural Principle + +Use: + +```text +declarative contracts ++ imperative engine +``` + +Declarative: + +- prompts +- parse schema +- verifier parameters +- asset path candidates +- loop budgets + +Imperative: + +- tmux spawning +- session capture +- crash/stuck detection +- file reads/writes +- snapshot creation +- verifier execution + +## Success Standard + +This refactor is worth doing only if it makes behavior easier to change without making runtime behavior harder to trust. + +Practical success means: + +- changing prompt text means editing a payload file or override, not Python +- changing review completion thresholds means editing JSON settings, not Python +- changing retry budgets means editing JSON settings, not env-only knobs +- resume always uses the same effective contract as the run start + diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md new file mode 100644 index 0000000..56acdb6 --- /dev/null +++ b/docs/plans/json-settings/02-policy-model.md @@ -0,0 +1,273 @@ +# Policy Model + +## Target Shape + +Introduce one new concept: the runtime policy. + +It has three layers: + +1. bundled default policy +2. optional project override policy +3. effective snapshot written at orchestration start + +Only the snapshot is allowed to drive an in-flight run. + +## File Locations + +### Bundled default policy + +```text +payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json +``` + +### Bundled prompt templates + +```text +payload/.claude/skills/bmad-story-automator/data/prompts/ + create.md + dev.md + auto.md + review.md + retro.md +``` + +### Bundled parse contracts + +```text +payload/.claude/skills/bmad-story-automator/data/parse/ + create.json + dev.json + auto.json + review.json + retro.json +``` + +### Project override + +```text +_bmad/bmm/story-automator.policy.json +``` + +### Effective snapshot + +```text +_bmad-output/story-automator/policy-snapshots/-.json +``` + +### Review machine contract + +```text +payload/.claude/skills/bmad-story-automator-review/contract.json +``` + +## Data Flow + +```mermaid +flowchart TD + A["Bundled policy JSON"] --> D["runtime_policy.py"] + B["Project override JSON"] --> D + D --> E["Resolve paths + validate + merge"] + E --> F["Write effective snapshot JSON"] + F --> G["State doc stores path + hash"] + F --> H["tmux build-cmd"] + F --> I["orchestrator_parse"] + F --> J["success verifiers"] + F --> K["orchestrator budgets"] +``` + +## Core Rules + +### Rule 1 + +Bundled defaults must reproduce current behavior exactly. + +### Rule 2 + +Project overrides can customize values, but cannot register new executable code. + +### Rule 3 + +Resume must use the snapshot from state, never live re-merge. + +### Rule 4 + +Prompt text, parse contracts, and verifier thresholds are data. + +### Rule 5 + +tmux lifecycle, monitor logic, file IO, and verifier execution remain Python. + +## Merge Rules + +Use deterministic merge semantics: + +- maps: deep merge +- arrays: replace +- scalars: override +- unknown top-level keys: reject with validation error + +This keeps overrides predictable and makes snapshots stable. + +## JSON Schema Shape + +High-level example: + +```json +{ + "version": 1, + "snapshot": { + "relativeDir": "_bmad-output/story-automator/policy-snapshots" + }, + "runtime": { + "parser": { + "provider": "claude", + "model": "haiku", + "timeoutSeconds": 120 + }, + "merge": { + "maps": "deep", + "arrays": "replace" + } + }, + "workflow": { + "sequence": ["create", "dev", "auto", "review"], + "optional": { + "auto": { + "skipWhenOverride": "skipAutomate" + } + }, + "repeat": { + "review": { + "maxCycles": 5, + "successVerifier": "review_completion", + "onIncomplete": "retry", + "onExhausted": "escalate" + } + }, + "crash": { + "maxRetries": 2, + "onExhausted": "escalate" + }, + "triggers": [ + { + "name": "retrospective_on_epic_complete", + "after": "review", + "verifier": "epic_complete", + "run": "retro", + "blocking": false, + "forceAgent": "claude" + } + ] + }, + "steps": { + "create": { + "label": "create-story", + "assets": { + "skillName": "bmad-create-story", + "workflowCandidates": ["workflow.md", "workflow.yaml"], + "instructionsCandidates": ["discover-inputs.md"], + "checklistCandidates": ["checklist.md"], + "templateCandidates": ["template.md"], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/create.md", + "interactionMode": "autonomous" + }, + "parse": { + "schemaFile": "data/parse/create.json" + }, + "success": { + "verifier": "create_story_artifact", + "config": { + "glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md", + "expectedMatches": 1 + } + } + }, + "review": { + "label": "code-review", + "assets": { + "skillName": "bmad-story-automator-review", + "workflowCandidates": ["workflow.yaml", "workflow.md"], + "instructionsCandidates": ["instructions.xml"], + "checklistCandidates": ["checklist.md"], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/review.md", + "interactionMode": "autonomous", + "acceptExtraInstruction": true, + "defaultExtraInstruction": "auto-fix all issues without prompting" + }, + "parse": { + "schemaFile": "data/parse/review.json" + }, + "success": { + "verifier": "review_completion", + "contractFile": ".claude/skills/bmad-story-automator-review/contract.json" + } + } + } +} +``` + +## Named Verifiers + +Initial verifier set should stay small: + +- `create_story_artifact` +- `session_exit` +- `review_completion` +- `epic_complete` + +No custom verifier registration in settings. + +## Prompt Template Rules + +Prompt templates should support simple substitution only: + +- `{{story_id}}` +- `{{story_prefix}}` +- `{{skill_path}}` +- `{{workflow_path}}` +- `{{instructions_line}}` +- `{{checklist_line}}` +- `{{template_line}}` +- `{{extra_instruction}}` + +No loops, no conditions beyond small optional-line helpers in Python. + +## Success Contract Rules + +The runtime should use settings to decide: + +- which verifier to run +- which config to pass it +- which sources to trust first +- which statuses count as done or incomplete + +It should not use session output alone as final truth except for explicitly simple verifiers like `session_exit`. + +## State Doc Metadata + +State frontmatter should store only: + +- `policyVersion` +- `policySnapshotFile` +- `policySnapshotHash` +- `legacyPolicy` when needed + +The state file should not store the full merged policy blob. + +## Why The Snapshot Matters + +Without a pinned snapshot, these changes become unsafe: + +- payload update +- project override edit +- prompt tweak during an in-flight run +- verifier threshold change after preflight + +The snapshot prevents those mutations from changing the behavior of a resumed orchestration. + diff --git a/docs/plans/json-settings/03-code-and-payload-changes.md b/docs/plans/json-settings/03-code-and-payload-changes.md new file mode 100644 index 0000000..85b8b66 --- /dev/null +++ b/docs/plans/json-settings/03-code-and-payload-changes.md @@ -0,0 +1,250 @@ +# Code And Payload Changes + +## Implementation Principle + +Keep files under control. Avoid one giant refactor file. + +Recommended new source modules: + +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/core/success_verifiers.py` + +Keep prompt rendering small enough to live in existing command modules unless it grows past a reasonable size. + +## Source Changes + +### New: `source/src/story_automator/core/runtime_policy.py` + +Responsibilities: + +- load bundled default policy JSON +- load optional project override JSON +- merge deterministically +- validate structure +- resolve step asset paths +- write effective snapshot JSON +- load policy from snapshot during resume +- expose helpers such as `load_effective_policy()` and `step_contract()` + +Notes: + +- this module is the only policy merge point +- it should normalize relative paths against project root or installed skill root +- it should reject unknown verifier names and invalid step references early + +### New: `source/src/story_automator/core/success_verifiers.py` + +Responsibilities: + +- named verifier registry +- `session_exit` +- `create_story_artifact` +- `review_completion` +- `epic_complete` + +Notes: + +- keep `verify_code_review_completion()` as a backward-compatible wrapper +- verifier config comes from policy, verifier execution stays in Python + +### `source/src/story_automator/commands/tmux.py` + +Replace hard-coded prompt assembly with policy-driven prompt rendering. + +Changes: + +- stop building prompts from inline string map +- load step contract from snapshot or effective policy +- render step prompt from `prompt.templateFile` +- use policy-driven step label instead of `_automate_workflow_label()` +- shrink `_build_retro_prompt()` into data-backed template usage +- make `monitor-session` call the configured verifier, not a permanent review special case + +Keep in Python: + +- Codex/Claude CLI invocation +- `CODEX_HOME` setup +- `tmux` session lifecycle +- heartbeat/status logic + +### `source/src/story_automator/commands/orchestrator_parse.py` + +Replace the `if step == ...` schema tree. + +Changes: + +- read `parse.schemaFile` and optional parser prompt template +- inject `label` and schema into parser call +- validate returned JSON against required keys from schema +- preserve current command output shape + +### `source/src/story_automator/core/review_verify.py` + +Reduce it to a compatibility wrapper. + +Changes: + +- keep current public function +- delegate to `success_verifiers.review_completion` +- allow contract-driven status values and source order + +### `source/src/story_automator/commands/orchestrator.py` + +Move hard-coded budgets into policy. + +Changes: + +- `review-loop` limit comes from `workflow.repeat.review.maxCycles` +- `session-crash` limit comes from `workflow.crash.maxRetries` +- story creation validation becomes part of `create_story_artifact` +- escalation actions stay in Python + +### `source/src/story_automator/commands/state.py` + +Add policy metadata at state document creation. + +Changes: + +- write `policyVersion` +- write `policySnapshotFile` +- write `policySnapshotHash` +- optionally write `legacyPolicy` +- surface these in state summary and validation + +Do not: + +- embed full policy JSON in frontmatter + +### `source/src/story_automator/core/frontmatter.py` + +Keep changes minimal. + +Possible work: + +- teach state readers to return new scalar metadata +- no nested policy parser + +### `source/src/story_automator/core/workflow_paths.py` + +Refactor into policy-backed asset resolution. + +Changes: + +- resolve explicit path or candidate list from policy +- distinguish required vs optional assets +- fail fast for missing required assets +- preserve compatibility wrappers where useful + +Important fix: + +- required assets must no longer silently return the first candidate string when nothing exists + +## Payload Changes + +### New: `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json` + +This is the default machine contract for the installed skill. + +### New: `payload/.claude/skills/bmad-story-automator/data/prompts/*.md` + +Add prompt templates for: + +- create +- dev +- auto +- review +- retro + +### New: `payload/.claude/skills/bmad-story-automator/data/parse/*.json` + +Add step parse contracts for: + +- create +- dev +- auto +- review +- retro + +### `payload/.claude/skills/bmad-story-automator/workflow.md` + +Keep this human-facing and orchestration-facing. + +Changes: + +- reference the runtime policy file explicitly +- describe the current sequence as the default policy, not the only possible future shape +- align wording with policy terms: prompt contract, parse contract, success verifier, snapshot + +### `payload/.claude/skills/bmad-story-automator-review/workflow.yaml` + +Add a machine contract pointer, for example: + +- `contract: "./contract.json"` + +### New: `payload/.claude/skills/bmad-story-automator-review/contract.json` + +Store structured review completion semantics: + +- blocking severity +- allowed done values +- allowed in-progress values +- source order +- sprint sync expectations + +### `payload/.claude/skills/bmad-story-automator-review/instructions.xml` + +Keep the adversarial review behavior, but align it with autonomous mode. + +Changes: + +- stop relying on prompt folklore to override user-choice branches +- make automatic fix behavior driven by explicit interaction mode +- keep review prose separate from machine completion rules + +## Installer And Packaging Impact + +### `install.sh` + +Likely no logic change needed because the installer already copies the whole payload tree. + +Needed checks: + +- verify new payload files exist after install +- update smoke tests to assert new data files are present + +### `package.json` + +Likely no change needed because `payload/` and `source/` are already in `files`. + +## Verification Surface Changes + +### `scripts/smoke-test.sh` + +Must update smoke coverage for: + +- installed policy JSON presence +- installed prompt template presence +- installed parse JSON presence +- prompt-building behavior still matching default policy +- policy-backed build-cmd output for create/auto/review/retro + +### Suggested new tests under `source/tests/` + +- `test_runtime_policy.py` +- `test_success_verifiers.py` +- `test_orchestrator_parse.py` +- `test_state_policy_metadata.py` + +Use stdlib `unittest` unless a dependency-free alternative is clearly better. + +## Recommended Module Boundaries + +To keep files under roughly 500 LOC: + +- `runtime_policy.py`: load, merge, validate, snapshot, resolve +- `success_verifiers.py`: registry and concrete verifiers +- `tmux.py`: session behavior plus prompt rendering entrypoint only +- `orchestrator_parse.py`: parser command plus schema validation + +If `runtime_policy.py` grows too large, split only after phase 1 lands. + diff --git a/docs/plans/json-settings/04-migration-testing-and-risks.md b/docs/plans/json-settings/04-migration-testing-and-risks.md new file mode 100644 index 0000000..93e4c93 --- /dev/null +++ b/docs/plans/json-settings/04-migration-testing-and-risks.md @@ -0,0 +1,200 @@ +# Migration, Testing, And Risks + +## Compatibility Plan + +### Default behavior + +Bundled default JSON settings must preserve today's behavior exactly. + +That includes: + +- prompt wording +- asset path candidate order +- parser labels and required fields +- review completion fallback to story file status +- review max cycles +- crash retry count +- retrospective forcing Claude + +### Old state docs + +If a state document has no policy metadata: + +- resume in legacy mode +- load bundled defaults +- mark the run summary with `legacyPolicy: true` + +This is the only safe fallback for pre-refactor state docs. + +### New state docs + +If a state document has: + +- `policySnapshotFile` +- `policySnapshotHash` + +then resume must: + +- load the snapshot +- verify the hash +- fail validation if snapshot missing or mismatched + +Do not silently fall back to live defaults for a new-format state doc. + +### Legacy env vars + +For one release cycle, continue to honor: + +- `MAX_REVIEW_CYCLES` +- `MAX_CRASH_RETRIES` + +But resolve them once at orchestration start and bake the effective values into the snapshot. + +That preserves old operator habits without breaking deterministic resume. + +## Test Strategy + +### Principle + +Add focused Python tests for new policy behavior, then keep the smoke suite as the installer/integration safety net. + +### Recommended Test Harness + +Use stdlib `unittest` first. + +Reasons: + +- no new dependency +- enough for merge/validation/path-resolution tests +- enough for verifier tests with temporary directories + +### New Python Test Coverage + +### `test_runtime_policy.py` + +Cases: + +- bundled default loads +- project override deep-merges maps +- arrays replace cleanly +- invalid step name rejected +- invalid verifier name rejected +- required asset missing fails +- snapshot hash stable + +### `test_success_verifiers.py` + +Cases: + +- `create_story_artifact` returns fail for 0 matches +- `create_story_artifact` returns pass for 1 match +- `create_story_artifact` returns fail for runaway multiple matches +- `review_completion` passes on sprint status done +- `review_completion` falls back to story file `Status: done` +- `review_completion` fails on in-progress/unknown +- `epic_complete` respects sprint status values + +### `test_orchestrator_parse.py` + +Cases: + +- parse schema loads from step contract +- invalid schema file rejected +- invalid child JSON rejected +- output shape remains compatible + +### `test_state_policy_metadata.py` + +Cases: + +- state doc writes policy metadata +- summary surfaces policy metadata +- legacy state without policy metadata remains valid + +### Smoke Test Updates + +Extend `scripts/smoke-test.sh` to verify: + +- installed `data/orchestration-policy.json` +- installed prompt template files +- installed parse JSON files +- `tmux-wrapper build-cmd` still emits expected default text +- review prompt still defaults to automatic fixes in autonomous mode + +### Verify Command Updates + +Recommended future command shape: + +```bash +python3 -m unittest discover -s source/tests +npm run test:smoke +npm run pack:dry-run +``` + +Then fold that into `npm run verify`. + +## Risk Register + +| Risk | Why it matters | Mitigation | +|------|----------------|------------| +| Prompt drift changes agent behavior | Equivalent wording is not actually equivalent for model behavior | Golden prompt tests against current defaults | +| Snapshot ignored on resume | Live payload changes mutate in-flight run behavior | Resume path must require snapshot for new-format states | +| Review still asks the user in autonomous mode | Current review workflow prose still has a menu branch | Add explicit interaction-mode contract and payload alignment | +| Required asset silent fallback | Missing workflow may look valid until runtime | Resolver must fail closed for required assets | +| Custom statuses cause false positives | Review completion may pass with wrong values | Contract validation + verifier tests | +| Optional auto skill incomplete | Step contract may claim assets that do not exist | Required/optional separation in resolver | +| Policy file grows too complex | Moderate refactor turns into new engine | Keep bounded primitives only | + +## Rollout Strategy + +### Phase 1 + +Land: + +- policy loader +- bundled default policy +- project override support +- pinned snapshot +- prompt templates +- parse contracts +- policy-backed retry budgets + +Keep: + +- fixed engine shape +- existing review special logic if needed for the first slice + +### Phase 2 + +Land: + +- verifier registry +- policy-backed `monitor-session` verifier dispatch +- `contract.json` for review +- review payload alignment for autonomous mode + +### Phase 3 + +Land: + +- policy-backed bounded loop config +- optional-step and trigger wiring +- cleanup of old hard-coded helpers + +## Phase Exit Criteria + +Phase 1 exit: + +- zero-config build-cmd output matches baseline +- snapshot created and stored in state +- parse schemas load from JSON files + +Phase 2 exit: + +- review completion no longer special-cased in `monitor-session` +- review contract is structured and tested + +Phase 3 exit: + +- retry/repeat/trigger policy comes from snapshot +- docs and runtime use the same terms diff --git a/docs/plans/json-settings/README.md b/docs/plans/json-settings/README.md new file mode 100644 index 0000000..6490892 --- /dev/null +++ b/docs/plans/json-settings/README.md @@ -0,0 +1,85 @@ +# JSON Settings Plan + +Purpose: move prompt text, parse contracts, verifier thresholds, and bounded loop rules out of scattered Python constants and into deterministic JSON settings, without replacing the existing runtime engine. + +## Summary + +This plan chooses: + +- JSON for machine settings +- markdown/XML for long prompt and workflow prose +- bundled defaults plus optional project override plus pinned snapshot +- named Python verifiers, not arbitrary expressions +- bounded workflow primitives, not user-defined workflow graphs + +That gives most of the configurability value with moderate risk. + +## Why This Exists + +Today the behavior is split across: + +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/core/review_verify.py` +- `source/src/story_automator/core/workflow_paths.py` +- `payload/.claude/skills/bmad-story-automator/workflow.md` +- `payload/.claude/skills/bmad-story-automator-review/workflow.yaml` +- `payload/.claude/skills/bmad-story-automator-review/instructions.xml` + +That split is the main source of drift risk. This packet defines one implementation path to pull the machine contract into JSON settings while keeping the current engine intact. + +## Doc Map + +- [01-why-json-settings.md](./01-why-json-settings.md) + Problem, goals, non-goals, and why JSON is the right fit for this repo. +- [02-policy-model.md](./02-policy-model.md) + Target architecture, file locations, merge rules, schema shape, and data/runtime boundaries. +- [03-code-and-payload-changes.md](./03-code-and-payload-changes.md) + Exact source and payload touchpoints, including new modules and file-by-file changes. +- [04-migration-testing-and-risks.md](./04-migration-testing-and-risks.md) + Compatibility plan, resume semantics, test strategy, and risk controls. +- [TODO.md](./TODO.md) + Sequential execution checklist with dependencies and exit criteria. + +## Read Order + +1. Read [01-why-json-settings.md](./01-why-json-settings.md) +2. Read [02-policy-model.md](./02-policy-model.md) +3. Read [03-code-and-payload-changes.md](./03-code-and-payload-changes.md) +4. Read [04-migration-testing-and-risks.md](./04-migration-testing-and-risks.md) +5. Execute [TODO.md](./TODO.md) top to bottom + +## Core Decision + +The implementation should use this model: + +```text +bundled default policy + + optional project override + = effective runtime policy + -> pinned snapshot at orchestration start + -> state doc stores pointer + hash + -> all resume/replay uses snapshot only +``` + +## Definition Of Done + +This plan is complete when the implementation can: + +- customize step prompts without editing Python +- customize parse schemas without editing Python +- customize verifier thresholds and retry budgets without editing Python +- keep zero-config behavior identical to today +- resume from a pinned snapshot even if payload or override files later change +- reject invalid settings safely + +## Out Of Scope + +This plan does not try to deliver: + +- arbitrary user-defined workflow graphs +- custom Python or shell expressions in config +- a general workflow interpreter +- rich nested policy blobs embedded in frontmatter + diff --git a/docs/plans/json-settings/TODO.md b/docs/plans/json-settings/TODO.md new file mode 100644 index 0000000..9270b2f --- /dev/null +++ b/docs/plans/json-settings/TODO.md @@ -0,0 +1,276 @@ +# TODO + +Execute in order. Do not skip ahead unless the dependency line says it is safe. + +## Phase 0: Baseline + +1. [ ] Capture current behavior baselines. + Files: `source/src/story_automator/commands/tmux.py`, `source/src/story_automator/commands/orchestrator_parse.py`, `source/src/story_automator/commands/orchestrator.py`, `source/src/story_automator/core/review_verify.py` + Actions: + - run `npm run verify` + - capture `tmux-wrapper build-cmd` output for `create`, `auto`, `review`, `retro` + - note current review/crash limits and review completion behavior + Done when: + - baseline commands are saved in working notes + - current default behavior is explicit before edits start + +2. [ ] Freeze the target JSON settings shape. + Depends on: 1 + Files: `docs/plans/json-settings/02-policy-model.md` + Actions: + - confirm final top-level keys + - confirm snapshot file path + - confirm verifier names + Done when: + - no open schema ambiguity remains + +## Phase 1: Policy Loader And Default Policy + +3. [ ] Add bundled default policy JSON and data directories. + Depends on: 2 + Files: + - `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json` + - `payload/.claude/skills/bmad-story-automator/data/prompts/*.md` + - `payload/.claude/skills/bmad-story-automator/data/parse/*.json` + Actions: + - encode current behavior exactly + - keep prompt wording as close to current strings as possible + Done when: + - payload contains complete default machine contract + +4. [ ] Implement `runtime_policy.py`. + Depends on: 3 + Files: + - `source/src/story_automator/core/runtime_policy.py` + Actions: + - load bundled policy + - load optional `_bmad/bmm/story-automator.policy.json` + - deep-merge maps, replace arrays + - validate known keys and verifier names + - resolve relative paths + - write stable snapshot JSON with hash + Done when: + - one call can return effective policy plus snapshot metadata + +5. [ ] Refactor required/optional asset resolution behind policy. + Depends on: 4 + Files: + - `source/src/story_automator/core/workflow_paths.py` + - `source/src/story_automator/core/runtime_policy.py` + Actions: + - move candidate-list resolution behind policy + - fail closed on missing required assets + - preserve compatibility wrappers where helpful + Done when: + - required assets never silently resolve to non-existent placeholders + +6. [ ] Add state metadata for policy snapshots. + Depends on: 4 + Files: + - `source/src/story_automator/commands/state.py` + - `source/src/story_automator/core/frontmatter.py` + Actions: + - write `policyVersion` + - write `policySnapshotFile` + - write `policySnapshotHash` + - add `legacyPolicy` handling + Done when: + - new state docs point at a snapshot instead of embedding policy + +## Phase 2: Prompt And Parse Externalization + +7. [ ] Replace hard-coded tmux prompts with template rendering. + Depends on: 4, 5, 6 + Files: + - `source/src/story_automator/commands/tmux.py` + Actions: + - load step contract from effective policy + - render prompt from template file + - preserve current Codex/Claude boot logic + - preserve current default prompt text behavior + Done when: + - `build-cmd` no longer uses the hard-coded prompt map + +8. [ ] Replace hard-coded parse schema switch with policy-backed contracts. + Depends on: 4 + Files: + - `source/src/story_automator/commands/orchestrator_parse.py` + Actions: + - load step parse schema JSON + - render parser prompt from label + schema + - validate returned JSON against required keys + Done when: + - parser behavior comes from data files, not `if step == ...` + +9. [ ] Move retry budgets into policy-backed reads. + Depends on: 4 + Files: + - `source/src/story_automator/commands/orchestrator.py` + Actions: + - source review max cycles from policy + - source crash retry limit from policy + - remove direct env-default dependence from active run behavior + Done when: + - budgets come from effective snapshot + +## Phase 3: Success Verifiers + +10. [ ] Add verifier registry and concrete implementations. + Depends on: 4 + Files: + - `source/src/story_automator/core/success_verifiers.py` + - `source/src/story_automator/core/review_verify.py` + Actions: + - implement `session_exit` + - implement `create_story_artifact` + - implement `review_completion` + - implement `epic_complete` + - keep backward-compatible wrapper for existing review helper + Done when: + - verifiers are selected by name and tested independently + +11. [ ] Wire `monitor-session` to policy-backed verifier dispatch. + Depends on: 7, 10 + Files: + - `source/src/story_automator/commands/tmux.py` + Actions: + - remove permanent review special case + - use step contract `success.verifier` + - pass verifier config and story context + Done when: + - completion logic is step-driven, not `workflow == "review"` driven + +12. [ ] Fold create story validation into `create_story_artifact`. + Depends on: 10, 11 + Files: + - `source/src/story_automator/commands/orchestrator.py` + - `source/src/story_automator/core/success_verifiers.py` + Actions: + - remove duplicated create validation trigger logic + - route create pass/fail through verifier + Done when: + - create success semantics exist in one place only + +## Phase 4: Review Payload Alignment + +13. [ ] Add structured review contract file. + Depends on: 3 + Files: + - `payload/.claude/skills/bmad-story-automator-review/contract.json` + - `payload/.claude/skills/bmad-story-automator-review/workflow.yaml` + Actions: + - move machine completion semantics into JSON + - make workflow point to the contract + Done when: + - review machine truth is no longer hidden inside prose only + +14. [ ] Align review instructions with autonomous mode. + Depends on: 13 + Files: + - `payload/.claude/skills/bmad-story-automator-review/instructions.xml` + Actions: + - remove reliance on prompt folklore for auto-fix behavior + - make automatic fix path explicit for autonomous mode + Done when: + - review payload no longer contradicts runtime prompt defaults + +15. [ ] Update main workflow prose to reference runtime policy. + Depends on: 3 + Files: + - `payload/.claude/skills/bmad-story-automator/workflow.md` + Actions: + - reference `orchestration-policy.json` + - describe fixed loop as default policy + - align terms with runtime policy language + Done when: + - payload docs and runtime use the same contract vocabulary + +## Phase 5: Testing + +16. [ ] Add Python unit tests for policy and verifiers. + Depends on: 4, 8, 10 + Files: + - `source/tests/test_runtime_policy.py` + - `source/tests/test_success_verifiers.py` + - `source/tests/test_orchestrator_parse.py` + - `source/tests/test_state_policy_metadata.py` + Actions: + - use stdlib `unittest` + - cover merge, validation, snapshot, verifier behavior, parser loading + Done when: + - policy-specific behavior has direct automated coverage + +17. [ ] Update smoke tests for installed policy assets and defaults. + Depends on: 7, 8, 11, 13, 14, 15 + Files: + - `scripts/smoke-test.sh` + Actions: + - assert policy JSON exists after install + - assert prompt templates and parse files exist + - assert default prompt output still matches baseline expectations + Done when: + - installer/integration behavior remains covered end to end + +18. [ ] Update local verify flow. + Depends on: 16, 17 + Files: + - `package.json` + - `docs/development.md` + Actions: + - add Python unit test command + - fold it into `npm run verify` + - document new verify sequence + Done when: + - one verify command covers unit + smoke + package dry run + +## Phase 6: Compatibility And Cleanup + +19. [ ] Implement legacy resume behavior and strict new-state validation. + Depends on: 6, 10, 11 + Files: + - `source/src/story_automator/commands/state.py` + - `source/src/story_automator/core/runtime_policy.py` + - any resume path using state metadata + Actions: + - old state without snapshot -> legacy defaults + `legacyPolicy: true` + - new state with missing snapshot -> validation failure + Done when: + - resume is deterministic and explicit in both modes + +20. [ ] Preserve env compatibility for one release cycle. + Depends on: 9 + Files: + - `source/src/story_automator/core/runtime_policy.py` + - docs as needed + Actions: + - read legacy env vars once at orchestration start + - bake effective values into snapshot + - document deprecation path + Done when: + - old env knobs still work without mutating resumed runs + +21. [ ] Remove or shrink obsolete hard-coded helpers. + Depends on: 7, 8, 9, 10, 11 + Files: + - `source/src/story_automator/commands/tmux.py` + - `source/src/story_automator/commands/orchestrator_parse.py` + - `source/src/story_automator/commands/orchestrator.py` + Actions: + - delete dead prompt-schema branches + - remove stale helpers after tests pass + Done when: + - no duplicate machine contract remains in code + +## Final Gate + +22. [ ] Run full verification and review default behavior drift. + Depends on: 1 through 21 + Actions: + - run Python unit tests + - run `npm run verify` + - compare prompt baselines against phase 0 captures + - review installed payload tree manually once + Done when: + - zero-config behavior matches baseline + - customization surfaces work + - resume uses snapshots only From af8c1d6c31939a72d78612faa9b976b7ae980c5c Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:29:35 -0300 Subject: [PATCH 02/17] feat: add JSON runtime policy foundation --- docs/changelog/260413.md | 35 +++ docs/development.md | 4 +- package.json | 3 +- .../bmad-story-automator-review/contract.json | 7 + .../bmad-story-automator-review/workflow.yaml | 1 + .../data/orchestration-policy.json | 146 ++++++++++ .../bmad-story-automator/data/parse/auto.json | 10 + .../data/parse/create.json | 10 + .../bmad-story-automator/data/parse/dev.json | 10 + .../data/parse/retro.json | 8 + .../data/parse/review.json | 15 + .../bmad-story-automator/data/prompts/auto.md | 4 + .../data/prompts/create.md | 7 + .../bmad-story-automator/data/prompts/dev.md | 4 + .../data/prompts/retro.md | 33 +++ .../data/prompts/review.md | 4 + .../templates/state-document.md | 4 + .../skills/bmad-story-automator/workflow.md | 2 + scripts/smoke-test.sh | 6 + .../story_automator/commands/orchestrator.py | 10 +- .../commands/orchestrator_parse.py | 53 ++-- source/src/story_automator/commands/state.py | 12 +- source/src/story_automator/commands/tmux.py | 165 ++--------- .../story_automator/core/runtime_policy.py | 265 ++++++++++++++++++ .../story_automator/core/workflow_paths.py | 118 +------- source/tests/test_orchestrator_parse.py | 99 +++++++ source/tests/test_runtime_policy.py | 93 ++++++ source/tests/test_state_policy_metadata.py | 138 +++++++++ 28 files changed, 998 insertions(+), 268 deletions(-) create mode 100644 payload/.claude/skills/bmad-story-automator-review/contract.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/auto.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/create.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/dev.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/retro.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/parse/review.json create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/auto.md create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/create.md create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/dev.md create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/retro.md create mode 100644 payload/.claude/skills/bmad-story-automator/data/prompts/review.md create mode 100644 source/src/story_automator/core/runtime_policy.py create mode 100644 source/tests/test_orchestrator_parse.py create mode 100644 source/tests/test_runtime_policy.py create mode 100644 source/tests/test_state_policy_metadata.py diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 2c3ea4f..556a30e 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -85,3 +85,38 @@ Added a structured planning packet for moving story-automator contracts to snaps ### QA Notes - N/A + +## 260413-07:29:16 - Add JSON runtime policy foundation + +### Summary +Implemented the first JSON-settings slice with bundled policy data, snapshot-backed state metadata, and policy-driven prompt/parse wiring. + +### Added +- Added a runtime policy loader with deterministic merge, asset resolution, environment compatibility, and snapshot writing. +- Added bundled orchestration policy, prompt templates, parse contracts, and a structured review contract file. +- Added Python unit coverage for policy loading, parser contracts, and state snapshot metadata. + +### Changed +- Changed `tmux-wrapper build-cmd` to render prompts from policy-backed templates instead of inline string maps. +- Changed parser contract loading, retry budget reads, smoke coverage, and `npm run verify` to use the new JSON-policy foundation. +- Changed orchestration state documents to persist `policyVersion`, `policySnapshotFile`, and `policySnapshotHash`. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/core/workflow_paths.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/state.py` +- `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json` +- `payload/.claude/skills/bmad-story-automator/data/prompts/` +- `payload/.claude/skills/bmad-story-automator/data/parse/` +- `payload/.claude/skills/bmad-story-automator-review/contract.json` +- `scripts/smoke-test.sh` +- `package.json` +- `docs/development.md` +- `docs/changelog/260413.md` +- `source/tests/` + +### QA Notes +- N/A diff --git a/docs/development.md b/docs/development.md index 2307d52..ba9ef4b 100644 --- a/docs/development.md +++ b/docs/development.md @@ -13,6 +13,7 @@ PYTHONPATH=source/src python3 -m story_automator --help `npm run verify` expands to: +- `npm run test:python` - `npm run pack:dry-run` - `npm run test:smoke` @@ -25,13 +26,14 @@ The smoke suite validates: - required and optional dependency handling - legacy backup behavior - installed skill layout +- installed runtime policy, prompt templates, and parse contracts - prompt-building behavior for Claude and Codex child sessions ## Repo Verification Flow ```mermaid flowchart TD - A["Edit installer, payload, or runtime"] --> B["Run python helper sanity checks"] + A["Edit installer, payload, or runtime"] --> B["Run npm run test:python"] B --> C["Run npm run test:smoke"] C --> D["Run npm run pack:dry-run"] D --> E["Run npm run verify"] diff --git a/package.json b/package.json index 4cac0ac..4ba537f 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,9 @@ ], "scripts": { "pack:dry-run": "npm pack --dry-run", + "test:python": "PYTHONPATH=source/src python3 -m unittest discover -s source/tests", "test:smoke": "bash scripts/smoke-test.sh", - "verify": "npm run pack:dry-run && npm run test:smoke" + "verify": "npm run test:python && npm run pack:dry-run && npm run test:smoke" }, "engines": { "node": ">=18" diff --git a/payload/.claude/skills/bmad-story-automator-review/contract.json b/payload/.claude/skills/bmad-story-automator-review/contract.json new file mode 100644 index 0000000..946bae5 --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator-review/contract.json @@ -0,0 +1,7 @@ +{ + "blockingSeverity": ["critical"], + "doneValues": ["done"], + "inProgressValues": ["in-progress", "in_progress", "review", "qa"], + "sourceOrder": ["sprint-status.yaml", "story-file"], + "syncSprintStatus": true +} diff --git a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml index 05b5347..f7c9283 100644 --- a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml +++ b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml @@ -16,4 +16,5 @@ sprint_status: "{implementation_artifacts}/sprint-status.yaml" # Workflow components instructions: "./instructions.xml" validation: "./checklist.md" +contract: "./contract.json" standalone: true diff --git a/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json b/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json new file mode 100644 index 0000000..1699f1b --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json @@ -0,0 +1,146 @@ +{ + "version": 1, + "snapshot": { + "relativeDir": "_bmad-output/story-automator/policy-snapshots" + }, + "runtime": { + "parser": { + "provider": "claude", + "model": "haiku", + "timeoutSeconds": 120 + }, + "merge": { + "maps": "deep", + "arrays": "replace" + } + }, + "workflow": { + "sequence": ["create", "dev", "auto", "review", "retro"], + "repeat": { + "review": { + "maxCycles": 5, + "successVerifier": "review_completion", + "onIncomplete": "retry", + "onExhausted": "escalate" + } + }, + "crash": { + "maxRetries": 2, + "onExhausted": "escalate" + } + }, + "steps": { + "create": { + "label": "create-story", + "assets": { + "skillName": "bmad-create-story", + "workflowCandidates": ["workflow.md", "workflow.yaml"], + "instructionsCandidates": ["discover-inputs.md"], + "checklistCandidates": ["checklist.md"], + "templateCandidates": ["template.md"], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/create.md", + "interactionMode": "autonomous" + }, + "parse": { + "schemaFile": "data/parse/create.json" + }, + "success": { + "verifier": "create_story_artifact", + "config": { + "glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md", + "expectedMatches": 1 + } + } + }, + "dev": { + "label": "dev-story", + "assets": { + "skillName": "bmad-dev-story", + "workflowCandidates": ["workflow.md", "workflow.yaml"], + "instructionsCandidates": [], + "checklistCandidates": ["checklist.md"], + "templateCandidates": [], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/dev.md", + "interactionMode": "autonomous" + }, + "parse": { + "schemaFile": "data/parse/dev.json" + }, + "success": { + "verifier": "session_exit" + } + }, + "auto": { + "label": "qa-generate-e2e-tests", + "assets": { + "skillName": "bmad-qa-generate-e2e-tests", + "workflowCandidates": ["workflow.md", "workflow.yaml"], + "instructionsCandidates": [], + "checklistCandidates": ["checklist.md"], + "templateCandidates": [], + "required": [] + }, + "prompt": { + "templateFile": "data/prompts/auto.md", + "interactionMode": "autonomous" + }, + "parse": { + "schemaFile": "data/parse/auto.json" + }, + "success": { + "verifier": "session_exit" + } + }, + "review": { + "label": "code-review", + "assets": { + "skillName": "bmad-story-automator-review", + "workflowCandidates": ["workflow.yaml", "workflow.md"], + "instructionsCandidates": ["instructions.xml"], + "checklistCandidates": ["checklist.md"], + "templateCandidates": [], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/review.md", + "interactionMode": "autonomous", + "acceptExtraInstruction": true, + "defaultExtraInstruction": "auto-fix all issues without prompting" + }, + "parse": { + "schemaFile": "data/parse/review.json" + }, + "success": { + "verifier": "review_completion", + "contractFile": ".claude/skills/bmad-story-automator-review/contract.json" + } + }, + "retro": { + "label": "retrospective", + "assets": { + "skillName": "bmad-retrospective", + "workflowCandidates": ["workflow.md", "workflow.yaml"], + "instructionsCandidates": [], + "checklistCandidates": [], + "templateCandidates": [], + "required": ["skill", "workflow"] + }, + "prompt": { + "templateFile": "data/prompts/retro.md", + "interactionMode": "autonomous" + }, + "parse": { + "schemaFile": "data/parse/retro.json" + }, + "success": { + "verifier": "epic_complete" + } + } + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/auto.json b/payload/.claude/skills/bmad-story-automator/data/parse/auto.json new file mode 100644 index 0000000..ba9a61e --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/parse/auto.json @@ -0,0 +1,10 @@ +{ + "requiredKeys": ["status", "tests_added", "coverage_improved", "summary", "next_action"], + "schema": { + "status": "SUCCESS|FAILURE|AMBIGUOUS", + "tests_added": "integer", + "coverage_improved": "true|false", + "summary": "brief description", + "next_action": "proceed|retry|escalate" + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/create.json b/payload/.claude/skills/bmad-story-automator/data/parse/create.json new file mode 100644 index 0000000..9c420f6 --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/parse/create.json @@ -0,0 +1,10 @@ +{ + "requiredKeys": ["status", "story_created", "story_file", "summary", "next_action"], + "schema": { + "status": "SUCCESS|FAILURE|AMBIGUOUS", + "story_created": "true|false", + "story_file": "path or null", + "summary": "brief description", + "next_action": "proceed|retry|escalate" + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/dev.json b/payload/.claude/skills/bmad-story-automator/data/parse/dev.json new file mode 100644 index 0000000..3d02f30 --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/parse/dev.json @@ -0,0 +1,10 @@ +{ + "requiredKeys": ["status", "tests_passed", "build_passed", "summary", "next_action"], + "schema": { + "status": "SUCCESS|FAILURE|AMBIGUOUS", + "tests_passed": "true|false", + "build_passed": "true|false", + "summary": "brief description", + "next_action": "proceed|retry|escalate" + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/retro.json b/payload/.claude/skills/bmad-story-automator/data/parse/retro.json new file mode 100644 index 0000000..3b9ed5a --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/parse/retro.json @@ -0,0 +1,8 @@ +{ + "requiredKeys": ["status", "summary", "next_action"], + "schema": { + "status": "SUCCESS|FAILURE|AMBIGUOUS", + "summary": "brief description", + "next_action": "proceed|retry|escalate" + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/parse/review.json b/payload/.claude/skills/bmad-story-automator/data/parse/review.json new file mode 100644 index 0000000..cfa86cd --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/parse/review.json @@ -0,0 +1,15 @@ +{ + "requiredKeys": ["status", "issues_found", "all_fixed", "summary", "next_action"], + "schema": { + "status": "SUCCESS|FAILURE|AMBIGUOUS", + "issues_found": { + "critical": "integer", + "high": "integer", + "medium": "integer", + "low": "integer" + }, + "all_fixed": "true|false", + "summary": "brief description", + "next_action": "proceed|retry|escalate" + } +} diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md b/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md new file mode 100644 index 0000000..28911dd --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/prompts/auto.md @@ -0,0 +1,4 @@ +Execute the BMAD {{label}} workflow for story {{story_id}}. + +{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md +Auto-apply all discovered gaps in tests. diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/create.md b/payload/.claude/skills/bmad-story-automator/data/prompts/create.md new file mode 100644 index 0000000..cf9b745 --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/prompts/create.md @@ -0,0 +1,7 @@ +Execute the BMAD create-story workflow for story {{story_id}}. + +{{skill_line}}{{workflow_line}}{{instructions_line}}{{template_line}}{{checklist_line}}Create story file at: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md + +Story ID: {{story_id}} + +#YOLO - Do NOT wait for user input. diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md b/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md new file mode 100644 index 0000000..a9eaa27 --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/prompts/dev.md @@ -0,0 +1,4 @@ +Execute the BMAD dev-story workflow for story {{story_id}}. + +{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md +Implement all tasks marked [ ]. Run tests. Update checkboxes. diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md b/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md new file mode 100644 index 0000000..82724af --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/prompts/retro.md @@ -0,0 +1,33 @@ +Execute the BMAD retrospective workflow for epic {{story_id}}. + +{{skill_line}}{{workflow_line}}{{instructions_line}}Run the retrospective in #YOLO mode. +Assume the user will NOT provide any input to the retrospective directly. +For ALL prompts that expect user input, make reasonable autonomous decisions based on: +- Sprint status data +- Story files and their dev notes +- Previous retrospective if available +- Architecture and PRD documents + +Key behaviors: +- When asked to confirm epic number: auto-confirm based on sprint-status +- When asked for observations: synthesize from story analysis +- When asked for decisions: make data-driven choices +- When presented menus: select the most appropriate option based on context +- Skip all "WAIT for user" instructions - continue autonomously + +After the retrospective has run and created documents, you MUST: +1. Create a list of documentation that may need updates based on implementation learnings +2. For each doc in the list, verify whether updates are actually needed by: + - Reading the current doc content + - Comparing against actual implementation code + - Checking for discrepancies between doc and code +3. Update docs that have verified discrepancies +4. Discard proposed updates where code matches docs + +Focus on these doc types: +- Architecture decisions that changed during implementation +- API documentation that diverged from specs +- README files with outdated instructions +- Configuration documentation + +EVERYTHING SHOULD BE AUTOMATED. THIS IS NOT A SESSION WHERE YOU SHOULD BE EXPECTING USER INPUT. diff --git a/payload/.claude/skills/bmad-story-automator/data/prompts/review.md b/payload/.claude/skills/bmad-story-automator/data/prompts/review.md new file mode 100644 index 0000000..960d18f --- /dev/null +++ b/payload/.claude/skills/bmad-story-automator/data/prompts/review.md @@ -0,0 +1,4 @@ +Execute the story-automator review workflow for story {{story_id}}. + +{{skill_line}}{{workflow_line}}{{instructions_line}}{{checklist_line}}Story file: _bmad-output/implementation-artifacts/{{story_prefix}}-*.md +Review implementation, find issues, fix them automatically. {{extra_instruction}} diff --git a/payload/.claude/skills/bmad-story-automator/templates/state-document.md b/payload/.claude/skills/bmad-story-automator/templates/state-document.md index 80f17ea..50657d8 100644 --- a/payload/.claude/skills/bmad-story-automator/templates/state-document.md +++ b/payload/.claude/skills/bmad-story-automator/templates/state-document.md @@ -18,6 +18,10 @@ overrides: customInstructions: "" # User-provided instructions for orchestration agentsFile: "" # Deterministic per-story agent selections complexityFile: "" # Persisted story complexity data +policyVersion: 0 +policySnapshotFile: "" +policySnapshotHash: "" +legacyPolicy: false # Agent Configuration (v3.0.0) agentConfig: diff --git a/payload/.claude/skills/bmad-story-automator/workflow.md b/payload/.claude/skills/bmad-story-automator/workflow.md index baa9b2d..6d430ec 100644 --- a/payload/.claude/skills/bmad-story-automator/workflow.md +++ b/payload/.claude/skills/bmad-story-automator/workflow.md @@ -20,6 +20,8 @@ outputFolder: '{output_folder}/story-automator' **Meta-Context:** This orchestrator spawns and monitors other workflows (create-story, dev-story, automate, code-review, retrospective) in isolated T-Mux sessions. It tracks state for full resumability and escalates to the user only when autonomous decisions cannot be made. +**Runtime Policy:** Machine settings live in `data/orchestration-policy.json`. Prompt contracts, parse contracts, retry budgets, and verifier selection should follow the pinned policy snapshot written at orchestration start. + --- ## MULTI-EPIC SUPPORT diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index 9af6456..0e0f0fb 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -207,10 +207,16 @@ verify_common_install() { assert_file "$story_dir/workflow.md" assert_file "$story_dir/scripts/story-automator" assert_file "$story_dir/src/story_automator/cli.py" + assert_file "$story_dir/data/orchestration-policy.json" + assert_file "$story_dir/data/prompts/create.md" + assert_file "$story_dir/data/prompts/review.md" + assert_file "$story_dir/data/parse/create.json" + assert_file "$story_dir/data/parse/review.json" assert_file "$story_dir/pyproject.toml" assert_file "$story_dir/README.md" assert_file "$review_dir/SKILL.md" assert_file "$review_dir/instructions.xml" + assert_file "$review_dir/contract.json" assert_contains "name: bmad-story-automator" "$story_dir/SKILL.md" assert_contains "Follow the instructions in ./workflow.md." "$story_dir/SKILL.md" diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 0639746..11d2bb1 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -6,6 +6,7 @@ from pathlib import Path from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter +from story_automator.core.runtime_policy import crash_max_retries, load_effective_policy, review_max_cycles from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.sprint import sprint_status_epic, sprint_status_get from story_automator.core.story_keys import normalize_story_key, sprint_status_file @@ -244,6 +245,10 @@ def _state_summary(args: list[str]) -> int: "currentStep": find_frontmatter_value(args[0], "currentStep"), "status": find_frontmatter_value(args[0], "status"), "lastUpdated": find_frontmatter_value(args[0], "lastUpdated"), + "policyVersion": find_frontmatter_value(args[0], "policyVersion"), + "policySnapshotFile": find_frontmatter_value(args[0], "policySnapshotFile"), + "policySnapshotHash": find_frontmatter_value(args[0], "policySnapshotHash"), + "legacyPolicy": find_frontmatter_value(args[0], "legacyPolicy"), "lastAction": extract_last_action(args[0]), } ) @@ -278,9 +283,10 @@ def _state_update(args: list[str]) -> int: def _escalate(args: list[str]) -> int: trigger = args[0] if args else "" context = args[1] if len(args) > 1 else "" + policy = load_effective_policy(get_project_root()) if trigger == "review-loop": cycles = _parse_context_int(context, "cycles") - limit = int(os.environ.get("MAX_REVIEW_CYCLES", "5")) + limit = review_max_cycles(policy) if cycles >= limit: print_json({"escalate": True, "reason": f"Review loop exceeded max cycles ({cycles}/{limit})"}) else: @@ -288,7 +294,7 @@ def _escalate(args: list[str]) -> int: return 0 if trigger == "session-crash": retries = _parse_context_int(context, "retries") - limit = int(os.environ.get("MAX_CRASH_RETRIES", "2")) + limit = crash_max_retries(policy) if retries >= limit: print_json({"escalate": True, "reason": f"Session crashed after {retries} retries"}) else: diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py index 72f89e9..6a95226 100644 --- a/source/src/story_automator/commands/orchestrator_parse.py +++ b/source/src/story_automator/commands/orchestrator_parse.py @@ -2,6 +2,7 @@ import json +from story_automator.core.runtime_policy import load_effective_policy, step_contract from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines @@ -22,7 +23,13 @@ def parse_output_action(args: list[str]) -> int: print('{"status":"error","reason":"output file not found or empty"}') return 1 lines = trim_lines(content)[:150] - prompt = _build_parse_prompt(step, "\n".join(lines)) + try: + contract = step_contract(load_effective_policy(), step) + parse_contract = _load_parse_contract(contract) + except (FileNotFoundError, json.JSONDecodeError, ValueError): + print_json({"status": "error", "reason": "parse_contract_invalid"}) + return 1 + prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines)) result = run_cmd( "claude", "-p", @@ -41,28 +48,36 @@ def parse_output_action(args: list[str]) -> int: print_json({"status": "error", "reason": "sub-agent returned invalid json"}) return 1 try: - json.loads(json_line) + payload = json.loads(json_line) except json.JSONDecodeError: print_json({"status": "error", "reason": "sub-agent returned invalid json"}) return 1 - print(json_line) + if not _has_required_keys(payload, parse_contract.get("requiredKeys") or []): + print_json({"status": "error", "reason": "sub-agent returned invalid json"}) + return 1 + print(json.dumps(payload, separators=(",", ":"))) return 0 -def _build_parse_prompt(step: str, content: str) -> str: - if step == "create": - schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","story_created":true/false,"story_file":"path or null","summary":"brief description","next_action":"proceed|retry|escalate"}' - label = "create-story" - elif step == "dev": - schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","tests_passed":true/false,"build_passed":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}' - label = "dev-story" - elif step == "auto": - schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","tests_added":N,"coverage_improved":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}' - label = "automate-tests" - elif step == "review": - schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","issues_found":{"critical":N,"high":N,"medium":N,"low":N},"all_fixed":true/false,"summary":"brief description","next_action":"proceed|retry|escalate"}' - label = "code-review" - else: - schema = '{"status":"SUCCESS|FAILURE|AMBIGUOUS","summary":"brief description","next_action":"proceed|retry|escalate"}' - label = "session" +def _load_parse_contract(contract: dict[str, object]) -> dict[str, object]: + parse = contract.get("parse") or {} + payload = json.loads(read_text(str(parse.get("schemaPath") or ""))) + if not isinstance(payload, dict): + raise ValueError("invalid parse schema") + if not isinstance(payload.get("requiredKeys"), list): + raise ValueError("invalid parse schema") + if not isinstance(payload.get("schema"), dict): + raise ValueError("invalid parse schema") + return payload + + +def _build_parse_prompt(contract: dict[str, object], parse_contract: dict[str, object], content: str) -> str: + label = str(contract.get("label") or "session") + schema = json.dumps(parse_contract.get("schema") or {}, separators=(",", ":")) return f"Analyze this {label} session output. Return JSON only:\n{schema}\n\nSession output:\n---\n{content}\n---" + + +def _has_required_keys(payload: object, required_keys: list[object]) -> bool: + if not isinstance(payload, dict): + return False + return all(isinstance(key, str) and key in payload for key in required_keys) diff --git a/source/src/story_automator/commands/state.py b/source/src/story_automator/commands/state.py index feb17c9..ede9786 100644 --- a/source/src/story_automator/commands/state.py +++ b/source/src/story_automator/commands/state.py @@ -6,7 +6,8 @@ from typing import Any from ..core.frontmatter import extract_frontmatter, parse_simple_frontmatter -from ..core.utils import count_matches, ensure_dir, file_exists, now_utc, now_utc_z, read_text, write_json +from ..core.runtime_policy import PolicyError, load_policy_for_state, snapshot_effective_policy +from ..core.utils import count_matches, ensure_dir, file_exists, get_project_root, now_utc, now_utc_z, read_text, write_json def cmd_build_state_doc(args: list[str]) -> int: @@ -42,6 +43,7 @@ def cmd_build_state_doc(args: list[str]) -> int: epic = str(config.get("epic") or "epic") safe_epic = re.sub(r"[^a-zA-Z0-9]+", "-", epic).strip("-") or "epic" output_path = Path(output_folder) / f"orchestration-{safe_epic}-{stamp}.md" + snapshot = snapshot_effective_policy(get_project_root()) text = read_text(template) replacements: dict[str, Any] = { "epic": config.get("epic", ""), @@ -56,6 +58,10 @@ def cmd_build_state_doc(args: list[str]) -> int: "aiCommand": config.get("aiCommand", ""), "agentsFile": config.get("agentsFile", ""), "complexityFile": config.get("complexityFile", ""), + "policyVersion": snapshot["policyVersion"], + "policySnapshotFile": snapshot["policySnapshotFile"], + "policySnapshotHash": snapshot["policySnapshotHash"], + "legacyPolicy": False, } overrides = config.get("overrides", {}) if isinstance(config.get("overrides"), dict) else {} text = re.sub( @@ -228,5 +234,9 @@ def required(key: str, validator: Any = None) -> None: required("status", lambda value: isinstance(value, str) and value in allowed) required("lastUpdated", lambda value: isinstance(value, str) and re.search(r"\d{4}-\d{2}-\d{2}T", value)) required("aiCommand") + try: + load_policy_for_state(state) + except PolicyError as exc: + issues.append(str(exc)) write_json({"ok": True, "structure": "issues" if issues else "ok", "issues": issues}) return 0 diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 0daa637..00e24c8 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -6,6 +6,7 @@ import time from pathlib import Path +from story_automator.core.runtime_policy import load_effective_policy, step_contract from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.utils import ( atomic_write, @@ -21,13 +22,6 @@ read_text, run_cmd, ) -from story_automator.core.workflow_paths import ( - create_story_workflow_paths, - dev_story_workflow_paths, - retrospective_workflow_paths, - review_workflow_paths, - testarch_automate_workflow_paths, -) def cmd_tmux_wrapper(args: list[str]) -> int: @@ -191,12 +185,10 @@ def _build_cmd(args: list[str]) -> int: agent = agent or agent_type() story_prefix = story_id.replace(".", "-") root = get_project_root() - create_paths = create_story_workflow_paths(root) - dev_paths = dev_story_workflow_paths(root) - auto_paths = testarch_automate_workflow_paths(root) - review_paths = review_workflow_paths(root) - retro_paths = retrospective_workflow_paths(root) - auto_label = _automate_workflow_label(auto_paths.workflow) + if step not in {"create", "dev", "auto", "review", "retro"}: + print(f"Unknown step type: {step}", file=__import__("sys").stderr) + return 1 + policy = load_effective_policy(root) ai_command = os.environ.get("AI_COMMAND") if ai_command and not os.environ.get("AI_AGENT"): cli = ai_command @@ -204,92 +196,7 @@ def _build_cmd(args: list[str]) -> int: cli = agent_cli(agent) else: cli = "codex exec" - if step not in {"create", "dev", "auto", "review", "retro"}: - print(f"Unknown step type: {step}", file=__import__("sys").stderr) - return 1 - create_extra = "" - if create_paths.instructions: - create_extra += f"Then read: {create_paths.instructions}\n" - if create_paths.template: - create_extra += f"Use template: {create_paths.template}\n" - if create_paths.checklist: - create_extra += f"Validate with: {create_paths.checklist}\n" - - dev_extra = "" - if dev_paths.instructions: - dev_extra += f"Then read: {dev_paths.instructions}\n" - if dev_paths.checklist: - dev_extra += f"Validate with: {dev_paths.checklist}\n" - - auto_extra = "" - if auto_paths.skill: - auto_extra += f"READ this skill first: {auto_paths.skill}\n" - if auto_paths.workflow: - auto_extra += f"READ this workflow file next: {auto_paths.workflow}\n" - if auto_paths.instructions: - auto_extra += f"Then read: {auto_paths.instructions}\n" - if auto_paths.checklist: - auto_extra += f"Validate with: {auto_paths.checklist}\n" - - review_extra = "" - if review_paths.instructions: - review_extra += f"Then read: {review_paths.instructions}\n" - if review_paths.checklist: - review_extra += f"Validate with: {review_paths.checklist}\n" - - retro_extra = "" - if retro_paths.instructions: - retro_extra += f"Then read: {retro_paths.instructions}\n" - - prompt = { - "create": ( - ( - f"Execute the BMAD create-story workflow for story {story_id}.\n\n" - f"READ this skill first: {create_paths.skill}\n" - f"READ this workflow file next: {create_paths.workflow}\n" - ) - + create_extra - + ( - f"Create story file at: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n\n" - f"Story ID: {story_id}\n\n#YOLO - Do NOT wait for user input." - ) - ), - "dev": ( - ( - f"Execute the BMAD dev-story workflow for story {story_id}.\n\n" - f"READ this skill first: {dev_paths.skill}\n" - f"READ this workflow file next: {dev_paths.workflow}\n" - ) - + dev_extra - + ( - f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n" - "Implement all tasks marked [ ]. Run tests. Update checkboxes." - ) - ), - "auto": ( - ( - f"Execute the BMAD {auto_label} workflow for story {story_id}.\n\n" - ) - + auto_extra - + ( - f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n" - "Auto-apply all discovered gaps in tests." - ) - ), - "review": ( - ( - f"Execute the story-automator review workflow for story {story_id}.\n\n" - f"READ this skill first: {review_paths.skill}\n" - f"READ this workflow file next: {review_paths.workflow}\n" - ) - + review_extra - + ( - f"Story file: _bmad-output/implementation-artifacts/{story_prefix}-*.md\n" - f"Review implementation, find issues, fix them automatically. {extra or 'auto-fix all issues without prompting'}" - ) - ), - "retro": _build_retro_prompt(story_id, retro_paths, retro_extra), - }[step] + prompt = _render_step_prompt(step_contract(policy, step), story_id, story_prefix, extra) escaped = prompt.replace("\\", "\\\\").replace('"', '\\"') if agent == "codex" and not ai_command: codex_home = f"/tmp/sa-codex-home-{project_hash(root)}" @@ -318,48 +225,28 @@ def skill_prefix(agent: str) -> str: return "none" if agent == "codex" else "bmad-" -def _build_retro_prompt(epic_number: str, retro_paths, retro_extra: str) -> str: - return ( - ( - f"Execute the BMAD retrospective workflow for epic {epic_number}.\n\n" - f"READ this skill first: {retro_paths.skill}\n" - f"READ this workflow file next: {retro_paths.workflow}\n" - ) - + retro_extra - + ( - "Run the retrospective in #YOLO mode.\n" - "Assume the user will NOT provide any input to the retrospective directly.\n" - "For ALL prompts that expect user input, make reasonable autonomous decisions based on:\n" - "- Sprint status data\n" - "- Story files and their dev notes\n" - "- Previous retrospective if available\n" - "- Architecture and PRD documents\n\n" - "Key behaviors:\n" - "- When asked to confirm epic number: auto-confirm based on sprint-status\n" - "- When asked for observations: synthesize from story analysis\n" - "- When asked for decisions: make data-driven choices\n" - "- When presented menus: select the most appropriate option based on context\n" - '- Skip all "WAIT for user" instructions - continue autonomously\n\n' - "After the retrospective has run and created documents, you MUST:\n" - "1. Create a list of documentation that may need updates based on implementation learnings\n" - "2. For each doc in the list, verify whether updates are actually needed by:\n" - " - Reading the current doc content\n" - " - Comparing against actual implementation code\n" - " - Checking for discrepancies between doc and code\n" - "3. Update docs that have verified discrepancies\n" - "4. Discard proposed updates where code matches docs\n\n" - "Focus on these doc types:\n" - "- Architecture decisions that changed during implementation\n" - "- API documentation that diverged from specs\n" - "- README files with outdated instructions\n" - "- Configuration documentation\n\n" - "EVERYTHING SHOULD BE AUTOMATED. THIS IS NOT A SESSION WHERE YOU SHOULD BE EXPECTING USER INPUT." - ) - ) +def _render_step_prompt(contract: dict[str, object], story_id: str, story_prefix: str, extra_instruction: str) -> str: + prompt_cfg = contract.get("prompt") or {} + assets = (contract.get("assets") or {}).get("files") or {} + template = read_text(str(prompt_cfg.get("templatePath") or "")) + replacements = { + "{{story_id}}": story_id, + "{{story_prefix}}": story_prefix, + "{{label}}": str(contract.get("label") or ""), + "{{skill_line}}": _prompt_line("READ this skill first", str(assets.get("skill") or "")), + "{{workflow_line}}": _prompt_line("READ this workflow file next", str(assets.get("workflow") or "")), + "{{instructions_line}}": _prompt_line("Then read", str(assets.get("instructions") or "")), + "{{checklist_line}}": _prompt_line("Validate with", str(assets.get("checklist") or "")), + "{{template_line}}": _prompt_line("Use template", str(assets.get("template") or "")), + "{{extra_instruction}}": extra_instruction.strip() or str(prompt_cfg.get("defaultExtraInstruction") or ""), + } + for key, value in replacements.items(): + template = template.replace(key, value) + return template -def _automate_workflow_label(workflow_path: str) -> str: - return "qa-generate-e2e-tests" if "qa-generate-e2e-tests" in workflow_path else "qa-generate-e2e-tests" +def _prompt_line(prefix: str, value: str) -> str: + return f"{prefix}: {value}\n" if value else "" def generate_session_name(step: str, epic: str, story_id: str, cycle: str = "") -> str: diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py new file mode 100644 index 0000000..ca472a0 --- /dev/null +++ b/source/src/story_automator/core/runtime_policy.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + +from .frontmatter import parse_simple_frontmatter +from .utils import ensure_dir, get_project_root, iso_now, md5_hex8, read_text, write_atomic + +VALID_TOP_LEVEL_KEYS = {"version", "snapshot", "runtime", "workflow", "steps"} +VALID_STEP_NAMES = {"create", "dev", "auto", "review", "retro"} +VALID_VERIFIERS = {"create_story_artifact", "session_exit", "review_completion", "epic_complete"} +VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"} + + +class PolicyError(ValueError): + pass + + +def load_effective_policy(project_root: str | None = None) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + bundle_root = bundled_skill_root(root) + bundled = _read_json(bundle_root / "data" / "orchestration-policy.json") + override_path = root / "_bmad" / "bmm" / "story-automator.policy.json" + override = _read_json(override_path) if override_path.is_file() else {} + policy = _deep_merge(bundled, override) + _apply_legacy_env(policy) + _validate_policy_shape(policy) + _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root) + return policy + + +def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + policy = load_effective_policy(str(root)) + snapshot_dir = root / _snapshot_relative_dir(policy) + ensure_dir(snapshot_dir) + stable_json = _stable_policy_json(policy) + snapshot_hash = md5_hex8(stable_json) + stamp = iso_now().replace("-", "").replace(":", "").replace("T", "-").replace("Z", "") + snapshot_path = snapshot_dir / f"{stamp}-{snapshot_hash}.json" + write_atomic(snapshot_path, stable_json) + return { + "policy": policy, + "policyVersion": policy.get("version", 1), + "policySnapshotHash": snapshot_hash, + "policySnapshotFile": _display_path(snapshot_path, root), + } + + +def load_policy_snapshot( + snapshot_file: str, + *, + project_root: str | None = None, + expected_hash: str = "", +) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + path = Path(snapshot_file) + if not path.is_absolute(): + path = root / path + if not path.is_file(): + raise PolicyError(f"policy snapshot missing: {path}") + raw = read_text(path) + actual_hash = md5_hex8(raw) + if expected_hash and actual_hash != expected_hash: + raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}") + policy = json.loads(raw) + _validate_policy_shape(policy) + return policy + + +def load_policy_for_state(state_file: str | Path, project_root: str | None = None) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + fields = parse_simple_frontmatter(read_text(state_file)) + snapshot_file = str(fields.get("policySnapshotFile") or "").strip() + snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() + if snapshot_file or snapshot_hash: + if not snapshot_file or not snapshot_hash: + raise PolicyError("state policy metadata incomplete") + return load_policy_snapshot(snapshot_file, project_root=str(root), expected_hash=snapshot_hash) + return load_effective_policy(str(root)) + + +def step_contract(policy: dict[str, Any], step: str) -> dict[str, Any]: + contract = (policy.get("steps") or {}).get(step) + if not isinstance(contract, dict): + raise PolicyError(f"unknown step: {step}") + return contract + + +def review_max_cycles(policy: dict[str, Any]) -> int: + repeat = ((policy.get("workflow") or {}).get("repeat") or {}).get("review") or {} + return int(repeat.get("maxCycles", 5)) + + +def crash_max_retries(policy: dict[str, Any]) -> int: + crash = ((policy.get("workflow") or {}).get("crash")) or {} + return int(crash.get("maxRetries", 2)) + + +def bundled_skill_root(project_root: str | Path | None = None) -> Path: + root = Path(project_root or get_project_root()).resolve() + installed = root / ".claude" / "skills" / "bmad-story-automator" + if (installed / "data" / "orchestration-policy.json").is_file(): + return installed + for parent in Path(__file__).resolve().parents: + candidate = parent / "payload" / ".claude" / "skills" / "bmad-story-automator" + if (candidate / "data" / "orchestration-policy.json").is_file(): + return candidate + raise PolicyError("bundled policy not found") + + +def _read_json(path: str | Path) -> dict[str, Any]: + payload = json.loads(read_text(path)) + if not isinstance(payload, dict): + raise PolicyError(f"policy json must be an object: {path}") + return payload + + +def _deep_merge(base: Any, override: Any) -> Any: + if isinstance(base, dict) and isinstance(override, dict): + merged = dict(base) + for key, value in override.items(): + merged[key] = _deep_merge(merged[key], value) if key in merged else value + return merged + if isinstance(override, list): + return list(override) + return override + + +def _apply_legacy_env(policy: dict[str, Any]) -> None: + review_cycles = os.environ.get("MAX_REVIEW_CYCLES") + crash_retries = os.environ.get("MAX_CRASH_RETRIES") + if review_cycles: + policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = int(review_cycles) + if crash_retries: + policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = int(crash_retries) + + +def _validate_policy_shape(policy: dict[str, Any]) -> None: + unknown_keys = sorted(set(policy) - VALID_TOP_LEVEL_KEYS) + if unknown_keys: + raise PolicyError(f"unknown top-level policy keys: {', '.join(unknown_keys)}") + steps = policy.get("steps") + if not isinstance(steps, dict): + raise PolicyError("steps must be an object") + unknown_steps = sorted(set(steps) - VALID_STEP_NAMES) + if unknown_steps: + raise PolicyError(f"unknown step names: {', '.join(unknown_steps)}") + sequence = ((policy.get("workflow") or {}).get("sequence")) or [] + if not isinstance(sequence, list) or not all(isinstance(item, str) for item in sequence): + raise PolicyError("workflow.sequence must be a string array") + for step in sequence: + if step not in steps: + raise PolicyError(f"workflow.sequence references missing step: {step}") + for name, contract in steps.items(): + if not isinstance(contract, dict): + raise PolicyError(f"step contract must be an object: {name}") + verifier = str(((contract.get("success") or {}).get("verifier")) or "") + if verifier not in VALID_VERIFIERS: + raise PolicyError(f"invalid verifier for {name}: {verifier}") + required = ((contract.get("assets") or {}).get("required")) or [] + if not isinstance(required, list) or any(item not in VALID_ASSET_NAMES for item in required): + raise PolicyError(f"invalid required assets for {name}") + + +def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None: + for name, contract in (policy.get("steps") or {}).items(): + assets = contract.setdefault("assets", {}) + assets["files"] = _resolve_step_assets(name, assets, project_root) + prompt = contract.setdefault("prompt", {}) + template_file = str(prompt.get("templateFile") or "").strip() + if not template_file: + raise PolicyError(f"missing prompt template for {name}") + prompt["templatePath"] = _resolve_data_path(template_file, project_root=project_root, bundle_root=bundle_root) + parse = contract.setdefault("parse", {}) + schema_file = str(parse.get("schemaFile") or "").strip() + if not schema_file: + raise PolicyError(f"missing parse schema for {name}") + parse["schemaPath"] = _resolve_data_path(schema_file, project_root=project_root, bundle_root=bundle_root) + success = contract.setdefault("success", {}) + contract_file = str(success.get("contractFile") or "").strip() + if contract_file: + success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root) + + +def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]: + skill_name = str(assets.get("skillName") or "").strip() + if not skill_name: + raise PolicyError(f"missing skillName for {step}") + skill_dir = project_root / ".claude" / "skills" / skill_name + required = set(assets.get("required") or []) + files = { + "skill": _resolve_required_file(skill_dir / "SKILL.md", project_root, required, "skill", step), + "workflow": _resolve_candidate_file(skill_dir, assets.get("workflowCandidates"), project_root, required, "workflow", step), + "instructions": _resolve_candidate_file(skill_dir, assets.get("instructionsCandidates"), project_root, required, "instructions", step), + "checklist": _resolve_candidate_file(skill_dir, assets.get("checklistCandidates"), project_root, required, "checklist", step), + "template": _resolve_candidate_file(skill_dir, assets.get("templateCandidates"), project_root, required, "template", step), + } + if ("skill" not in required and "workflow" not in required) and bool(files["skill"]) != bool(files["workflow"]): + files["skill"] = "" + files["workflow"] = "" + return files + + +def _resolve_required_file(path: Path, project_root: Path, required: set[str], asset: str, step: str) -> str: + if path.is_file(): + return _display_path(path, project_root) + if asset in required: + raise PolicyError(f"missing required {asset} asset for {step}: {path}") + return "" + + +def _resolve_candidate_file( + skill_dir: Path, + candidates: Any, + project_root: Path, + required: set[str], + asset: str, + step: str, +) -> str: + if not isinstance(candidates, list): + candidates = [] + for name in candidates: + if not isinstance(name, str) or not name: + continue + path = skill_dir / name + if path.is_file(): + return _display_path(path, project_root) + if asset in required: + searched = ", ".join(str(skill_dir / str(name)) for name in candidates if isinstance(name, str) and name) + raise PolicyError(f"missing required {asset} asset for {step}: {searched}") + return "" + + +def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path) -> str: + raw = Path(path_value) + if raw.is_absolute(): + if not raw.is_file(): + raise PolicyError(f"policy data file missing: {raw}") + return str(raw) + for base in (bundle_root, project_root): + candidate = (base / raw).resolve() + if candidate.is_file(): + return str(candidate) + raise PolicyError(f"policy data file missing: {path_value}") + + +def _snapshot_relative_dir(policy: dict[str, Any]) -> str: + relative_dir = str((policy.get("snapshot") or {}).get("relativeDir") or "").strip() + if not relative_dir: + raise PolicyError("snapshot.relativeDir missing") + return relative_dir + + +def _stable_policy_json(policy: dict[str, Any]) -> str: + return json.dumps(policy, indent=2, sort_keys=True) + "\n" + + +def _display_path(path: Path, project_root: Path) -> str: + try: + return str(path.resolve().relative_to(project_root.resolve())) + except ValueError: + return str(path.resolve()) diff --git a/source/src/story_automator/core/workflow_paths.py b/source/src/story_automator/core/workflow_paths.py index 13bb279..3fa47c6 100644 --- a/source/src/story_automator/core/workflow_paths.py +++ b/source/src/story_automator/core/workflow_paths.py @@ -1,9 +1,8 @@ from __future__ import annotations from dataclasses import dataclass -from pathlib import Path -from story_automator.core.utils import get_project_root +from story_automator.core.runtime_policy import load_effective_policy, step_contract @dataclass(frozen=True) @@ -15,123 +14,32 @@ class WorkflowPaths: template: str = "" -def _first_existing_relative_path(*candidates: str, project_root: str | None = None) -> str: - root = Path(project_root or get_project_root()) - for rel in candidates: - if rel and (root / rel).exists(): - return rel - for rel in candidates: - if rel: - return rel - return "" - - -def _existing_relative_path_or_empty(*candidates: str, project_root: str | None = None) -> str: - root = Path(project_root or get_project_root()) - for rel in candidates: - if rel and (root / rel).exists(): - return rel - return "" - - -def _skill_file(skill_name: str) -> str: - return f".claude/skills/{skill_name}/SKILL.md" - - -def _workflow_file(skill_name: str, *names: str, project_root: str | None = None) -> str: - return _first_existing_relative_path( - *(f".claude/skills/{skill_name}/{name}" for name in names), - project_root=project_root, - ) - - -def _optional_file(skill_name: str, *names: str, project_root: str | None = None) -> str: - return _existing_relative_path_or_empty( - *(f".claude/skills/{skill_name}/{name}" for name in names), - project_root=project_root, - ) - - -def _paired_optional_workflow_paths( - skill_name: str, - *, - workflow_names: tuple[str, ...], - checklist_names: tuple[str, ...] = (), - project_root: str | None = None, -) -> WorkflowPaths: - skill = _existing_relative_path_or_empty(_skill_file(skill_name), project_root=project_root) - workflow = _existing_relative_path_or_empty( - *(f".claude/skills/{skill_name}/{name}" for name in workflow_names), - project_root=project_root, - ) - if not skill or not workflow: - return WorkflowPaths() +def _paths_for_step(step: str, project_root: str | None = None) -> WorkflowPaths: + files = (step_contract(load_effective_policy(project_root), step).get("assets") or {}).get("files") or {} return WorkflowPaths( - skill=skill, - workflow=workflow, - checklist=_existing_relative_path_or_empty( - *(f".claude/skills/{skill_name}/{name}" for name in checklist_names), - project_root=project_root, - ), + skill=str(files.get("skill") or ""), + workflow=str(files.get("workflow") or ""), + instructions=str(files.get("instructions") or ""), + checklist=str(files.get("checklist") or ""), + template=str(files.get("template") or ""), ) def create_story_workflow_paths(project_root: str | None = None) -> WorkflowPaths: - return WorkflowPaths( - skill=_first_existing_relative_path(_skill_file("bmad-create-story"), project_root=project_root), - workflow=_workflow_file("bmad-create-story", "workflow.md", "workflow.yaml", project_root=project_root), - instructions=_optional_file("bmad-create-story", "discover-inputs.md", project_root=project_root), - checklist=_optional_file("bmad-create-story", "checklist.md", project_root=project_root), - template=_optional_file("bmad-create-story", "template.md", project_root=project_root), - ) + return _paths_for_step("create", project_root) def dev_story_workflow_paths(project_root: str | None = None) -> WorkflowPaths: - return WorkflowPaths( - skill=_first_existing_relative_path(_skill_file("bmad-dev-story"), project_root=project_root), - workflow=_workflow_file("bmad-dev-story", "workflow.md", "workflow.yaml", project_root=project_root), - instructions="", - checklist=_optional_file("bmad-dev-story", "checklist.md", project_root=project_root), - ) + return _paths_for_step("dev", project_root) def retrospective_workflow_paths(project_root: str | None = None) -> WorkflowPaths: - return WorkflowPaths( - skill=_first_existing_relative_path(_skill_file("bmad-retrospective"), project_root=project_root), - workflow=_workflow_file("bmad-retrospective", "workflow.md", "workflow.yaml", project_root=project_root), - instructions="", - ) + return _paths_for_step("retro", project_root) def review_workflow_paths(project_root: str | None = None) -> WorkflowPaths: - return WorkflowPaths( - skill=_first_existing_relative_path( - _skill_file("bmad-story-automator-review"), - project_root=project_root, - ), - workflow=_workflow_file( - "bmad-story-automator-review", - "workflow.yaml", - "workflow.md", - project_root=project_root, - ), - instructions=_optional_file( - "bmad-story-automator-review", - "instructions.xml", - project_root=project_root, - ), - checklist=_optional_file( - "bmad-story-automator-review", - "checklist.md", - project_root=project_root, - ), - ) + return _paths_for_step("review", project_root) def testarch_automate_workflow_paths(project_root: str | None = None) -> WorkflowPaths: - return _paired_optional_workflow_paths( - "bmad-qa-generate-e2e-tests", - workflow_names=("workflow.md", "workflow.yaml"), - checklist_names=("checklist.md",), - project_root=project_root, - ) + return _paths_for_step("auto", project_root) diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py new file mode 100644 index 0000000..0d5c1e3 --- /dev/null +++ b/source/tests/test_orchestrator_parse.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import io +import json +import shutil +import tempfile +import unittest +from contextlib import redirect_stdout +from pathlib import Path +from unittest.mock import patch + +from story_automator.commands.orchestrator_parse import parse_output_action +from story_automator.core.utils import CommandResult + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +class OrchestratorParseTests(unittest.TestCase): + def setUp(self) -> None: + self.tmp = tempfile.TemporaryDirectory() + self.project_root = Path(self.tmp.name) + self._install_bundle() + self._install_required_skills() + self.output_file = self.project_root / "session.txt" + self.output_file.write_text("session output\n", encoding="utf-8") + + def tearDown(self) -> None: + self.tmp.cleanup() + + def test_parse_schema_loads_from_step_contract(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["story_created"]) + + def test_invalid_schema_file_rejected(self) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True) + (override_dir / "story-automator.policy.json").write_text( + json.dumps({"steps": {"create": {"parse": {"schemaFile": "missing.json"}}}}), + encoding="utf-8", + ) + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid") + + def test_invalid_child_json_rejected(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult("not json", 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json") + + def test_output_shape_remains_compatible(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"SUCCESS","issues_found":{"critical":0,"high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "review"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertIn("issues_found", payload) + self.assertIn("all_fixed", payload) + + def _install_bundle(self) -> None: + source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" + source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" + target_root = self.project_root / ".claude" / "skills" + target_root.mkdir(parents=True, exist_ok=True) + shutil.copytree(source_skill, target_root / "bmad-story-automator") + shutil.copytree(source_review, target_root / "bmad-story-automator-review") + + def _install_required_skills(self) -> None: + for name in ("bmad-create-story", "bmad-dev-story", "bmad-retrospective", "bmad-qa-generate-e2e-tests"): + skill_dir = self.project_root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "discover-inputs.md").write_text("# discover\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "template.md").write_text("# template\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + + +if __name__ == "__main__": + unittest.main() diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py new file mode 100644 index 0000000..8b64f3b --- /dev/null +++ b/source/tests/test_runtime_policy.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import json +import shutil +import tempfile +import unittest +from pathlib import Path + +from story_automator.core.runtime_policy import PolicyError, load_effective_policy, snapshot_effective_policy + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +class RuntimePolicyTests(unittest.TestCase): + def setUp(self) -> None: + self.tmp = tempfile.TemporaryDirectory() + self.project_root = Path(self.tmp.name) + self._install_bundle() + self._install_required_skills() + + def tearDown(self) -> None: + self.tmp.cleanup() + + def test_bundled_default_loads(self) -> None: + policy = load_effective_policy(str(self.project_root)) + self.assertEqual(policy["version"], 1) + self.assertEqual(policy["steps"]["review"]["success"]["verifier"], "review_completion") + + def test_project_override_deep_merges_and_arrays_replace(self) -> None: + self._write_override( + { + "workflow": {"sequence": ["create", "review"]}, + "steps": {"review": {"prompt": {"defaultExtraInstruction": "fix critical issues only"}}}, + } + ) + policy = load_effective_policy(str(self.project_root)) + self.assertEqual(policy["workflow"]["sequence"], ["create", "review"]) + self.assertEqual(policy["steps"]["review"]["prompt"]["defaultExtraInstruction"], "fix critical issues only") + + def test_invalid_step_name_rejected(self) -> None: + self._write_override({"steps": {"ship": {"success": {"verifier": "session_exit"}}}}) + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_invalid_verifier_name_rejected(self) -> None: + self._write_override({"steps": {"review": {"success": {"verifier": "nope"}}}}) + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_required_asset_missing_fails(self) -> None: + shutil.rmtree(self.project_root / ".claude" / "skills" / "bmad-create-story") + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_snapshot_hash_stable(self) -> None: + first = snapshot_effective_policy(str(self.project_root)) + second = snapshot_effective_policy(str(self.project_root)) + self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"]) + + def _install_bundle(self) -> None: + source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" + source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" + target_root = self.project_root / ".claude" / "skills" + target_root.mkdir(parents=True, exist_ok=True) + shutil.copytree(source_skill, target_root / "bmad-story-automator") + shutil.copytree(source_review, target_root / "bmad-story-automator-review") + + def _install_required_skills(self) -> None: + self._make_skill( + "bmad-create-story", + extras={"discover-inputs.md": "# discover\n", "checklist.md": "# checklist\n", "template.md": "# template\n"}, + ) + self._make_skill("bmad-dev-story", extras={"checklist.md": "# checklist\n"}) + self._make_skill("bmad-retrospective") + self._make_skill("bmad-qa-generate-e2e-tests", extras={"checklist.md": "# checklist\n"}) + + def _make_skill(self, name: str, *, extras: dict[str, str] | None = None) -> None: + skill_dir = self.project_root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8") + for rel, content in (extras or {}).items(): + (skill_dir / rel).write_text(content, encoding="utf-8") + + def _write_override(self, payload: dict[str, object]) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text(json.dumps(payload), encoding="utf-8") + + +if __name__ == "__main__": + unittest.main() diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py new file mode 100644 index 0000000..5f4528b --- /dev/null +++ b/source/tests/test_state_policy_metadata.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import io +import json +import shutil +import tempfile +import unittest +from contextlib import redirect_stdout +from pathlib import Path + +from story_automator.commands.orchestrator import cmd_orchestrator_helper +from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +class StatePolicyMetadataTests(unittest.TestCase): + def setUp(self) -> None: + self.tmp = tempfile.TemporaryDirectory() + self.project_root = Path(self.tmp.name) + self.output_dir = self.project_root / "_bmad-output" / "story-automator" + self._install_bundle() + self._install_required_skills() + + def tearDown(self) -> None: + self.tmp.cleanup() + + def test_state_doc_writes_policy_metadata(self) -> None: + stdout = io.StringIO() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_build_state_doc( + [ + "--template", + str(template), + "--output-folder", + str(self.output_dir), + "--config-json", + json.dumps(self._config()), + ] + ) + self.assertEqual(code, 0) + state_file = Path(json.loads(stdout.getvalue())["path"]) + text = state_file.read_text(encoding="utf-8") + self.assertIn("policySnapshotFile:", text) + self.assertIn("policySnapshotHash:", text) + + def test_summary_surfaces_policy_metadata(self) -> None: + state_file = self._build_state() + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["policySnapshotFile"]) + self.assertTrue(payload["policySnapshotHash"]) + + def test_legacy_state_without_policy_metadata_remains_valid(self) -> None: + legacy = self.project_root / "legacy.md" + legacy.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_state(["--state", str(legacy)]) + self.assertEqual(code, 0) + self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok") + + def _build_state(self) -> Path: + stdout = io.StringIO() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" + with patch_env(self.project_root), redirect_stdout(stdout): + cmd_build_state_doc( + [ + "--template", + str(template), + "--output-folder", + str(self.output_dir), + "--config-json", + json.dumps(self._config()), + ] + ) + return Path(json.loads(stdout.getvalue())["path"]) + + def _config(self) -> dict[str, object]: + return { + "epic": "1", + "epicName": "Epic 1", + "storyRange": ["1.1"], + "status": "READY", + "aiCommand": "claude --dangerously-skip-permissions", + } + + def _install_bundle(self) -> None: + source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" + source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" + target_root = self.project_root / ".claude" / "skills" + target_root.mkdir(parents=True, exist_ok=True) + shutil.copytree(source_skill, target_root / "bmad-story-automator") + shutil.copytree(source_review, target_root / "bmad-story-automator-review") + + def _install_required_skills(self) -> None: + for name in ("bmad-create-story", "bmad-dev-story", "bmad-retrospective", "bmad-qa-generate-e2e-tests"): + skill_dir = self.project_root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "discover-inputs.md").write_text("# discover\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-create-story" / "template.md").write_text("# template\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + + +class patch_env: + def __init__(self, project_root: Path) -> None: + self.project_root = str(project_root) + self.previous = None + + def __enter__(self) -> None: + import os + + self.previous = os.environ.get("PROJECT_ROOT") + os.environ["PROJECT_ROOT"] = self.project_root + + def __exit__(self, exc_type, exc, tb) -> None: + import os + + if self.previous is None: + os.environ.pop("PROJECT_ROOT", None) + else: + os.environ["PROJECT_ROOT"] = self.previous + + +if __name__ == "__main__": + unittest.main() From 01a3e1cc308c170c0dc982f88eea15bef5760fa5 Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 07:55:40 -0300 Subject: [PATCH 03/17] fix: harden runtime policy snapshot handling --- docs/changelog/260413.md | 27 ++++++ .../story_automator/commands/orchestrator.py | 12 ++- .../commands/orchestrator_parse.py | 14 ++- source/src/story_automator/commands/tmux.py | 9 +- .../story_automator/core/runtime_policy.py | 96 ++++++++++++++++++- source/tests/test_orchestrator_parse.py | 44 +++++++++ source/tests/test_runtime_policy.py | 46 ++++++++- source/tests/test_state_policy_metadata.py | 24 +++++ 8 files changed, 259 insertions(+), 13 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 556a30e..56731d9 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -120,3 +120,30 @@ Implemented the first JSON-settings slice with bundled policy data, snapshot-bac ### QA Notes - N/A + +## 260413-07:55:28 - Harden runtime policy snapshot handling + +### Summary +Fixed the follow-up review findings around snapshot consumption, policy validation, and stale marker fallback behavior. + +### Fixed +- Fixed runtime policy consumers to honor pinned state snapshots for prompt building, parser contract loading, and escalation budgets when a state file is available. +- Fixed malformed override JSON and invalid nested policy shapes to fail through controlled validation paths instead of crashing later with raw exceptions. +- Fixed implicit marker/env state lookup to fall back safely when the referenced state file is missing or the marker payload is malformed. +- Fixed `tmux-wrapper build-cmd` to strip `--state-file` from prompt text instead of leaking the flag into child instructions. + +### Changed +- Added regression tests covering snapshot reuse after override changes, invalid nested workflow shapes, malformed marker files, and state-aware prompt/build behavior. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_orchestrator_parse.py` +- `source/tests/test_state_policy_metadata.py` +- `docs/changelog/260413.md` + +### QA Notes +- N/A diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 11d2bb1..31ac9db 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -6,7 +6,7 @@ from pathlib import Path from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter -from story_automator.core.runtime_policy import crash_max_retries, load_effective_policy, review_max_cycles +from story_automator.core.runtime_policy import crash_max_retries, load_runtime_policy, review_max_cycles from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.sprint import sprint_status_epic, sprint_status_get from story_automator.core.story_keys import normalize_story_key, sprint_status_file @@ -283,7 +283,15 @@ def _state_update(args: list[str]) -> int: def _escalate(args: list[str]) -> int: trigger = args[0] if args else "" context = args[1] if len(args) > 1 else "" - policy = load_effective_policy(get_project_root()) + state_file = "" + idx = 2 + while idx < len(args): + if args[idx] == "--state-file" and idx + 1 < len(args): + state_file = args[idx + 1] + idx += 2 + continue + idx += 1 + policy = load_runtime_policy(get_project_root(), state_file=state_file) if trigger == "review-loop": cycles = _parse_context_int(context, "cycles") limit = review_max_cycles(policy) diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py index 6a95226..c809cfd 100644 --- a/source/src/story_automator/commands/orchestrator_parse.py +++ b/source/src/story_automator/commands/orchestrator_parse.py @@ -2,7 +2,7 @@ import json -from story_automator.core.runtime_policy import load_effective_policy, step_contract +from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines @@ -14,6 +14,14 @@ def parse_output_action(args: list[str]) -> int: print('{"status":"error","reason":"output file not found or empty"}') return 1 output_file, step = args[:2] + state_file = "" + idx = 2 + while idx < len(args): + if args[idx] == "--state-file" and idx + 1 < len(args): + state_file = args[idx + 1] + idx += 2 + continue + idx += 1 try: content = read_text(output_file) except FileNotFoundError: @@ -24,9 +32,9 @@ def parse_output_action(args: list[str]) -> int: return 1 lines = trim_lines(content)[:150] try: - contract = step_contract(load_effective_policy(), step) + contract = step_contract(load_runtime_policy(state_file=state_file), step) parse_contract = _load_parse_contract(contract) - except (FileNotFoundError, json.JSONDecodeError, ValueError): + except (FileNotFoundError, json.JSONDecodeError, ValueError, PolicyError): print_json({"status": "error", "reason": "parse_contract_invalid"}) return 1 prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines)) diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 00e24c8..329014d 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -6,7 +6,7 @@ import time from pathlib import Path -from story_automator.core.runtime_policy import load_effective_policy, step_contract +from story_automator.core.runtime_policy import load_runtime_policy, step_contract from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.utils import ( atomic_write, @@ -175,11 +175,16 @@ def _build_cmd(args: list[str]) -> int: extra = "" tail = args[2:] idx = 0 + state_file = "" while idx < len(tail): if tail[idx] == "--agent" and idx + 1 < len(tail): agent = tail[idx + 1] idx += 2 continue + if tail[idx] == "--state-file" and idx + 1 < len(tail): + state_file = tail[idx + 1] + idx += 2 + continue extra = f"{extra} {tail[idx]}".strip() idx += 1 agent = agent or agent_type() @@ -188,7 +193,7 @@ def _build_cmd(args: list[str]) -> int: if step not in {"create", "dev", "auto", "review", "retro"}: print(f"Unknown step type: {step}", file=__import__("sys").stderr) return 1 - policy = load_effective_policy(root) + policy = load_runtime_policy(root, state_file=state_file) ai_command = os.environ.get("AI_COMMAND") if ai_command and not os.environ.get("AI_AGENT"): cli = ai_command diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index ca472a0..ea3a536 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -31,6 +31,18 @@ def load_effective_policy(project_root: str | None = None) -> dict[str, Any]: return policy +def load_runtime_policy(project_root: str | None = None, state_file: str | Path | None = None) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + resolved_state, source = resolve_policy_state_file(root, state_file) + if resolved_state: + try: + return load_policy_for_state(resolved_state, project_root=str(root)) + except (FileNotFoundError, PolicyError): + if source == "explicit": + raise + return load_effective_policy(str(root)) + + def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() policy = load_effective_policy(str(root)) @@ -65,8 +77,12 @@ def load_policy_snapshot( actual_hash = md5_hex8(raw) if expected_hash and actual_hash != expected_hash: raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}") - policy = json.loads(raw) + try: + policy = json.loads(raw) + except json.JSONDecodeError as exc: + raise PolicyError(f"policy json invalid: {path}") from exc _validate_policy_shape(policy) + _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) return policy @@ -82,6 +98,26 @@ def load_policy_for_state(state_file: str | Path, project_root: str | None = Non return load_effective_policy(str(root)) +def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]: + root = Path(project_root or get_project_root()).resolve() + explicit = Path(state_file).expanduser() if state_file else None + if explicit: + return str(_resolve_state_path(root, explicit)), "explicit" + env_state = os.environ.get("STORY_AUTOMATOR_STATE_FILE", "").strip() + if env_state: + return str(_resolve_state_path(root, Path(env_state).expanduser())), "env" + marker = root / ".claude" / ".story-automator-active" + if marker.is_file(): + try: + payload = _read_json(marker) + except PolicyError: + return "", "" + marker_state = str(payload.get("stateFile") or "").strip() + if marker_state: + return str(_resolve_state_path(root, Path(marker_state).expanduser())), "marker" + return "", "" + + def step_contract(policy: dict[str, Any], step: str) -> dict[str, Any]: contract = (policy.get("steps") or {}).get(step) if not isinstance(contract, dict): @@ -112,7 +148,10 @@ def bundled_skill_root(project_root: str | Path | None = None) -> Path: def _read_json(path: str | Path) -> dict[str, Any]: - payload = json.loads(read_text(path)) + try: + payload = json.loads(read_text(path)) + except json.JSONDecodeError as exc: + raise PolicyError(f"policy json invalid: {path}") from exc if not isinstance(payload, dict): raise PolicyError(f"policy json must be an object: {path}") return payload @@ -142,25 +181,40 @@ def _validate_policy_shape(policy: dict[str, Any]) -> None: unknown_keys = sorted(set(policy) - VALID_TOP_LEVEL_KEYS) if unknown_keys: raise PolicyError(f"unknown top-level policy keys: {', '.join(unknown_keys)}") + snapshot = _expect_optional_dict(policy, "snapshot") + if "snapshot" in policy and "relativeDir" in snapshot and not isinstance(snapshot.get("relativeDir"), str): + raise PolicyError("snapshot.relativeDir must be a string") + workflow = _expect_optional_dict(policy, "workflow") + repeat = _expect_optional_nested_dict(workflow, "repeat", "workflow") + review = _expect_optional_nested_dict(repeat, "review", "workflow.repeat") + crash = _expect_optional_nested_dict(workflow, "crash", "workflow") steps = policy.get("steps") if not isinstance(steps, dict): raise PolicyError("steps must be an object") unknown_steps = sorted(set(steps) - VALID_STEP_NAMES) if unknown_steps: raise PolicyError(f"unknown step names: {', '.join(unknown_steps)}") - sequence = ((policy.get("workflow") or {}).get("sequence")) or [] + sequence = (workflow.get("sequence")) or [] if not isinstance(sequence, list) or not all(isinstance(item, str) for item in sequence): raise PolicyError("workflow.sequence must be a string array") + if "maxCycles" in review and not isinstance(review.get("maxCycles"), int): + raise PolicyError("workflow.repeat.review.maxCycles must be an integer") + if "maxRetries" in crash and not isinstance(crash.get("maxRetries"), int): + raise PolicyError("workflow.crash.maxRetries must be an integer") for step in sequence: if step not in steps: raise PolicyError(f"workflow.sequence references missing step: {step}") for name, contract in steps.items(): if not isinstance(contract, dict): raise PolicyError(f"step contract must be an object: {name}") + assets = _expect_step_dict(contract, "assets", name) + _expect_step_dict(contract, "prompt", name) + _expect_step_dict(contract, "parse", name) + _expect_step_dict(contract, "success", name) verifier = str(((contract.get("success") or {}).get("verifier")) or "") if verifier not in VALID_VERIFIERS: raise PolicyError(f"invalid verifier for {name}: {verifier}") - required = ((contract.get("assets") or {}).get("required")) or [] + required = (assets.get("required")) or [] if not isinstance(required, list) or any(item not in VALID_ASSET_NAMES for item in required): raise PolicyError(f"invalid required assets for {name}") @@ -248,7 +302,8 @@ def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path def _snapshot_relative_dir(policy: dict[str, Any]) -> str: - relative_dir = str((policy.get("snapshot") or {}).get("relativeDir") or "").strip() + snapshot = _expect_optional_dict(policy, "snapshot") + relative_dir = str(snapshot.get("relativeDir") or "").strip() if not relative_dir: raise PolicyError("snapshot.relativeDir missing") return relative_dir @@ -263,3 +318,34 @@ def _display_path(path: Path, project_root: Path) -> str: return str(path.resolve().relative_to(project_root.resolve())) except ValueError: return str(path.resolve()) + + +def _resolve_state_path(project_root: Path, path: Path) -> Path: + return path if path.is_absolute() else project_root / path + + +def _expect_optional_dict(payload: dict[str, Any], key: str) -> dict[str, Any]: + value = payload.get(key) + if value is None: + return {} + if not isinstance(value, dict): + raise PolicyError(f"{key} must be an object") + return value + + +def _expect_step_dict(contract: dict[str, Any], key: str, step: str) -> dict[str, Any]: + value = contract.get(key) + if value is None: + return {} + if not isinstance(value, dict): + raise PolicyError(f"{step}.{key} must be an object") + return value + + +def _expect_optional_nested_dict(payload: dict[str, Any], key: str, label: str) -> dict[str, Any]: + value = payload.get(key) + if value is None: + return {} + if not isinstance(value, dict): + raise PolicyError(f"{label}.{key} must be an object") + return value diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py index 0d5c1e3..99abaf5 100644 --- a/source/tests/test_orchestrator_parse.py +++ b/source/tests/test_orchestrator_parse.py @@ -9,6 +9,7 @@ from pathlib import Path from unittest.mock import patch +from story_automator.commands.state import cmd_build_state_doc from story_automator.commands.orchestrator_parse import parse_output_action from story_automator.core.utils import CommandResult @@ -74,6 +75,23 @@ def test_output_shape_remains_compatible(self) -> None: self.assertIn("issues_found", payload) self.assertIn("all_fixed", payload) + def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) -> None: + state_file = self._build_state() + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text( + json.dumps({"steps": {"create": {"parse": {"schemaFile": "missing.json"}}}}), + encoding="utf-8", + ) + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + self.assertTrue(json.loads(stdout.getvalue())["story_created"]) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" @@ -94,6 +112,32 @@ def _install_required_skills(self) -> None: (self.project_root / ".claude" / "skills" / "bmad-dev-story" / "checklist.md").write_text("# checklist\n", encoding="utf-8") (self.project_root / ".claude" / "skills" / "bmad-qa-generate-e2e-tests" / "checklist.md").write_text("# checklist\n", encoding="utf-8") + def _build_state(self) -> Path: + output_dir = self.project_root / "_bmad-output" / "story-automator" + output_dir.mkdir(parents=True, exist_ok=True) + stdout = io.StringIO() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout): + cmd_build_state_doc( + [ + "--template", + str(template), + "--output-folder", + str(output_dir), + "--config-json", + json.dumps( + { + "epic": "1", + "epicName": "Epic 1", + "storyRange": ["1.1"], + "status": "READY", + "aiCommand": "claude --dangerously-skip-permissions", + } + ), + ] + ) + return Path(json.loads(stdout.getvalue())["path"]) + if __name__ == "__main__": unittest.main() diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 8b64f3b..3979f54 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -6,7 +6,7 @@ import unittest from pathlib import Path -from story_automator.core.runtime_policy import PolicyError, load_effective_policy, snapshot_effective_policy +from story_automator.core.runtime_policy import PolicyError, load_effective_policy, load_runtime_policy, snapshot_effective_policy REPO_ROOT = Path(__file__).resolve().parents[2] @@ -58,6 +58,50 @@ def test_snapshot_hash_stable(self) -> None: second = snapshot_effective_policy(str(self.project_root)) self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"]) + def test_malformed_override_json_raises_policy_error(self) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text("{bad json", encoding="utf-8") + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_invalid_assets_type_rejected(self) -> None: + self._write_override({"steps": {"review": {"assets": []}}}) + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_invalid_workflow_and_snapshot_types_rejected(self) -> None: + self._write_override({"workflow": [], "snapshot": []}) + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_invalid_nested_workflow_types_rejected(self) -> None: + self._write_override({"workflow": {"repeat": [1], "crash": [2]}}) + with self.assertRaises(PolicyError): + load_effective_policy(str(self.project_root)) + + def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + copied_root = Path(self.tmp.name) / "copied" + shutil.copytree(self.project_root, copied_root) + policy = load_runtime_policy(str(copied_root), state_file=str(copied_root / snapshot["policySnapshotFile"])) + template_path = policy["steps"]["create"]["prompt"]["templatePath"] + self.assertTrue(str(copied_root) in template_path) + + def test_missing_marker_state_falls_back_to_effective_policy(self) -> None: + marker = self.project_root / ".claude" / ".story-automator-active" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.write_text(json.dumps({"stateFile": "missing.md"}), encoding="utf-8") + policy = load_runtime_policy(str(self.project_root)) + self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + + def test_malformed_marker_falls_back_to_effective_policy(self) -> None: + marker = self.project_root / ".claude" / ".story-automator-active" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.write_text("{bad json", encoding="utf-8") + policy = load_runtime_policy(str(self.project_root)) + self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 5f4528b..72301ca 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -10,6 +10,7 @@ from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state +from story_automator.commands.tmux import _build_cmd REPO_ROOT = Path(__file__).resolve().parents[2] @@ -68,6 +69,29 @@ def test_legacy_state_without_policy_metadata_remains_valid(self) -> None: self.assertEqual(code, 0) self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok") + def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: + state_file = self._build_state() + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text( + json.dumps({"workflow": {"repeat": {"review": {"maxCycles": 1}}}}), + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=2", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + self.assertFalse(json.loads(stdout.getvalue())["escalate"]) + + def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None: + state_file = self._build_state() + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = _build_cmd(["review", "1.1", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + rendered = stdout.getvalue() + self.assertNotIn("--state-file", rendered) + def _build_state(self) -> Path: stdout = io.StringIO() template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" From e84e4b612a25dee59855ff49436054bf557cf6dc Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:06:04 -0300 Subject: [PATCH 04/17] feat: wire policy-backed success verifiers --- docs/changelog/260413.md | 24 ++ .../story_automator/commands/orchestrator.py | 7 +- source/src/story_automator/commands/tmux.py | 62 ++++- .../src/story_automator/core/review_verify.py | 42 +-- .../story_automator/core/success_verifiers.py | 240 ++++++++++++++++++ source/tests/test_success_verifiers.py | 194 ++++++++++++++ 6 files changed, 533 insertions(+), 36 deletions(-) create mode 100644 source/src/story_automator/core/success_verifiers.py create mode 100644 source/tests/test_success_verifiers.py diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 56731d9..faaca3e 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -1,5 +1,29 @@ # Changelog - 260413 +## 260413-08:05:51 - Wire policy-backed success verifiers + +### Summary +Moved review completion checks onto the JSON policy contract and routed monitor verification through the named verifier registry. + +### Added +- Added a shared success verifier registry covering session exit, story artifact creation, review completion, and epic completion. +- Added unit coverage for contract-driven review verification, create artifact matching, epic completion, pinned snapshot reuse, and monitor dispatch. + +### Changed +- Changed `monitor-session` to resolve the active step's `success.verifier` from policy, accept `--state-file`, and verify completion through the configured verifier instead of a hard-coded review branch. +- Changed `verify-code-review` to resolve review completion from the pinned state snapshot when provided, so review verification stays aligned with the active runtime policy. + +### Files +- `source/src/story_automator/core/success_verifiers.py` +- `source/src/story_automator/core/review_verify.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/tests/test_success_verifiers.py` +- `docs/changelog/260413.md` + +### QA Notes +- N/A + ## 260413-11:35:00 - Verify packed npx install path ### Summary diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 31ac9db..4e69b22 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -378,7 +378,12 @@ def _verify_code_review(args: list[str]) -> int: if not args: print_json({"verified": False, "reason": "story_key_required"}) return 1 - payload = verify_code_review_completion(get_project_root(), args[0]) + state_file = "" + tail = args[1:] + for idx, arg in enumerate(tail): + if arg == "--state-file" and idx + 1 < len(tail): + state_file = tail[idx + 1] + payload = verify_code_review_completion(get_project_root(), args[0], state_file=state_file or None) print_json(payload) return 0 if bool(payload.get("verified")) else 1 diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 329014d..c6ddbeb 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -6,8 +6,8 @@ import time from pathlib import Path -from story_automator.core.runtime_policy import load_runtime_policy, step_contract -from story_automator.core.review_verify import verify_code_review_completion +from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract +from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier from story_automator.core.utils import ( atomic_write, command_exists, @@ -652,7 +652,7 @@ def cmd_monitor_session(args: list[str]) -> int: return 1 if args[0] in {"--help", "-h"}: print("Usage: monitor-session [options]") - print("Options: --max-polls N --initial-wait N --project-root PATH --timeout MIN --verbose --json --agent TYPE --workflow TYPE --story-key KEY") + print("Options: --max-polls N --initial-wait N --project-root PATH --timeout MIN --verbose --json --agent TYPE --workflow TYPE --story-key KEY --state-file PATH") return 0 session = args[0] max_polls = 30 @@ -662,6 +662,7 @@ def cmd_monitor_session(args: list[str]) -> int: agent = os.environ.get("AI_AGENT", "claude") workflow = "dev" story_key = "" + state_file = "" project_root = get_project_root() idx = 1 while idx < len(args): @@ -692,6 +693,10 @@ def cmd_monitor_session(args: list[str]) -> int: story_key = args[idx + 1] idx += 2 continue + elif arg == "--state-file" and idx + 1 < len(args): + state_file = args[idx + 1] + idx += 2 + continue elif arg == "--project-root" and idx + 1 < len(args): project_root = args[idx + 1] idx += 2 @@ -713,11 +718,26 @@ def cmd_monitor_session(args: list[str]) -> int: state = str(status["session_state"]) if state == "completed": output = session_status(session, full=True, codex=agent == "codex", project_root=project_root)["active_task"] - if workflow == "review" and story_key: - verified = verify_code_review_completion(project_root, story_key) + verification = _verify_monitor_completion( + workflow, + project_root=project_root, + story_key=story_key, + output_file=str(output), + state_file=state_file or None, + ) + if verification is not None: + verified, verifier_name = verification if bool(verified.get("verified")): - return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "verified_complete") - return _emit_monitor(json_output, "incomplete", last_done, last_total, str(output), "workflow_not_verified") + reason = "normal_completion" if verifier_name == "session_exit" else "verified_complete" + return _emit_monitor(json_output, "completed", last_done, last_total, str(output), reason) + return _emit_monitor( + json_output, + "incomplete", + last_done, + last_total, + str(output), + str(verified.get("reason") or "workflow_not_verified"), + ) return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "normal_completion") if state == "crashed": crashed = session_status(session, full=True, codex=agent == "codex", project_root=project_root) @@ -745,3 +765,31 @@ def _emit_monitor(json_output: bool, state: str, done: int, total: int, output_f else: print(f"{state},{done},{total},{output_file},{reason}") return 0 + + +def _verify_monitor_completion( + workflow: str, + *, + project_root: str, + story_key: str, + output_file: str, + state_file: str | Path | None = None, +) -> tuple[dict[str, object], str] | None: + try: + contract = resolve_success_contract(project_root, workflow, state_file=state_file) + except (FileNotFoundError, PolicyError): + return ({"verified": False, "reason": "verifier_contract_invalid"}, "") + verifier_name = str(contract.get("verifier") or "").strip() + if not verifier_name: + return None + try: + result = run_success_verifier( + verifier_name, + project_root=project_root, + story_key=story_key, + output_file=output_file, + contract=contract, + ) + except PolicyError: + return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name) + return (result, verifier_name) diff --git a/source/src/story_automator/core/review_verify.py b/source/src/story_automator/core/review_verify.py index d321bcb..5975c69 100644 --- a/source/src/story_automator/core/review_verify.py +++ b/source/src/story_automator/core/review_verify.py @@ -1,34 +1,20 @@ from __future__ import annotations from pathlib import Path +from typing import Any -from .frontmatter import find_frontmatter_value_case -from .sprint import sprint_status_get -from .story_keys import normalize_story_key +from .success_verifiers import resolve_success_contract, review_completion -def verify_code_review_completion(project_root: str, story_key: str) -> dict[str, object]: - norm = normalize_story_key(project_root, story_key) - if norm is None: - return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} - status = sprint_status_get(project_root, norm.id) - if status.done: - return {"verified": True, "story": norm.key, "sprint_status": "done", "source": "sprint-status.yaml"} - matches = sorted((Path(project_root) / "_bmad-output" / "implementation-artifacts").glob(f"{norm.prefix}-*.md")) - story_status = find_frontmatter_value_case(matches[0], "Status") if matches else "" - if story_status == "done": - return { - "verified": True, - "story": norm.key, - "sprint_status": status.status, - "story_file_status": "done", - "source": "story-file", - "note": "sprint_status_not_updated", - } - return { - "verified": False, - "story": norm.key, - "sprint_status": status.status, - "story_file_status": story_status or "unknown", - "reason": "workflow_not_complete", - } +def verify_code_review_completion( + project_root: str, + story_key: str, + *, + success_contract: dict[str, Any] | None = None, + state_file: str | Path | None = None, +) -> dict[str, object]: + try: + contract = success_contract or resolve_success_contract(project_root, "review", state_file=state_file) + return review_completion(project_root=project_root, story_key=story_key, contract=contract) + except (FileNotFoundError, ValueError) as exc: + return {"verified": False, "reason": "review_contract_invalid", "input": story_key, "error": str(exc)} diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py new file mode 100644 index 0000000..0d596f8 --- /dev/null +++ b/source/src/story_automator/core/success_verifiers.py @@ -0,0 +1,240 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Callable + +from .frontmatter import find_frontmatter_value_case +from .runtime_policy import PolicyError, load_runtime_policy, step_contract +from .sprint import sprint_status_epic, sprint_status_get +from .story_keys import normalize_story_key +from .utils import read_text + +ALLOWED_REVIEW_CONTRACT_KEYS = {"blockingSeverity", "doneValues", "inProgressValues", "sourceOrder", "syncSprintStatus"} +ALLOWED_REVIEW_SOURCES = {"sprint-status.yaml", "story-file"} +DEFAULT_REVIEW_CONTRACT = { + "blockingSeverity": ["critical"], + "doneValues": ["done"], + "inProgressValues": ["in-progress", "in_progress", "review", "qa"], + "sourceOrder": ["sprint-status.yaml", "story-file"], + "syncSprintStatus": True, +} + + +def resolve_success_contract(project_root: str, step: str, *, state_file: str | Path | None = None) -> dict[str, Any]: + policy = load_runtime_policy(project_root, state_file=state_file) + success = step_contract(policy, step).get("success") or {} + if not isinstance(success, dict): + raise PolicyError(f"invalid success contract for {step}") + return success + + +def run_success_verifier( + name: str, + *, + project_root: str, + story_key: str = "", + output_file: str = "", + contract: dict[str, Any] | None = None, +) -> dict[str, object]: + verifier = VERIFIERS.get(name) + if verifier is None: + raise PolicyError(f"unknown success verifier: {name}") + return verifier(project_root=project_root, story_key=story_key, output_file=output_file, contract=contract or {}) + + +def session_exit( + *, + project_root: str, + story_key: str = "", + output_file: str = "", + contract: dict[str, Any] | None = None, +) -> dict[str, object]: + payload: dict[str, object] = {"verified": True, "source": "session_exit"} + if story_key: + payload["story"] = story_key + if output_file: + payload["outputFile"] = output_file + return payload + + +def create_story_artifact( + *, + project_root: str, + story_key: str, + output_file: str = "", + contract: dict[str, Any] | None = None, +) -> dict[str, object]: + norm = normalize_story_key(project_root, story_key) + if norm is None: + return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} + config = _success_config(contract) + raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md") + expected = int(config.get("expectedMatches", 1)) + pattern = _format_story_pattern(raw_glob, norm) + matches = sorted(Path(project_root).glob(pattern)) + payload: dict[str, object] = { + "verified": len(matches) == expected, + "story": norm.key, + "source": "artifact_glob", + "pattern": pattern, + "expectedMatches": expected, + "actualMatches": len(matches), + "matches": [str(match) for match in matches], + } + if not bool(payload["verified"]): + payload["reason"] = "unexpected_story_artifact_count" + return payload + + +def review_completion( + *, + project_root: str, + story_key: str, + output_file: str = "", + contract: dict[str, Any] | None = None, +) -> dict[str, object]: + norm = normalize_story_key(project_root, story_key) + if norm is None: + return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} + review_contract = _load_review_contract(project_root, contract or {}) + done_values = {value.lower() for value in review_contract["doneValues"]} + sprint = sprint_status_get(project_root, norm.id) + story_file = _story_artifact_path(project_root, norm.prefix) + story_status = find_frontmatter_value_case(story_file, "Status") if story_file else "" + for source in review_contract["sourceOrder"]: + if source == "sprint-status.yaml" and sprint.status.lower() in done_values: + return { + "verified": True, + "story": norm.key, + "sprint_status": sprint.status, + "story_file_status": story_status or "unknown", + "source": "sprint-status.yaml", + } + if source == "story-file" and story_status.lower() in done_values: + payload: dict[str, object] = { + "verified": True, + "story": norm.key, + "sprint_status": sprint.status, + "story_file_status": story_status, + "source": "story-file", + } + if review_contract["syncSprintStatus"] and not sprint.done: + payload["note"] = "sprint_status_not_updated" + return payload + return { + "verified": False, + "story": norm.key, + "sprint_status": sprint.status, + "story_file_status": story_status or "unknown", + "reason": "workflow_not_complete", + } + + +def epic_complete( + *, + project_root: str, + story_key: str, + output_file: str = "", + contract: dict[str, Any] | None = None, +) -> dict[str, object]: + norm = normalize_story_key(project_root, story_key) + if norm is None: + return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} + epic = norm.id.split(".", 1)[0] + stories, done = sprint_status_epic(project_root, epic) + if not stories: + return {"verified": False, "epic": epic, "reason": "no_stories_found", "source": "sprint-status.yaml"} + return { + "verified": done == len(stories), + "epic": epic, + "story": norm.key, + "totalStories": len(stories), + "doneStories": done, + "source": "sprint-status.yaml", + **({} if done == len(stories) else {"reason": "epic_incomplete"}), + } + + +def _success_config(contract: dict[str, Any] | None) -> dict[str, Any]: + config = (contract or {}).get("config") or {} + if not isinstance(config, dict): + raise PolicyError("success.config must be an object") + return config + + +def _format_story_pattern(pattern: str, story) -> str: + return ( + pattern.replace("{story_prefix}", story.prefix) + .replace("{story_id}", story.id) + .replace("{story_key}", story.key) + ) + + +def _story_artifact_path(project_root: str, story_prefix: str) -> Path | None: + matches = sorted((Path(project_root) / "_bmad-output" / "implementation-artifacts").glob(f"{story_prefix}-*.md")) + return matches[0] if matches else None + + +def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[str, Any]: + merged = dict(DEFAULT_REVIEW_CONTRACT) + contract_path = str(contract.get("contractPath") or "").strip() + if contract_path: + path = Path(contract_path) + if not path.is_absolute(): + path = Path(project_root) / path + try: + payload = json.loads(read_text(path)) + except json.JSONDecodeError as exc: + raise PolicyError(f"review contract json invalid: {path}") from exc + if not isinstance(payload, dict): + raise PolicyError(f"review contract must be an object: {path}") + merged.update(payload) + inline = _inline_review_contract(contract) + merged.update(inline) + _validate_review_contract(merged) + return { + "blockingSeverity": [str(value).strip() for value in merged["blockingSeverity"] if str(value).strip()], + "doneValues": [str(value).strip() for value in merged["doneValues"] if str(value).strip()], + "inProgressValues": [str(value).strip() for value in merged["inProgressValues"] if str(value).strip()], + "sourceOrder": [str(value).strip() for value in merged["sourceOrder"] if str(value).strip()], + "syncSprintStatus": bool(merged["syncSprintStatus"]), + } + + +def _inline_review_contract(contract: dict[str, Any]) -> dict[str, Any]: + inline: dict[str, Any] = {} + config = contract.get("config") + if isinstance(config, dict): + for key in ALLOWED_REVIEW_CONTRACT_KEYS: + if key in config: + inline[key] = config[key] + for key in ALLOWED_REVIEW_CONTRACT_KEYS: + if key in contract: + inline[key] = contract[key] + return inline + + +def _validate_review_contract(contract: dict[str, Any]) -> None: + unknown_keys = sorted(set(contract) - ALLOWED_REVIEW_CONTRACT_KEYS) + if unknown_keys: + raise PolicyError(f"unknown review contract keys: {', '.join(unknown_keys)}") + for key in ("blockingSeverity", "doneValues", "inProgressValues", "sourceOrder"): + values = contract.get(key) + if not isinstance(values, list) or not all(isinstance(value, str) for value in values): + raise PolicyError(f"review contract {key} must be a string array") + if not isinstance(contract.get("syncSprintStatus"), bool): + raise PolicyError("review contract syncSprintStatus must be a boolean") + invalid_sources = sorted({value for value in contract["sourceOrder"] if value not in ALLOWED_REVIEW_SOURCES}) + if invalid_sources: + raise PolicyError(f"review contract sourceOrder contains unknown sources: {', '.join(invalid_sources)}") + + +VerifierFn = Callable[..., dict[str, object]] + +VERIFIERS: dict[str, VerifierFn] = { + "create_story_artifact": create_story_artifact, + "session_exit": session_exit, + "review_completion": review_completion, + "epic_complete": epic_complete, +} diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py new file mode 100644 index 0000000..9071bdb --- /dev/null +++ b/source/tests/test_success_verifiers.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import io +import json +import shutil +import tempfile +import unittest +from contextlib import redirect_stdout +from pathlib import Path + +from story_automator.commands.state import cmd_build_state_doc +from story_automator.commands.tmux import _verify_monitor_completion +from story_automator.core.review_verify import verify_code_review_completion +from story_automator.core.runtime_policy import PolicyError +from story_automator.core.success_verifiers import create_story_artifact, epic_complete, review_completion + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +class SuccessVerifierTests(unittest.TestCase): + def setUp(self) -> None: + self.tmp = tempfile.TemporaryDirectory() + self.project_root = Path(self.tmp.name) + self.output_dir = self.project_root / "_bmad-output" / "story-automator" + self.artifacts_dir = self.project_root / "_bmad-output" / "implementation-artifacts" + self._install_bundle() + self._install_required_skills() + + def tearDown(self) -> None: + self.tmp.cleanup() + + def test_create_story_artifact_matches_configured_glob(self) -> None: + self._write_story("1-2-example", status="draft") + payload = create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"glob": "_bmad-output/implementation-artifacts/{story_prefix}-*.md", "expectedMatches": 1}}, + ) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["actualMatches"], 1) + + def test_review_completion_uses_contract_done_values(self) -> None: + self._write_story("1-2-example", status="approved") + contract = self._write_review_contract( + {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False} + ) + payload = review_completion( + project_root=str(self.project_root), + story_key="1.2", + contract={"contractPath": str(contract)}, + ) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["source"], "story-file") + self.assertNotIn("note", payload) + + def test_review_completion_rejects_invalid_contract(self) -> None: + contract = self._write_review_contract({"sourceOrder": ["bad-source"]}) + with self.assertRaises(PolicyError): + review_completion( + project_root=str(self.project_root), + story_key="1.2", + contract={"contractPath": str(contract)}, + ) + + def test_epic_complete_checks_sprint_status(self) -> None: + self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n") + payload = epic_complete(project_root=str(self.project_root), story_key="1.2") + self.assertTrue(payload["verified"]) + self.assertEqual(payload["doneStories"], 2) + + def test_review_wrapper_uses_pinned_state_snapshot(self) -> None: + self._write_story("1-2-example", status="approved") + state_file = self._build_state() + self._write_override( + { + "steps": { + "review": { + "success": { + "config": {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False} + } + } + } + } + ) + payload = verify_code_review_completion(str(self.project_root), "1.2", state_file=state_file) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "workflow_not_complete") + + def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None: + self._write_story("1-2-example", status="done") + result = _verify_monitor_completion( + "review", + project_root=str(self.project_root), + story_key="1.2", + output_file="/tmp/session.txt", + ) + self.assertIsNotNone(result) + payload, verifier = result or ({}, "") + self.assertEqual(verifier, "review_completion") + self.assertTrue(payload["verified"]) + + def _build_state(self) -> Path: + stdout = io.StringIO() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" + with patch_env(self.project_root), redirect_stdout(stdout): + cmd_build_state_doc( + [ + "--template", + str(template), + "--output-folder", + str(self.output_dir), + "--config-json", + json.dumps( + { + "epic": "1", + "epicName": "Epic 1", + "storyRange": ["1.2"], + "status": "READY", + "aiCommand": "claude --dangerously-skip-permissions", + } + ), + ] + ) + return Path(json.loads(stdout.getvalue())["path"]) + + def _install_bundle(self) -> None: + source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" + source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" + target_root = self.project_root / ".claude" / "skills" + target_root.mkdir(parents=True, exist_ok=True) + shutil.copytree(source_skill, target_root / "bmad-story-automator") + shutil.copytree(source_review, target_root / "bmad-story-automator-review") + + def _install_required_skills(self) -> None: + self._make_skill( + "bmad-create-story", + extras={"discover-inputs.md": "# discover\n", "checklist.md": "# checklist\n", "template.md": "# template\n"}, + ) + self._make_skill("bmad-dev-story", extras={"checklist.md": "# checklist\n"}) + self._make_skill("bmad-retrospective") + self._make_skill("bmad-qa-generate-e2e-tests", extras={"checklist.md": "# checklist\n"}) + + def _make_skill(self, name: str, *, extras: dict[str, str] | None = None) -> None: + skill_dir = self.project_root / ".claude" / "skills" / name + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + (skill_dir / "workflow.md").write_text(f"# {name}\n", encoding="utf-8") + for rel, content in (extras or {}).items(): + (skill_dir / rel).write_text(content, encoding="utf-8") + + def _write_story(self, stem: str, *, status: str) -> Path: + self.artifacts_dir.mkdir(parents=True, exist_ok=True) + path = self.artifacts_dir / f"{stem}.md" + path.write_text(f"---\nStatus: {status}\nTitle: Story\n---\n", encoding="utf-8") + return path + + def _write_sprint_status(self, content: str) -> None: + self.artifacts_dir.mkdir(parents=True, exist_ok=True) + (self.artifacts_dir / "sprint-status.yaml").write_text(content, encoding="utf-8") + + def _write_review_contract(self, payload: dict[str, object]) -> Path: + path = self.project_root / "review-contract.json" + path.write_text(json.dumps(payload), encoding="utf-8") + return path + + def _write_override(self, payload: dict[str, object]) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text(json.dumps(payload), encoding="utf-8") + + +class patch_env: + def __init__(self, project_root: Path) -> None: + self.project_root = str(project_root) + self.previous = None + + def __enter__(self) -> None: + import os + + self.previous = os.environ.get("PROJECT_ROOT") + os.environ["PROJECT_ROOT"] = self.project_root + + def __exit__(self, exc_type, exc, tb) -> None: + import os + + if self.previous is None: + os.environ.pop("PROJECT_ROOT", None) + else: + os.environ["PROJECT_ROOT"] = self.previous + + +if __name__ == "__main__": + unittest.main() From f12226a7464ac45bb96b1b7008276a0acee6132e Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:35:30 -0300 Subject: [PATCH 05/17] fix: harden success verifier review repairs --- docs/changelog/260413.md | 25 ++++++++ .../data/code-review-loop.md | 8 +-- scripts/smoke-test.sh | 5 ++ .../story_automator/core/runtime_policy.py | 48 +++++++++++--- .../story_automator/core/success_verifiers.py | 62 ++++++++++++++----- source/tests/test_success_verifiers.py | 45 ++++++++++++++ 6 files changed, 166 insertions(+), 27 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index faaca3e..232a993 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -24,6 +24,31 @@ Moved review completion checks onto the JSON policy contract and routed monitor ### QA Notes - N/A +## 260413-08:34:25 - Harden success verifier review fixes + +### Summary +Closed the review-loop findings around contract-safe verifier loading, snapshot handoff coverage, and verifier config failure modes. + +### Fixed +- Fixed review verification to load only step-local success contract paths so unrelated missing skill assets no longer turn completed reviews into `review_contract_invalid`. +- Fixed review contract validation to reject empty or whitespace-only completion arrays and fixed verifier numeric parsing to reject malformed or boolean `expectedMatches`. +- Fixed retrospective completion checks to accept the real bare epic identifier used by the retro step. + +### Changed +- Changed the shipped code-review loop to pass `--state-file` through create/build, monitor, parse, and verify commands so pinned policy snapshots stay consistent end to end. +- Changed smoke coverage and unit tests to pin each state-file handoff and the new verifier hardening paths. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/core/success_verifiers.py` +- `source/tests/test_success_verifiers.py` +- `payload/.claude/skills/bmad-story-automator/data/code-review-loop.md` +- `scripts/smoke-test.sh` +- `docs/changelog/260413.md` + +### QA Notes +- `npm run verify` + ## 260413-11:35:00 - Verify packed npx install path ### Summary diff --git a/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md b/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md index e20bfe2..0723fae 100644 --- a/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md +++ b/payload/.claude/skills/bmad-story-automator/data/code-review-loop.md @@ -56,7 +56,7 @@ scripts="$(printf "%s" "{project_root}/.claude/skills/bmad-story-automator/scrip session_name=$("$scripts" tmux-wrapper spawn review {epic} {story_id} \ --agent "$review_agent" \ --cycle $reviewCycle \ - --command "$("$scripts" tmux-wrapper build-cmd review {story_id} --agent "$review_agent")") + --command "$("$scripts" tmux-wrapper build-cmd review {story_id} --agent "$review_agent" --state-file "$state_file")") ``` ### 2. Monitor Session with Verification (v2.2) @@ -66,7 +66,7 @@ session_name=$("$scripts" tmux-wrapper spawn review {epic} {story_id} \ # Pass --workflow and --story-key for completion verification result=$("$scripts" monitor-session "$session_name" --json --verbose \ --agent "$review_agent" \ - --workflow review --story-key {story_id}) + --workflow review --story-key {story_id} --state-file "$state_file") final_state=$(echo "$result" | jq -r '.final_state') output_file=$(echo "$result" | jq -r '.output_file') ``` @@ -77,7 +77,7 @@ output_file=$(echo "$result" | jq -r '.output_file') ```bash # Sub-agent parsing (haiku, 99% cheaper than main context) -parsed=$("$scripts" orchestrator-helper parse-output "$output_file" review) +parsed=$("$scripts" orchestrator-helper parse-output "$output_file" review --state-file "$state_file") ``` ### 4. Verify Sprint Status @@ -159,6 +159,6 @@ file_status=$("$scripts" orchestrator-helper story-file-status {story_id}) Check if code-review actually completed: ```bash -"$scripts" orchestrator-helper verify-code-review {story_id} +"$scripts" orchestrator-helper verify-code-review {story_id} --state-file "$state_file" # Returns: {"verified":true/false, "sprint_status":"...", ...} ``` diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index 0e0f0fb..ee74970 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -241,6 +241,11 @@ verify_common_install() { assert_contains "outside .claude/skills/" "$review_dir/instructions.xml" assert_contains 'installed helper at `scripts/story-automator`' "$story_dir/data/scripts-reference.md" assert_not_contains "bin/" "$story_dir/data/monitoring-pattern.md" + assert_contains 'state-file "$state_file"' "$story_dir/data/code-review-loop.md" + assert_contains 'build-cmd review {story_id} --agent "$review_agent" --state-file "$state_file"' "$story_dir/data/code-review-loop.md" + assert_contains 'workflow review --story-key {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" + assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md" + assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" } verify_qa_prompts() { diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index ea3a536..93c0c62 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -18,7 +18,7 @@ class PolicyError(ValueError): pass -def load_effective_policy(project_root: str | None = None) -> dict[str, Any]: +def load_effective_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() bundle_root = bundled_skill_root(root) bundled = _read_json(bundle_root / "data" / "orchestration-policy.json") @@ -27,20 +27,28 @@ def load_effective_policy(project_root: str | None = None) -> dict[str, Any]: policy = _deep_merge(bundled, override) _apply_legacy_env(policy) _validate_policy_shape(policy) - _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root) + if resolve_assets: + _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root) + else: + _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root) return policy -def load_runtime_policy(project_root: str | None = None, state_file: str | Path | None = None) -> dict[str, Any]: +def load_runtime_policy( + project_root: str | None = None, + state_file: str | Path | None = None, + *, + resolve_assets: bool = True, +) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() resolved_state, source = resolve_policy_state_file(root, state_file) if resolved_state: try: - return load_policy_for_state(resolved_state, project_root=str(root)) + return load_policy_for_state(resolved_state, project_root=str(root), resolve_assets=resolve_assets) except (FileNotFoundError, PolicyError): if source == "explicit": raise - return load_effective_policy(str(root)) + return load_effective_policy(str(root), resolve_assets=resolve_assets) def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]: @@ -66,6 +74,7 @@ def load_policy_snapshot( *, project_root: str | None = None, expected_hash: str = "", + resolve_assets: bool = True, ) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() path = Path(snapshot_file) @@ -82,11 +91,19 @@ def load_policy_snapshot( except json.JSONDecodeError as exc: raise PolicyError(f"policy json invalid: {path}") from exc _validate_policy_shape(policy) - _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) + if resolve_assets: + _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) + else: + _resolve_success_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) return policy -def load_policy_for_state(state_file: str | Path, project_root: str | None = None) -> dict[str, Any]: +def load_policy_for_state( + state_file: str | Path, + project_root: str | None = None, + *, + resolve_assets: bool = True, +) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() fields = parse_simple_frontmatter(read_text(state_file)) snapshot_file = str(fields.get("policySnapshotFile") or "").strip() @@ -94,8 +111,13 @@ def load_policy_for_state(state_file: str | Path, project_root: str | None = Non if snapshot_file or snapshot_hash: if not snapshot_file or not snapshot_hash: raise PolicyError("state policy metadata incomplete") - return load_policy_snapshot(snapshot_file, project_root=str(root), expected_hash=snapshot_hash) - return load_effective_policy(str(root)) + return load_policy_snapshot( + snapshot_file, + project_root=str(root), + expected_hash=snapshot_hash, + resolve_assets=resolve_assets, + ) + return load_effective_policy(str(root), resolve_assets=resolve_assets) def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]: @@ -239,6 +261,14 @@ def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_ success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root) +def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None: + for contract in (policy.get("steps") or {}).values(): + success = contract.setdefault("success", {}) + contract_file = str(success.get("contractFile") or "").strip() + if contract_file: + success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root) + + def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]: skill_name = str(assets.get("skillName") or "").strip() if not skill_name: diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py index 0d596f8..4a5cf42 100644 --- a/source/src/story_automator/core/success_verifiers.py +++ b/source/src/story_automator/core/success_verifiers.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from pathlib import Path from typing import Any, Callable @@ -22,7 +23,7 @@ def resolve_success_contract(project_root: str, step: str, *, state_file: str | Path | None = None) -> dict[str, Any]: - policy = load_runtime_policy(project_root, state_file=state_file) + policy = load_runtime_policy(project_root, state_file=state_file, resolve_assets=False) success = step_contract(policy, step).get("success") or {} if not isinstance(success, dict): raise PolicyError(f"invalid success contract for {step}") @@ -70,7 +71,7 @@ def create_story_artifact( return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} config = _success_config(contract) raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md") - expected = int(config.get("expectedMatches", 1)) + expected = _parse_int(config.get("expectedMatches", 1), "success.config.expectedMatches", minimum=0) pattern = _format_story_pattern(raw_glob, norm) matches = sorted(Path(project_root).glob(pattern)) payload: dict[str, object] = { @@ -138,17 +139,16 @@ def epic_complete( output_file: str = "", contract: dict[str, Any] | None = None, ) -> dict[str, object]: - norm = normalize_story_key(project_root, story_key) - if norm is None: + epic = _epic_identifier(project_root, story_key) + if not epic: return {"verified": False, "reason": "could_not_normalize_key", "input": story_key} - epic = norm.id.split(".", 1)[0] stories, done = sprint_status_epic(project_root, epic) if not stories: return {"verified": False, "epic": epic, "reason": "no_stories_found", "source": "sprint-status.yaml"} return { "verified": done == len(stories), "epic": epic, - "story": norm.key, + "story": story_key, "totalStories": len(stories), "doneStories": done, "source": "sprint-status.yaml", @@ -193,13 +193,7 @@ def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[s inline = _inline_review_contract(contract) merged.update(inline) _validate_review_contract(merged) - return { - "blockingSeverity": [str(value).strip() for value in merged["blockingSeverity"] if str(value).strip()], - "doneValues": [str(value).strip() for value in merged["doneValues"] if str(value).strip()], - "inProgressValues": [str(value).strip() for value in merged["inProgressValues"] if str(value).strip()], - "sourceOrder": [str(value).strip() for value in merged["sourceOrder"] if str(value).strip()], - "syncSprintStatus": bool(merged["syncSprintStatus"]), - } + return _sanitize_review_contract(merged) def _inline_review_contract(contract: dict[str, Any]) -> dict[str, Any]: @@ -225,11 +219,51 @@ def _validate_review_contract(contract: dict[str, Any]) -> None: raise PolicyError(f"review contract {key} must be a string array") if not isinstance(contract.get("syncSprintStatus"), bool): raise PolicyError("review contract syncSprintStatus must be a boolean") - invalid_sources = sorted({value for value in contract["sourceOrder"] if value not in ALLOWED_REVIEW_SOURCES}) + if not _sanitize_string_list(contract["doneValues"]): + raise PolicyError("review contract doneValues must not be empty") + source_order = _sanitize_string_list(contract["sourceOrder"]) + if not source_order: + raise PolicyError("review contract sourceOrder must not be empty") + invalid_sources = sorted({value for value in source_order if value not in ALLOWED_REVIEW_SOURCES}) if invalid_sources: raise PolicyError(f"review contract sourceOrder contains unknown sources: {', '.join(invalid_sources)}") +def _parse_int(value: Any, field: str, *, minimum: int | None = None) -> int: + if isinstance(value, bool): + raise PolicyError(f"{field} must be an integer") + try: + parsed = int(value) + except (TypeError, ValueError) as exc: + raise PolicyError(f"{field} must be an integer") from exc + if minimum is not None and parsed < minimum: + raise PolicyError(f"{field} must be >= {minimum}") + return parsed + + +def _epic_identifier(project_root: str, story_key: str) -> str: + if re.fullmatch(r"\d+", story_key): + return story_key + norm = normalize_story_key(project_root, story_key) + if norm is None: + return "" + return norm.id.split(".", 1)[0] + + +def _sanitize_review_contract(contract: dict[str, Any]) -> dict[str, Any]: + return { + "blockingSeverity": _sanitize_string_list(contract["blockingSeverity"]), + "doneValues": _sanitize_string_list(contract["doneValues"]), + "inProgressValues": _sanitize_string_list(contract["inProgressValues"]), + "sourceOrder": _sanitize_string_list(contract["sourceOrder"]), + "syncSprintStatus": contract["syncSprintStatus"], + } + + +def _sanitize_string_list(values: list[str]) -> list[str]: + return [value.strip() for value in values if value.strip()] + + VerifierFn = Callable[..., dict[str, object]] VERIFIERS: dict[str, VerifierFn] = { diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 9071bdb..a007dde 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -63,12 +63,34 @@ def test_review_completion_rejects_invalid_contract(self) -> None: contract={"contractPath": str(contract)}, ) + def test_review_completion_rejects_empty_contract_lists(self) -> None: + with self.assertRaises(PolicyError): + review_completion( + project_root=str(self.project_root), + story_key="1.2", + contract={"doneValues": [], "sourceOrder": []}, + ) + + def test_review_completion_rejects_whitespace_only_done_values(self) -> None: + with self.assertRaises(PolicyError): + review_completion( + project_root=str(self.project_root), + story_key="1.2", + contract={"doneValues": [" "], "sourceOrder": ["story-file"]}, + ) + def test_epic_complete_checks_sprint_status(self) -> None: self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n") payload = epic_complete(project_root=str(self.project_root), story_key="1.2") self.assertTrue(payload["verified"]) self.assertEqual(payload["doneStories"], 2) + def test_epic_complete_accepts_bare_epic_id(self) -> None: + self._write_sprint_status("1-1-story-one: done\n1-2-story-two: done\n") + payload = epic_complete(project_root=str(self.project_root), story_key="1") + self.assertTrue(payload["verified"]) + self.assertEqual(payload["epic"], "1") + def test_review_wrapper_uses_pinned_state_snapshot(self) -> None: self._write_story("1-2-example", status="approved") state_file = self._build_state() @@ -87,6 +109,13 @@ def test_review_wrapper_uses_pinned_state_snapshot(self) -> None: self.assertFalse(payload["verified"]) self.assertEqual(payload["reason"], "workflow_not_complete") + def test_review_wrapper_ignores_unrelated_missing_assets(self) -> None: + shutil.rmtree(self.project_root / ".claude" / "skills" / "bmad-create-story") + self._write_story("1-2-example", status="done") + payload = verify_code_review_completion(str(self.project_root), "1.2") + self.assertTrue(payload["verified"]) + self.assertEqual(payload["source"], "story-file") + def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None: self._write_story("1-2-example", status="done") result = _verify_monitor_completion( @@ -100,6 +129,22 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None: self.assertEqual(verifier, "review_completion") self.assertTrue(payload["verified"]) + def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None: + with self.assertRaises(PolicyError): + create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"expectedMatches": "abc"}}, + ) + + def test_create_story_artifact_rejects_boolean_expected_matches(self) -> None: + with self.assertRaises(PolicyError): + create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"expectedMatches": False}}, + ) + def _build_state(self) -> Path: stdout = io.StringIO() template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" From 5ea85409861fb052c63eec3f760844f9d49061da Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:39:52 -0300 Subject: [PATCH 06/17] refactor: route create validation through verifier --- docs/changelog/260413.md | 27 ++++++++++++++ docs/cli-reference.md | 7 ++++ .../data/monitoring-pattern.md | 12 ++----- .../data/retry-fallback-implementation.md | 5 ++- .../data/scripts-reference.md | 3 +- .../steps-c/step-03-execute.md | 12 +++---- scripts/smoke-test.sh | 4 +++ .../story_automator/commands/orchestrator.py | 36 ++++++++++++++++++- source/tests/test_success_verifiers.py | 23 ++++++++++++ 9 files changed, 108 insertions(+), 21 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 232a993..c915f58 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -24,6 +24,33 @@ Moved review completion checks onto the JSON policy contract and routed monitor ### QA Notes - N/A +## 260413-08:39:42 - Route create validation through shared verifier + +### Summary +Removed the duplicate create-story file counting path and exposed the shared verifier registry as the public success-check interface. + +### Added +- Added `orchestrator-helper verify-step` so workflow steps can run the configured success verifier directly, with optional pinned `--state-file` and `--output-file` inputs. + +### Changed +- Changed the shipped create workflow, retry guidance, monitoring pattern, and CLI docs to use `verify-step create ... --state-file "$state_file"` instead of `validate-story-creation check`. +- Changed docs and smoke coverage to pin the new create verifier command forms and their state-file handoff. +- Changed regression tests to cover `verify-step create` and pinned create-policy snapshot reuse. + +### Files +- `source/src/story_automator/commands/orchestrator.py` +- `source/tests/test_success_verifiers.py` +- `payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md` +- `payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md` +- `payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md` +- `payload/.claude/skills/bmad-story-automator/data/scripts-reference.md` +- `docs/cli-reference.md` +- `scripts/smoke-test.sh` +- `docs/changelog/260413.md` + +### QA Notes +- `npm run verify` + ## 260413-08:34:25 - Harden success verifier review fixes ### Summary diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 8e24b5a..6b83009 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -62,6 +62,7 @@ Critical rule: - `orchestrator-helper state-summary` - `orchestrator-helper state-update` - `orchestrator-helper marker create|remove|check|heartbeat` +- `orchestrator-helper verify-step` - `orchestrator-helper verify-code-review` - `orchestrator-helper get-epic-stories` - `orchestrator-helper check-epic-complete` @@ -110,6 +111,12 @@ session="$("$scripts" tmux-wrapper spawn review 1 1.2 --agent claude --command " "$scripts" orchestrator-helper agents-resolve --state-file "$state_file" --story 1.2 --task review ``` +### Verify Create Success + +```bash +"$scripts" orchestrator-helper verify-step create 1.2 --state-file "$state_file" +``` + ## Read Next - [Agents And Monitoring](./agents-and-monitoring.md) diff --git a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md index 6d13f4d..530a613 100644 --- a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md +++ b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md @@ -105,17 +105,11 @@ next_action=$(echo "$parsed" | jq -r '.next_action') "$scripts" orchestrator-helper escalate ``` -### $scripts validate-story-creation +### $scripts orchestrator-helper verify-step ```bash -# Count before session -before=$("$scripts" validate-story-creation count 5.3) - -# ... run create-story session ... - -# Count after and validate -after=$("$scripts" validate-story-creation count 5.3) -"$scripts" validate-story-creation check 5.3 --before $before --after $after +# Validate create-story via the shared success verifier +"$scripts" orchestrator-helper verify-step create 5.3 --state-file "$state_file" ``` --- diff --git a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md index 982505f..10e6f4b 100644 --- a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md +++ b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md @@ -75,9 +75,8 @@ fi ### Create Story ```bash -after=$("$scripts" validate-story-creation count {story_id}) -validation=$("$scripts" validate-story-creation check {story_id} --before $before --after $after) -validation_passed=$(echo "$validation" | jq -r '.valid') +validation=$("$scripts" orchestrator-helper verify-step create {story_id} --state-file "$state_file") +validation_passed=$(echo "$validation" | jq -r '.verified') ``` ### Dev Story diff --git a/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md b/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md index 7e5dc24..bbc2ec6 100644 --- a/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md +++ b/payload/.claude/skills/bmad-story-automator/data/scripts-reference.md @@ -12,9 +12,10 @@ All operations use the installed helper at `scripts/story-automator` (usually vi | `$scripts codex-status-check` | Codex-specific status with heartbeat (v2.4.0) | | `$scripts heartbeat-check` | CPU-based process heartbeat detection | | `$scripts orchestrator-helper` | Sprint-status, parsing, markers | +| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step | | `$scripts orchestrator-helper agents-build` | Deterministic agents file generation | | `$scripts orchestrator-helper agents-resolve` | Agent lookup per story/task via state file or direct agents file | -| `$scripts validate-story-creation` | Story file count validation | +| `$scripts validate-story-creation` | Legacy story file count validation | | `$scripts commit-story` | Deterministic git commit with JSON output | ## Usage Pattern diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md index 1d11781..d7e60fa 100644 --- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md +++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md @@ -127,26 +127,24 @@ If multiple logs exist, run one grep/regex pass across all log files and forward **Apply retry/fallback pattern from `{retryStrategy}`:** Up to 5 attempts, alternating agents, network-aware delays. ```bash -before=$("$scripts" validate-story-creation count {story_id}) # Retry loop: see {retryStrategy} session=$("$scripts" tmux-wrapper spawn create {epic} {story_id} \ --agent "$current_agent" \ - --command "$("$scripts" tmux-wrapper build-cmd create {story_id} --agent "$current_agent")") + --command "$("$scripts" tmux-wrapper build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file")") result=$("$scripts" monitor-session "$session" --json --agent "$current_agent") "$scripts" tmux-wrapper kill "$session" -after=$("$scripts" validate-story-creation count {story_id}) -validation=$("$scripts" validate-story-creation check {story_id} --before $before --after $after) +validation=$("$scripts" orchestrator-helper verify-step create {story_id} --state-file "$state_file") ``` -- If `validation.valid == true`: +- If `validation.verified == true`: ```bash # Update Story Progress: mark create-story done tmp_state=$(mktemp) sed "s/^| ${story_id} |.*$/| ${story_id} | done | - | - | - | - | in-progress |/" "$state_file" > "$tmp_state" && mv "$tmp_state" "$state_file" ``` → proceed to B -- If `validation.valid == false` AND attempts < 5 → retry with next agent (see `{retryStrategy}`) -- If `validation.valid == false` AND attempts == 5 → escalate (all retries exhausted) +- If `validation.verified == false` AND attempts < 5 → retry with next agent (see `{retryStrategy}`) +- If `validation.verified == false` AND attempts == 5 → escalate (all retries exhausted) ### B. Dev Story diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index ee74970..44e44ef 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -246,6 +246,10 @@ verify_common_install() { assert_contains 'workflow review --story-key {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md" assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" + assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" + assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md" + assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md" + assert_contains '| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step |' "$story_dir/data/scripts-reference.md" } verify_qa_prompts() { diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 4e69b22..694430a 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -6,8 +6,9 @@ from pathlib import Path from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter -from story_automator.core.runtime_policy import crash_max_retries, load_runtime_policy, review_max_cycles +from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles from story_automator.core.review_verify import verify_code_review_completion +from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier from story_automator.core.sprint import sprint_status_epic, sprint_status_get from story_automator.core.story_keys import normalize_story_key, sprint_status_file from story_automator.core.utils import ( @@ -51,6 +52,7 @@ def cmd_orchestrator_helper(args: list[str]) -> int: "commit-ready": _commit_ready, "normalize-key": _normalize_key, "story-file-status": _story_file_status, + "verify-step": _verify_step, "verify-code-review": _verify_code_review, "check-epic-complete": check_epic_complete_action, "get-epic-stories": get_epic_stories_action, @@ -86,6 +88,7 @@ def _usage(code: int) -> int: print(" commit-ready ", file=target) print(" normalize-key [--to id|key|prefix|json]", file=target) print(" story-file-status ", file=target) + print(" verify-step [--state-file path] [--output-file path]", file=target) print(" verify-code-review ", file=target) print(" check-epic-complete [--state-file path]", file=target) print(" get-epic-stories [--state-file path]", file=target) @@ -388,6 +391,37 @@ def _verify_code_review(args: list[str]) -> int: return 0 if bool(payload.get("verified")) else 1 +def _verify_step(args: list[str]) -> int: + if len(args) < 2: + print_json({"verified": False, "reason": "step_and_story_required"}) + return 1 + step, story_key = args[:2] + state_file = "" + output_file = "" + tail = args[2:] + for idx, arg in enumerate(tail): + if arg == "--state-file" and idx + 1 < len(tail): + state_file = tail[idx + 1] + elif arg == "--output-file" and idx + 1 < len(tail): + output_file = tail[idx + 1] + try: + contract = resolve_success_contract(get_project_root(), step, state_file=state_file or None) + verifier = str(contract.get("verifier") or "").strip() + if not verifier: + raise PolicyError(f"missing success verifier for {step}") + payload = run_success_verifier( + verifier, + project_root=get_project_root(), + story_key=story_key, + output_file=output_file, + contract=contract, + ) + except (FileNotFoundError, PolicyError) as exc: + payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)} + print_json(payload) + return 0 if bool(payload.get("verified")) else 1 + + def _parse_context_int(context: str, key: str) -> int: match = re.search(rf"{re.escape(key)}=(\d+)", context) return int(match.group(1)) if match else 0 diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index a007dde..0ab5f04 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -8,6 +8,7 @@ from contextlib import redirect_stdout from pathlib import Path +from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc from story_automator.commands.tmux import _verify_monitor_completion from story_automator.core.review_verify import verify_code_review_completion @@ -129,6 +130,28 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None: self.assertEqual(verifier, "review_completion") self.assertTrue(payload["verified"]) + def test_verify_step_create_uses_shared_verifier(self) -> None: + self._write_story("1-2-example", status="draft") + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["verify-step", "create", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["source"], "artifact_glob") + + def test_verify_step_create_uses_pinned_snapshot(self) -> None: + self._write_story("1-2-example", status="draft") + state_file = self._build_state() + self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}}) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["verify-step", "create", "1.2", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["expectedMatches"], 1) + def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None: with self.assertRaises(PolicyError): create_story_artifact( From 1b8e2d03dc390f1051916c1e7a7e2902f8ccabdb Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:49:08 -0300 Subject: [PATCH 07/17] fix: restore verify-step retry contract --- docs/changelog/260413.md | 24 +++++++++++++++++++ .../data/monitoring-pattern.md | 20 +++++++++------- .../data/retry-fallback-implementation.md | 2 +- scripts/smoke-test.sh | 4 ++++ .../story_automator/commands/orchestrator.py | 4 +++- source/tests/test_success_verifiers.py | 9 +++++++ 6 files changed, 52 insertions(+), 11 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index c915f58..861257f 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -1,5 +1,29 @@ # Changelog - 260413 +## 260413-09:14:32 - Restore verify-step retry contract + +### Summary +Restored the shared verifier CLI contract so create-step retries can inspect JSON failures without aborting, and aligned the shipped create examples with the pinned verifier path. + +### Fixed +- Fixed `orchestrator-helper verify-step` to return JSON with exit code `0` for ordinary verification failures, preserving retry loops that branch on `.verified` instead of shell exit status. +- Fixed the create monitoring quick reference and retry wrapper examples to keep `--state-file` attached through build and monitor handoff. + +### Changed +- Changed smoke coverage and unit tests to pin the create verifier failure contract and the updated state-file command forms. + +### Files +- `source/src/story_automator/commands/orchestrator.py` +- `source/tests/test_success_verifiers.py` +- `payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md` +- `payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md` +- `scripts/smoke-test.sh` +- `docs/changelog/260413.md` + +### QA Notes +- `PYTHONPATH=source/src python3 -m unittest source.tests.test_success_verifiers` +- `bash scripts/smoke-test.sh` + ## 260413-08:05:51 - Wire policy-backed success verifiers ### Summary diff --git a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md index 530a613..cfa8441 100644 --- a/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md +++ b/payload/.claude/skills/bmad-story-automator/data/monitoring-pattern.md @@ -24,22 +24,24 @@ scripts/ --- -## Standard Workflow: Spawn + Monitor + Parse +## Standard Workflow: Spawn + Monitor + Verify (Create Example) ```bash # STEP 1: Spawn session (use $scripts tmux-wrapper) session_name=$("$scripts" tmux-wrapper spawn create 5 5.3 \ - --command "$("$scripts" tmux-wrapper build-cmd create 5.3)") + --command "$("$scripts" tmux-wrapper build-cmd create 5.3 --state-file "$state_file")") # STEP 2: Monitor until completion (SINGLE API CALL) -result=$("$scripts" monitor-session "$session_name" --verbose --json) +result=$("$scripts" monitor-session "$session_name" \ + --verbose --json \ + --workflow create --story-key 5.3 --state-file "$state_file") -# STEP 3: Parse output with sub-agent -output_file=$(echo "$result" | jq -r '.output_file') -parsed=$("$scripts" orchestrator-helper parse-output "$output_file" create) +# STEP 3: Verify success against the shared create contract +validation=$("$scripts" orchestrator-helper verify-step create 5.3 --state-file "$state_file") +verified=$(echo "$validation" | jq -r '.verified') -# STEP 4: Act on parsed result -next_action=$(echo "$parsed" | jq -r '.next_action') +# STEP 4: Act on verifier result +[ "$verified" = "true" ] || echo "retry-or-escalate" # STEP 5: ALWAYS cleanup session (v1.2.0) "$scripts" tmux-wrapper kill "$session_name" @@ -120,7 +122,7 @@ After `$scripts monitor-session` returns: | final_state | Action | |-------------|--------| -| `completed` | Parse output → act on `next_action` | +| `completed` | Run step verifier or parser for the active workflow | | `incomplete` | **(v2.2)** Session idle but workflow NOT verified → Escalate immediately | | `crashed` | Check retry count → retry or escalate | | `stuck` | Get output → investigate → may need restart | diff --git a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md index 10e6f4b..47452bd 100644 --- a/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md +++ b/payload/.claude/skills/bmad-story-automator/data/retry-fallback-implementation.md @@ -37,7 +37,7 @@ while [ $attempt -lt $max_attempts ] && [ "$success" = "false" ]; do # Execute workflow step session=$("$scripts" tmux-wrapper spawn {step} {epic} {story_id} \ --agent "$current_agent" \ - --command "$("$scripts" tmux-wrapper build-cmd {step} {story_id} --agent "$current_agent")") + --command "$("$scripts" tmux-wrapper build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file")") result=$("$scripts" monitor-session "$session" --json --agent "$current_agent") # Cleanup session diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index 44e44ef..7414554 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -247,8 +247,12 @@ verify_common_install() { assert_contains 'parse-output "$output_file" review --state-file "$state_file"' "$story_dir/data/code-review-loop.md" assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" + assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md" + assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md" assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md" + assert_contains 'workflow create --story-key 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md" + assert_not_contains 'parse-output "$output_file" create' "$story_dir/data/monitoring-pattern.md" assert_contains '| `$scripts orchestrator-helper verify-step` | Shared success verifier checks per step |' "$story_dir/data/scripts-reference.md" } diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 694430a..5eb902e 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -416,10 +416,12 @@ def _verify_step(args: list[str]) -> int: output_file=output_file, contract=contract, ) + exit_code = 0 except (FileNotFoundError, PolicyError) as exc: payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)} + exit_code = 1 print_json(payload) - return 0 if bool(payload.get("verified")) else 1 + return exit_code def _parse_context_int(context: str, key: str) -> int: diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 0ab5f04..4bee455 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -152,6 +152,15 @@ def test_verify_step_create_uses_pinned_snapshot(self) -> None: self.assertTrue(payload["verified"]) self.assertEqual(payload["expectedMatches"], 1) + def test_verify_step_create_returns_json_on_verification_failure(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["verify-step", "create", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "unexpected_story_artifact_count") + def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None: with self.assertRaises(PolicyError): create_story_artifact( From 92360247e154c7bd751d2522325e9d07a40c22d4 Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:14:50 -0300 Subject: [PATCH 08/17] fix: enforce snapshot-only resume semantics --- docs/changelog/260413.md | 25 +++++++++++ docs/development.md | 9 ++++ docs/state-and-resume.md | 23 ++++++++++ .../story_automator/commands/orchestrator.py | 34 +++++++++----- .../story_automator/core/runtime_policy.py | 30 ++++++++----- source/tests/test_runtime_policy.py | 44 ++++++++++++++++++- source/tests/test_state_policy_metadata.py | 29 ++++++++++++ 7 files changed, 172 insertions(+), 22 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 861257f..2437e31 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -247,3 +247,28 @@ Fixed the follow-up review findings around snapshot consumption, policy validati ### QA Notes - N/A + +## 260413-09:13:20 - Enforce snapshot-only resume semantics + +### Summary +Locked resume behavior to pinned snapshots for new state docs while keeping legacy states on bundled defaults only. + +### Fixed +- Fixed legacy state resumes to ignore live project overrides and legacy env knobs so old runs stay on bundled defaults. +- Fixed marker and env discovered new-format states to fail validation when the pinned snapshot file is missing instead of silently falling back to live policy. + +### Changed +- Changed `state-summary` to infer `legacyPolicy: true` for old state docs without snapshot metadata. +- Changed tests and operator docs to pin snapshot-only resume rules and the one-release env compatibility window for `MAX_REVIEW_CYCLES` and `MAX_CRASH_RETRIES`. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_state_policy_metadata.py` +- `docs/state-and-resume.md` +- `docs/development.md` +- `docs/changelog/260413.md` + +### QA Notes +- N/A diff --git a/docs/development.md b/docs/development.md index ba9ef4b..ada477a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -68,6 +68,15 @@ python3 -m story_automator with `PYTHONPATH` pointed at `source/src`. +## Legacy Env Compatibility + +For one release cycle, fresh orchestration starts still honor: + +- `MAX_REVIEW_CYCLES` +- `MAX_CRASH_RETRIES` + +Those values are resolved once during snapshot creation. Resume paths read the pinned snapshot, not the current shell env. Prefer `_bmad/bmm/story-automator.policy.json` for new configuration changes. + ## What To Re-Check After Runtime Changes If you change: diff --git a/docs/state-and-resume.md b/docs/state-and-resume.md index b90104d..41da500 100644 --- a/docs/state-and-resume.md +++ b/docs/state-and-resume.md @@ -35,6 +35,10 @@ Important frontmatter fields: - `agentConfig` - `activeSessions` - `completedSessions` +- `policyVersion` +- `policySnapshotFile` +- `policySnapshotHash` +- `legacyPolicy` ### Body Sections @@ -107,6 +111,25 @@ flowchart TD Resume is step-aware. It does not blindly restart from the beginning. +### Policy Rules On Resume + +- new-format state docs must load `policySnapshotFile` plus `policySnapshotHash` +- missing or mismatched snapshots are validation failures, not fallback cases +- old state docs without snapshot metadata resume in legacy mode with bundled defaults +- `state-summary` reports `legacyPolicy: true` for those legacy resumes + +### Legacy Env Compatibility + +For one release cycle, `MAX_REVIEW_CYCLES` and `MAX_CRASH_RETRIES` still work at orchestration start. + +They are resolved once, written into the effective policy snapshot, and ignored on resume after that. + +Deprecation path: + +1. keep existing env knobs working for fresh starts +2. prefer JSON policy overrides for new setup +3. remove the env path after the compatibility window closes + ## Validate Flow Validation is a first-class mode, not an ad hoc debug routine. diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 5eb902e..70db01f 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -5,7 +5,13 @@ import re from pathlib import Path -from story_automator.core.frontmatter import extract_last_action, find_frontmatter_value, find_frontmatter_value_case, parse_frontmatter +from story_automator.core.frontmatter import ( + extract_last_action, + find_frontmatter_value, + find_frontmatter_value_case, + parse_frontmatter, + parse_simple_frontmatter, +) from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier @@ -239,19 +245,25 @@ def _state_summary(args: list[str]) -> int: if not args or not file_exists(args[0]): print_json({"ok": False, "error": "file_not_found"}) return 1 + fields = parse_simple_frontmatter(read_text(args[0])) + snapshot_file = str(fields.get("policySnapshotFile") or "").strip() + snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() + legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower() + if legacy_policy not in {"true", "false"}: + legacy_policy = "true" if not snapshot_file and not snapshot_hash else "false" print_json( { "ok": True, - "epic": find_frontmatter_value(args[0], "epic"), - "epicName": find_frontmatter_value(args[0], "epicName"), - "currentStory": find_frontmatter_value(args[0], "currentStory"), - "currentStep": find_frontmatter_value(args[0], "currentStep"), - "status": find_frontmatter_value(args[0], "status"), - "lastUpdated": find_frontmatter_value(args[0], "lastUpdated"), - "policyVersion": find_frontmatter_value(args[0], "policyVersion"), - "policySnapshotFile": find_frontmatter_value(args[0], "policySnapshotFile"), - "policySnapshotHash": find_frontmatter_value(args[0], "policySnapshotHash"), - "legacyPolicy": find_frontmatter_value(args[0], "legacyPolicy"), + "epic": str(fields.get("epic") or ""), + "epicName": str(fields.get("epicName") or ""), + "currentStory": str(fields.get("currentStory") or ""), + "currentStep": str(fields.get("currentStep") or ""), + "status": str(fields.get("status") or ""), + "lastUpdated": str(fields.get("lastUpdated") or ""), + "policyVersion": str(fields.get("policyVersion") or ""), + "policySnapshotFile": snapshot_file, + "policySnapshotHash": snapshot_hash, + "legacyPolicy": legacy_policy, "lastAction": extract_last_action(args[0]), } ) diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index 93c0c62..d307537 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -14,23 +14,34 @@ VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"} +def load_bundled_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]: + root = Path(project_root or get_project_root()).resolve() + bundle_root = bundled_skill_root(root) + policy = _read_json(bundle_root / "data" / "orchestration-policy.json") + _validate_policy_shape(policy) + if resolve_assets: + _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root) + else: + _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root) + return policy + + class PolicyError(ValueError): pass def load_effective_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() - bundle_root = bundled_skill_root(root) - bundled = _read_json(bundle_root / "data" / "orchestration-policy.json") + bundled = load_bundled_policy(str(root), resolve_assets=False) override_path = root / "_bmad" / "bmm" / "story-automator.policy.json" override = _read_json(override_path) if override_path.is_file() else {} policy = _deep_merge(bundled, override) _apply_legacy_env(policy) _validate_policy_shape(policy) if resolve_assets: - _resolve_policy_paths(policy, project_root=root, bundle_root=bundle_root) + _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) else: - _resolve_success_paths(policy, project_root=root, bundle_root=bundle_root) + _resolve_success_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) return policy @@ -43,11 +54,10 @@ def load_runtime_policy( root = Path(project_root or get_project_root()).resolve() resolved_state, source = resolve_policy_state_file(root, state_file) if resolved_state: - try: - return load_policy_for_state(resolved_state, project_root=str(root), resolve_assets=resolve_assets) - except (FileNotFoundError, PolicyError): - if source == "explicit": - raise + state_path = Path(resolved_state) + if source != "explicit" and not state_path.is_file(): + return load_effective_policy(str(root), resolve_assets=resolve_assets) + return load_policy_for_state(str(state_path), project_root=str(root), resolve_assets=resolve_assets) return load_effective_policy(str(root), resolve_assets=resolve_assets) @@ -117,7 +127,7 @@ def load_policy_for_state( expected_hash=snapshot_hash, resolve_assets=resolve_assets, ) - return load_effective_policy(str(root), resolve_assets=resolve_assets) + return load_bundled_policy(str(root), resolve_assets=resolve_assets) def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]: diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 3979f54..2548fc5 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -5,8 +5,15 @@ import tempfile import unittest from pathlib import Path +from unittest.mock import patch -from story_automator.core.runtime_policy import PolicyError, load_effective_policy, load_runtime_policy, snapshot_effective_policy +from story_automator.core.runtime_policy import ( + PolicyError, + load_effective_policy, + load_policy_snapshot, + load_runtime_policy, + snapshot_effective_policy, +) REPO_ROOT = Path(__file__).resolve().parents[2] @@ -58,6 +65,18 @@ def test_snapshot_hash_stable(self) -> None: second = snapshot_effective_policy(str(self.project_root)) self.assertEqual(first["policySnapshotHash"], second["policySnapshotHash"]) + def test_snapshot_bakes_legacy_env_values_for_resume(self) -> None: + with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "2", "MAX_CRASH_RETRIES": "4"}, clear=False): + snapshot = snapshot_effective_policy(str(self.project_root)) + with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "9", "MAX_CRASH_RETRIES": "9"}, clear=False): + policy = load_policy_snapshot( + snapshot["policySnapshotFile"], + project_root=str(self.project_root), + expected_hash=snapshot["policySnapshotHash"], + ) + self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 2) + self.assertEqual(policy["workflow"]["crash"]["maxRetries"], 4) + def test_malformed_override_json_raises_policy_error(self) -> None: override_dir = self.project_root / "_bmad" / "bmm" override_dir.mkdir(parents=True, exist_ok=True) @@ -102,6 +121,29 @@ def test_malformed_marker_falls_back_to_effective_policy(self) -> None: policy = load_runtime_policy(str(self.project_root)) self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + def test_legacy_state_uses_bundled_defaults_without_override_or_env(self) -> None: + self._write_override({"workflow": {"repeat": {"review": {"maxCycles": 1}}}}) + legacy_state = self.project_root / "legacy.md" + legacy_state.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n", + encoding="utf-8", + ) + with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "2"}, clear=False): + policy = load_runtime_policy(str(self.project_root), state_file=str(legacy_state)) + self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + + def test_marker_resume_with_missing_snapshot_raises_policy_error(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"_bmad-output/story-automator/snapshots/missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n", + encoding="utf-8", + ) + marker = self.project_root / ".claude" / ".story-automator-active" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.write_text(json.dumps({"stateFile": str(state_file.relative_to(self.project_root))}), encoding="utf-8") + with self.assertRaises(PolicyError): + load_runtime_policy(str(self.project_root)) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 72301ca..ceccd8f 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -69,6 +69,35 @@ def test_legacy_state_without_policy_metadata_remains_valid(self) -> None: self.assertEqual(code, 0) self.assertEqual(json.loads(stdout.getvalue())["structure"], "ok") + def test_summary_infers_legacy_policy_for_old_state(self) -> None: + legacy = self.project_root / "legacy.md" + legacy.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(legacy)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertEqual(payload["legacyPolicy"], "true") + + def test_validate_state_rejects_new_state_with_missing_snapshot(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicySnapshotFile: \"_bmad-output/story-automator/snapshots/missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_state(["--state", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["structure"], "issues") + self.assertTrue(any("policy snapshot missing" in issue for issue in payload["issues"])) + def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: state_file = self._build_state() override_dir = self.project_root / "_bmad" / "bmm" From 3b5d55f6d5099a10eb7cf312443c274360d965db Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:26:44 -0300 Subject: [PATCH 09/17] fix: tighten state policy compatibility helpers --- docs/changelog/260413.md | 26 ++++++ docs/cli-reference.md | 8 +- .../story_automator/commands/orchestrator.py | 3 +- .../commands/validate_story_creation.py | 88 +++++++++++++------ .../story_automator/core/runtime_policy.py | 23 +++-- source/tests/test_runtime_policy.py | 9 ++ source/tests/test_state_policy_metadata.py | 27 ++++++ source/tests/test_success_verifiers.py | 25 ++++++ 8 files changed, 177 insertions(+), 32 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 2437e31..3f814fe 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -48,6 +48,32 @@ Moved review completion checks onto the JSON policy contract and routed monitor ### QA Notes - N/A +## 260413-09:26:29 - Tighten state policy compatibility helpers + +### Summary +Closed the remaining state-policy fallback hole and routed the legacy create validator through the shared verifier. + +### Changed +- Changed `validate-story-creation check` into a compatibility wrapper around the policy-backed create success verifier and updated the CLI docs to point new callers at `orchestrator-helper verify-step create`. +- Changed regression coverage to pin the legacy create wrapper against the shared verifier and pinned state snapshots. + +### Fixed +- Fixed new-format state docs with policy-era metadata but missing snapshot metadata to fail instead of slipping into legacy defaults. +- Fixed `state-summary` to avoid inferring `legacyPolicy: true` for malformed new-format state docs. + +### Files +- `source/src/story_automator/commands/validate_story_creation.py` +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/tests/test_success_verifiers.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_state_policy_metadata.py` +- `docs/cli-reference.md` +- `docs/changelog/260413.md` + +### QA Notes +- N/A + ## 260413-08:39:42 - Route create validation through shared verifier ### Summary diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 6b83009..2cf3592 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -87,7 +87,7 @@ These support saved presets and generated agent plans. - `stop-hook` - `list-sessions` - `commit-story` -- `validate-story-creation` +- `validate-story-creation` (legacy compatibility wrapper; prefer `orchestrator-helper verify-step create`) ## Typical Patterns @@ -117,6 +117,12 @@ session="$("$scripts" tmux-wrapper spawn review 1 1.2 --agent claude --command " "$scripts" orchestrator-helper verify-step create 1.2 --state-file "$state_file" ``` +Legacy compatibility: + +```bash +"$scripts" validate-story-creation check 1.2 --state-file "$state_file" +``` + ## Read Next - [Agents And Monitoring](./agents-and-monitoring.md) diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 70db01f..d54fd4a 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -248,9 +248,10 @@ def _state_summary(args: list[str]) -> int: fields = parse_simple_frontmatter(read_text(args[0])) snapshot_file = str(fields.get("policySnapshotFile") or "").strip() snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() + policy_version = str(fields.get("policyVersion") or "").strip() legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower() if legacy_policy not in {"true", "false"}: - legacy_policy = "true" if not snapshot_file and not snapshot_hash else "false" + legacy_policy = "true" if not snapshot_file and not snapshot_hash and not policy_version else "false" print_json( { "ok": True, diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py index feda1b0..948a2cc 100644 --- a/source/src/story_automator/commands/validate_story_creation.py +++ b/source/src/story_automator/commands/validate_story_creation.py @@ -1,8 +1,12 @@ from __future__ import annotations +import json import os from pathlib import Path +from story_automator.core.runtime_policy import PolicyError +from story_automator.core.success_verifiers import create_story_artifact, resolve_success_contract + def cmd_validate_story_creation(args: list[str]) -> int: action = args[0] if args else "" @@ -16,6 +20,43 @@ def story_prefix(story_id: str) -> str: def count_files(story_id: str, folder: Path) -> int: return len(list(folder.glob(f"{story_prefix(story_id)}-*.md"))) + def check_usage() -> int: + print( + "Usage: validate-story-creation check [--state-file PATH] [--before N --after N]", + file=os.sys.stderr, + ) + return 1 + + def create_check_payload(story_id: str, state_file: str) -> dict[str, object]: + contract = resolve_success_contract(project_root, "create", state_file=state_file or None) + payload = create_story_artifact(project_root=project_root, story_key=story_id, contract=contract) + expected = int(payload.get("expectedMatches", 1) or 1) + actual = int(payload.get("actualMatches", 0) or 0) + valid = bool(payload.get("verified")) + if valid: + reason = "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected" + elif actual == 0: + reason = "No story file created - session may have failed" + elif actual > expected: + reason = f"RUNAWAY CREATION: {actual} files created instead of {expected}" + else: + reason = f"Unexpected story artifact count: {actual} files instead of {expected}" + response: dict[str, object] = { + "valid": valid, + "verified": valid, + "created_count": actual, + "expected": expected, + "prefix": story_prefix(story_id), + "action": "proceed" if valid else "escalate", + "reason": reason, + "source": payload.get("source", ""), + "pattern": payload.get("pattern", ""), + "matches": payload.get("matches", []), + } + if payload.get("story"): + response["story"] = payload["story"] + return response + if action == "count": if not rest: print("Usage: validate-story-creation count ", file=os.sys.stderr) @@ -29,45 +70,42 @@ def count_files(story_id: str, folder: Path) -> int: if action == "check": if not rest: - print("Usage: validate-story-creation check --before N --after N", file=os.sys.stderr) - return 1 + return check_usage() story_id = rest[0] - before = after = None + state_file = "" + before = after = "" idx = 1 while idx < len(rest): if rest[idx] == "--before" and idx + 1 < len(rest): - before = int(rest[idx + 1]) + before = rest[idx + 1] idx += 2 continue if rest[idx] == "--after" and idx + 1 < len(rest): - after = int(rest[idx + 1]) + after = rest[idx + 1] idx += 2 continue if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest): artifacts_dir = Path(rest[idx + 1]) idx += 2 continue + if rest[idx] == "--state-file" and idx + 1 < len(rest): + state_file = rest[idx + 1] + idx += 2 + continue idx += 1 - if before is None or after is None: - print("Usage: validate-story-creation check --before N --after N", file=os.sys.stderr) + if artifacts_dir != Path(project_root) / "_bmad-output" / "implementation-artifacts": + print("validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", file=os.sys.stderr) return 1 - created = after - before - valid = created == 1 - reason = ( - "Exactly 1 story file created as expected" - if created == 1 - else "No story file created - session may have failed" - if created == 0 - else f"Story files decreased ({created}) - unexpected deletion" - if created < 0 - else f"RUNAWAY CREATION: {created} files created instead of 1" - ) - action_name = "proceed" if valid else "escalate" - print( - f'{{"valid":{str(valid).lower()},"created_count":{created},"expected":1,' - f'"before":{before},"after":{after},"prefix":"{story_prefix(story_id)}",' - f'"action":"{action_name}","reason":"{reason}"}}' - ) + try: + payload = create_check_payload(story_id, state_file) + except (PolicyError, ValueError) as exc: + print(json.dumps({"valid": False, "verified": False, "action": "escalate", "reason": str(exc)}, separators=(",", ":"))) + return 1 + if before: + payload["before"] = before + if after: + payload["after"] = after + print(json.dumps(payload, separators=(",", ":"))) return 0 if action == "list": @@ -98,7 +136,7 @@ def count_files(story_id: str, folder: Path) -> int: print("", file=os.sys.stderr) print("Actions:", file=os.sys.stderr) print(" count - Count current story files", file=os.sys.stderr) - print(" check --before N --after N - Validate creation", file=os.sys.stderr) + print(" check [--state-file PATH] - Compatibility wrapper for create verifier", file=os.sys.stderr) print(" list - List matching files", file=os.sys.stderr) print(" prefix - Convert story ID to file prefix", file=os.sys.stderr) return 1 diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index d307537..1fcbae9 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -116,11 +116,8 @@ def load_policy_for_state( ) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() fields = parse_simple_frontmatter(read_text(state_file)) - snapshot_file = str(fields.get("policySnapshotFile") or "").strip() - snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() - if snapshot_file or snapshot_hash: - if not snapshot_file or not snapshot_hash: - raise PolicyError("state policy metadata incomplete") + snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields) + if not legacy_mode: return load_policy_snapshot( snapshot_file, project_root=str(root), @@ -364,6 +361,22 @@ def _resolve_state_path(project_root: Path, path: Path) -> Path: return path if path.is_absolute() else project_root / path +def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]: + snapshot_file = str(fields.get("policySnapshotFile") or "").strip() + snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() + policy_version = str(fields.get("policyVersion") or "").strip() + legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower() + if snapshot_file or snapshot_hash: + if not snapshot_file or not snapshot_hash: + raise PolicyError("state policy metadata incomplete") + return snapshot_file, snapshot_hash, False + if legacy_policy == "true": + return "", "", True + if legacy_policy == "false" or policy_version: + raise PolicyError("state policy snapshot missing") + return "", "", True + + def _expect_optional_dict(payload: dict[str, Any], key: str) -> dict[str, Any]: value = payload.get(key) if value is None: diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 2548fc5..b1fa2b2 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -144,6 +144,15 @@ def test_marker_resume_with_missing_snapshot_raises_policy_error(self) -> None: with self.assertRaises(PolicyError): load_runtime_policy(str(self.project_root)) + def test_new_state_without_snapshot_metadata_is_rejected(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: false\n---\n", + encoding="utf-8", + ) + with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"): + load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index ceccd8f..286a16d 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -98,6 +98,33 @@ def test_validate_state_rejects_new_state_with_missing_snapshot(self) -> None: self.assertEqual(payload["structure"], "issues") self.assertTrue(any("policy snapshot missing" in issue for issue in payload["issues"])) + def test_validate_state_rejects_new_state_missing_snapshot_metadata(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: false\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_state(["--state", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["structure"], "issues") + self.assertTrue(any("state policy snapshot missing" in issue for issue in payload["issues"])) + + def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["legacyPolicy"], "false") + def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: state_file = self._build_state() override_dir = self.project_root / "_bmad" / "bmm" diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 4bee455..6fde4fd 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -11,6 +11,7 @@ from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc from story_automator.commands.tmux import _verify_monitor_completion +from story_automator.commands.validate_story_creation import cmd_validate_story_creation from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.runtime_policy import PolicyError from story_automator.core.success_verifiers import create_story_artifact, epic_complete, review_completion @@ -161,6 +162,30 @@ def test_verify_step_create_returns_json_on_verification_failure(self) -> None: self.assertFalse(payload["verified"]) self.assertEqual(payload["reason"], "unexpected_story_artifact_count") + def test_validate_story_creation_check_uses_shared_verifier(self) -> None: + self._write_story("1-2-example", status="draft") + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["valid"]) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["expected"], 1) + + def test_validate_story_creation_check_uses_pinned_snapshot(self) -> None: + self._write_story("1-2-example", status="draft") + state_file = self._build_state() + self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}}) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["valid"]) + self.assertEqual(payload["expected"], 1) + def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None: with self.assertRaises(PolicyError): create_story_artifact( From 54ffd3fb736325bebe28b37863d853205b7d9b9e Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 11:01:52 -0300 Subject: [PATCH 10/17] fix: harden parser runtime and validator compatibility --- docs/changelog/260413.md | 28 ++++ .../story_automator/commands/orchestrator.py | 15 +- .../commands/orchestrator_parse.py | 15 +- source/src/story_automator/commands/tmux.py | 10 +- .../commands/validate_story_creation.py | 158 +++++++++++++----- .../story_automator/core/runtime_policy.py | 34 +++- source/tests/test_orchestrator_parse.py | 17 ++ source/tests/test_runtime_policy.py | 14 ++ source/tests/test_state_policy_metadata.py | 22 ++- source/tests/test_success_verifiers.py | 145 ++++++++++++++++ 10 files changed, 390 insertions(+), 68 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 3f814fe..6327858 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -298,3 +298,31 @@ Locked resume behavior to pinned snapshots for new state docs while keeping lega ### QA Notes - N/A + +## 260413-11:00:47 - Harden parser runtime and validator compatibility + +### Summary +Hardened the policy-backed parser/runtime wiring and closed the remaining legacy validator compatibility gaps. + +### Changed +- Changed parser dispatch to read provider, model, and timeout settings from `runtime.parser` policy data instead of hard-coded CLI defaults. +- Changed `tmux-wrapper build-cmd` to reject unknown steps through policy contract lookup instead of a local step allowlist. + +### Fixed +- Fixed `validate-story-creation` compatibility mode to preserve `before`/`after` delta semantics, forward trailing flags like `--state-file`, preserve zero-expected contracts, and return one JSON envelope across malformed flag, positional, and policy-error paths. +- Fixed policy-state classification so contradictory `policyVersion` and `legacyPolicy: true` metadata fails closed and `state-summary` reports the same boundary as the runtime loader. + +### Files +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/validate_story_creation.py` +- `source/src/story_automator/core/runtime_policy.py` +- `source/tests/test_orchestrator_parse.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_state_policy_metadata.py` +- `source/tests/test_success_verifiers.py` +- `docs/changelog/260413.md` + +### QA Notes +- N/A diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index d54fd4a..c8e115d 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -12,7 +12,13 @@ parse_frontmatter, parse_simple_frontmatter, ) -from story_automator.core.runtime_policy import PolicyError, crash_max_retries, load_runtime_policy, review_max_cycles +from story_automator.core.runtime_policy import ( + PolicyError, + crash_max_retries, + load_runtime_policy, + review_max_cycles, + summarize_state_policy_fields, +) from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.success_verifiers import resolve_success_contract, run_success_verifier from story_automator.core.sprint import sprint_status_epic, sprint_status_get @@ -246,12 +252,7 @@ def _state_summary(args: list[str]) -> int: print_json({"ok": False, "error": "file_not_found"}) return 1 fields = parse_simple_frontmatter(read_text(args[0])) - snapshot_file = str(fields.get("policySnapshotFile") or "").strip() - snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() - policy_version = str(fields.get("policyVersion") or "").strip() - legacy_policy = str(fields.get("legacyPolicy") or "").strip().lower() - if legacy_policy not in {"true", "false"}: - legacy_policy = "true" if not snapshot_file and not snapshot_hash and not policy_version else "false" + snapshot_file, snapshot_hash, policy_version, legacy_policy = summarize_state_policy_fields(fields) print_json( { "ok": True, diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py index c809cfd..3965f79 100644 --- a/source/src/story_automator/commands/orchestrator_parse.py +++ b/source/src/story_automator/commands/orchestrator_parse.py @@ -2,13 +2,10 @@ import json -from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, step_contract +from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, parser_runtime_config, step_contract from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines -PARSE_OUTPUT_TIMEOUT = 120 - - def parse_output_action(args: list[str]) -> int: if len(args) < 2: print('{"status":"error","reason":"output file not found or empty"}') @@ -32,20 +29,22 @@ def parse_output_action(args: list[str]) -> int: return 1 lines = trim_lines(content)[:150] try: - contract = step_contract(load_runtime_policy(state_file=state_file), step) + policy = load_runtime_policy(state_file=state_file) + contract = step_contract(policy, step) parse_contract = _load_parse_contract(contract) + parser_cfg = parser_runtime_config(policy) except (FileNotFoundError, json.JSONDecodeError, ValueError, PolicyError): print_json({"status": "error", "reason": "parse_contract_invalid"}) return 1 prompt = _build_parse_prompt(contract, parse_contract, "\n".join(lines)) result = run_cmd( - "claude", + str(parser_cfg["provider"]), "-p", "--model", - "haiku", + str(parser_cfg["model"]), prompt, env={"STORY_AUTOMATOR_CHILD": "true", "CLAUDECODE": ""}, - timeout=PARSE_OUTPUT_TIMEOUT, + timeout=int(parser_cfg["timeoutSeconds"]), ) if result.exit_code != 0: reason = "sub-agent call timed out" if result.exit_code == COMMAND_TIMEOUT_EXIT else "sub-agent call failed" diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index c6ddbeb..6b0bb70 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -190,10 +190,12 @@ def _build_cmd(args: list[str]) -> int: agent = agent or agent_type() story_prefix = story_id.replace(".", "-") root = get_project_root() - if step not in {"create", "dev", "auto", "review", "retro"}: - print(f"Unknown step type: {step}", file=__import__("sys").stderr) + try: + policy = load_runtime_policy(root, state_file=state_file) + contract = step_contract(policy, step) + except (FileNotFoundError, PolicyError) as exc: + print(str(exc), file=__import__("sys").stderr) return 1 - policy = load_runtime_policy(root, state_file=state_file) ai_command = os.environ.get("AI_COMMAND") if ai_command and not os.environ.get("AI_AGENT"): cli = ai_command @@ -201,7 +203,7 @@ def _build_cmd(args: list[str]) -> int: cli = agent_cli(agent) else: cli = "codex exec" - prompt = _render_step_prompt(step_contract(policy, step), story_id, story_prefix, extra) + prompt = _render_step_prompt(contract, story_id, story_prefix, extra) escaped = prompt.replace("\\", "\\\\").replace('"', '\\"') if agent == "codex" and not ai_command: codex_home = f"/tmp/sa-codex-home-{project_hash(root)}" diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py index 948a2cc..746b019 100644 --- a/source/src/story_automator/commands/validate_story_creation.py +++ b/source/src/story_automator/commands/validate_story_creation.py @@ -12,7 +12,8 @@ def cmd_validate_story_creation(args: list[str]) -> int: action = args[0] if args else "" rest = args[1:] if args else [] project_root = os.environ.get("PROJECT_ROOT", os.getcwd()) - artifacts_dir = Path(project_root) / "_bmad-output" / "implementation-artifacts" + default_artifacts_dir = Path(project_root) / "_bmad-output" / "implementation-artifacts" + artifacts_dir = default_artifacts_dir def story_prefix(story_id: str) -> str: return story_id.replace(".", "-") @@ -20,43 +21,84 @@ def story_prefix(story_id: str) -> str: def count_files(story_id: str, folder: Path) -> int: return len(list(folder.glob(f"{story_prefix(story_id)}-*.md"))) - def check_usage() -> int: - print( - "Usage: validate-story-creation check [--state-file PATH] [--before N --after N]", - file=os.sys.stderr, - ) - return 1 - def create_check_payload(story_id: str, state_file: str) -> dict[str, object]: contract = resolve_success_contract(project_root, "create", state_file=state_file or None) - payload = create_story_artifact(project_root=project_root, story_key=story_id, contract=contract) - expected = int(payload.get("expectedMatches", 1) or 1) - actual = int(payload.get("actualMatches", 0) or 0) - valid = bool(payload.get("verified")) - if valid: - reason = "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected" - elif actual == 0: - reason = "No story file created - session may have failed" - elif actual > expected: - reason = f"RUNAWAY CREATION: {actual} files created instead of {expected}" - else: - reason = f"Unexpected story artifact count: {actual} files instead of {expected}" + return create_story_artifact(project_root=project_root, story_key=story_id, contract=contract) + + def expected_matches(payload: dict[str, object] | None) -> int: + if payload is None: + return 1 + return int(payload.get("expectedMatches", 1)) + + def count_reason(created: int, expected: int) -> str: + if created == expected: + return "Exactly 1 story file created as expected" if expected == 1 else f"Exactly {expected} story files created as expected" + if created == 0: + return "No story file created - session may have failed" + if created < 0: + return f"Story files decreased ({created}) - unexpected deletion" + if created > expected: + return f"RUNAWAY CREATION: {created} files created instead of {expected}" + return f"Unexpected story artifact count: {created} files instead of {expected}" + + def build_check_response( + story_id: str, + payload: dict[str, object] | None, + *, + before_count: int | None = None, + after_count: int | None = None, + valid_override: bool | None = None, + reason_override: str | None = None, + ) -> dict[str, object]: + expected = expected_matches(payload) + created = int(payload.get("actualMatches", 0)) if payload is not None else 0 + valid = bool(payload.get("verified")) if payload is not None else False + reason = count_reason(created, expected) + if before_count is not None and after_count is not None: + created = after_count - before_count + valid = created == expected + reason = count_reason(created, expected) + if valid_override is not None: + valid = valid_override + if reason_override is not None: + reason = reason_override response: dict[str, object] = { "valid": valid, "verified": valid, - "created_count": actual, + "created_count": created, "expected": expected, "prefix": story_prefix(story_id), "action": "proceed" if valid else "escalate", "reason": reason, - "source": payload.get("source", ""), - "pattern": payload.get("pattern", ""), - "matches": payload.get("matches", []), + "source": payload.get("source", "") if payload is not None else "", + "pattern": payload.get("pattern", "") if payload is not None else "", + "matches": payload.get("matches", []) if payload is not None else [], } - if payload.get("story"): + if before_count is not None and after_count is not None: + response["before"] = before_count + response["after"] = after_count + if payload is not None and payload.get("story"): response["story"] = payload["story"] return response + def print_check_error( + story_id: str, + *, + reason: str, + before_count: int | None = None, + after_count: int | None = None, + ) -> int: + response = build_check_response( + story_id, + None, + before_count=before_count, + after_count=after_count, + valid_override=False, + reason_override=reason, + ) + print(json.dumps(response, separators=(",", ":"))) + return 1 + if action == "count": if not rest: print("Usage: validate-story-creation count ", file=os.sys.stderr) @@ -70,42 +112,62 @@ def create_check_payload(story_id: str, state_file: str) -> dict[str, object]: if action == "check": if not rest: - return check_usage() + return print_check_error("", reason="story_id required") story_id = rest[0] state_file = "" - before = after = "" + before_value = after_value = None + before_seen = after_seen = False idx = 1 while idx < len(rest): - if rest[idx] == "--before" and idx + 1 < len(rest): - before = rest[idx + 1] - idx += 2 + if rest[idx] == "--before": + before_seen = True + if idx + 1 < len(rest): + before_value = rest[idx + 1] + idx += 2 + else: + return print_check_error(story_id, reason="--before requires a value") continue - if rest[idx] == "--after" and idx + 1 < len(rest): - after = rest[idx + 1] - idx += 2 + if rest[idx] == "--after": + after_seen = True + if idx + 1 < len(rest): + after_value = rest[idx + 1] + idx += 2 + else: + return print_check_error(story_id, reason="--after requires a value") continue if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest): artifacts_dir = Path(rest[idx + 1]) idx += 2 continue + if rest[idx] == "--artifacts-dir": + return print_check_error(story_id, reason="--artifacts-dir requires a value") if rest[idx] == "--state-file" and idx + 1 < len(rest): state_file = rest[idx + 1] idx += 2 continue - idx += 1 - if artifacts_dir != Path(project_root) / "_bmad-output" / "implementation-artifacts": - print("validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", file=os.sys.stderr) - return 1 + if rest[idx] == "--state-file": + return print_check_error(story_id, reason="--state-file requires a value") + return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}") + if before_seen != after_seen: + return print_check_error(story_id, reason="both --before and --after are required together") + before_count = after_count = None + if before_seen and after_seen: + try: + before_count = int(before_value or "") + after_count = int(after_value or "") + except ValueError: + return print_check_error(story_id, reason="before/after must be integers") + if artifacts_dir != default_artifacts_dir and not (before_seen and after_seen): + return print_check_error( + story_id, + reason="validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", + ) try: payload = create_check_payload(story_id, state_file) + response = build_check_response(story_id, payload, before_count=before_count, after_count=after_count) except (PolicyError, ValueError) as exc: - print(json.dumps({"valid": False, "verified": False, "action": "escalate", "reason": str(exc)}, separators=(",", ":"))) - return 1 - if before: - payload["before"] = before - if after: - payload["after"] = after - print(json.dumps(payload, separators=(",", ":"))) + return print_check_error(story_id, reason=str(exc), before_count=before_count, after_count=after_count) + print(json.dumps(response, separators=(",", ":"))) return 0 if action == "list": @@ -129,8 +191,12 @@ def create_check_payload(story_id: str, state_file: str) -> dict[str, object]: print(story_prefix(rest[0])) return 0 - if action and len(rest) >= 2 and rest[0].isdigit() and rest[1].isdigit(): - return cmd_validate_story_creation(["check", action, "--before", rest[0], "--after", rest[1]]) + if action and action not in {"count", "check", "list", "prefix"}: + if not rest: + return print_check_error(action, reason="both --before and --after are required together") + if len(rest) == 1: + return cmd_validate_story_creation(["check", action, "--before", rest[0]]) + return cmd_validate_story_creation(["check", action, "--before", rest[0], "--after", rest[1], *rest[2:]]) print("Usage: validate-story-creation [args]", file=os.sys.stderr) print("", file=os.sys.stderr) diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index 1fcbae9..73ec450 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -12,6 +12,7 @@ VALID_STEP_NAMES = {"create", "dev", "auto", "review", "retro"} VALID_VERIFIERS = {"create_story_artifact", "session_exit", "review_completion", "epic_complete"} VALID_ASSET_NAMES = {"skill", "workflow", "instructions", "checklist", "template"} +VALID_PARSER_PROVIDERS = {"claude"} def load_bundled_policy(project_root: str | None = None, *, resolve_assets: bool = True) -> dict[str, Any]: @@ -127,6 +128,17 @@ def load_policy_for_state( return load_bundled_policy(str(root), resolve_assets=resolve_assets) +def summarize_state_policy_fields(fields: dict[str, Any]) -> tuple[str, str, str, str]: + snapshot_file = str(fields.get("policySnapshotFile") or "").strip() + snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() + policy_version = str(fields.get("policyVersion") or "").strip() + try: + _, _, legacy_mode = _state_policy_mode(fields) + except PolicyError: + legacy_mode = False + return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false" + + def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]: root = Path(project_root or get_project_root()).resolve() explicit = Path(state_file).expanduser() if state_file else None @@ -164,6 +176,21 @@ def crash_max_retries(policy: dict[str, Any]) -> int: return int(crash.get("maxRetries", 2)) +def parser_runtime_config(policy: dict[str, Any]) -> dict[str, object]: + runtime = _expect_optional_dict(policy, "runtime") + parser = _expect_optional_nested_dict(runtime, "parser", "runtime") + provider = str(parser.get("provider") or "").strip() + model = str(parser.get("model") or "").strip() + timeout = parser.get("timeoutSeconds") + if provider not in VALID_PARSER_PROVIDERS: + raise PolicyError(f"runtime.parser.provider must be one of: {', '.join(sorted(VALID_PARSER_PROVIDERS))}") + if not model: + raise PolicyError("runtime.parser.model must be a string") + if isinstance(timeout, bool) or not isinstance(timeout, int) or timeout <= 0: + raise PolicyError("runtime.parser.timeoutSeconds must be a positive integer") + return {"provider": provider, "model": model, "timeoutSeconds": timeout} + + def bundled_skill_root(project_root: str | Path | None = None) -> Path: root = Path(project_root or get_project_root()).resolve() installed = root / ".claude" / "skills" / "bmad-story-automator" @@ -213,6 +240,9 @@ def _validate_policy_shape(policy: dict[str, Any]) -> None: snapshot = _expect_optional_dict(policy, "snapshot") if "snapshot" in policy and "relativeDir" in snapshot and not isinstance(snapshot.get("relativeDir"), str): raise PolicyError("snapshot.relativeDir must be a string") + runtime = _expect_optional_dict(policy, "runtime") + _expect_optional_nested_dict(runtime, "merge", "runtime") + parser_runtime_config(policy) workflow = _expect_optional_dict(policy, "workflow") repeat = _expect_optional_nested_dict(workflow, "repeat", "workflow") review = _expect_optional_nested_dict(repeat, "review", "workflow.repeat") @@ -370,10 +400,10 @@ def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]: if not snapshot_file or not snapshot_hash: raise PolicyError("state policy metadata incomplete") return snapshot_file, snapshot_hash, False - if legacy_policy == "true": - return "", "", True if legacy_policy == "false" or policy_version: raise PolicyError("state policy snapshot missing") + if legacy_policy == "true": + return "", "", True return "", "", True diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py index 99abaf5..110b004 100644 --- a/source/tests/test_orchestrator_parse.py +++ b/source/tests/test_orchestrator_parse.py @@ -92,6 +92,23 @@ def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) -> self.assertEqual(code, 0) self.assertTrue(json.loads(stdout.getvalue())["story_created"]) + def test_parser_runtime_uses_policy_settings(self) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text( + json.dumps({"runtime": {"parser": {"provider": "claude", "model": "sonnet", "timeoutSeconds": 33}}}), + encoding="utf-8", + ) + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"SUCCESS","story_created":true,"story_file":"x","summary":"ok","next_action":"proceed"}', 0), + ) as mock_run, redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create"]) + self.assertEqual(code, 0) + self.assertEqual(mock_run.call_args.args[:4], ("claude", "-p", "--model", "sonnet")) + self.assertEqual(mock_run.call_args.kwargs["timeout"], 33) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index b1fa2b2..7c080ca 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -99,6 +99,11 @@ def test_invalid_nested_workflow_types_rejected(self) -> None: with self.assertRaises(PolicyError): load_effective_policy(str(self.project_root)) + def test_invalid_parser_runtime_rejected(self) -> None: + self._write_override({"runtime": {"parser": {"provider": "bad", "model": "haiku", "timeoutSeconds": 120}}}) + with self.assertRaisesRegex(PolicyError, "runtime.parser.provider"): + load_effective_policy(str(self.project_root)) + def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None: snapshot = snapshot_effective_policy(str(self.project_root)) copied_root = Path(self.tmp.name) / "copied" @@ -153,6 +158,15 @@ def test_new_state_without_snapshot_metadata_is_rejected(self) -> None: with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"): load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def test_contradictory_legacy_flag_with_policy_version_is_rejected(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: true\n---\n", + encoding="utf-8", + ) + with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"): + load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 286a16d..c43ed17 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -5,7 +5,7 @@ import shutil import tempfile import unittest -from contextlib import redirect_stdout +from contextlib import redirect_stderr, redirect_stdout from pathlib import Path from story_automator.commands.orchestrator import cmd_orchestrator_helper @@ -125,6 +125,19 @@ def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(s payload = json.loads(stdout.getvalue()) self.assertEqual(payload["legacyPolicy"], "false") + def test_summary_does_not_mark_contradictory_legacy_flag_as_legacy(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\nepic: \"1\"\nepicName: \"Epic 1\"\nstoryRange: [\"1.1\"]\nstatus: \"READY\"\nlastUpdated: \"2026-04-13T00:00:00Z\"\naiCommand: \"claude\"\npolicyVersion: 1\nlegacyPolicy: true\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["legacyPolicy"], "false") + def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: state_file = self._build_state() override_dir = self.project_root / "_bmad" / "bmm" @@ -148,6 +161,13 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None: rendered = stdout.getvalue() self.assertNotIn("--state-file", rendered) + def test_build_cmd_rejects_unknown_step_via_policy(self) -> None: + stderr = io.StringIO() + with patch_env(self.project_root), redirect_stderr(stderr): + code = _build_cmd(["ship", "1.1"]) + self.assertEqual(code, 1) + self.assertIn("unknown step: ship", stderr.getvalue()) + def _build_state(self) -> Path: stdout = io.StringIO() template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 6fde4fd..95a1510 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -186,6 +186,151 @@ def test_validate_story_creation_check_uses_pinned_snapshot(self) -> None: self.assertTrue(payload["valid"]) self.assertEqual(payload["expected"], 1) + def test_validate_story_creation_check_uses_before_after_delta(self) -> None: + self._write_story("1-2-existing", status="draft") + self._write_story("1-2-new", status="draft") + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "1", "--after", "2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["valid"]) + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 1) + self.assertEqual(payload["after"], 2) + + def test_validate_story_creation_positional_mode_forwards_state_file(self) -> None: + self._write_story("1-2-example", status="draft") + state_file = self._build_state() + self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 2}}}}}) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0", "1", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["valid"]) + self.assertEqual(payload["expected"], 1) + self.assertEqual(payload["created_count"], 1) + + def test_validate_story_creation_check_returns_compat_schema_on_policy_error(self) -> None: + self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": "abc"}}}}}) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["expected"], 1) + self.assertEqual(payload["created_count"], 0) + self.assertEqual(payload["prefix"], "1-2") + self.assertEqual(payload["source"], "") + self.assertEqual(payload["pattern"], "") + self.assertEqual(payload["matches"], []) + + def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "x", "--after", "1"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "before/after must be integers") + self.assertEqual(payload["expected"], 1) + self.assertEqual(payload["created_count"], 0) + + def test_validate_story_creation_check_returns_compat_schema_on_partial_counts(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "1"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "both --before and --after are required together") + self.assertEqual(payload["prefix"], "1-2") + + def test_validate_story_creation_check_returns_compat_schema_on_trailing_before_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "--before requires a value") + + def test_validate_story_creation_check_returns_compat_schema_on_empty_counts(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "", "--after", ""]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "before/after must be integers") + + def test_validate_story_creation_check_returns_compat_schema_on_unsupported_artifacts_dir(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--artifacts-dir", str(self.project_root / "tmp")]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"]) + + def test_validate_story_creation_positional_mode_returns_compat_schema_on_bad_counts(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "x", "1"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "before/after must be integers") + + def test_validate_story_creation_positional_mode_returns_compat_schema_on_missing_after(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "both --before and --after are required together") + + def test_validate_story_creation_positional_mode_returns_compat_schema_on_missing_counts(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "both --before and --after are required together") + + def test_validate_story_creation_positional_mode_returns_compat_schema_on_extra_token(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0", "1", "junk"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "unsupported check argument: junk") + + def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomplete_state_file(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0", "1", "--state-file"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "--state-file requires a value") + + def test_validate_story_creation_check_preserves_zero_expected_matches(self) -> None: + self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 0}}}}}) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["valid"]) + self.assertEqual(payload["expected"], 0) + self.assertEqual(payload["created_count"], 0) + def test_create_story_artifact_rejects_invalid_expected_matches(self) -> None: with self.assertRaises(PolicyError): create_story_artifact( From 2dbf6f3e5bfb56d50afd87bea8044ecae7f8e36d Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:55:18 -0300 Subject: [PATCH 11/17] fix: close state-summary and validator gaps --- docs/changelog/260413.md | 22 +++++ .../story_automator/commands/orchestrator.py | 36 +++---- .../commands/validate_story_creation.py | 27 ++++-- .../story_automator/core/runtime_policy.py | 21 +++-- source/tests/test_runtime_policy.py | 10 ++ source/tests/test_state_policy_metadata.py | 93 ++++++++++++++++++- source/tests/test_success_verifiers.py | 86 +++++++++++++++++ 7 files changed, 262 insertions(+), 33 deletions(-) diff --git a/docs/changelog/260413.md b/docs/changelog/260413.md index 6327858..99d17cc 100644 --- a/docs/changelog/260413.md +++ b/docs/changelog/260413.md @@ -326,3 +326,25 @@ Hardened the policy-backed parser/runtime wiring and closed the remaining legacy ### QA Notes - N/A + +## 260413-21:53:12 - Close state-summary and validator compatibility gaps + +### Summary +Fixed remaining review-loop gaps in policy snapshot reporting and legacy create-validator error payloads. + +### Changed +- Changed `state-summary` to validate snapshot metadata against the runtime project root and surface `policyError` when state policy metadata is contradictory, incomplete, missing, or hash-mismatched. +- Changed `validate-story-creation check` and the positional compatibility shim to preserve parsed delta metadata across malformed trailing argument paths and reject all `--artifacts-dir` overrides consistently. +- Changed regression coverage to pin the new state-summary error reporting and compatibility-payload branches found during the clean review loop. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/validate_story_creation.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_state_policy_metadata.py` +- `source/tests/test_success_verifiers.py` +- `docs/changelog/260413.md` + +### QA Notes +- N/A diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index c8e115d..986a317 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -252,23 +252,27 @@ def _state_summary(args: list[str]) -> int: print_json({"ok": False, "error": "file_not_found"}) return 1 fields = parse_simple_frontmatter(read_text(args[0])) - snapshot_file, snapshot_hash, policy_version, legacy_policy = summarize_state_policy_fields(fields) - print_json( - { - "ok": True, - "epic": str(fields.get("epic") or ""), - "epicName": str(fields.get("epicName") or ""), - "currentStory": str(fields.get("currentStory") or ""), - "currentStep": str(fields.get("currentStep") or ""), - "status": str(fields.get("status") or ""), - "lastUpdated": str(fields.get("lastUpdated") or ""), - "policyVersion": str(fields.get("policyVersion") or ""), - "policySnapshotFile": snapshot_file, - "policySnapshotHash": snapshot_hash, - "legacyPolicy": legacy_policy, - "lastAction": extract_last_action(args[0]), - } + snapshot_file, snapshot_hash, policy_version, legacy_policy, policy_error = summarize_state_policy_fields( + fields, + project_root=get_project_root(), ) + payload = { + "ok": True, + "epic": str(fields.get("epic") or ""), + "epicName": str(fields.get("epicName") or ""), + "currentStory": str(fields.get("currentStory") or ""), + "currentStep": str(fields.get("currentStep") or ""), + "status": str(fields.get("status") or ""), + "lastUpdated": str(fields.get("lastUpdated") or ""), + "policyVersion": policy_version, + "policySnapshotFile": snapshot_file, + "policySnapshotHash": snapshot_hash, + "legacyPolicy": legacy_policy, + "lastAction": extract_last_action(args[0]), + } + if policy_error: + payload["policyError"] = policy_error + print_json(payload) return 0 diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py index 746b019..cb12f5f 100644 --- a/source/src/story_automator/commands/validate_story_creation.py +++ b/source/src/story_automator/commands/validate_story_creation.py @@ -99,6 +99,14 @@ def print_check_error( print(json.dumps(response, separators=(",", ":"))) return 1 + def parsed_delta_counts(before_value: str | None, after_value: str | None) -> tuple[int | None, int | None]: + if before_value is None or after_value is None: + return None, None + try: + return int(before_value or ""), int(after_value or "") + except ValueError: + return None, None + if action == "count": if not rest: print("Usage: validate-story-creation count ", file=os.sys.stderr) @@ -125,7 +133,8 @@ def print_check_error( before_value = rest[idx + 1] idx += 2 else: - return print_check_error(story_id, reason="--before requires a value") + before_count, after_count = parsed_delta_counts(before_value, after_value) + return print_check_error(story_id, reason="--before requires a value", before_count=before_count, after_count=after_count) continue if rest[idx] == "--after": after_seen = True @@ -133,21 +142,25 @@ def print_check_error( after_value = rest[idx + 1] idx += 2 else: - return print_check_error(story_id, reason="--after requires a value") + before_count, after_count = parsed_delta_counts(before_value, after_value) + return print_check_error(story_id, reason="--after requires a value", before_count=before_count, after_count=after_count) continue if rest[idx] == "--artifacts-dir" and idx + 1 < len(rest): artifacts_dir = Path(rest[idx + 1]) idx += 2 continue if rest[idx] == "--artifacts-dir": - return print_check_error(story_id, reason="--artifacts-dir requires a value") + before_count, after_count = parsed_delta_counts(before_value, after_value) + return print_check_error(story_id, reason="--artifacts-dir requires a value", before_count=before_count, after_count=after_count) if rest[idx] == "--state-file" and idx + 1 < len(rest): state_file = rest[idx + 1] idx += 2 continue if rest[idx] == "--state-file": - return print_check_error(story_id, reason="--state-file requires a value") - return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}") + before_count, after_count = parsed_delta_counts(before_value, after_value) + return print_check_error(story_id, reason="--state-file requires a value", before_count=before_count, after_count=after_count) + before_count, after_count = parsed_delta_counts(before_value, after_value) + return print_check_error(story_id, reason=f"unsupported check argument: {rest[idx]}", before_count=before_count, after_count=after_count) if before_seen != after_seen: return print_check_error(story_id, reason="both --before and --after are required together") before_count = after_count = None @@ -157,10 +170,12 @@ def print_check_error( after_count = int(after_value or "") except ValueError: return print_check_error(story_id, reason="before/after must be integers") - if artifacts_dir != default_artifacts_dir and not (before_seen and after_seen): + if artifacts_dir != default_artifacts_dir: return print_check_error( story_id, reason="validate-story-creation check no longer supports --artifacts-dir overrides; use count/list for custom folders", + before_count=before_count, + after_count=after_count, ) try: payload = create_check_payload(story_id, state_file) diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index 73ec450..d440ce6 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -128,15 +128,20 @@ def load_policy_for_state( return load_bundled_policy(str(root), resolve_assets=resolve_assets) -def summarize_state_policy_fields(fields: dict[str, Any]) -> tuple[str, str, str, str]: - snapshot_file = str(fields.get("policySnapshotFile") or "").strip() - snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() +def summarize_state_policy_fields(fields: dict[str, Any], *, project_root: str | Path | None = None) -> tuple[str, str, str, str, str]: policy_version = str(fields.get("policyVersion") or "").strip() try: - _, _, legacy_mode = _state_policy_mode(fields) - except PolicyError: - legacy_mode = False - return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false" + snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields) + if snapshot_file and snapshot_hash: + load_policy_snapshot( + snapshot_file, + project_root=str(Path(project_root or get_project_root()).resolve()), + expected_hash=snapshot_hash, + resolve_assets=False, + ) + except PolicyError as exc: + return "", "", policy_version, "false", str(exc) + return snapshot_file, snapshot_hash, policy_version, "true" if legacy_mode else "false", "" def resolve_policy_state_file(project_root: str | Path | None = None, state_file: str | Path | None = None) -> tuple[str, str]: @@ -399,6 +404,8 @@ def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]: if snapshot_file or snapshot_hash: if not snapshot_file or not snapshot_hash: raise PolicyError("state policy metadata incomplete") + if legacy_policy == "true": + raise PolicyError("state policy metadata contradictory") return snapshot_file, snapshot_hash, False if legacy_policy == "false" or policy_version: raise PolicyError("state policy snapshot missing") diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 7c080ca..f5bf559 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -167,6 +167,16 @@ def test_contradictory_legacy_flag_with_policy_version_is_rejected(self) -> None with self.assertRaisesRegex(PolicyError, "state policy snapshot missing"): load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def test_snapshot_metadata_with_legacy_flag_is_rejected(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + state_file = self.project_root / "orchestration.md" + state_file.write_text( + f"---\npolicySnapshotFile: \"{snapshot['policySnapshotFile']}\"\npolicySnapshotHash: \"{snapshot['policySnapshotHash']}\"\nlegacyPolicy: true\n---\n", + encoding="utf-8", + ) + with self.assertRaisesRegex(PolicyError, "state policy metadata contradictory"): + load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index c43ed17..7af8365 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -50,7 +50,7 @@ def test_state_doc_writes_policy_metadata(self) -> None: def test_summary_surfaces_policy_metadata(self) -> None: state_file = self._build_state() stdout = io.StringIO() - with redirect_stdout(stdout): + with patch_env(self.project_root), redirect_stdout(stdout): code = cmd_orchestrator_helper(["state-summary", str(state_file)]) self.assertEqual(code, 0) payload = json.loads(stdout.getvalue()) @@ -76,7 +76,7 @@ def test_summary_infers_legacy_policy_for_old_state(self) -> None: encoding="utf-8", ) stdout = io.StringIO() - with redirect_stdout(stdout): + with patch_env(self.project_root), redirect_stdout(stdout): code = cmd_orchestrator_helper(["state-summary", str(legacy)]) self.assertEqual(code, 0) payload = json.loads(stdout.getvalue()) @@ -119,7 +119,7 @@ def test_summary_does_not_infer_legacy_for_new_state_missing_snapshot_metadata(s encoding="utf-8", ) stdout = io.StringIO() - with redirect_stdout(stdout): + with patch_env(self.project_root), redirect_stdout(stdout): code = cmd_orchestrator_helper(["state-summary", str(state_file)]) self.assertEqual(code, 0) payload = json.loads(stdout.getvalue()) @@ -132,11 +132,96 @@ def test_summary_does_not_mark_contradictory_legacy_flag_as_legacy(self) -> None encoding="utf-8", ) stdout = io.StringIO() - with redirect_stdout(stdout): + with patch_env(self.project_root), redirect_stdout(stdout): code = cmd_orchestrator_helper(["state-summary", str(state_file)]) self.assertEqual(code, 0) payload = json.loads(stdout.getvalue()) self.assertEqual(payload["legacyPolicy"], "false") + self.assertEqual(payload["policyError"], "state policy snapshot missing") + + def test_summary_clears_contradictory_snapshot_metadata(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"snap.json\"\npolicySnapshotHash: \"deadbeef\"\nlegacyPolicy: true\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertEqual(payload["legacyPolicy"], "false") + self.assertEqual(payload["policyError"], "state policy metadata contradictory") + + def test_summary_clears_incomplete_snapshot_metadata(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"snap.json\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertEqual(payload["legacyPolicy"], "false") + self.assertEqual(payload["policyError"], "state policy metadata incomplete") + + def test_summary_reports_missing_snapshot_reference(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertIn("policy snapshot missing", payload["policyError"]) + + def test_summary_reports_snapshot_hash_mismatch(self) -> None: + state_file = self._build_state() + lines = [] + for line in state_file.read_text(encoding="utf-8").splitlines(): + if line.startswith("policySnapshotHash: "): + lines.append('policySnapshotHash: "deadbeef"') + else: + lines.append(line) + state_file.write_text("\n".join(lines) + "\n", encoding="utf-8") + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertIn("policy snapshot hash mismatch", payload["policyError"]) + + def test_summary_uses_runtime_root_for_relative_snapshot_validation(self) -> None: + outside = self.project_root.parent / "outside-state" + outside.mkdir(parents=True, exist_ok=True) + shadow = outside / "snap.json" + shadow.write_text("{}", encoding="utf-8") + state_file = outside / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"snap.json\"\npolicySnapshotHash: \"99999999\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["state-summary", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["policySnapshotFile"], "") + self.assertEqual(payload["policySnapshotHash"], "") + self.assertIn("policy snapshot missing", payload["policyError"]) def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: state_file = self._build_state() diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 95a1510..91264d6 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -275,6 +275,30 @@ def test_validate_story_creation_check_returns_compat_schema_on_unsupported_arti self.assertFalse(payload["valid"]) self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"]) + def test_validate_story_creation_check_rejects_artifacts_dir_in_delta_mode(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--artifacts-dir", str(self.project_root / "tmp")]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"]) + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + + def test_validate_story_creation_positional_mode_rejects_artifacts_dir_with_delta_fields(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0", "1", "--artifacts-dir", str(self.project_root / "tmp")]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertIn("no longer supports --artifacts-dir overrides", payload["reason"]) + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + def test_validate_story_creation_positional_mode_returns_compat_schema_on_bad_counts(self) -> None: stdout = io.StringIO() with patch_env(self.project_root), redirect_stdout(stdout): @@ -310,6 +334,9 @@ def test_validate_story_creation_positional_mode_returns_compat_schema_on_extra_ payload = json.loads(stdout.getvalue()) self.assertFalse(payload["valid"]) self.assertEqual(payload["reason"], "unsupported check argument: junk") + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomplete_state_file(self) -> None: stdout = io.StringIO() @@ -319,6 +346,65 @@ def test_validate_story_creation_positional_mode_returns_compat_schema_on_incomp payload = json.loads(stdout.getvalue()) self.assertFalse(payload["valid"]) self.assertEqual(payload["reason"], "--state-file requires a value") + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + + def test_validate_story_creation_check_preserves_delta_on_incomplete_state_file(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--state-file"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "--state-file requires a value") + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + + def test_validate_story_creation_check_preserves_delta_on_trailing_before_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--before", "0", "--after", "1", "--before"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "--before requires a value") + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + + def test_validate_story_creation_positional_mode_preserves_delta_on_trailing_before_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "0", "1", "--before"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["reason"], "--before requires a value") + self.assertEqual(payload["created_count"], 1) + self.assertEqual(payload["before"], 0) + self.assertEqual(payload["after"], 1) + + def test_validate_story_creation_check_returns_compat_failure_without_exception(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["created_count"], 0) + self.assertEqual(payload["reason"], "No story file created - session may have failed") + + def test_validate_story_creation_positional_mode_returns_delta_failure_without_exception(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["1.2", "1", "3"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertEqual(payload["created_count"], 2) + self.assertEqual(payload["reason"], "RUNAWAY CREATION: 2 files created instead of 1") def test_validate_story_creation_check_preserves_zero_expected_matches(self) -> None: self._write_override({"steps": {"create": {"success": {"config": {"expectedMatches": 0}}}}}) From 667b4d528cc5986bb1382a0b7d479396bf43e208 Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Tue, 14 Apr 2026 00:44:42 -0300 Subject: [PATCH 12/17] docs: update json settings todo status --- docs/plans/json-settings/TODO.md | 44 ++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/docs/plans/json-settings/TODO.md b/docs/plans/json-settings/TODO.md index 9270b2f..cbf7163 100644 --- a/docs/plans/json-settings/TODO.md +++ b/docs/plans/json-settings/TODO.md @@ -2,6 +2,12 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. +Status backfill: checked against shipped code and `npm run verify` on 2026-04-13. + +Notes: +- Item 1 remains open because the original pre-edit baseline notes were not preserved in-repo. +- Item 14 remains open because the review payload still relies on the extra instruction `auto-fix all issues without prompting` instead of encoding autonomous fix behavior directly in `instructions.xml`. + ## Phase 0: Baseline 1. [ ] Capture current behavior baselines. @@ -14,7 +20,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. - baseline commands are saved in working notes - current default behavior is explicit before edits start -2. [ ] Freeze the target JSON settings shape. +2. [x] Freeze the target JSON settings shape. Depends on: 1 Files: `docs/plans/json-settings/02-policy-model.md` Actions: @@ -26,7 +32,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 1: Policy Loader And Default Policy -3. [ ] Add bundled default policy JSON and data directories. +3. [x] Add bundled default policy JSON and data directories. Depends on: 2 Files: - `payload/.claude/skills/bmad-story-automator/data/orchestration-policy.json` @@ -38,7 +44,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - payload contains complete default machine contract -4. [ ] Implement `runtime_policy.py`. +4. [x] Implement `runtime_policy.py`. Depends on: 3 Files: - `source/src/story_automator/core/runtime_policy.py` @@ -52,7 +58,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - one call can return effective policy plus snapshot metadata -5. [ ] Refactor required/optional asset resolution behind policy. +5. [x] Refactor required/optional asset resolution behind policy. Depends on: 4 Files: - `source/src/story_automator/core/workflow_paths.py` @@ -64,7 +70,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - required assets never silently resolve to non-existent placeholders -6. [ ] Add state metadata for policy snapshots. +6. [x] Add state metadata for policy snapshots. Depends on: 4 Files: - `source/src/story_automator/commands/state.py` @@ -79,7 +85,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 2: Prompt And Parse Externalization -7. [ ] Replace hard-coded tmux prompts with template rendering. +7. [x] Replace hard-coded tmux prompts with template rendering. Depends on: 4, 5, 6 Files: - `source/src/story_automator/commands/tmux.py` @@ -91,7 +97,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - `build-cmd` no longer uses the hard-coded prompt map -8. [ ] Replace hard-coded parse schema switch with policy-backed contracts. +8. [x] Replace hard-coded parse schema switch with policy-backed contracts. Depends on: 4 Files: - `source/src/story_automator/commands/orchestrator_parse.py` @@ -102,7 +108,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - parser behavior comes from data files, not `if step == ...` -9. [ ] Move retry budgets into policy-backed reads. +9. [x] Move retry budgets into policy-backed reads. Depends on: 4 Files: - `source/src/story_automator/commands/orchestrator.py` @@ -115,7 +121,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 3: Success Verifiers -10. [ ] Add verifier registry and concrete implementations. +10. [x] Add verifier registry and concrete implementations. Depends on: 4 Files: - `source/src/story_automator/core/success_verifiers.py` @@ -129,7 +135,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - verifiers are selected by name and tested independently -11. [ ] Wire `monitor-session` to policy-backed verifier dispatch. +11. [x] Wire `monitor-session` to policy-backed verifier dispatch. Depends on: 7, 10 Files: - `source/src/story_automator/commands/tmux.py` @@ -140,7 +146,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - completion logic is step-driven, not `workflow == "review"` driven -12. [ ] Fold create story validation into `create_story_artifact`. +12. [x] Fold create story validation into `create_story_artifact`. Depends on: 10, 11 Files: - `source/src/story_automator/commands/orchestrator.py` @@ -153,7 +159,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 4: Review Payload Alignment -13. [ ] Add structured review contract file. +13. [x] Add structured review contract file. Depends on: 3 Files: - `payload/.claude/skills/bmad-story-automator-review/contract.json` @@ -174,7 +180,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - review payload no longer contradicts runtime prompt defaults -15. [ ] Update main workflow prose to reference runtime policy. +15. [x] Update main workflow prose to reference runtime policy. Depends on: 3 Files: - `payload/.claude/skills/bmad-story-automator/workflow.md` @@ -187,7 +193,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 5: Testing -16. [ ] Add Python unit tests for policy and verifiers. +16. [x] Add Python unit tests for policy and verifiers. Depends on: 4, 8, 10 Files: - `source/tests/test_runtime_policy.py` @@ -200,7 +206,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - policy-specific behavior has direct automated coverage -17. [ ] Update smoke tests for installed policy assets and defaults. +17. [x] Update smoke tests for installed policy assets and defaults. Depends on: 7, 8, 11, 13, 14, 15 Files: - `scripts/smoke-test.sh` @@ -211,7 +217,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - installer/integration behavior remains covered end to end -18. [ ] Update local verify flow. +18. [x] Update local verify flow. Depends on: 16, 17 Files: - `package.json` @@ -225,7 +231,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. ## Phase 6: Compatibility And Cleanup -19. [ ] Implement legacy resume behavior and strict new-state validation. +19. [x] Implement legacy resume behavior and strict new-state validation. Depends on: 6, 10, 11 Files: - `source/src/story_automator/commands/state.py` @@ -237,7 +243,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - resume is deterministic and explicit in both modes -20. [ ] Preserve env compatibility for one release cycle. +20. [x] Preserve env compatibility for one release cycle. Depends on: 9 Files: - `source/src/story_automator/core/runtime_policy.py` @@ -249,7 +255,7 @@ Execute in order. Do not skip ahead unless the dependency line says it is safe. Done when: - old env knobs still work without mutating resumed runs -21. [ ] Remove or shrink obsolete hard-coded helpers. +21. [x] Remove or shrink obsolete hard-coded helpers. Depends on: 7, 8, 9, 10, 11 Files: - `source/src/story_automator/commands/tmux.py` From 55b2fd4350dd3b64f358024cf316c485cbd3bd30 Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Tue, 14 Apr 2026 17:16:16 -0300 Subject: [PATCH 13/17] fix: address PR review follow-ups --- .../story_automator/commands/orchestrator.py | 6 +++++- source/src/story_automator/commands/tmux.py | 2 ++ .../story_automator/core/runtime_policy.py | 17 +++++++++++++-- source/tests/test_runtime_policy.py | 5 +++++ source/tests/test_state_policy_metadata.py | 14 +++++++++++++ source/tests/test_success_verifiers.py | 21 +++++++++++++++++++ 6 files changed, 62 insertions(+), 3 deletions(-) diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 986a317..0fe139a 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -312,7 +312,11 @@ def _escalate(args: list[str]) -> int: idx += 2 continue idx += 1 - policy = load_runtime_policy(get_project_root(), state_file=state_file) + try: + policy = load_runtime_policy(get_project_root(), state_file=state_file) + except (FileNotFoundError, PolicyError) as exc: + print_json({"escalate": True, "reason": str(exc)}) + return 0 if trigger == "review-loop": cycles = _parse_context_int(context, "cycles") limit = review_max_cycles(policy) diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 6b0bb70..d61cea8 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -784,6 +784,8 @@ def _verify_monitor_completion( verifier_name = str(contract.get("verifier") or "").strip() if not verifier_name: return None + if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip(): + return None try: result = run_success_verifier( verifier_name, diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index d440ce6..d3eb8cc 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -233,9 +233,22 @@ def _apply_legacy_env(policy: dict[str, Any]) -> None: review_cycles = os.environ.get("MAX_REVIEW_CYCLES") crash_retries = os.environ.get("MAX_CRASH_RETRIES") if review_cycles: - policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = int(review_cycles) + policy.setdefault("workflow", {}).setdefault("repeat", {}).setdefault("review", {})["maxCycles"] = _legacy_env_int( + "MAX_REVIEW_CYCLES", + review_cycles, + ) if crash_retries: - policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = int(crash_retries) + policy.setdefault("workflow", {}).setdefault("crash", {})["maxRetries"] = _legacy_env_int( + "MAX_CRASH_RETRIES", + crash_retries, + ) + + +def _legacy_env_int(name: str, raw: str) -> int: + try: + return int(raw) + except ValueError as exc: + raise PolicyError(f"{name} must be an integer") from exc def _validate_policy_shape(policy: dict[str, Any]) -> None: diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index f5bf559..87b204b 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -77,6 +77,11 @@ def test_snapshot_bakes_legacy_env_values_for_resume(self) -> None: self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 2) self.assertEqual(policy["workflow"]["crash"]["maxRetries"], 4) + def test_invalid_legacy_env_value_raises_policy_error(self) -> None: + with patch.dict("os.environ", {"MAX_REVIEW_CYCLES": "nope"}, clear=False): + with self.assertRaisesRegex(PolicyError, "MAX_REVIEW_CYCLES must be an integer"): + load_effective_policy(str(self.project_root)) + def test_malformed_override_json_raises_policy_error(self) -> None: override_dir = self.project_root / "_bmad" / "bmm" override_dir.mkdir(parents=True, exist_ok=True) diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 7af8365..a6cb3e0 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -237,6 +237,20 @@ def test_escalate_uses_pinned_snapshot_when_state_file_provided(self) -> None: self.assertEqual(code, 0) self.assertFalse(json.loads(stdout.getvalue())["escalate"]) + def test_escalate_returns_json_when_state_snapshot_is_invalid(self) -> None: + state_file = self.project_root / "orchestration.md" + state_file.write_text( + "---\npolicySnapshotFile: \"missing.json\"\npolicySnapshotHash: \"deadbeef\"\n---\n", + encoding="utf-8", + ) + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=1", "--state-file", str(state_file)]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["escalate"]) + self.assertIn("policy snapshot missing", payload["reason"]) + def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None: state_file = self._build_state() stdout = io.StringIO() diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 91264d6..92725cc 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -131,6 +131,27 @@ def test_monitor_dispatch_uses_review_verifier_from_contract(self) -> None: self.assertEqual(verifier, "review_completion") self.assertTrue(payload["verified"]) + def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) -> None: + result = _verify_monitor_completion( + "review", + project_root=str(self.project_root), + story_key="", + output_file="/tmp/session.txt", + ) + self.assertIsNone(result) + + def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None: + result = _verify_monitor_completion( + "dev", + project_root=str(self.project_root), + story_key="", + output_file="/tmp/session.txt", + ) + self.assertIsNotNone(result) + payload, verifier = result or ({}, "") + self.assertEqual(verifier, "session_exit") + self.assertTrue(payload["verified"]) + def test_verify_step_create_uses_shared_verifier(self) -> None: self._write_story("1-2-example", status="draft") stdout = io.StringIO() From fdb4d710055abdb887874397864b6a1ed1d1287a Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Tue, 14 Apr 2026 22:01:52 -0300 Subject: [PATCH 14/17] fix: address CodeRabbit review findings --- docs/changelog/260414.md | 40 +++++++++ docs/development.md | 4 +- docs/plans/json-settings/02-policy-model.md | 20 +---- docs/plans/json-settings/README.md | 7 +- .../steps-c/step-03-execute.md | 2 +- scripts/smoke-test.sh | 1 + .../story_automator/commands/orchestrator.py | 20 +++-- .../commands/orchestrator_parse.py | 13 ++- source/src/story_automator/commands/state.py | 6 +- source/src/story_automator/commands/tmux.py | 9 +- .../commands/validate_story_creation.py | 2 +- .../src/story_automator/core/review_verify.py | 5 +- .../story_automator/core/runtime_policy.py | 88 +++++++++++++++++-- source/tests/test_orchestrator_parse.py | 16 ++++ source/tests/test_runtime_policy.py | 21 +++++ source/tests/test_state_policy_metadata.py | 26 ++++++ source/tests/test_success_verifiers.py | 52 ++++++++++- 17 files changed, 285 insertions(+), 47 deletions(-) create mode 100644 docs/changelog/260414.md diff --git a/docs/changelog/260414.md b/docs/changelog/260414.md new file mode 100644 index 0000000..9579976 --- /dev/null +++ b/docs/changelog/260414.md @@ -0,0 +1,40 @@ +# Changelog - 260414 + +## 260414-21:51:35 - Harden snapshot and verifier review fixes + +### Summary +Closed the latest CodeRabbit review pass by aligning docs/examples with shipped behavior and hardening snapshot, parser, and verifier paths. + +### Changed +- Changed the development and JSON-settings docs to match the shipped verify order, supported workflow keys, and full state-policy metadata contract. +- Changed the execute-step example and smoke coverage so dev `build-cmd` keeps the pinned `--state-file` handoff. +- Changed regression coverage to pin the new parser, snapshot, verifier, and compatibility-wrapper failure contracts. + +### Fixed +- Fixed `tmux-wrapper build-cmd` prompt escaping to quote shell input safely for both Claude and Codex child sessions. +- Fixed `parse-output` and `verify-step` flag parsing to reject incomplete `--state-file` and `--output-file` arguments instead of silently falling back. +- Fixed state-doc creation, review verification, and legacy create validation to normalize missing snapshot/contract failures into structured JSON responses. +- Fixed monitor verification to surface `story_key_required` when a story-bound verifier is invoked without a story key. +- Fixed runtime policy snapshot loading to hash prompt/schema/contract files and reject snapshot or asset paths that escape allowed roots. + +### Files +- `docs/development.md` +- `docs/plans/json-settings/02-policy-model.md` +- `docs/plans/json-settings/README.md` +- `payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md` +- `scripts/smoke-test.sh` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/src/story_automator/commands/state.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/commands/validate_story_creation.py` +- `source/src/story_automator/core/review_verify.py` +- `source/src/story_automator/core/runtime_policy.py` +- `source/tests/test_orchestrator_parse.py` +- `source/tests/test_runtime_policy.py` +- `source/tests/test_state_policy_metadata.py` +- `source/tests/test_success_verifiers.py` +- `docs/changelog/260414.md` + +### QA Notes +- N/A diff --git a/docs/development.md b/docs/development.md index ada477a..ea31fe9 100644 --- a/docs/development.md +++ b/docs/development.md @@ -34,8 +34,8 @@ The smoke suite validates: ```mermaid flowchart TD A["Edit installer, payload, or runtime"] --> B["Run npm run test:python"] - B --> C["Run npm run test:smoke"] - C --> D["Run npm run pack:dry-run"] + B --> C["Run npm run pack:dry-run"] + C --> D["Run npm run test:smoke"] D --> E["Run npm run verify"] ``` diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md index 56acdb6..3a168d9 100644 --- a/docs/plans/json-settings/02-policy-model.md +++ b/docs/plans/json-settings/02-policy-model.md @@ -130,12 +130,7 @@ High-level example: } }, "workflow": { - "sequence": ["create", "dev", "auto", "review"], - "optional": { - "auto": { - "skipWhenOverride": "skipAutomate" - } - }, + "sequence": ["create", "dev", "auto", "review", "retro"], "repeat": { "review": { "maxCycles": 5, @@ -147,17 +142,7 @@ High-level example: "crash": { "maxRetries": 2, "onExhausted": "escalate" - }, - "triggers": [ - { - "name": "retrospective_on_epic_complete", - "after": "review", - "verifier": "epic_complete", - "run": "retro", - "blocking": false, - "forceAgent": "claude" - } - ] + } }, "steps": { "create": { @@ -270,4 +255,3 @@ Without a pinned snapshot, these changes become unsafe: - verifier threshold change after preflight The snapshot prevents those mutations from changing the behavior of a resumed orchestration. - diff --git a/docs/plans/json-settings/README.md b/docs/plans/json-settings/README.md index 6490892..e9cf39a 100644 --- a/docs/plans/json-settings/README.md +++ b/docs/plans/json-settings/README.md @@ -59,7 +59,11 @@ bundled default policy + optional project override = effective runtime policy -> pinned snapshot at orchestration start - -> state doc stores pointer + hash + -> state doc stores: + - policySnapshotFile (string snapshot pointer) + - policySnapshotHash (string snapshot hash) + - policyVersion (string/integer runtime policy version) + - legacyPolicy (boolean legacy-state marker) -> all resume/replay uses snapshot only ``` @@ -82,4 +86,3 @@ This plan does not try to deliver: - custom Python or shell expressions in config - a general workflow interpreter - rich nested policy blobs embedded in frontmatter - diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md index d7e60fa..7faa3fc 100644 --- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md +++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03-execute.md @@ -154,7 +154,7 @@ validation=$("$scripts" orchestrator-helper verify-step create {story_id} --stat # Retry loop with agent alternation: see {retryStrategy} session=$("$scripts" tmux-wrapper spawn dev {epic} {story_id} \ --agent "$current_agent" \ - --command "$("$scripts" tmux-wrapper build-cmd dev {story_id} --agent "$current_agent")") + --command "$("$scripts" tmux-wrapper build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file")") result=$("$scripts" monitor-session "$session" --json --agent "$current_agent") "$scripts" tmux-wrapper kill "$session" ``` diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index 7414554..d64bf14 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -248,6 +248,7 @@ verify_common_install() { assert_contains 'verify-code-review {story_id} --state-file "$state_file"' "$story_dir/data/code-review-loop.md" assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" + assert_contains 'build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md" assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md" assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md" diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 0fe139a..5723fd2 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -421,12 +421,20 @@ def _verify_step(args: list[str]) -> int: state_file = "" output_file = "" tail = args[2:] - for idx, arg in enumerate(tail): - if arg == "--state-file" and idx + 1 < len(tail): - state_file = tail[idx + 1] - elif arg == "--output-file" and idx + 1 < len(tail): - output_file = tail[idx + 1] try: + idx = 0 + while idx < len(tail): + arg = tail[idx] + if arg in {"--state-file", "--output-file"}: + if idx + 1 >= len(tail) or not tail[idx + 1].strip() or tail[idx + 1].startswith("--"): + raise PolicyError(f"{arg} requires a value") + if arg == "--state-file": + state_file = tail[idx + 1] + else: + output_file = tail[idx + 1] + idx += 2 + continue + idx += 1 contract = resolve_success_contract(get_project_root(), step, state_file=state_file or None) verifier = str(contract.get("verifier") or "").strip() if not verifier: @@ -439,7 +447,7 @@ def _verify_step(args: list[str]) -> int: contract=contract, ) exit_code = 0 - except (FileNotFoundError, PolicyError) as exc: + except (FileNotFoundError, PolicyError, ValueError) as exc: payload = {"verified": False, "step": step, "input": story_key, "reason": "verifier_contract_invalid", "error": str(exc)} exit_code = 1 print_json(payload) diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py index 3965f79..714c577 100644 --- a/source/src/story_automator/commands/orchestrator_parse.py +++ b/source/src/story_automator/commands/orchestrator_parse.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from typing import Any from story_automator.core.runtime_policy import PolicyError, load_runtime_policy, parser_runtime_config, step_contract from story_automator.core.utils import COMMAND_TIMEOUT_EXIT, extract_json_line, print_json, read_text, run_cmd, trim_lines @@ -14,7 +15,10 @@ def parse_output_action(args: list[str]) -> int: state_file = "" idx = 2 while idx < len(args): - if args[idx] == "--state-file" and idx + 1 < len(args): + if args[idx] == "--state-file": + if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"): + print_json({"status": "error", "reason": "parse_contract_invalid"}) + return 1 state_file = args[idx + 1] idx += 2 continue @@ -71,7 +75,10 @@ def _load_parse_contract(contract: dict[str, object]) -> dict[str, object]: payload = json.loads(read_text(str(parse.get("schemaPath") or ""))) if not isinstance(payload, dict): raise ValueError("invalid parse schema") - if not isinstance(payload.get("requiredKeys"), list): + required_keys = payload.get("requiredKeys") + if not isinstance(required_keys, list): + raise ValueError("invalid parse schema") + if any(not isinstance(key, str) or not key.strip() for key in required_keys): raise ValueError("invalid parse schema") if not isinstance(payload.get("schema"), dict): raise ValueError("invalid parse schema") @@ -84,7 +91,7 @@ def _build_parse_prompt(contract: dict[str, object], parse_contract: dict[str, o return f"Analyze this {label} session output. Return JSON only:\n{schema}\n\nSession output:\n---\n{content}\n---" -def _has_required_keys(payload: object, required_keys: list[object]) -> bool: +def _has_required_keys(payload: object, required_keys: list[Any]) -> bool: if not isinstance(payload, dict): return False return all(isinstance(key, str) and key in payload for key in required_keys) diff --git a/source/src/story_automator/commands/state.py b/source/src/story_automator/commands/state.py index ede9786..89b1c8a 100644 --- a/source/src/story_automator/commands/state.py +++ b/source/src/story_automator/commands/state.py @@ -43,7 +43,11 @@ def cmd_build_state_doc(args: list[str]) -> int: epic = str(config.get("epic") or "epic") safe_epic = re.sub(r"[^a-zA-Z0-9]+", "-", epic).strip("-") or "epic" output_path = Path(output_folder) / f"orchestration-{safe_epic}-{stamp}.md" - snapshot = snapshot_effective_policy(get_project_root()) + try: + snapshot = snapshot_effective_policy(get_project_root()) + except (FileNotFoundError, PolicyError, ValueError) as exc: + write_json({"ok": False, "error": "policy_snapshot_failed", "reason": str(exc)}) + return 1 text = read_text(template) replacements: dict[str, Any] = { "epic": config.get("epic", ""), diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index d61cea8..1fe0c75 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -3,6 +3,7 @@ import json import os import re +import shlex import time from pathlib import Path @@ -204,7 +205,7 @@ def _build_cmd(args: list[str]) -> int: else: cli = "codex exec" prompt = _render_step_prompt(contract, story_id, story_prefix, extra) - escaped = prompt.replace("\\", "\\\\").replace('"', '\\"') + quoted_prompt = shlex.quote(prompt) if agent == "codex" and not ai_command: codex_home = f"/tmp/sa-codex-home-{project_hash(root)}" auth_src = os.path.expanduser("~/.codex/auth.json") @@ -213,10 +214,10 @@ def _build_cmd(args: list[str]) -> int: + f' && if [ -f "{auth_src}" ]; then ln -sf "{auth_src}" "{codex_home}/auth.json"; fi' + f' && CODEX_HOME="{codex_home}" codex exec -s workspace-write -c \'approval_policy="never"\'' + f' -c \'model_reasoning_effort="high"\'' - + f' --disable plugins --disable sqlite --disable shell_snapshot "{escaped}"' + + f" --disable plugins --disable sqlite --disable shell_snapshot {quoted_prompt}" ) else: - print(f'unset CLAUDECODE && {cli} "{escaped}"') + print(f"unset CLAUDECODE && {cli} {quoted_prompt}") return 0 @@ -785,7 +786,7 @@ def _verify_monitor_completion( if not verifier_name: return None if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip(): - return None + return ({"verified": False, "reason": "story_key_required", "verifier": verifier_name}, verifier_name) try: result = run_success_verifier( verifier_name, diff --git a/source/src/story_automator/commands/validate_story_creation.py b/source/src/story_automator/commands/validate_story_creation.py index cb12f5f..b8e1d0e 100644 --- a/source/src/story_automator/commands/validate_story_creation.py +++ b/source/src/story_automator/commands/validate_story_creation.py @@ -180,7 +180,7 @@ def parsed_delta_counts(before_value: str | None, after_value: str | None) -> tu try: payload = create_check_payload(story_id, state_file) response = build_check_response(story_id, payload, before_count=before_count, after_count=after_count) - except (PolicyError, ValueError) as exc: + except (FileNotFoundError, PolicyError, ValueError) as exc: return print_check_error(story_id, reason=str(exc), before_count=before_count, after_count=after_count) print(json.dumps(response, separators=(",", ":"))) return 0 diff --git a/source/src/story_automator/core/review_verify.py b/source/src/story_automator/core/review_verify.py index 5975c69..029c67a 100644 --- a/source/src/story_automator/core/review_verify.py +++ b/source/src/story_automator/core/review_verify.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any +from .runtime_policy import PolicyError from .success_verifiers import resolve_success_contract, review_completion @@ -14,7 +15,7 @@ def verify_code_review_completion( state_file: str | Path | None = None, ) -> dict[str, object]: try: - contract = success_contract or resolve_success_contract(project_root, "review", state_file=state_file) + contract = resolve_success_contract(project_root, "review", state_file=state_file) if success_contract is None else success_contract return review_completion(project_root=project_root, story_key=story_key, contract=contract) - except (FileNotFoundError, ValueError) as exc: + except (FileNotFoundError, ValueError, PolicyError) as exc: return {"verified": False, "reason": "review_contract_invalid", "input": story_key, "error": str(exc)} diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index d3eb8cc..812d5e2 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -39,6 +39,7 @@ def load_effective_policy(project_root: str | None = None, *, resolve_assets: bo policy = _deep_merge(bundled, override) _apply_legacy_env(policy) _validate_policy_shape(policy) + _clear_resolved_fields(policy) if resolve_assets: _resolve_policy_paths(policy, project_root=root, bundle_root=bundled_skill_root(root)) else: @@ -65,7 +66,7 @@ def load_runtime_policy( def snapshot_effective_policy(project_root: str | None = None) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() policy = load_effective_policy(str(root)) - snapshot_dir = root / _snapshot_relative_dir(policy) + snapshot_dir = _resolve_snapshot_dir(policy, root) ensure_dir(snapshot_dir) stable_json = _stable_policy_json(policy) snapshot_hash = md5_hex8(stable_json) @@ -229,6 +230,27 @@ def _deep_merge(base: Any, override: Any) -> Any: return override +def _clear_resolved_fields(policy: dict[str, Any]) -> None: + for contract in (policy.get("steps") or {}).values(): + if not isinstance(contract, dict): + continue + assets = contract.get("assets") + if isinstance(assets, dict): + assets.pop("files", None) + prompt = contract.get("prompt") + if isinstance(prompt, dict): + prompt.pop("templatePath", None) + prompt.pop("templateHash", None) + parse = contract.get("parse") + if isinstance(parse, dict): + parse.pop("schemaPath", None) + parse.pop("schemaHash", None) + success = contract.get("success") + if isinstance(success, dict): + success.pop("contractPath", None) + success.pop("contractHash", None) + + def _apply_legacy_env(policy: dict[str, Any]) -> None: review_cycles = os.environ.get("MAX_REVIEW_CYCLES") crash_retries = os.environ.get("MAX_CRASH_RETRIES") @@ -305,15 +327,18 @@ def _resolve_policy_paths(policy: dict[str, Any], *, project_root: Path, bundle_ if not template_file: raise PolicyError(f"missing prompt template for {name}") prompt["templatePath"] = _resolve_data_path(template_file, project_root=project_root, bundle_root=bundle_root) + _set_or_verify_hash(prompt, path_key="templatePath", hash_key="templateHash", label="policy template") parse = contract.setdefault("parse", {}) schema_file = str(parse.get("schemaFile") or "").strip() if not schema_file: raise PolicyError(f"missing parse schema for {name}") parse["schemaPath"] = _resolve_data_path(schema_file, project_root=project_root, bundle_root=bundle_root) + _set_or_verify_hash(parse, path_key="schemaPath", hash_key="schemaHash", label="policy parse schema") success = contract.setdefault("success", {}) contract_file = str(success.get("contractFile") or "").strip() if contract_file: success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root) + _set_or_verify_hash(success, path_key="contractPath", hash_key="contractHash", label="policy success contract") def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle_root: Path) -> None: @@ -322,13 +347,15 @@ def _resolve_success_paths(policy: dict[str, Any], *, project_root: Path, bundle contract_file = str(success.get("contractFile") or "").strip() if contract_file: success["contractPath"] = _resolve_data_path(contract_file, project_root=project_root, bundle_root=bundle_root) + _set_or_verify_hash(success, path_key="contractPath", hash_key="contractHash", label="policy success contract") def _resolve_step_assets(step: str, assets: dict[str, Any], project_root: Path) -> dict[str, str]: skill_name = str(assets.get("skillName") or "").strip() if not skill_name: raise PolicyError(f"missing skillName for {step}") - skill_dir = project_root / ".claude" / "skills" / skill_name + skills_root = (project_root / ".claude" / "skills").resolve() + skill_dir = _ensure_within(skills_root / skill_name, skills_root, f"skillName for {step}") required = set(assets.get("required") or []) files = { "skill": _resolve_required_file(skill_dir / "SKILL.md", project_root, required, "skill", step), @@ -364,7 +391,7 @@ def _resolve_candidate_file( for name in candidates: if not isinstance(name, str) or not name: continue - path = skill_dir / name + path = _ensure_within(skill_dir / name, skill_dir, f"{asset} candidate for {step}") if path.is_file(): return _display_path(path, project_root) if asset in required: @@ -375,14 +402,24 @@ def _resolve_candidate_file( def _resolve_data_path(path_value: str, *, project_root: Path, bundle_root: Path) -> str: raw = Path(path_value) + allowed_roots = (bundle_root.resolve(), project_root.resolve()) if raw.is_absolute(): - if not raw.is_file(): + resolved = raw.resolve() + if not _is_within_any(resolved, allowed_roots): + raise PolicyError(f"policy data path escapes allowed roots: {path_value}") + if not resolved.is_file(): raise PolicyError(f"policy data file missing: {raw}") - return str(raw) - for base in (bundle_root, project_root): + return str(resolved) + escaped_all = True + for base in allowed_roots: candidate = (base / raw).resolve() + if not _is_within(candidate, base): + continue + escaped_all = False if candidate.is_file(): return str(candidate) + if escaped_all: + raise PolicyError(f"policy data path escapes allowed roots: {path_value}") raise PolicyError(f"policy data file missing: {path_value}") @@ -394,6 +431,12 @@ def _snapshot_relative_dir(policy: dict[str, Any]) -> str: return relative_dir +def _resolve_snapshot_dir(policy: dict[str, Any], project_root: Path) -> Path: + raw = Path(_snapshot_relative_dir(policy)) + candidate = raw if raw.is_absolute() else project_root / raw + return _ensure_within(candidate, project_root.resolve(), "snapshot.relativeDir") + + def _stable_policy_json(policy: dict[str, Any]) -> str: return json.dumps(policy, indent=2, sort_keys=True) + "\n" @@ -409,6 +452,39 @@ def _resolve_state_path(project_root: Path, path: Path) -> Path: return path if path.is_absolute() else project_root / path +def _set_or_verify_hash(payload: dict[str, Any], *, path_key: str, hash_key: str, label: str) -> None: + path = str(payload.get(path_key) or "").strip() + if not path: + return + actual = md5_hex8(read_text(path)) + expected = str(payload.get(hash_key) or "").strip() + if expected and expected != actual: + raise PolicyError(f"{label} hash mismatch: {path}") + payload[hash_key] = actual + + +def _ensure_within(path: Path, root: Path, label: str) -> Path: + resolved = path.resolve() + root_resolved = root.resolve() + try: + resolved.relative_to(root_resolved) + except ValueError as exc: + raise PolicyError(f"{label} escapes allowed root: {path}") from exc + return resolved + + +def _is_within(path: Path, root: Path) -> bool: + try: + path.resolve().relative_to(root.resolve()) + except ValueError: + return False + return True + + +def _is_within_any(path: Path, roots: tuple[Path, ...]) -> bool: + return any(_is_within(path, root) for root in roots) + + def _state_policy_mode(fields: dict[str, Any]) -> tuple[str, str, bool]: snapshot_file = str(fields.get("policySnapshotFile") or "").strip() snapshot_hash = str(fields.get("policySnapshotHash") or "").strip() diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py index 110b004..cc451f4 100644 --- a/source/tests/test_orchestrator_parse.py +++ b/source/tests/test_orchestrator_parse.py @@ -53,6 +53,22 @@ def test_invalid_schema_file_rejected(self) -> None: self.assertEqual(code, 1) self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid") + def test_missing_state_file_flag_value_rejected(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create", "--state-file"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid") + + def test_non_string_required_key_rejected(self) -> None: + schema = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "parse" / "create.json" + schema.write_text(json.dumps({"requiredKeys": [True], "schema": {}}), encoding="utf-8") + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "create"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "parse_contract_invalid") + def test_invalid_child_json_rejected(self) -> None: stdout = io.StringIO() with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 87b204b..2136d0f 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -117,6 +117,27 @@ def test_snapshot_reload_re_resolves_paths_for_new_root(self) -> None: template_path = policy["steps"]["create"]["prompt"]["templatePath"] self.assertTrue(str(copied_root) in template_path) + def test_snapshot_relative_dir_cannot_escape_project_root(self) -> None: + self._write_override({"snapshot": {"relativeDir": "../outside"}}) + with self.assertRaisesRegex(PolicyError, "snapshot.relativeDir escapes allowed root"): + snapshot_effective_policy(str(self.project_root)) + + def test_data_path_cannot_escape_allowed_roots(self) -> None: + self._write_override({"steps": {"create": {"prompt": {"templateFile": "../outside.md"}}}}) + with self.assertRaisesRegex(PolicyError, "policy data path escapes allowed roots"): + load_effective_policy(str(self.project_root)) + + def test_snapshot_detects_prompt_template_drift(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + prompt = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "create.md" + prompt.write_text("# changed\n", encoding="utf-8") + with self.assertRaisesRegex(PolicyError, "policy template hash mismatch"): + load_policy_snapshot( + snapshot["policySnapshotFile"], + project_root=str(self.project_root), + expected_hash=snapshot["policySnapshotHash"], + ) + def test_missing_marker_state_falls_back_to_effective_policy(self) -> None: marker = self.project_root / ".claude" / ".story-automator-active" marker.parent.mkdir(parents=True, exist_ok=True) diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index a6cb3e0..748dd0b 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -259,6 +259,32 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None: self.assertEqual(code, 0) rendered = stdout.getvalue() self.assertNotIn("--state-file", rendered) + self.assertNotIn(str(state_file), rendered) + + def test_build_state_doc_returns_json_on_policy_snapshot_failure(self) -> None: + override_dir = self.project_root / "_bmad" / "bmm" + override_dir.mkdir(parents=True, exist_ok=True) + (override_dir / "story-automator.policy.json").write_text( + json.dumps({"snapshot": {"relativeDir": "../outside"}}), + encoding="utf-8", + ) + stdout = io.StringIO() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_build_state_doc( + [ + "--template", + str(template), + "--output-folder", + str(self.output_dir), + "--config-json", + json.dumps(self._config()), + ] + ) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["ok"]) + self.assertEqual(payload["error"], "policy_snapshot_failed") def test_build_cmd_rejects_unknown_step_via_policy(self) -> None: stderr = io.StringIO() diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 92725cc..f858d5c 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -138,7 +138,11 @@ def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) -> story_key="", output_file="/tmp/session.txt", ) - self.assertIsNone(result) + self.assertIsNotNone(result) + payload, verifier = result or ({}, "") + self.assertEqual(verifier, "review_completion") + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "story_key_required") def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None: result = _verify_monitor_completion( @@ -248,6 +252,52 @@ def test_validate_story_creation_check_returns_compat_schema_on_policy_error(sel self.assertEqual(payload["pattern"], "") self.assertEqual(payload["matches"], []) + def test_validate_story_creation_check_returns_compat_schema_on_missing_state_file(self) -> None: + stdout = io.StringIO() + missing = self.project_root / "missing-state.md" + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(missing)]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertIn("missing-state.md", payload["reason"]) + + def test_review_wrapper_honors_empty_injected_contract(self) -> None: + self._write_story("1-2-example", status="done") + self._write_override( + { + "steps": { + "review": { + "success": { + "config": {"doneValues": ["approved"], "sourceOrder": ["story-file"], "syncSprintStatus": False} + } + } + } + } + ) + payload = verify_code_review_completion(str(self.project_root), "1.2", success_contract={}) + self.assertTrue(payload["verified"]) + self.assertEqual(payload["source"], "story-file") + + def test_review_wrapper_normalizes_policy_error(self) -> None: + payload = verify_code_review_completion( + str(self.project_root), + "1.2", + success_contract={"doneValues": [], "sourceOrder": ["story-file"]}, + ) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "review_contract_invalid") + + def test_verify_step_rejects_incomplete_state_file_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["verify-step", "create", "1.2", "--state-file"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "verifier_contract_invalid") + self.assertEqual(payload["error"], "--state-file requires a value") + def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None: stdout = io.StringIO() with patch_env(self.project_root), redirect_stdout(stdout): From e8a8f78fd752350251cc42283d8e493a9975744b Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Wed, 15 Apr 2026 01:20:41 -0300 Subject: [PATCH 15/17] fix: harden policy resume and review parsing --- docs/changelog/260415.md | 31 ++++++++++ docs/plans/json-settings/02-policy-model.md | 5 +- .../03-code-and-payload-changes.md | 7 ++- .../bmad-story-automator-review/workflow.yaml | 1 - .../steps-c/step-03a-execute-review.md | 4 +- scripts/smoke-test.sh | 2 + .../story_automator/commands/orchestrator.py | 36 +++++++++--- .../commands/orchestrator_parse.py | 25 ++++++++ source/src/story_automator/commands/tmux.py | 46 ++++++++++----- .../story_automator/core/runtime_policy.py | 31 +++++++--- .../story_automator/core/success_verifiers.py | 23 +++++++- source/tests/test_orchestrator_parse.py | 20 +++++++ source/tests/test_runtime_policy.py | 58 +++++++++++++++++-- source/tests/test_state_policy_metadata.py | 31 +++++++++- source/tests/test_success_verifiers.py | 49 ++++++++++++++++ 15 files changed, 320 insertions(+), 49 deletions(-) create mode 100644 docs/changelog/260415.md diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md new file mode 100644 index 0000000..c54cdaa --- /dev/null +++ b/docs/changelog/260415.md @@ -0,0 +1,31 @@ +# Changelog - 260415 + +## 260415-01:20:16 - Harden policy resume and review parsing + +### Summary +Closed PR review gaps around state-file handling, snapshot boundaries, parser validation, and policy docs. + +### Changed +- Made `load_runtime_policy()` fail closed for broken active-run markers and missing marker/env state files. +- Enforced parse-contract schema shapes for nested review payloads instead of checking required top-level keys only. +- Removed the unused top-level review workflow `contract` pointer and aligned the plan docs with step-level policy ownership. + +### Fixed +- Added strict `--state-file` propagation and help coverage for review execution flow docs and tmux/orchestrator helpers. +- Bounded snapshot, state-file, and artifact-glob resolution to the project roots used by the runtime. +- Expanded regression coverage for prompt/schema/contract snapshot drift, malformed markers, and invalid review parser output. + +### Files +- `source/src/story_automator/core/runtime_policy.py` +- `source/src/story_automator/commands/orchestrator_parse.py` +- `source/src/story_automator/commands/orchestrator.py` +- `source/src/story_automator/commands/tmux.py` +- `source/src/story_automator/core/success_verifiers.py` +- `payload/.claude/skills/bmad-story-automator-review/workflow.yaml` +- `docs/plans/json-settings/02-policy-model.md` +- `docs/plans/json-settings/03-code-and-payload-changes.md` +- `source/tests/test_orchestrator_parse.py` +- `source/tests/test_runtime_policy.py` + +### QA Notes +- N/A diff --git a/docs/plans/json-settings/02-policy-model.md b/docs/plans/json-settings/02-policy-model.md index 3a168d9..50fd8a7 100644 --- a/docs/plans/json-settings/02-policy-model.md +++ b/docs/plans/json-settings/02-policy-model.md @@ -214,8 +214,9 @@ Prompt templates should support simple substitution only: - `{{story_id}}` - `{{story_prefix}}` -- `{{skill_path}}` -- `{{workflow_path}}` +- `{{label}}` +- `{{skill_line}}` +- `{{workflow_line}}` - `{{instructions_line}}` - `{{checklist_line}}` - `{{template_line}}` diff --git a/docs/plans/json-settings/03-code-and-payload-changes.md b/docs/plans/json-settings/03-code-and-payload-changes.md index 85b8b66..f326c59 100644 --- a/docs/plans/json-settings/03-code-and-payload-changes.md +++ b/docs/plans/json-settings/03-code-and-payload-changes.md @@ -177,9 +177,11 @@ Changes: ### `payload/.claude/skills/bmad-story-automator-review/workflow.yaml` -Add a machine contract pointer, for example: +Keep this file human-facing only. -- `contract: "./contract.json"` +The machine contract should stay in step policy: + +- `steps.review.success.contractFile = ".claude/skills/bmad-story-automator-review/contract.json"` ### New: `payload/.claude/skills/bmad-story-automator-review/contract.json` @@ -247,4 +249,3 @@ To keep files under roughly 500 LOC: - `orchestrator_parse.py`: parser command plus schema validation If `runtime_policy.py` grows too large, split only after phase 1 lands. - diff --git a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml index f7c9283..05b5347 100644 --- a/payload/.claude/skills/bmad-story-automator-review/workflow.yaml +++ b/payload/.claude/skills/bmad-story-automator-review/workflow.yaml @@ -16,5 +16,4 @@ sprint_status: "{implementation_artifacts}/sprint-status.yaml" # Workflow components instructions: "./instructions.xml" validation: "./checklist.md" -contract: "./contract.json" standalone: true diff --git a/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md b/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md index a88d198..61a06bd 100644 --- a/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md +++ b/payload/.claude/skills/bmad-story-automator/steps-c/step-03a-execute-review.md @@ -35,7 +35,7 @@ Set: `scripts="{scriptsDir}"` # --command required (see Spawn Pattern in step-03) session=$("$scripts" tmux-wrapper spawn auto {epic} {story_id} \ --agent "$current_agent" \ - --command "$("$scripts" tmux-wrapper build-cmd auto {story_id} --agent "$current_agent")") + --command "$("$scripts" tmux-wrapper build-cmd auto {story_id} --agent "$current_agent" --state-file "$state_file")") result=$("$scripts" monitor-session "$session" --json --agent "$current_agent") "$scripts" tmux-wrapper kill "$session" ``` @@ -74,7 +74,7 @@ if [ -z "$review_focus" ]; then fi # Compact subprocess-style summary contract for parent flow -review_summary=$("$scripts" orchestrator-helper parse-output "$review_log" review | jq -c ' +review_summary=$("$scripts" orchestrator-helper parse-output "$review_log" review --state-file "$state_file" | jq -c ' { next_action: (.next_action // "retry"), confidence: (.confidence // 0), diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh index d64bf14..5e2ac85 100755 --- a/scripts/smoke-test.sh +++ b/scripts/smoke-test.sh @@ -249,6 +249,8 @@ verify_common_install() { assert_contains 'orchestrator-helper verify-step create {story_id} --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" assert_contains 'build-cmd create {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" assert_contains 'build-cmd dev {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03-execute.md" + assert_contains 'build-cmd auto {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/steps-c/step-03a-execute-review.md" + assert_contains 'parse-output "$review_log" review --state-file "$state_file"' "$story_dir/steps-c/step-03a-execute-review.md" assert_contains 'validation_passed=$(echo "$validation" | jq -r '\''.verified'\'')' "$story_dir/data/retry-fallback-implementation.md" assert_contains 'build-cmd {step} {story_id} --agent "$current_agent" --state-file "$state_file"' "$story_dir/data/retry-fallback-implementation.md" assert_contains 'orchestrator-helper verify-step create 5.3 --state-file "$state_file"' "$story_dir/data/monitoring-pattern.md" diff --git a/source/src/story_automator/commands/orchestrator.py b/source/src/story_automator/commands/orchestrator.py index 5723fd2..fddbf83 100644 --- a/source/src/story_automator/commands/orchestrator.py +++ b/source/src/story_automator/commands/orchestrator.py @@ -306,12 +306,16 @@ def _escalate(args: list[str]) -> int: context = args[1] if len(args) > 1 else "" state_file = "" idx = 2 - while idx < len(args): - if args[idx] == "--state-file" and idx + 1 < len(args): - state_file = args[idx + 1] - idx += 2 - continue - idx += 1 + try: + while idx < len(args): + if args[idx] == "--state-file": + state_file = _flag_value(args, idx, "--state-file") + idx += 2 + continue + idx += 1 + except PolicyError as exc: + print_json({"escalate": True, "reason": str(exc)}) + return 0 try: policy = load_runtime_policy(get_project_root(), state_file=state_file) except (FileNotFoundError, PolicyError) as exc: @@ -405,9 +409,17 @@ def _verify_code_review(args: list[str]) -> int: return 1 state_file = "" tail = args[1:] - for idx, arg in enumerate(tail): - if arg == "--state-file" and idx + 1 < len(tail): - state_file = tail[idx + 1] + try: + idx = 0 + while idx < len(tail): + if tail[idx] == "--state-file": + state_file = _flag_value(tail, idx, "--state-file") + idx += 2 + continue + idx += 1 + except PolicyError as exc: + print_json({"verified": False, "reason": "review_contract_invalid", "input": args[0], "error": str(exc)}) + return 1 payload = verify_code_review_completion(get_project_root(), args[0], state_file=state_file or None) print_json(payload) return 0 if bool(payload.get("verified")) else 1 @@ -457,3 +469,9 @@ def _verify_step(args: list[str]) -> int: def _parse_context_int(context: str, key: str) -> int: match = re.search(rf"{re.escape(key)}=(\d+)", context) return int(match.group(1)) if match else 0 + + +def _flag_value(args: list[str], idx: int, flag: str) -> str: + if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"): + raise PolicyError(f"{flag} requires a value") + return args[idx + 1] diff --git a/source/src/story_automator/commands/orchestrator_parse.py b/source/src/story_automator/commands/orchestrator_parse.py index 714c577..0f7ea28 100644 --- a/source/src/story_automator/commands/orchestrator_parse.py +++ b/source/src/story_automator/commands/orchestrator_parse.py @@ -66,6 +66,9 @@ def parse_output_action(args: list[str]) -> int: if not _has_required_keys(payload, parse_contract.get("requiredKeys") or []): print_json({"status": "error", "reason": "sub-agent returned invalid json"}) return 1 + if not _matches_schema(payload, parse_contract.get("schema") or {}): + print_json({"status": "error", "reason": "sub-agent returned invalid json"}) + return 1 print(json.dumps(payload, separators=(",", ":"))) return 0 @@ -95,3 +98,25 @@ def _has_required_keys(payload: object, required_keys: list[Any]) -> bool: if not isinstance(payload, dict): return False return all(isinstance(key, str) and key in payload for key in required_keys) + + +def _matches_schema(payload: object, schema: object) -> bool: + if isinstance(schema, dict): + if not isinstance(payload, dict): + return False + for key, child_schema in schema.items(): + if key not in payload or not _matches_schema(payload[key], child_schema): + return False + return True + if not isinstance(schema, str): + return False + rule = schema.strip() + if rule == "integer": + return isinstance(payload, int) and not isinstance(payload, bool) + if rule == "true|false": + return isinstance(payload, bool) + if rule == "path or null": + return payload is None or (isinstance(payload, str) and bool(payload.strip())) + if "|" in rule and " " not in rule: + return isinstance(payload, str) and payload in rule.split("|") + return isinstance(payload, str) and bool(payload.strip()) diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 1fe0c75..9208042 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -98,7 +98,7 @@ def _usage(code: int) -> int: print(" kill ", file=target) print(" kill-all [--project-only]", file=target) print(" exists ", file=target) - print(" build-cmd [--agent TYPE] [extra_instruction]", file=target) + print(" build-cmd [--agent TYPE] [--state-file PATH] [extra_instruction]", file=target) print(" project-slug", file=target) print(" project-hash", file=target) print(" story-suffix ", file=target) @@ -109,6 +109,8 @@ def _usage(code: int) -> int: def _spawn(args: list[str]) -> int: + if args and args[0] in {"--help", "-h"}: + return _usage(0) if len(args) < 3: return _usage(1) step, epic, story_id = args[:3] @@ -169,6 +171,8 @@ def _spawn(args: list[str]) -> int: def _build_cmd(args: list[str]) -> int: + if args and args[0] in {"--help", "-h"}: + return _usage(0) if len(args) < 2: return _usage(1) step, story_id = args[:2] @@ -177,17 +181,21 @@ def _build_cmd(args: list[str]) -> int: tail = args[2:] idx = 0 state_file = "" - while idx < len(tail): - if tail[idx] == "--agent" and idx + 1 < len(tail): - agent = tail[idx + 1] - idx += 2 - continue - if tail[idx] == "--state-file" and idx + 1 < len(tail): - state_file = tail[idx + 1] - idx += 2 - continue - extra = f"{extra} {tail[idx]}".strip() - idx += 1 + try: + while idx < len(tail): + if tail[idx] == "--agent": + agent = _flag_value(tail, idx, "--agent") + idx += 2 + continue + if tail[idx] == "--state-file": + state_file = _flag_value(tail, idx, "--state-file") + idx += 2 + continue + extra = f"{extra} {tail[idx]}".strip() + idx += 1 + except PolicyError as exc: + print(str(exc), file=__import__("sys").stderr) + return 1 agent = agent or agent_type() story_prefix = story_id.replace(".", "-") root = get_project_root() @@ -696,8 +704,12 @@ def cmd_monitor_session(args: list[str]) -> int: story_key = args[idx + 1] idx += 2 continue - elif arg == "--state-file" and idx + 1 < len(args): - state_file = args[idx + 1] + elif arg == "--state-file": + try: + state_file = _flag_value(args, idx, "--state-file") + except PolicyError as exc: + print(str(exc), file=__import__("sys").stderr) + return 1 idx += 2 continue elif arg == "--project-root" and idx + 1 < len(args): @@ -798,3 +810,9 @@ def _verify_monitor_completion( except PolicyError: return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name) return (result, verifier_name) + + +def _flag_value(args: list[str], idx: int, flag: str) -> str: + if idx + 1 >= len(args) or not args[idx + 1].strip() or args[idx + 1].startswith("--"): + raise PolicyError(f"{flag} requires a value") + return args[idx + 1] diff --git a/source/src/story_automator/core/runtime_policy.py b/source/src/story_automator/core/runtime_policy.py index 812d5e2..4fbffc4 100644 --- a/source/src/story_automator/core/runtime_policy.py +++ b/source/src/story_automator/core/runtime_policy.py @@ -57,6 +57,8 @@ def load_runtime_policy( resolved_state, source = resolve_policy_state_file(root, state_file) if resolved_state: state_path = Path(resolved_state) + if source in {"env", "marker"} and not state_path.is_file(): + raise PolicyError(f"{source} state file missing: {state_path}") if source != "explicit" and not state_path.is_file(): return load_effective_policy(str(root), resolve_assets=resolve_assets) return load_policy_for_state(str(state_path), project_root=str(root), resolve_assets=resolve_assets) @@ -92,9 +94,13 @@ def load_policy_snapshot( path = Path(snapshot_file) if not path.is_absolute(): path = root / path + path = _ensure_within(path, root, "policy snapshot") if not path.is_file(): raise PolicyError(f"policy snapshot missing: {path}") - raw = read_text(path) + try: + raw = read_text(path) + except OSError as exc: + raise PolicyError(f"policy snapshot unreadable: {path}") from exc actual_hash = md5_hex8(raw) if expected_hash and actual_hash != expected_hash: raise PolicyError(f"policy snapshot hash mismatch: expected {expected_hash}, got {actual_hash}") @@ -117,7 +123,10 @@ def load_policy_for_state( resolve_assets: bool = True, ) -> dict[str, Any]: root = Path(project_root or get_project_root()).resolve() - fields = parse_simple_frontmatter(read_text(state_file)) + try: + fields = parse_simple_frontmatter(read_text(state_file)) + except OSError as exc: + raise PolicyError(f"state file unreadable: {state_file}") from exc snapshot_file, snapshot_hash, legacy_mode = _state_policy_mode(fields) if not legacy_mode: return load_policy_snapshot( @@ -152,16 +161,17 @@ def resolve_policy_state_file(project_root: str | Path | None = None, state_file return str(_resolve_state_path(root, explicit)), "explicit" env_state = os.environ.get("STORY_AUTOMATOR_STATE_FILE", "").strip() if env_state: - return str(_resolve_state_path(root, Path(env_state).expanduser())), "env" + return str(_resolve_state_path(root, Path(env_state).expanduser(), allow_outside=False, label="env state file")), "env" marker = root / ".claude" / ".story-automator-active" if marker.is_file(): try: payload = _read_json(marker) - except PolicyError: - return "", "" + except PolicyError as exc: + raise PolicyError(f"active-run marker invalid: {exc}") from exc marker_state = str(payload.get("stateFile") or "").strip() - if marker_state: - return str(_resolve_state_path(root, Path(marker_state).expanduser())), "marker" + if not marker_state: + raise PolicyError("active-run marker missing stateFile") + return str(_resolve_state_path(root, Path(marker_state).expanduser(), allow_outside=False, label="marker state file")), "marker" return "", "" @@ -448,8 +458,11 @@ def _display_path(path: Path, project_root: Path) -> str: return str(path.resolve()) -def _resolve_state_path(project_root: Path, path: Path) -> Path: - return path if path.is_absolute() else project_root / path +def _resolve_state_path(project_root: Path, path: Path, *, allow_outside: bool = True, label: str = "state file") -> Path: + candidate = path if path.is_absolute() else project_root / path + if allow_outside: + return candidate.resolve() + return _ensure_within(candidate, project_root.resolve(), label) def _set_or_verify_hash(payload: dict[str, Any], *, path_key: str, hash_key: str, label: str) -> None: diff --git a/source/src/story_automator/core/success_verifiers.py b/source/src/story_automator/core/success_verifiers.py index 4a5cf42..40b32e8 100644 --- a/source/src/story_automator/core/success_verifiers.py +++ b/source/src/story_automator/core/success_verifiers.py @@ -73,12 +73,13 @@ def create_story_artifact( raw_glob = str(config.get("glob") or "_bmad-output/implementation-artifacts/{story_prefix}-*.md") expected = _parse_int(config.get("expectedMatches", 1), "success.config.expectedMatches", minimum=0) pattern = _format_story_pattern(raw_glob, norm) - matches = sorted(Path(project_root).glob(pattern)) + root, safe_pattern = _resolve_artifact_glob(project_root, pattern) + matches = sorted(root.glob(safe_pattern)) payload: dict[str, object] = { "verified": len(matches) == expected, "story": norm.key, "source": "artifact_glob", - "pattern": pattern, + "pattern": safe_pattern, "expectedMatches": expected, "actualMatches": len(matches), "matches": [str(match) for match in matches], @@ -176,6 +177,24 @@ def _story_artifact_path(project_root: str, story_prefix: str) -> Path | None: return matches[0] if matches else None +def _resolve_artifact_glob(project_root: str, pattern: str) -> tuple[Path, str]: + root = Path(project_root).resolve() + artifacts_root = (root / "_bmad-output" / "implementation-artifacts").resolve() + raw = Path(pattern) + if raw.is_absolute(): + raise PolicyError("success.config.glob must be relative to _bmad-output/implementation-artifacts") + resolved = (root / raw).resolve() + try: + relative = resolved.relative_to(root) + except ValueError as exc: + raise PolicyError("success.config.glob escapes project root") from exc + try: + resolved.relative_to(artifacts_root) + except ValueError as exc: + raise PolicyError("success.config.glob must stay within _bmad-output/implementation-artifacts") from exc + return root, str(relative) + + def _load_review_contract(project_root: str, contract: dict[str, Any]) -> dict[str, Any]: merged = dict(DEFAULT_REVIEW_CONTRACT) contract_path = str(contract.get("contractPath") or "").strip() diff --git a/source/tests/test_orchestrator_parse.py b/source/tests/test_orchestrator_parse.py index cc451f4..b9a2dba 100644 --- a/source/tests/test_orchestrator_parse.py +++ b/source/tests/test_orchestrator_parse.py @@ -91,6 +91,26 @@ def test_output_shape_remains_compatible(self) -> None: self.assertIn("issues_found", payload) self.assertIn("all_fixed", payload) + def test_review_output_rejects_invalid_nested_shape(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"SUCCESS","issues_found":{"critical":"0","high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "review"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json") + + def test_review_output_rejects_invalid_enum_value(self) -> None: + stdout = io.StringIO() + with patch.dict("os.environ", {"PROJECT_ROOT": str(self.project_root)}), patch( + "story_automator.commands.orchestrator_parse.run_cmd", + return_value=CommandResult('{"status":"BROKEN","issues_found":{"critical":0,"high":0,"medium":1,"low":0},"all_fixed":true,"summary":"ok","next_action":"proceed"}', 0), + ), redirect_stdout(stdout): + code = parse_output_action([str(self.output_file), "review"]) + self.assertEqual(code, 1) + self.assertEqual(json.loads(stdout.getvalue())["reason"], "sub-agent returned invalid json") + def test_state_file_keeps_pinned_parse_contract_after_override_changes(self) -> None: state_file = self._build_state() override_dir = self.project_root / "_bmad" / "bmm" diff --git a/source/tests/test_runtime_policy.py b/source/tests/test_runtime_policy.py index 2136d0f..e402320 100644 --- a/source/tests/test_runtime_policy.py +++ b/source/tests/test_runtime_policy.py @@ -127,6 +127,14 @@ def test_data_path_cannot_escape_allowed_roots(self) -> None: with self.assertRaisesRegex(PolicyError, "policy data path escapes allowed roots"): load_effective_policy(str(self.project_root)) + def test_snapshot_file_cannot_escape_project_root(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + source_path = self.project_root / snapshot["policySnapshotFile"] + external = self.project_root.parent / "external-snapshot.json" + external.write_text(source_path.read_text(encoding="utf-8"), encoding="utf-8") + with self.assertRaisesRegex(PolicyError, "policy snapshot escapes allowed root"): + load_policy_snapshot(str(external), project_root=str(self.project_root), expected_hash=snapshot["policySnapshotHash"]) + def test_snapshot_detects_prompt_template_drift(self) -> None: snapshot = snapshot_effective_policy(str(self.project_root)) prompt = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "create.md" @@ -138,19 +146,53 @@ def test_snapshot_detects_prompt_template_drift(self) -> None: expected_hash=snapshot["policySnapshotHash"], ) - def test_missing_marker_state_falls_back_to_effective_policy(self) -> None: + def test_snapshot_detects_parse_schema_drift(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + schema = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "parse" / "create.json" + schema.write_text('{"requiredKeys":["status"],"schema":{"status":"SUCCESS|FAILURE|AMBIGUOUS"}}\n', encoding="utf-8") + with self.assertRaisesRegex(PolicyError, "policy parse schema hash mismatch"): + load_policy_snapshot( + snapshot["policySnapshotFile"], + project_root=str(self.project_root), + expected_hash=snapshot["policySnapshotHash"], + ) + + def test_snapshot_detects_success_contract_drift(self) -> None: + snapshot = snapshot_effective_policy(str(self.project_root)) + contract = self.project_root / ".claude" / "skills" / "bmad-story-automator-review" / "contract.json" + contract.write_text('{"doneValues":["approved"],"sourceOrder":["story-file"],"syncSprintStatus":false}\n', encoding="utf-8") + with self.assertRaisesRegex(PolicyError, "policy success contract hash mismatch"): + load_policy_snapshot( + snapshot["policySnapshotFile"], + project_root=str(self.project_root), + expected_hash=snapshot["policySnapshotHash"], + ) + + def test_missing_marker_state_raises_policy_error(self) -> None: marker = self.project_root / ".claude" / ".story-automator-active" marker.parent.mkdir(parents=True, exist_ok=True) marker.write_text(json.dumps({"stateFile": "missing.md"}), encoding="utf-8") - policy = load_runtime_policy(str(self.project_root)) - self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + with self.assertRaisesRegex(PolicyError, "marker state file missing"): + load_runtime_policy(str(self.project_root)) - def test_malformed_marker_falls_back_to_effective_policy(self) -> None: + def test_marker_state_cannot_escape_project_root(self) -> None: + marker = self.project_root / ".claude" / ".story-automator-active" + marker.parent.mkdir(parents=True, exist_ok=True) + marker.write_text(json.dumps({"stateFile": "../outside.md"}), encoding="utf-8") + with self.assertRaisesRegex(PolicyError, "marker state file escapes allowed root"): + load_runtime_policy(str(self.project_root)) + + def test_malformed_marker_raises_policy_error(self) -> None: marker = self.project_root / ".claude" / ".story-automator-active" marker.parent.mkdir(parents=True, exist_ok=True) marker.write_text("{bad json", encoding="utf-8") - policy = load_runtime_policy(str(self.project_root)) - self.assertEqual(policy["workflow"]["repeat"]["review"]["maxCycles"], 5) + with self.assertRaisesRegex(PolicyError, "active-run marker invalid"): + load_runtime_policy(str(self.project_root)) + + def test_env_state_cannot_escape_project_root(self) -> None: + with patch.dict("os.environ", {"STORY_AUTOMATOR_STATE_FILE": "../outside.md"}, clear=False): + with self.assertRaisesRegex(PolicyError, "env state file escapes allowed root"): + load_runtime_policy(str(self.project_root)) def test_legacy_state_uses_bundled_defaults_without_override_or_env(self) -> None: self._write_override({"workflow": {"repeat": {"review": {"maxCycles": 1}}}}) @@ -203,6 +245,10 @@ def test_snapshot_metadata_with_legacy_flag_is_rejected(self) -> None: with self.assertRaisesRegex(PolicyError, "state policy metadata contradictory"): load_runtime_policy(str(self.project_root), state_file=str(state_file)) + def test_explicit_directory_state_file_raises_policy_error(self) -> None: + with self.assertRaisesRegex(PolicyError, "state file unreadable"): + load_runtime_policy(str(self.project_root), state_file=str(self.project_root)) + def _install_bundle(self) -> None: source_skill = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator" source_review = REPO_ROOT / "payload" / ".claude" / "skills" / "bmad-story-automator-review" diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 748dd0b..04afb23 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -10,7 +10,7 @@ from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc, cmd_validate_state -from story_automator.commands.tmux import _build_cmd +from story_automator.commands.tmux import _build_cmd, cmd_tmux_wrapper REPO_ROOT = Path(__file__).resolve().parents[2] @@ -261,6 +261,26 @@ def test_build_cmd_does_not_treat_state_file_flag_as_prompt_text(self) -> None: self.assertNotIn("--state-file", rendered) self.assertNotIn(str(state_file), rendered) + def test_build_cmd_rejects_incomplete_state_file_flag(self) -> None: + stderr = io.StringIO() + with patch_env(self.project_root), redirect_stderr(stderr): + code = _build_cmd(["review", "1.1", "--state-file"]) + self.assertEqual(code, 1) + self.assertIn("--state-file requires a value", stderr.getvalue()) + + def test_tmux_subcommand_help_matches_step_preflight_contract(self) -> None: + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_tmux_wrapper(["spawn", "--help"]) + self.assertEqual(code, 0) + self.assertIn("--command", stdout.getvalue()) + + stdout = io.StringIO() + with redirect_stdout(stdout): + code = cmd_tmux_wrapper(["build-cmd", "--help"]) + self.assertEqual(code, 0) + self.assertIn("--state-file", stdout.getvalue()) + def test_build_state_doc_returns_json_on_policy_snapshot_failure(self) -> None: override_dir = self.project_root / "_bmad" / "bmm" override_dir.mkdir(parents=True, exist_ok=True) @@ -293,6 +313,15 @@ def test_build_cmd_rejects_unknown_step_via_policy(self) -> None: self.assertEqual(code, 1) self.assertIn("unknown step: ship", stderr.getvalue()) + def test_escalate_returns_json_on_incomplete_state_file_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["escalate", "review-loop", "cycles=1", "--state-file"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertTrue(payload["escalate"]) + self.assertEqual(payload["reason"], "--state-file requires a value") + def _build_state(self) -> Path: stdout = io.StringIO() template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "templates" / "state-document.md" diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index f858d5c..aec2a4d 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -42,6 +42,30 @@ def test_create_story_artifact_matches_configured_glob(self) -> None: self.assertTrue(payload["verified"]) self.assertEqual(payload["actualMatches"], 1) + def test_create_story_artifact_rejects_glob_that_escapes_project_root(self) -> None: + with self.assertRaisesRegex(PolicyError, "success.config.glob escapes project root"): + create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"glob": "../other/{story_prefix}-*.md", "expectedMatches": 1}}, + ) + + def test_create_story_artifact_rejects_glob_outside_artifacts_dir(self) -> None: + with self.assertRaisesRegex(PolicyError, "success.config.glob must stay within _bmad-output/implementation-artifacts"): + create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"glob": "docs/{story_prefix}-*.md", "expectedMatches": 1}}, + ) + + def test_create_story_artifact_rejects_absolute_glob(self) -> None: + with self.assertRaisesRegex(PolicyError, "success.config.glob must be relative to _bmad-output/implementation-artifacts"): + create_story_artifact( + project_root=str(self.project_root), + story_key="1.2", + contract={"config": {"glob": "/tmp/{story_prefix}-*.md", "expectedMatches": 1}}, + ) + def test_review_completion_uses_contract_done_values(self) -> None: self._write_story("1-2-example", status="approved") contract = self._write_review_contract( @@ -262,6 +286,21 @@ def test_validate_story_creation_check_returns_compat_schema_on_missing_state_fi self.assertFalse(payload["valid"]) self.assertIn("missing-state.md", payload["reason"]) + def test_review_wrapper_normalizes_directory_state_file(self) -> None: + payload = verify_code_review_completion(str(self.project_root), "1.2", state_file=self.project_root) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "review_contract_invalid") + self.assertIn("state file unreadable", str(payload.get("error"))) + + def test_validate_story_creation_check_returns_compat_schema_on_directory_state_file(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_validate_story_creation(["check", "1.2", "--state-file", str(self.project_root)]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["valid"]) + self.assertIn("state file unreadable", payload["reason"]) + def test_review_wrapper_honors_empty_injected_contract(self) -> None: self._write_story("1-2-example", status="done") self._write_override( @@ -298,6 +337,16 @@ def test_verify_step_rejects_incomplete_state_file_flag(self) -> None: self.assertEqual(payload["reason"], "verifier_contract_invalid") self.assertEqual(payload["error"], "--state-file requires a value") + def test_verify_code_review_rejects_incomplete_state_file_flag(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), redirect_stdout(stdout): + code = cmd_orchestrator_helper(["verify-code-review", "1.2", "--state-file"]) + self.assertEqual(code, 1) + payload = json.loads(stdout.getvalue()) + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "review_contract_invalid") + self.assertEqual(payload["error"], "--state-file requires a value") + def test_validate_story_creation_check_returns_compat_schema_on_bad_counts(self) -> None: stdout = io.StringIO() with patch_env(self.project_root), redirect_stdout(stdout): From d7ffdf5802213dc63a8a94693370f468377b33a5 Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Wed, 15 Apr 2026 06:47:36 -0300 Subject: [PATCH 16/17] fix: harden tmux monitor failure handling --- docs/changelog/260415.md | 18 +++++++++++ source/src/story_automator/commands/tmux.py | 6 ++-- source/tests/test_state_policy_metadata.py | 21 +++++++++++++ source/tests/test_success_verifiers.py | 33 ++++++++++++++++++++- 4 files changed, 74 insertions(+), 4 deletions(-) diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md index c54cdaa..084800a 100644 --- a/docs/changelog/260415.md +++ b/docs/changelog/260415.md @@ -29,3 +29,21 @@ Closed PR review gaps around state-file handling, snapshot boundaries, parser va ### QA Notes - N/A + +## 260415-06:47:15 - Harden tmux prompt and monitor contract failures + +### Summary +Kept tmux workflow helpers fail-closed when prompt templates or success verifiers are missing or unreadable. + +### Fixed +- Moved review prompt rendering under the existing `build-cmd` policy/contract error path and broadened it to catch `OSError` template read failures. +- Made monitor verification return `verifier_contract_invalid` when `success.verifier` is blank instead of falling through to `normal_completion`. +- Added regression coverage for missing/directory prompt templates and for the monitor-session caller path when verifier config is empty. + +### Files +- `source/src/story_automator/commands/tmux.py` +- `source/tests/test_state_policy_metadata.py` +- `source/tests/test_success_verifiers.py` + +### QA Notes +- N/A diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index 9208042..b35a1d1 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -202,7 +202,8 @@ def _build_cmd(args: list[str]) -> int: try: policy = load_runtime_policy(root, state_file=state_file) contract = step_contract(policy, step) - except (FileNotFoundError, PolicyError) as exc: + prompt = _render_step_prompt(contract, story_id, story_prefix, extra) + except (OSError, PolicyError) as exc: print(str(exc), file=__import__("sys").stderr) return 1 ai_command = os.environ.get("AI_COMMAND") @@ -212,7 +213,6 @@ def _build_cmd(args: list[str]) -> int: cli = agent_cli(agent) else: cli = "codex exec" - prompt = _render_step_prompt(contract, story_id, story_prefix, extra) quoted_prompt = shlex.quote(prompt) if agent == "codex" and not ai_command: codex_home = f"/tmp/sa-codex-home-{project_hash(root)}" @@ -796,7 +796,7 @@ def _verify_monitor_completion( return ({"verified": False, "reason": "verifier_contract_invalid"}, "") verifier_name = str(contract.get("verifier") or "").strip() if not verifier_name: - return None + return ({"verified": False, "reason": "verifier_contract_invalid"}, "") if verifier_name in {"create_story_artifact", "review_completion", "epic_complete"} and not story_key.strip(): return ({"verified": False, "reason": "story_key_required", "verifier": verifier_name}, verifier_name) try: diff --git a/source/tests/test_state_policy_metadata.py b/source/tests/test_state_policy_metadata.py index 04afb23..a617de0 100644 --- a/source/tests/test_state_policy_metadata.py +++ b/source/tests/test_state_policy_metadata.py @@ -268,6 +268,27 @@ def test_build_cmd_rejects_incomplete_state_file_flag(self) -> None: self.assertEqual(code, 1) self.assertIn("--state-file requires a value", stderr.getvalue()) + def test_build_cmd_returns_exit_code_one_when_prompt_template_is_missing(self) -> None: + state_file = self._build_state() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "review.md" + template.unlink() + stderr = io.StringIO() + with patch_env(self.project_root), redirect_stderr(stderr): + code = _build_cmd(["review", "1.1", "--state-file", str(state_file)]) + self.assertEqual(code, 1) + self.assertIn("review.md", stderr.getvalue()) + + def test_build_cmd_returns_exit_code_one_when_prompt_template_becomes_directory(self) -> None: + state_file = self._build_state() + template = self.project_root / ".claude" / "skills" / "bmad-story-automator" / "data" / "prompts" / "review.md" + template.unlink() + template.mkdir() + stderr = io.StringIO() + with patch_env(self.project_root), redirect_stderr(stderr): + code = _build_cmd(["review", "1.1", "--state-file", str(state_file)]) + self.assertEqual(code, 1) + self.assertIn("review.md", stderr.getvalue()) + def test_tmux_subcommand_help_matches_step_preflight_contract(self) -> None: stdout = io.StringIO() with redirect_stdout(stdout): diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index aec2a4d..8d277a2 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -7,10 +7,11 @@ import unittest from contextlib import redirect_stdout from pathlib import Path +from unittest.mock import patch from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc -from story_automator.commands.tmux import _verify_monitor_completion +from story_automator.commands.tmux import _verify_monitor_completion, cmd_monitor_session from story_automator.commands.validate_story_creation import cmd_validate_story_creation from story_automator.core.review_verify import verify_code_review_completion from story_automator.core.runtime_policy import PolicyError @@ -168,6 +169,36 @@ def test_monitor_dispatch_skips_story_keyed_verifier_without_story_key(self) -> self.assertFalse(payload["verified"]) self.assertEqual(payload["reason"], "story_key_required") + def test_monitor_dispatch_rejects_missing_verifier_in_contract(self) -> None: + self._write_override({"steps": {"review": {"success": {"verifier": ""}}}}) + result = _verify_monitor_completion( + "review", + project_root=str(self.project_root), + story_key="1.2", + output_file="/tmp/session.txt", + ) + self.assertIsNotNone(result) + payload, verifier = result or ({}, "") + self.assertEqual(verifier, "") + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "verifier_contract_invalid") + + def test_monitor_session_reports_incomplete_when_verifier_missing(self) -> None: + self._write_override({"steps": {"review": {"success": {"verifier": ""}}}}) + stdout = io.StringIO() + statuses = [ + {"todos_done": 1, "todos_total": 1, "session_state": "completed"}, + {"active_task": "/tmp/session.txt"}, + ] + with patch_env(self.project_root), patch("story_automator.commands.tmux.time.sleep"), patch( + "story_automator.commands.tmux.session_status", side_effect=statuses + ), redirect_stdout(stdout): + code = cmd_monitor_session(["fake-session", "--json", "--workflow", "review", "--story-key", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["final_state"], "incomplete") + self.assertEqual(payload["exit_reason"], "verifier_contract_invalid") + def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None: result = _verify_monitor_completion( "dev", From 85f49a405f45d735bd1be77139ef063320fea29f Mon Sep 17 00:00:00 2001 From: bmad <236206860+bma-d@users.noreply.github.com> Date: Wed, 15 Apr 2026 07:55:10 -0300 Subject: [PATCH 17/17] fix: tighten tmux monitor output verification --- docs/changelog/260415.md | 18 +++++++++ source/src/story_automator/commands/tmux.py | 35 +++++++++++++++-- source/tests/test_success_verifiers.py | 43 +++++++++++++++++++++ 3 files changed, 92 insertions(+), 4 deletions(-) diff --git a/docs/changelog/260415.md b/docs/changelog/260415.md index 084800a..5c6a8aa 100644 --- a/docs/changelog/260415.md +++ b/docs/changelog/260415.md @@ -47,3 +47,21 @@ Kept tmux workflow helpers fail-closed when prompt templates or success verifier ### QA Notes - N/A + +## 260415-07:54:52 - Tighten tmux monitor output verification + +### Summary +Made monitor JSON reflect actual verifier results and narrowed verifier-side file error normalization. + +### Fixed +- Defaulted `output_verified` to `false` when monitor terminal states have no verifier result, instead of deriving it from `output_file` presence. +- Kept verified and incomplete monitor branches explicit about verifier outcome in emitted JSON. +- Narrowed verifier-side error normalization to missing/path-shape file failures plus `PolicyError`, instead of flattening all `OSError`s. +- Added regressions for verifier-side file failures and timeout output remaining unverified. + +### Files +- `source/src/story_automator/commands/tmux.py` +- `source/tests/test_success_verifiers.py` + +### QA Notes +- N/A diff --git a/source/src/story_automator/commands/tmux.py b/source/src/story_automator/commands/tmux.py index b35a1d1..5e839f5 100644 --- a/source/src/story_automator/commands/tmux.py +++ b/source/src/story_automator/commands/tmux.py @@ -744,7 +744,15 @@ def cmd_monitor_session(args: list[str]) -> int: verified, verifier_name = verification if bool(verified.get("verified")): reason = "normal_completion" if verifier_name == "session_exit" else "verified_complete" - return _emit_monitor(json_output, "completed", last_done, last_total, str(output), reason) + return _emit_monitor( + json_output, + "completed", + last_done, + last_total, + str(output), + reason, + output_verified=bool(verified.get("verified")), + ) return _emit_monitor( json_output, "incomplete", @@ -752,6 +760,7 @@ def cmd_monitor_session(args: list[str]) -> int: last_total, str(output), str(verified.get("reason") or "workflow_not_verified"), + output_verified=bool(verified.get("verified")), ) return _emit_monitor(json_output, "completed", last_done, last_total, str(output), "normal_completion") if state == "crashed": @@ -774,9 +783,27 @@ def cmd_monitor_session(args: list[str]) -> int: return _emit_monitor(json_output, "timeout", last_done, last_total, str(output), "max_polls_exceeded") -def _emit_monitor(json_output: bool, state: str, done: int, total: int, output_file: str, reason: str) -> int: +def _emit_monitor( + json_output: bool, + state: str, + done: int, + total: int, + output_file: str, + reason: str, + *, + output_verified: bool | None = None, +) -> int: if json_output: - print_json({"final_state": state, "todos_done": done, "todos_total": total, "output_file": output_file, "exit_reason": reason, "output_verified": bool(output_file)}) + print_json( + { + "final_state": state, + "todos_done": done, + "todos_total": total, + "output_file": output_file, + "exit_reason": reason, + "output_verified": False if output_verified is None else output_verified, + } + ) else: print(f"{state},{done},{total},{output_file},{reason}") return 0 @@ -807,7 +834,7 @@ def _verify_monitor_completion( output_file=output_file, contract=contract, ) - except PolicyError: + except (FileNotFoundError, IsADirectoryError, NotADirectoryError, PolicyError): return ({"verified": False, "reason": "verifier_contract_invalid"}, verifier_name) return (result, verifier_name) diff --git a/source/tests/test_success_verifiers.py b/source/tests/test_success_verifiers.py index 8d277a2..db94d9e 100644 --- a/source/tests/test_success_verifiers.py +++ b/source/tests/test_success_verifiers.py @@ -198,6 +198,49 @@ def test_monitor_session_reports_incomplete_when_verifier_missing(self) -> None: payload = json.loads(stdout.getvalue()) self.assertEqual(payload["final_state"], "incomplete") self.assertEqual(payload["exit_reason"], "verifier_contract_invalid") + self.assertFalse(payload["output_verified"]) + + def test_monitor_dispatch_rejects_verifier_side_file_error(self) -> None: + with patch("story_automator.commands.tmux.run_success_verifier", side_effect=FileNotFoundError("missing.json")): + result = _verify_monitor_completion( + "review", + project_root=str(self.project_root), + story_key="1.2", + output_file="/tmp/session.txt", + ) + self.assertIsNotNone(result) + payload, verifier = result or ({}, "") + self.assertEqual(verifier, "review_completion") + self.assertFalse(payload["verified"]) + self.assertEqual(payload["reason"], "verifier_contract_invalid") + + def test_monitor_session_reports_incomplete_when_verifier_raises_file_error(self) -> None: + stdout = io.StringIO() + statuses = [ + {"todos_done": 1, "todos_total": 1, "session_state": "completed"}, + {"active_task": "/tmp/session.txt"}, + ] + with patch_env(self.project_root), patch("story_automator.commands.tmux.time.sleep"), patch( + "story_automator.commands.tmux.session_status", side_effect=statuses + ), patch("story_automator.commands.tmux.run_success_verifier", side_effect=FileNotFoundError("missing.json")), redirect_stdout(stdout): + code = cmd_monitor_session(["fake-session", "--json", "--workflow", "review", "--story-key", "1.2"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["final_state"], "incomplete") + self.assertEqual(payload["exit_reason"], "verifier_contract_invalid") + self.assertFalse(payload["output_verified"]) + + def test_monitor_session_timeout_keeps_output_unverified_without_verifier_result(self) -> None: + stdout = io.StringIO() + with patch_env(self.project_root), patch( + "story_automator.commands.tmux.session_status", return_value={"active_task": "/tmp/session.txt"} + ), redirect_stdout(stdout): + code = cmd_monitor_session(["fake-session", "--json", "--max-polls", "0"]) + self.assertEqual(code, 0) + payload = json.loads(stdout.getvalue()) + self.assertEqual(payload["final_state"], "timeout") + self.assertEqual(payload["exit_reason"], "max_polls_exceeded") + self.assertFalse(payload["output_verified"]) def test_monitor_dispatch_allows_session_exit_without_story_key(self) -> None: result = _verify_monitor_completion(