From 500f89aecb6e9a801ee3661059be4221bebdd3ed Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 01:57:13 -0700 Subject: [PATCH 1/9] docs(holmes): plan weslaw assurance prd slices --- docs/BEARING.md | 37 +- .../holmes-weslaw-assurance-prd-test-plan.md | 394 ++++++++++++++++++ .../prds/README.md | 116 ++++++ docs/design/README.md | 2 + 4 files changed, 543 insertions(+), 6 deletions(-) create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/README.md diff --git a/docs/BEARING.md b/docs/BEARING.md index 3d5e8355..df4f5454 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -12,6 +12,7 @@ timeline Phase 3 : Binding Observatory : Module Runtime : Artifact Evidence Phase 4 : Legacy Node Retired : Rust-Native Release : Holmes Assurance Phase 5 : weslaw : Semantic Law IR : Contract Bundle Physics + Phase 6 : Holmes + weslaw : Assurance PRDs : Evidence Judgment ``` ## Active Gravity @@ -399,10 +400,34 @@ deferral, and v1 playback/retrospective closeout evidence. deterministic, and boring. It must not become an expression language, policy engine, or YAML programming language. +### 12. Holmes `weslaw` Assurance Planning + +The next chunk is a 50-slice PRD and test-plan campaign that turns the merged +`weslaw` v1 outputs into Holmes-facing assurance requirements before +implementation begins. + +The active packet is +[0020-holmes-weslaw-assurance-prd-test-plan](./design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md). + +Working budget: **50 slices**. + +Status: **0 / 50 slices closed**. The plan allocates `HLAW-001` through +`HLAW-050` across evidence intake, typed domain contracts, report models, CLI +flows, GitHub publishing, MCP surfaces, policy, QA fixtures, determinism, +performance budgets, migration, release gates, operator docs, and campaign +closeout. Each slice must produce a PRD/test-plan artifact with explicit +objectives, scope, user stories, BDD acceptance criteria, and test scenarios. + +The first recommended pull is `HLAW-001` through `HLAW-010`: evidence intake +and typed domain contracts for law diffs, coverage reports, capability +summaries, bundle manifests, artifact location, validation results, semantic +findings, coverage gates, and traceability gates. + ## Next Target -The immediate focus is **PR review and merge for `weslaw` v1**, then Holmes -assurance integration over the new law diff, coverage, and bundle evidence. +The immediate focus is **Holmes `weslaw` assurance planning**: spend the next +50 slices writing implementation-grade PRDs and test plans before the Rust +Holmes assurance integration begins. Current evidence still includes complete v0.0.5 publication proof, Rust L1 fixtures for directive-heavy SDL, schema extensions, nested list type @@ -416,11 +441,11 @@ without pinning Wesley to legacy Node. The `0019` packet names the semantic law architecture that lets Wesley compile meaning alongside shape without smuggling runtime ownership into the base compiler. -The next pull after this PR is: +The next pull is: -1. After merge, start the Holmes-facing assurance pass over `weslaw` outputs: - consume law diffs, coverage reports, capability summaries, and bundle hashes - without giving Holmes ownership of semantic truth. +1. `HLAW-001` through `HLAW-010`: write PRD/test-plan artifacts for the + evidence intake and typed domain contracts that let Holmes consume + `weslaw` outputs without owning semantic law truth. ## Post-Retirement Freestyle Slice Log diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md new file mode 100644 index 00000000..7a9af1f7 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -0,0 +1,394 @@ +--- +title: Holmes weslaw Assurance PRD And Test Plan Campaign +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: active +release: v0.0.8 +--- + +# Holmes `weslaw` Assurance PRD And Test Plan Campaign + +## Status + +Active planning packet. + +## Question + +How should Wesley spend the next 50 slices after `weslaw` v1 so Holmes can +consume `weslaw` outputs as assurance evidence without taking ownership of +semantic law truth? + +## Hill + +The next chunk is a product-management and QA campaign, not a code-first +campaign. + +Each slice produces one PRD and test plan for a concrete Holmes-facing +`weslaw` assurance feature. The campaign exists to force the next engineering +work to have measurable behavior, non-goals, BDD acceptance criteria, and +negative/non-functional testing before Rust crates, CLI commands, MCP tools, or +GitHub publishers are built. + +## Product Boundary + +Wesley owns compiler truth and `weslaw` law artifacts. Holmes owns assurance +judgment over evidence. Holmes may ingest `weslaw` outputs and produce +findings, gates, reports, and recommendations. Holmes must not reinterpret +GraphQL shape, mutate law, rebind law, invent semantic diffs, or become the +source of truth for contract bundles. + +## Slice Output Contract + +Every `HLAW` slice creates one Markdown PRD/test-plan artifact under this +packet, using the filename: + +```text +prds/HLAW-XXX-.md +``` + +Every slice artifact must contain these sections, with these headings: + +1. `Feature Overview & Objectives` +2. `Scope Definition` +3. `Detailed User Stories` +4. `Acceptance Criteria (BDD Format)` +5. `Detailed Test Plan` + +The canonical artifact template is +[prds/README.md](./prds/README.md). Future slice artifacts should copy that +structure before adding feature-specific requirements. + +The slice artifact must be written from two roles at once: + +- Expert Technical Product Manager: define user value, scope, metrics, and + acceptance criteria. +- Lead QA Engineer: define deterministic validation, fixture strategy, + failure modes, and non-functional test coverage. + +The artifact must not contain generic implementation filler. Each PRD must name +exact command surfaces, artifact shapes, expected fields, policy decisions, +failure behavior, and test fixtures where known. + +## Campaign KPIs + +| KPI | Target | +| --- | --- | +| PRD completeness | 50 / 50 slice artifacts include all five required sections. | +| Testability | 50 / 50 slice artifacts include happy-path, negative/edge, and non-functional test coverage. | +| Boundary clarity | 50 / 50 slice artifacts explicitly state what Holmes must not own. | +| Implementation readiness | At least 40 / 50 slice artifacts name concrete commands, ports, schemas, fixtures, or reports. | +| Drift control | Drift checks at slices 10, 25, 40, and 50 update this packet and `BEARING`. | + +## Chunking + +| Chunk | Slices | Planned PR Shape | Purpose | +| --- | --- | --- | --- | +| 1 | HLAW-001..HLAW-010 | One PR | Evidence intake and typed domain contracts. | +| 2 | HLAW-011..HLAW-020 | One PR | Report model, CLI operator flows, and local artifacts. | +| 3 | HLAW-021..HLAW-030 | One PR | GitHub and MCP interfaces over the same assurance use cases. | +| 4 | HLAW-031..HLAW-040 | One PR | Policy, QA harnesses, determinism, concurrency, and budgets. | +| 5 | HLAW-041..HLAW-050 | One PR | Migration, release gates, documentation, and campaign closeout. | + +Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. + +## Slice Checklist + +### Evidence Intake And Typed Domain Contracts + +- [ ] HLAW-001 `HolmesLawEvidenceBundle` PRD and test plan. + - Feature/product: A typed bundle contract that groups `wesley law diff`, + `law coverage`, `law capabilities`, and contract bundle manifest outputs + into one Holmes-readable evidence input. + - Required output: PRD for schema fields, versioning, required/optional + artifact references, unsupported-version diagnostics, and fixture layout. +- [ ] HLAW-002 `LawDiffIngestPort` PRD and test plan. + - Feature/product: A Holmes input port that reads `wesley.law-diff/v1` JSON + and normalizes it into assurance findings without reclassifying semantic + law changes. + - Required output: PRD for event-kind mapping, malformed JSON handling, + duplicate law ids, unknown event kinds, and stable finding ids. +- [ ] HLAW-003 `LawCoverageIngestPort` PRD and test plan. + - Feature/product: A Holmes input port that reads profile/category-aware law + coverage reports and turns missing release-required subjects into gates. + - Required output: PRD for release/local profile behavior, threshold + handling, missing-subject rendering, and coverage fixture matrices. +- [ ] HLAW-004 `LawCapabilityIngestPort` PRD and test plan. + - Feature/product: A Holmes input port that reads report-only footprint + capability summaries and reports boundary posture without claiming runtime + enforcement. + - Required output: PRD for `reportOnly`, `runtimeEnforcement`, reads/writes/ + creates/forbids, empty-footprint behavior, and wording constraints. +- [ ] HLAW-005 `ContractBundleManifestIngestPort` PRD and test plan. + - Feature/product: A Holmes input port that reads contract bundle manifests + and verifies schema, law, profile, bundle, compiler, and codec hashes are + present and consistently referenced by other artifacts. + - Required output: PRD for hash validation, absent optional hashes, mismatch + errors, and bundle traceability reporting. +- [ ] HLAW-006 `WeslawArtifactLocator` PRD and test plan. + - Feature/product: A local adapter that resolves law evidence artifact paths + from CLI flags, workflow artifacts, and explicit bundle metadata. + - Required output: PRD for path resolution precedence, missing files, + symlink/path traversal policy, and deterministic diagnostics. +- [ ] HLAW-007 `LawEvidenceValidationResult` PRD and test plan. + - Feature/product: A typed validation result that separates input contract + errors from assurance findings so bad evidence fails before judgment. + - Required output: PRD for error taxonomy, JSON shape, CLI exit mapping, and + test fixtures for invalid artifacts. +- [ ] HLAW-008 `SemanticChangeFinding` PRD and test plan. + - Feature/product: A domain finding model for law diff events with severity, + posture, law id, subject, change fields, and source artifact references. + - Required output: PRD for stable finding IDs, severity defaults, markdown + snippets, JSON rendering, and sort order. +- [ ] HLAW-009 `LawCoverageGateDecision` PRD and test plan. + - Feature/product: A gate model that evaluates law coverage against policy + profiles and reports pass/warn/fail/unavailable outcomes. + - Required output: PRD for gate states, profile-specific required categories, + missing-subject evidence, and fallback behavior when coverage is absent. +- [ ] HLAW-010 `BundleTraceabilityGateDecision` PRD and test plan. + - Feature/product: A gate model that checks every ingested law artifact links + back to the same expected contract bundle hash family. + - Required output: PRD for cross-artifact consistency, hash mismatch + findings, unsupported manifest versions, and checkpoint playback. + +### Report Model, CLI, And Local Artifacts + +- [ ] HLAW-011 `LawAssuranceReportDocument` PRD and test plan. + - Feature/product: A structured report section family for semantic changes, + coverage, capabilities, and bundle traceability inside the Holmes + `ReportDocument`. + - Required output: PRD for section ids, tables, summary metrics, attachments, + stable ordering, and renderer-neutral semantics. +- [ ] HLAW-012 `LawDiffReportSection` PRD and test plan. + - Feature/product: A report section that presents semantic law diff events in + review order while preserving machine-readable event kinds. + - Required output: PRD for field columns, grouped summaries, high-risk event + highlighting, truncation policy, and no-change behavior. +- [ ] HLAW-013 `LawCoverageReportSection` PRD and test plan. + - Feature/product: A report section that presents law coverage by profile, + category, required status, covered count, and missing subjects. + - Required output: PRD for thresholds, empty categories, required versus + advisory categories, and accessibility of table output. +- [ ] HLAW-014 `LawCapabilityReportSection` PRD and test plan. + - Feature/product: A report section that presents footprint capability + summaries while explicitly labeling them report-only. + - Required output: PRD for wording, resource grouping, empty lists, large + footprint truncation, and runtime-enforcement disclaimers. +- [ ] HLAW-015 `BundleProvenanceReportSection` PRD and test plan. + - Feature/product: A report section that shows schemaHash, lawHash, + profileHash, bundleHash, law codec, compiler identity, and generator + provenance. + - Required output: PRD for required fields, partial manifests, hash display, + copy/paste safety, and mismatch callouts. +- [ ] HLAW-016 `holmes weslaw validate` CLI PRD and test plan. + - Feature/product: A Holmes CLI command that validates a `HolmesLawEvidence` + input bundle without making readiness judgments. + - Required output: PRD for flags, JSON/text output, exit codes, invalid + bundle diagnostics, and fixture golden outputs. +- [ ] HLAW-017 `holmes weslaw assess` CLI PRD and test plan. + - Feature/product: A Holmes CLI command that evaluates validated law evidence + into gates, findings, verdict, and a structured report document. + - Required output: PRD for flags, policy selection, `--fail-on` behavior, + terminal output, JSON output, and missing optional artifact behavior. +- [ ] HLAW-018 `holmes weslaw report` CLI PRD and test plan. + - Feature/product: A Holmes CLI command that renders a `ReportDocument` as + Markdown, JSON, terminal text, or file output without publishing anywhere. + - Required output: PRD for renderer selection, output paths, stdout behavior, + overwrite policy, and snapshot tests. +- [ ] HLAW-019 `LawAssuranceArtifactWriter` PRD and test plan. + - Feature/product: A local output adapter that writes normalized validation, + assessment, and rendered report artifacts for CI and later review. + - Required output: PRD for artifact names, deterministic bytes, directory + creation, collision policy, and reproducible hash checks. +- [ ] HLAW-020 `LawAssuranceExitCodePolicy` PRD and test plan. + - Feature/product: A CLI exit-code policy for validation errors, assurance + failures, warnings, publisher failures, and internal errors. + - Required output: PRD for exit-code table, `--fail-on` gates, CI defaults, + and negative tests for each category. + +### GitHub And MCP Interfaces + +- [ ] HLAW-021 `GitHubLawAssuranceComment` PRD and test plan. + - Feature/product: A GitHub PR comment renderer/publisher for law diff, + coverage, capability, and bundle provenance summaries. + - Required output: PRD for sticky comment markers, update behavior, markdown + constraints, truncation, links, and idempotent publishing. +- [ ] HLAW-022 `GitHubLawGateCheckSummary` PRD and test plan. + - Feature/product: A GitHub-facing gate summary that tells reviewers whether + law evidence is pass, warn, fail, or unavailable. + - Required output: PRD for review wording, blocked-merge posture, required + versus advisory gates, and stale evidence detection. +- [ ] HLAW-023 `GitHubLawFindingAnnotations` PRD and test plan. + - Feature/product: A mapping from law findings to PR annotations or comment + bullets where file/line context exists. + - Required output: PRD for annotation eligibility, no-line findings, + deduplication, rate limits, and fallback rendering. +- [ ] HLAW-024 `GitHubLawEvidenceLinks` PRD and test plan. + - Feature/product: A link model that connects PR comments to law artifacts, + CI runs, bundle manifests, and rendered reports. + - Required output: PRD for artifact URLs, missing artifact behavior, + expiration notes, and markdown link safety. +- [ ] HLAW-025 `GitHubLawOverrideControls` PRD and test plan. + - Feature/product: A policy-controlled override surface for maintainers to + acknowledge advisory law warnings without hiding failed validation. + - Required output: PRD for labels/checkboxes, audit records, non-overridable + failures, and drift checkpoint criteria. +- [ ] HLAW-026 `McpAssessWeslawBundleTool` PRD and test plan. + - Feature/product: An MCP tool that assesses a law evidence bundle and + returns structured gates, findings, and rendered report references. + - Required output: PRD for request/response schema, workspace authorization, + error mapping, and deterministic examples. +- [ ] HLAW-027 `McpLawEvidenceResources` PRD and test plan. + - Feature/product: MCP resources exposing law diff, coverage, capability, + bundle manifest, and rendered law report data. + - Required output: PRD for resource URIs, caching, access control, invalid + bundle references, and schema examples. +- [ ] HLAW-028 `McpExplainLawFindingTool` PRD and test plan. + - Feature/product: An MCP tool that explains one Holmes law finding with + source artifact references and suggested next action. + - Required output: PRD for finding ids, explanation shape, citation fallback, + and missing finding behavior. +- [ ] HLAW-029 `McpLawPolicyTool` PRD and test plan. + - Feature/product: An MCP tool that returns active law assurance policy, + thresholds, required gates, and non-overridable checks. + - Required output: PRD for policy redaction, profile selection, unknown + profile errors, and stale policy detection. +- [ ] HLAW-030 `AgentSafeLawSummary` PRD and test plan. + - Feature/product: A compact, structured summary format optimized for agents + that need law evidence without long Markdown comments. + - Required output: PRD for token budgets, severity grouping, artifact refs, + omitted-detail accounting, and MCP/CLI parity. + +### Policy, QA Harnesses, Determinism, And Budgets + +- [ ] HLAW-031 `LawAssurancePolicySchema` PRD and test plan. + - Feature/product: A versioned policy schema defining required law evidence, + thresholds, severity mappings, and override rules. + - Required output: PRD for schema versioning, defaults, profile inheritance, + unknown fields, and JSON Schema validation. +- [ ] HLAW-032 `LawSeverityMappingPolicy` PRD and test plan. + - Feature/product: A policy layer that maps law diff event kinds and coverage + gaps to Holmes severities without changing Wesley's semantic classifications. + - Required output: PRD for mapping table, unmapped event behavior, + release/local differences, and fixture coverage. +- [ ] HLAW-033 `LawCoverageThresholdPolicy` PRD and test plan. + - Feature/product: A policy layer that sets required coverage floors by + category and profile. + - Required output: PRD for pass/warn/fail thresholds, category absences, + percentage rounding, and boundary-value tests. +- [ ] HLAW-034 `LawAssuranceSuppressionPolicy` PRD and test plan. + - Feature/product: A suppression/audit model for known advisory findings that + must not suppress invalid evidence or failed binding. + - Required output: PRD for suppression ids, expiration, reason text, audit + output, and abuse-prevention tests. +- [ ] HLAW-035 `LawAssuranceAuditWitness` PRD and test plan. + - Feature/product: A deterministic witness artifact recording inputs, policy, + outputs, hashes, and the exact gates evaluated by Holmes. + - Required output: PRD for witness schema, hash coverage, replay fields, + clock injection, and reproducibility tests. +- [ ] HLAW-036 `LawAssuranceGoldenFixtureCorpus` PRD and test plan. + - Feature/product: A fixture corpus covering clean, warning, failing, + malformed, stale, and missing law evidence bundles. + - Required output: PRD for fixture naming, expected outputs, snapshot + regeneration policy, and cross-platform stability. +- [ ] HLAW-037 `LawAssuranceNegativeFixtureCorpus` PRD and test plan. + - Feature/product: A negative fixture set for invalid JSON, unsupported + versions, hash mismatches, missing artifacts, unknown profiles, and malformed + policies. + - Required output: PRD for diagnostic codes, exit behavior, fixture + isolation, and panic-free guarantees. +- [ ] HLAW-038 `LawAssuranceFakeClockAndPorts` PRD and test plan. + - Feature/product: Dependency-injected clock and in-memory ports for + deterministic tests across CLI, API, MCP, and GitHub adapters. + - Required output: PRD for fake-clock API, no-wall-clock assertions, adapter + contracts, and concurrency-safe tests. +- [ ] HLAW-039 `LawAssuranceConcurrencyAndIdempotence` PRD and test plan. + - Feature/product: Test requirements for repeated, concurrent, and retried + assessment/publish operations. + - Required output: PRD for idempotent comment updates, artifact overwrite + policy, race simulation, and lock-free domain behavior. +- [ ] HLAW-040 `LawAssurancePerformanceBudget` PRD and test plan. + - Feature/product: Performance and size budgets for law evidence validation, + assessment, rendering, and publishing. + - Required output: PRD for benchmark fixtures, large report limits, timeout + seams, memory ceilings, and drift checkpoint criteria. + +### Migration, Release Gates, Docs, And Closeout + +- [ ] HLAW-041 `LegacyHolmesLawEvidenceMapping` PRD and test plan. + - Feature/product: A mapping from current JavaScript Holmes workflow artifacts + to the future Rust Holmes law assurance bundle. + - Required output: PRD for retained fields, rejected fields, migration gaps, + and compatibility fixtures. +- [ ] HLAW-042 `HolmesWorkflowWeslawIntegration` PRD and test plan. + - Feature/product: CI workflow integration that runs Wesley law commands, + assembles law evidence, and invokes Holmes assessment. + - Required output: PRD for job dependencies, artifact paths, failure + propagation, retry behavior, and branch/fork permissions. +- [ ] HLAW-043 `RustHolmesCrateScaffold` PRD and test plan. + - Feature/product: The initial Rust crate/module structure needed to host + law assurance domain, application, reporting, and adapters. + - Required output: PRD for crate boundaries, public API, dependency rules, + compile-time guard tests, and no-GitHub-in-domain enforcement. +- [ ] HLAW-044 `TransitionalHolmesCliAliases` PRD and test plan. + - Feature/product: Transitional CLI aliases or wrapper behavior that lets + existing workflows call the new law assurance path without reviving legacy + Node authority. + - Required output: PRD for supported aliases, deprecation messages, exit + parity, and removal gates. +- [ ] HLAW-045 `LawAssuranceOperatorDocs` PRD and test plan. + - Feature/product: Operator documentation for generating law evidence, + running Holmes law assessment, reading findings, and resolving failures. + - Required output: PRD for docs locations, command examples, troubleshooting + matrix, docs command checks, and accessibility of examples. +- [ ] HLAW-046 `LawAssuranceSchemaVersioning` PRD and test plan. + - Feature/product: Versioning and compatibility rules for Holmes law evidence + bundle schemas, policy schemas, report schemas, and witness schemas. + - Required output: PRD for semver-like compatibility, unsupported-version + diagnostics, migration notices, and schema validation tests. +- [ ] HLAW-047 `LawAssuranceArtifactRetention` PRD and test plan. + - Feature/product: Artifact retention rules for local runs, CI runs, PR + comments, and future dashboard links. + - Required output: PRD for retention names, overwrite policy, cleanup + behavior, stale link warnings, and fork-safe behavior. +- [ ] HLAW-048 `LawAssuranceEndToEndWorkflow` PRD and test plan. + - Feature/product: End-to-end workflow from GraphQL SDL and `weslaw` authoring + through Wesley law artifacts to Holmes findings and PR review output. + - Required output: PRD for full golden path, failure-path sequence, fixture + repository layout, and release-gate assertions. +- [ ] HLAW-049 `LawAssuranceReleaseGateRollout` PRD and test plan. + - Feature/product: A staged rollout plan for advisory, required, and + non-overridable law assurance gates in CI. + - Required output: PRD for rollout phases, branch protection interaction, + opt-in/opt-out policy, false-positive handling, and rollback tests. +- [ ] HLAW-050 `HolmesWeslawAssuranceCloseout` PRD and test plan. + - Feature/product: Campaign closeout artifact summarizing completed PRDs, + open decisions, implementation-ready slices, deferred scope, and next + engineering branch. + - Required output: PRD for closeout acceptance, retrospective questions, + evidence index, backlog suggestions, and BEARING update requirements. + +## Initial Recommendation + +Spend the first PR on `HLAW-001` through `HLAW-010`. + +Reasoning: + +- These slices define the evidence contracts and domain objects that every + interface depends on. +- They prevent premature CLI/GitHub/MCP design from hardcoding report shape + before the input contract is stable. +- They give QA a fixture vocabulary early: clean bundle, invalid bundle, + mismatched hashes, missing coverage, empty capabilities, unsupported versions, + and stale manifests. +- They preserve Wesley's ownership boundary because Holmes ingests published + law artifacts instead of recalculating semantic truth. + +## Non-Goals For The 50-Slice Planning Campaign + +- Do not implement Rust Holmes crates yet. +- Do not replace the current GitHub workflow yet. +- Do not change `weslaw` semantics, hashes, or law diff classifications. +- Do not edit Echo, jedit, Continuum, warp-ttd, git-warp, or + `wesley-postgres` from this branch. +- Do not make Holmes a law compiler. +- Do not build the Law Matrix static site in this campaign. diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/README.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/README.md new file mode 100644 index 00000000..a1003b5a --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/README.md @@ -0,0 +1,116 @@ +--- +title: Holmes weslaw Assurance PRD Artifact Template +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: active +release: v0.0.8 +--- + +# Holmes `weslaw` Assurance PRD Artifact Template + +Every `HLAW` slice in this packet must create one Markdown artifact in this +directory using the filename pattern: + +```text +HLAW-XXX-.md +``` + +Each artifact must be written from two roles at once: + +- Expert Technical Product Manager: define product value, audience, measurable + outcomes, feature boundaries, and acceptance criteria. +- Lead QA Engineer: define deterministic validation, fixtures, failure modes, + test matrices, and non-functional test coverage. + +The artifact must avoid generic filler. It should name concrete commands, +ports, schemas, fixtures, fields, artifact paths, report sections, policy +decisions, exit codes, and failure behavior wherever the feature can already be +bounded. + +## Required Artifact Shape + +Copy this structure for every `HLAW` PRD/test-plan artifact. + +```markdown +# HLAW-XXX + +## Feature Overview & Objectives + +### Problem Statement + + + +### Target User/Audience + + + +### Success Metrics + +| KPI | Target | +| --- | --- | +| | | +| | | +| | | + +## Scope Definition + +### In Scope + +- +- + +### Out of Scope + +- +- + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a , I want to so that . | +| US-002 | As a , I want to so that . | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | | | | +| US-002 | | | | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | | | | | + +### Happy Path Testing + +1. +2. + +### Negative/Edge Case Testing + +- Invalid inputs: +- Timeouts: +- Concurrent users or retries: +- Broken dependencies: + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | | | +| Load | | | +| Security | | | +| Accessibility | | | +``` + +## Review Rule + +A slice is not complete unless its artifact includes all five required sections, +at least one BDD acceptance criterion per user story, and explicit happy-path, +negative/edge, and non-functional test coverage. diff --git a/docs/design/README.md b/docs/design/README.md index 83ebfaaf..aed7bf66 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -51,6 +51,8 @@ Current packets: and [canonicalization and diagnostic](./0019-weslaw-semantic-law-ir/CANONICALIZATION_AND_DIAGNOSTICS.md) substrate notes +- [`0020`](./0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md): + Holmes `weslaw` assurance PRD and test-plan campaign for the next 50 slices - [Module Contract](./wesley-module-contract.md): Generic core boundary versus external module-owned domain surfaces - [Module Capability Contract](./wesley-module-capability-contract.md): The capability surfaces external modules should implement - [Contract / Artifact / Runtime Boundary](./wesley-contract-family-artifact-runtime-value.md): GraphQL-authored families, Wesley-emitted artifacts, and later runtime values From 2152a6ec116ffb7b05405d73aabdd327ed38a8c7 Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 02:29:57 -0700 Subject: [PATCH 2/9] docs(holmes): define law assurance evidence intake prds --- docs/BEARING.md | 16 +- .../holmes-weslaw-assurance-prd-test-plan.md | 42 ++++-- .../HLAW-001-holmes-law-evidence-bundle.md | 139 ++++++++++++++++++ .../prds/HLAW-002-law-diff-ingest-port.md | 128 ++++++++++++++++ .../prds/HLAW-003-law-coverage-ingest-port.md | 126 ++++++++++++++++ .../HLAW-004-law-capability-ingest-port.md | 127 ++++++++++++++++ ...05-contract-bundle-manifest-ingest-port.md | 127 ++++++++++++++++ .../prds/HLAW-006-weslaw-artifact-locator.md | 125 ++++++++++++++++ ...HLAW-007-law-evidence-validation-result.md | 125 ++++++++++++++++ .../prds/HLAW-008-semantic-change-finding.md | 125 ++++++++++++++++ .../HLAW-009-law-coverage-gate-decision.md | 124 ++++++++++++++++ ...W-010-bundle-traceability-gate-decision.md | 125 ++++++++++++++++ 12 files changed, 1308 insertions(+), 21 deletions(-) create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-001-holmes-law-evidence-bundle.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-002-law-diff-ingest-port.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-003-law-coverage-ingest-port.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-004-law-capability-ingest-port.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-005-contract-bundle-manifest-ingest-port.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-006-weslaw-artifact-locator.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-007-law-evidence-validation-result.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-008-semantic-change-finding.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-009-law-coverage-gate-decision.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-010-bundle-traceability-gate-decision.md diff --git a/docs/BEARING.md b/docs/BEARING.md index df4f5454..78d2304a 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -411,17 +411,17 @@ The active packet is Working budget: **50 slices**. -Status: **0 / 50 slices closed**. The plan allocates `HLAW-001` through +Status: **10 / 50 slices closed**. The plan allocates `HLAW-001` through `HLAW-050` across evidence intake, typed domain contracts, report models, CLI flows, GitHub publishing, MCP surfaces, policy, QA fixtures, determinism, performance budgets, migration, release gates, operator docs, and campaign closeout. Each slice must produce a PRD/test-plan artifact with explicit objectives, scope, user stories, BDD acceptance criteria, and test scenarios. -The first recommended pull is `HLAW-001` through `HLAW-010`: evidence intake -and typed domain contracts for law diffs, coverage reports, capability -summaries, bundle manifests, artifact location, validation results, semantic -findings, coverage gates, and traceability gates. +The first pull closed `HLAW-001` through `HLAW-010`: evidence intake and typed +domain contracts for law diffs, coverage reports, capability summaries, bundle +manifests, artifact location, validation results, semantic findings, coverage +gates, and traceability gates. ## Next Target @@ -443,9 +443,9 @@ runtime ownership into the base compiler. The next pull is: -1. `HLAW-001` through `HLAW-010`: write PRD/test-plan artifacts for the - evidence intake and typed domain contracts that let Holmes consume - `weslaw` outputs without owning semantic law truth. +1. `HLAW-011` through `HLAW-020`: write PRD/test-plan artifacts for the report + model, CLI operator flows, local artifact writer, and exit-code policy that + sit on top of the evidence intake contracts. ## Post-Retirement Freestyle Slice Log diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md index 7a9af1f7..6f88a9c4 100644 --- a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -10,7 +10,7 @@ release: v0.0.8 ## Status -Active planning packet. +Active planning packet. First PR chunk complete. ## Question @@ -83,7 +83,7 @@ failure behavior, and test fixtures where known. | Chunk | Slices | Planned PR Shape | Purpose | | --- | --- | --- | --- | -| 1 | HLAW-001..HLAW-010 | One PR | Evidence intake and typed domain contracts. | +| 1 | HLAW-001..HLAW-010 | Complete | Evidence intake and typed domain contracts. | | 2 | HLAW-011..HLAW-020 | One PR | Report model, CLI operator flows, and local artifacts. | | 3 | HLAW-021..HLAW-030 | One PR | GitHub and MCP interfaces over the same assurance use cases. | | 4 | HLAW-031..HLAW-040 | One PR | Policy, QA harnesses, determinism, concurrency, and budgets. | @@ -95,56 +95,56 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ### Evidence Intake And Typed Domain Contracts -- [ ] HLAW-001 `HolmesLawEvidenceBundle` PRD and test plan. +- [x] HLAW-001 `HolmesLawEvidenceBundle` PRD and test plan. - Feature/product: A typed bundle contract that groups `wesley law diff`, `law coverage`, `law capabilities`, and contract bundle manifest outputs into one Holmes-readable evidence input. - Required output: PRD for schema fields, versioning, required/optional artifact references, unsupported-version diagnostics, and fixture layout. -- [ ] HLAW-002 `LawDiffIngestPort` PRD and test plan. +- [x] HLAW-002 `LawDiffIngestPort` PRD and test plan. - Feature/product: A Holmes input port that reads `wesley.law-diff/v1` JSON and normalizes it into assurance findings without reclassifying semantic law changes. - Required output: PRD for event-kind mapping, malformed JSON handling, duplicate law ids, unknown event kinds, and stable finding ids. -- [ ] HLAW-003 `LawCoverageIngestPort` PRD and test plan. +- [x] HLAW-003 `LawCoverageIngestPort` PRD and test plan. - Feature/product: A Holmes input port that reads profile/category-aware law coverage reports and turns missing release-required subjects into gates. - Required output: PRD for release/local profile behavior, threshold handling, missing-subject rendering, and coverage fixture matrices. -- [ ] HLAW-004 `LawCapabilityIngestPort` PRD and test plan. +- [x] HLAW-004 `LawCapabilityIngestPort` PRD and test plan. - Feature/product: A Holmes input port that reads report-only footprint capability summaries and reports boundary posture without claiming runtime enforcement. - Required output: PRD for `reportOnly`, `runtimeEnforcement`, reads/writes/ creates/forbids, empty-footprint behavior, and wording constraints. -- [ ] HLAW-005 `ContractBundleManifestIngestPort` PRD and test plan. +- [x] HLAW-005 `ContractBundleManifestIngestPort` PRD and test plan. - Feature/product: A Holmes input port that reads contract bundle manifests and verifies schema, law, profile, bundle, compiler, and codec hashes are present and consistently referenced by other artifacts. - Required output: PRD for hash validation, absent optional hashes, mismatch errors, and bundle traceability reporting. -- [ ] HLAW-006 `WeslawArtifactLocator` PRD and test plan. +- [x] HLAW-006 `WeslawArtifactLocator` PRD and test plan. - Feature/product: A local adapter that resolves law evidence artifact paths from CLI flags, workflow artifacts, and explicit bundle metadata. - Required output: PRD for path resolution precedence, missing files, symlink/path traversal policy, and deterministic diagnostics. -- [ ] HLAW-007 `LawEvidenceValidationResult` PRD and test plan. +- [x] HLAW-007 `LawEvidenceValidationResult` PRD and test plan. - Feature/product: A typed validation result that separates input contract errors from assurance findings so bad evidence fails before judgment. - Required output: PRD for error taxonomy, JSON shape, CLI exit mapping, and test fixtures for invalid artifacts. -- [ ] HLAW-008 `SemanticChangeFinding` PRD and test plan. +- [x] HLAW-008 `SemanticChangeFinding` PRD and test plan. - Feature/product: A domain finding model for law diff events with severity, posture, law id, subject, change fields, and source artifact references. - Required output: PRD for stable finding IDs, severity defaults, markdown snippets, JSON rendering, and sort order. -- [ ] HLAW-009 `LawCoverageGateDecision` PRD and test plan. +- [x] HLAW-009 `LawCoverageGateDecision` PRD and test plan. - Feature/product: A gate model that evaluates law coverage against policy profiles and reports pass/warn/fail/unavailable outcomes. - Required output: PRD for gate states, profile-specific required categories, missing-subject evidence, and fallback behavior when coverage is absent. -- [ ] HLAW-010 `BundleTraceabilityGateDecision` PRD and test plan. +- [x] HLAW-010 `BundleTraceabilityGateDecision` PRD and test plan. - Feature/product: A gate model that checks every ingested law artifact links back to the same expected contract bundle hash family. - Required output: PRD for cross-artifact consistency, hash mismatch @@ -369,7 +369,7 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ## Initial Recommendation -Spend the first PR on `HLAW-001` through `HLAW-010`. +The first PR spends `HLAW-001` through `HLAW-010`. Reasoning: @@ -383,6 +383,22 @@ Reasoning: - They preserve Wesley's ownership boundary because Holmes ingests published law artifacts instead of recalculating semantic truth. +## Drift Check: HLAW-010 + +Date: 2026-05-26. + +Status: **10 / 50 slices closed**. + +Decision: continue with `HLAW-011` through `HLAW-020` next. The evidence intake +chunk confirmed the intended boundary: Holmes consumes Wesley-published law +artifacts, validates their shape and provenance, converts them into findings +and gate decisions, and does not recompute semantic law truth. + +No scope correction is needed. Implementation remains out of scope for this +campaign. The next chunk can safely define report sections, CLI operator flows, +artifact writing, and exit-code behavior on top of the evidence contracts +specified here. + ## Non-Goals For The 50-Slice Planning Campaign - Do not implement Rust Holmes crates yet. diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-001-holmes-law-evidence-bundle.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-001-holmes-law-evidence-bundle.md new file mode 100644 index 00000000..e613c922 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-001-holmes-law-evidence-bundle.md @@ -0,0 +1,139 @@ +--- +title: HLAW-001 HolmesLawEvidenceBundle +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-001 HolmesLawEvidenceBundle + +## Feature Overview & Objectives + +### Problem Statement + +Holmes needs one typed evidence input for Wesley `weslaw` assurance. Today the +future evidence surface is implied by several Wesley outputs: law diff JSON, +coverage JSON, report-only capability summaries, validation diagnostics, and +contract bundle manifests. Without a bundle contract, every Holmes interface +would invent its own flag set, required files, and failure semantics. + +`HolmesLawEvidenceBundle` defines the first-class input envelope that groups +those artifacts without letting Holmes recalculate semantic law. Wesley remains +the compiler and law authority; Holmes validates that the provided evidence is +well-formed, mutually consistent, and sufficient for an assurance judgment. + +### Target User/Audience + +- Holmes CLI operators running local or CI law assurance checks. +- GitHub workflow maintainers wiring Wesley law artifacts into Holmes reports. +- MCP agents consuming a compact bundle reference instead of raw filesystem + layout assumptions. +- Wesley maintainers reviewing whether Holmes is respecting compiler ownership + boundaries. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Bundle completeness | 100% of required evidence paths are validated before any assurance gate runs. | +| Ownership clarity | 100% of bundle validation failures distinguish bad evidence from failed assurance judgment. | +| Interface reuse | CLI, API, GitHub, and MCP designs all reference the same bundle schema name and version. | + +## Scope Definition + +### In Scope + +- Define a versioned `holmes.law-evidence-bundle/v1` JSON object accepted by + Holmes law assurance features. +- Require explicit references for law diff, law coverage, capability summary, + and contract bundle manifest artifacts. +- Allow optional references for Wesley validation diagnostics, law explain + extracts, source schema, source `weslaw`, and rendered reports. +- Require a `profile` field naming the assurance profile being evaluated. +- Require `expectedBundleHash` when the caller wants cross-artifact traceability + enforcement. +- Define unsupported-version, missing-path, unreadable-path, duplicate-path, and + malformed-reference diagnostics. +- Define fixture families for clean bundles, partial optional bundles, missing + required artifacts, unsupported versions, and stale manifests. + +### Out of Scope + +- Holmes will not parse GraphQL SDL or `weslaw` source to recreate semantic law. +- Holmes will not run `wesley law diff`, `wesley law coverage`, + `wesley law capabilities`, or `wesley law validate`. +- Holmes will not decide whether a law diff event is semantically strengthening + or weakening; it will consume Wesley's published event classification. +- Holmes will not publish GitHub comments in this slice. +- Holmes will not define final CLI flags in this slice beyond naming the bundle + object consumed by later CLI features. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want to pass Holmes one bundle file so that workflow steps do not need to duplicate every law artifact path. | +| US-002 | As a Holmes domain developer, I want required and optional law artifacts separated so that validation can fail before assurance judgment begins. | +| US-003 | As a Wesley maintainer, I want the bundle to identify Wesley-produced artifact kinds so that Holmes cannot silently substitute its own semantic reconstruction. | +| US-004 | As an MCP agent, I want a compact bundle reference so that tool requests can assess law evidence without long path lists. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A JSON object with `version: "holmes.law-evidence-bundle/v1"` and all required artifact references | Holmes validates the bundle | The bundle is accepted and normalized into a deterministic internal input record. | +| US-001 | A workflow supplies the same artifact both as `lawDiff.path` and `lawCoverage.path` | Holmes validates the bundle | Validation fails with a duplicate artifact diagnostic unless the artifact kind explicitly supports multi-kind content. | +| US-002 | A bundle omits `lawDiff`, `lawCoverage`, `lawCapabilities`, or `contractBundleManifest` | Holmes validates the bundle | Validation fails with `HLAW_BUNDLE_MISSING_REQUIRED_ARTIFACT` before any gate is evaluated. | +| US-002 | A bundle omits optional `sourceSchema` or `sourceWeslaw` references | Holmes validates the bundle | Validation succeeds and records those optional references as unavailable. | +| US-003 | A bundle artifact declares an unsupported producer such as `custom-law-diff/v1` | Holmes validates the bundle | Validation fails with `HLAW_BUNDLE_UNSUPPORTED_ARTIFACT_KIND`. | +| US-003 | A bundle contains Wesley artifact references but no `profile` | Holmes validates the bundle | Validation fails with `HLAW_BUNDLE_MISSING_PROFILE`. | +| US-004 | An MCP caller sends a bundle URI plus profile | Holmes resolves the bundle | The same normalized bundle contract is used as the CLI path, with no MCP-only evidence shape. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean release bundle with required artifacts and expected bundle hash | Happy | `fixtures/hlaw/bundles/clean-release.json` | Accepted normalized bundle. | +| TS-002 | Clean local bundle with optional source references absent | Happy | `fixtures/hlaw/bundles/clean-local-minimal.json` | Accepted with optional references marked unavailable. | +| TS-003 | Missing law diff artifact | Negative | `fixtures/hlaw/bundles/missing-law-diff.json` | `HLAW_BUNDLE_MISSING_REQUIRED_ARTIFACT`. | +| TS-004 | Unsupported bundle version | Negative | `fixtures/hlaw/bundles/unsupported-version.json` | `HLAW_BUNDLE_UNSUPPORTED_VERSION`. | +| TS-005 | Required artifact path points outside workspace through traversal | Security | `fixtures/hlaw/bundles/path-traversal.json` | Rejected before file read. | +| TS-006 | Duplicate artifact path used for distinct required artifact kinds | Edge | `fixtures/hlaw/bundles/duplicate-required-path.json` | Rejected as ambiguous evidence. | +| TS-007 | Very large bundle with many optional rendered report references | Load | generated fixture with 1,000 optional refs | Validation stays deterministic and does not run assurance gates. | + +### Happy Path Testing + +1. Write a clean `holmes.law-evidence-bundle/v1` fixture with `profile`, + `lawDiff`, `lawCoverage`, `lawCapabilities`, and `contractBundleManifest`. +2. Validate that Holmes reads the bundle and materializes a normalized input + record with sorted artifact references. +3. Verify that optional source and rendered report references are preserved + when present. +4. Verify that the validation result contains no assurance findings, because + this slice only proves input acceptability. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed JSON, unknown top-level keys under strict mode, + missing required artifact references, unsupported artifact kinds, unsupported + bundle versions, empty strings, non-string paths, duplicate artifact ids, and + relative paths that escape the workspace. +- Timeouts: bundle validation must not perform network IO; any filesystem read + timeout injected by an adapter is reported as input validation failure, not as + an assurance gate failure. +- Concurrent users or retries: repeated validation of the same bundle must + produce byte-identical normalized JSON and diagnostics. +- Broken dependencies: unreadable files, missing files, and invalid symlinks are + reported per artifact with the artifact role included in the diagnostic. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Validate a 1,000-reference bundle in under 250 ms on a local development machine. | Benchmark synthetic bundle parsing and normalization without reading artifact bodies. | +| Load | Validation must remain O(n log n) or better over artifact references because references are sorted for deterministic output. | Generate bundles with 10, 100, and 1,000 optional references and compare growth. | +| Security | Paths must be workspace-confined after symlink normalization unless an explicit trusted absolute-path mode is later designed. | Fixtures for `../`, absolute paths, symlink escape, and encoded traversal. | +| Accessibility | Human text diagnostics must name the artifact role and path without relying on color. | Snapshot terminal and Markdown diagnostics with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-002-law-diff-ingest-port.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-002-law-diff-ingest-port.md new file mode 100644 index 00000000..0db9bd7e --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-002-law-diff-ingest-port.md @@ -0,0 +1,128 @@ +--- +title: HLAW-002 LawDiffIngestPort +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-002 LawDiffIngestPort + +## Feature Overview & Objectives + +### Problem Statement + +Wesley emits structured semantic law diff events, but Holmes needs an input port +that turns those events into assurance-ready findings without reinterpreting +their meaning. If every adapter reads `wesley.law-diff/v1` directly, event +ordering, unknown event kinds, duplicate law ids, and malformed JSON will drift +across CLI, GitHub, MCP, and API behavior. + +`LawDiffIngestPort` is the application boundary that accepts Wesley law diff +JSON, validates the envelope, preserves Wesley's classification, and emits +stable Holmes finding candidates. + +### Target User/Audience + +- Holmes application-layer developers defining law assurance use cases. +- QA engineers building fixture coverage for semantic law change review. +- CI maintainers who need deterministic findings from Wesley law diff output. +- Review agents that need concise machine-readable findings with source + artifact references. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Event fidelity | 100% of supported Wesley event kinds map without changing the original event kind. | +| Diagnostic determinism | Repeated ingest of the same diff produces identical finding ids and ordering. | +| Failure isolation | Malformed law diff input produces validation errors and zero assurance findings. | + +## Scope Definition + +### In Scope + +- Define a `LawDiffIngestPort` that accepts `wesley.law-diff/v1` JSON artifacts. +- Validate version, producer identity, source/target hash fields, event array + structure, event ids, law ids, subjects, event kind, and source spans where + present. +- Normalize supported events into `SemanticChangeFinding` inputs while + preserving Wesley event kind and severity hints. +- Reject malformed JSON, unsupported versions, duplicate event ids, missing + required fields, and unknown event kinds unless policy explicitly permits + opaque advisory passthrough in a later slice. +- Preserve source artifact path and byte offset metadata when the artifact + locator supplies it. + +### Out of Scope + +- Holmes will not compute semantic law diffs. +- Holmes will not decide whether a scalar change is strengthening, weakening, + or mixed. +- Holmes will not edit `weslaw` or GraphQL source files. +- Holmes will not render final Markdown reports in this slice. +- Holmes will not publish GitHub annotations in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a Holmes assessor, I want law diff events normalized into stable finding inputs so that later gates and reports share the same evidence. | +| US-002 | As a QA engineer, I want unknown event kinds rejected deterministically so that new Wesley event kinds cannot slip through silently. | +| US-003 | As a reviewer, I want each finding candidate to retain its law id, subject, and artifact reference so that I can trace it back to Wesley output. | +| US-004 | As a release maintainer, I want malformed law diff evidence to fail validation before readiness judgment so that bad inputs are not reported as product failures. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid `wesley.law-diff/v1` file with added, removed, modified, strengthened, and weakened events | The ingest port reads it | Holmes emits normalized finding inputs in deterministic review order. | +| US-001 | Two events share the same law id but have different event ids | The ingest port reads them | Both events are preserved, sorted by event ordering rules, and not deduplicated by law id. | +| US-002 | A diff event has `kind: "quantumLawShift"` | The ingest port reads it | Validation fails with `HLAW_DIFF_UNKNOWN_EVENT_KIND`. | +| US-002 | A diff file declares `version: "wesley.law-diff/v2"` | The ingest port reads it | Validation fails with `HLAW_DIFF_UNSUPPORTED_VERSION`. | +| US-003 | A diff event includes subject `operation:Mutation.replaceRangeAsTick` and law id `jedit.op.replaceRangeAsTick.footprint` | The ingest port normalizes it | The finding input contains the exact subject and law id strings from Wesley output. | +| US-004 | A diff artifact is malformed JSON | The ingest port reads it | The validation result records `HLAW_DIFF_MALFORMED_JSON` and emits zero findings. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Valid mixed event diff | Happy | `fixtures/hlaw/law-diff/mixed-events.json` | Supported events become finding inputs. | +| TS-002 | Empty diff | Happy | `fixtures/hlaw/law-diff/no-changes.json` | Accepted with zero finding inputs. | +| TS-003 | Unknown event kind | Negative | `fixtures/hlaw/law-diff/unknown-kind.json` | `HLAW_DIFF_UNKNOWN_EVENT_KIND`. | +| TS-004 | Duplicate event id | Negative | `fixtures/hlaw/law-diff/duplicate-event-id.json` | `HLAW_DIFF_DUPLICATE_EVENT_ID`. | +| TS-005 | Missing subject | Negative | `fixtures/hlaw/law-diff/missing-subject.json` | `HLAW_DIFF_MISSING_FIELD`. | +| TS-006 | Large diff with 10,000 events | Load | generated fixture | Accepted within budget and sorted deterministically. | +| TS-007 | Diff with source spans missing | Edge | `fixtures/hlaw/law-diff/no-spans.json` | Accepted with source location marked unavailable. | + +### Happy Path Testing + +1. Load a valid `wesley.law-diff/v1` fixture through the artifact locator. +2. Invoke `LawDiffIngestPort`. +3. Assert that each supported event kind maps to one normalized finding input. +4. Assert that Wesley fields are copied, not recomputed: event kind, law id, + subject, before/after hashes, and semantic change payload. +5. Snapshot the sorted finding input list. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed JSON, unsupported version, missing `events`, missing + event `kind`, missing `lawId`, missing `subject`, duplicate event ids, wrong + primitive types, and unknown event kinds. +- Timeouts: the port receives artifact bytes from an adapter; injected read + timeout is surfaced by the artifact locator and not retried here. +- Concurrent users or retries: multiple simultaneous ingest calls over the same + bytes must produce identical normalized results and no shared mutable state. +- Broken dependencies: invalid artifact metadata from the locator must produce + an input error tied to the law diff artifact role. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Ingest 10,000 events in under 500 ms after bytes are loaded. | Benchmark generated event fixtures and assert stable sort cost. | +| Load | Memory use must scale linearly with event count. | Run heap or allocation checks over 100, 1,000, and 10,000 event fixtures. | +| Security | Event text must be escaped by later renderers; ingest must preserve raw strings without executing them. | Include Markdown and HTML-like payloads in fixtures and verify they remain data. | +| Accessibility | Finding inputs must carry enough fields for later renderers to produce non-color-only summaries. | Contract test that severity, kind, law id, subject, and summary are always available. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-003-law-coverage-ingest-port.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-003-law-coverage-ingest-port.md new file mode 100644 index 00000000..a239b4f6 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-003-law-coverage-ingest-port.md @@ -0,0 +1,126 @@ +--- +title: HLAW-003 LawCoverageIngestPort +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-003 LawCoverageIngestPort + +## Feature Overview & Objectives + +### Problem Statement + +Wesley can report profile/category-aware law coverage, but Holmes needs one +ingest boundary that turns that report into gate-ready evidence. Coverage is +not a generic percentage: missing law for release-required mutation footprints +has a different posture than missing optional documentation law. If coverage +handling is left to renderers, CI and GitHub comments will disagree. + +`LawCoverageIngestPort` validates Wesley coverage reports and normalizes +profile/category data into missing-subject evidence for later gate evaluation. + +### Target User/Audience + +- Release maintainers enforcing law coverage thresholds. +- QA engineers designing coverage fixture matrices. +- Holmes policy developers mapping coverage gaps to pass, warn, fail, or + unavailable outcomes. +- Reviewers who need concrete missing subjects instead of abstract percentages. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Missing-subject traceability | 100% of failed required categories include concrete missing subject coordinates. | +| Profile separation | Local and release profile coverage can be ingested from the same report without overwriting each other. | +| Threshold readiness | 100% of ingested categories expose covered, total, missing, required, and advisory counts. | + +## Scope Definition + +### In Scope + +- Accept `wesley.law-coverage/v1` JSON artifacts. +- Validate profile ids, category ids, required/advisory classification, + numerator/denominator consistency, missing-subject arrays, and report hashes. +- Normalize coverage by profile and category for later policy evaluation. +- Preserve missing subject coordinates and the law category expected for each. +- Report absent coverage artifacts as unavailable input when the bundle marks + coverage optional in local-only flows; required release flows are handled by + later gate policy. + +### Out of Scope + +- Holmes will not scan GraphQL or `weslaw` files to compute coverage. +- Holmes will not decide which schema subjects require law coverage; Wesley and + policy artifacts provide that information. +- Holmes will not apply final pass/warn/fail thresholds in this ingest slice. +- Holmes will not render coverage tables in this slice. +- Holmes will not create suppression or override behavior in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release maintainer, I want coverage reports normalized by profile so that release gates do not accidentally use local-dev posture. | +| US-002 | As a QA engineer, I want inconsistent coverage counts rejected so that tests cannot pass on impossible percentages. | +| US-003 | As a reviewer, I want missing subjects preserved with categories so that coverage failures are actionable. | +| US-004 | As a Holmes developer, I want coverage ingest to expose unavailable coverage distinctly from zero coverage so that later gates can render accurate outcomes. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A coverage report with `local-dev` and `ci-release` profiles | Holmes ingests the report | The normalized result contains separate profile maps with no merged thresholds. | +| US-002 | A category reports `covered: 7`, `total: 5` | Holmes ingests the report | Validation fails with `HLAW_COVERAGE_INCONSISTENT_COUNTS`. | +| US-002 | A category reports three missing subjects but `missing: 2` | Holmes ingests the report | Validation fails with `HLAW_COVERAGE_MISSING_COUNT_MISMATCH`. | +| US-003 | `ci-release` mutation footprint coverage is missing `operation:Mutation.createCheckpoint` | Holmes ingests the report | The normalized missing subject includes the exact coordinate and category id. | +| US-004 | The bundle has no coverage artifact in a local exploratory profile | Holmes validates inputs | Coverage is represented as unavailable, not as 0% covered. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Multi-profile coverage report | Happy | `fixtures/hlaw/law-coverage/multi-profile.json` | Separate normalized profile/category maps. | +| TS-002 | Perfect release coverage | Happy | `fixtures/hlaw/law-coverage/release-perfect.json` | No missing required subjects. | +| TS-003 | Missing mutation footprints | Negative | `fixtures/hlaw/law-coverage/missing-required-footprints.json` | Missing-subject evidence is preserved. | +| TS-004 | Covered count greater than total | Negative | `fixtures/hlaw/law-coverage/impossible-counts.json` | `HLAW_COVERAGE_INCONSISTENT_COUNTS`. | +| TS-005 | Unknown category id | Edge | `fixtures/hlaw/law-coverage/unknown-category.json` | Accepted only if report marks it advisory; otherwise rejected by policy later. | +| TS-006 | Empty categories array | Edge | `fixtures/hlaw/law-coverage/empty-categories.json` | Accepted as no coverage categories, not as full coverage. | +| TS-007 | Large coverage report across 50,000 subjects | Load | generated fixture | Ingest stays within performance budget. | + +### Happy Path Testing + +1. Load a valid coverage report with at least two profiles and four law + categories. +2. Validate that every category exposes `covered`, `total`, `missing`, required + status, and missing subject coordinates. +3. Confirm that release-required categories and advisory categories are retained + separately. +4. Snapshot the normalized coverage result in deterministic profile/category + order. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed JSON, unsupported version, missing profile ids, + duplicate profile ids, duplicate category ids within a profile, impossible + counts, missing subject count mismatch, non-coordinate missing subjects, and + invalid percentage precision. +- Timeouts: coverage ingest does not perform slow IO; adapter timeouts are input + errors from artifact loading. +- Concurrent users or retries: repeated ingest of the same bytes must return the + same category order and count calculations. +- Broken dependencies: absent coverage artifact is represented as unavailable + only when the surrounding bundle and profile allow that state. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Ingest 50,000 subject rows in under 1 second after bytes are loaded. | Benchmark generated coverage reports with deterministic sorted output. | +| Load | Coverage normalization must not duplicate missing-subject strings more than necessary. | Allocation checks over large missing-subject fixtures. | +| Security | Subject coordinates are treated as opaque strings and never used as paths. | Include path-like coordinates and assert no filesystem access. | +| Accessibility | Later renderers must be able to show missing subjects as text, not only percentage color. | Contract test that missing subject arrays are always present for gaps. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-004-law-capability-ingest-port.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-004-law-capability-ingest-port.md new file mode 100644 index 00000000..70067003 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-004-law-capability-ingest-port.md @@ -0,0 +1,127 @@ +--- +title: HLAW-004 LawCapabilityIngestPort +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-004 LawCapabilityIngestPort + +## Feature Overview & Objectives + +### Problem Statement + +Wesley can emit report-only footprint capability summaries from `weslaw` +operation footprint law. Holmes must consume those summaries as architectural +posture evidence without overstating runtime enforcement. The risk is wording +and data drift: a report that says an operation "forbids Diagnostics" must not +claim the runtime physically prevented access unless a later runtime witness +proves enforcement. + +`LawCapabilityIngestPort` validates report-only capability summaries and +normalizes reads, writes, creates, forbids, slots, closures, and enforcement +posture for later reporting. + +### Target User/Audience + +- Holmes report authors presenting operation footprint posture. +- Wesley maintainers ensuring `weslaw` footprint summaries are not + misrepresented. +- Runtime maintainers evaluating which operations still lack enforcement + witnesses. +- QA engineers testing empty, large, and malformed footprint summaries. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Wording safety | 100% of normalized summaries expose `reportOnly` or `runtimeEnforcement` explicitly. | +| Footprint fidelity | Supported reads, writes, creates, forbids, slots, and closures are preserved without reinterpretation. | +| Empty-footprint clarity | Operations without footprint law are reported as unavailable or absent, not as unrestricted access. | + +## Scope Definition + +### In Scope + +- Accept `wesley.law-capabilities/v1` JSON artifacts. +- Validate operation subject, law id, report-only posture, runtime enforcement + flag, resource arrays, slot declarations, closure declarations, and source + artifact references. +- Normalize operation capability summaries for later report sections and gates. +- Require explicit posture fields so renderers cannot imply enforcement by + omission. +- Define empty-footprint behavior for operations with no law, no capability + summary, or explicit empty resource sets. + +### Out of Scope + +- Holmes will not generate runtime capability APIs. +- Holmes will not enforce reads, writes, creates, or forbids at runtime. +- Holmes will not inspect handler code to verify resource access. +- Holmes will not compute footprint closure expansion. +- Holmes will not publish capability summaries to GitHub in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want Holmes to label capability summaries as report-only so that I do not mistake footprint law for runtime enforcement. | +| US-002 | As a runtime maintainer, I want operations grouped with reads, writes, creates, and forbids so that enforcement gaps are visible. | +| US-003 | As a QA engineer, I want malformed capability summaries rejected so that reports do not hide missing posture fields. | +| US-004 | As a Holmes developer, I want explicit empty-footprint semantics so that absent law is not rendered as unrestricted permission. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A capability summary has `reportOnly: true` and `runtimeEnforcement: false` | Holmes ingests it | The normalized summary exposes both fields and a wording hint requiring report-only language. | +| US-001 | A summary omits both posture fields | Holmes ingests it | Validation fails with `HLAW_CAPABILITY_MISSING_POSTURE`. | +| US-002 | An operation summary lists reads, writes, creates, and forbids | Holmes ingests it | The normalized operation retains every resource list in deterministic order. | +| US-003 | A resource appears in both `writes` and `forbids` for the same operation | Holmes ingests it | Validation fails with `HLAW_CAPABILITY_CONTRADICTORY_RESOURCE_POSTURE`. | +| US-004 | An operation is absent from the capability artifact | Holmes reports capability posture later | The operation is unavailable for capability reporting, not marked as empty access. | +| US-004 | An operation is present with all resource arrays empty | Holmes ingests it | The summary is accepted only if the artifact explicitly marks the operation as intentionally empty. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Report-only footprint summary for jedit replace operation | Happy | `fixtures/hlaw/capabilities/jedit-replace-report-only.json` | Normalized operation capability. | +| TS-002 | Runtime-enforced capability summary from future witness | Edge | `fixtures/hlaw/capabilities/runtime-enforced.json` | Accepted with enforcement posture preserved. | +| TS-003 | Missing posture flags | Negative | `fixtures/hlaw/capabilities/missing-posture.json` | `HLAW_CAPABILITY_MISSING_POSTURE`. | +| TS-004 | Contradictory writes and forbids | Negative | `fixtures/hlaw/capabilities/contradictory-resource.json` | `HLAW_CAPABILITY_CONTRADICTORY_RESOURCE_POSTURE`. | +| TS-005 | Explicit empty operation | Edge | `fixtures/hlaw/capabilities/explicit-empty.json` | Accepted as intentionally empty. | +| TS-006 | Large footprint with many slots and closures | Load | generated fixture | Accepted with deterministic ordering. | +| TS-007 | Missing operation summary | Edge | bundle with no capability entry for requested operation | Later report sees unavailable, not empty. | + +### Happy Path Testing + +1. Load a valid `wesley.law-capabilities/v1` artifact. +2. Ingest operation summaries containing reads, writes, creates, forbids, slots, + closures, and posture fields. +3. Verify deterministic sorting by operation subject and resource name. +4. Verify that report-only wording metadata is emitted for renderers. + +### Negative/Edge Case Testing + +- Invalid inputs: unsupported version, missing operation subject, missing law + id, missing posture fields, both `reportOnly` and `runtimeEnforcement` true + without a witness reference, contradictory resource posture, duplicate slot + ids, and closure references to unknown slots. +- Timeouts: no network or handler inspection occurs during ingest; artifact + load failures are returned by the locator. +- Concurrent users or retries: simultaneous ingest must not mutate shared + resource registries or reorder summaries. +- Broken dependencies: absent capability artifacts are unavailable evidence, + while malformed capability artifacts fail validation. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Ingest 5,000 operation summaries in under 750 ms after bytes are loaded. | Benchmark generated capability reports. | +| Load | Large resource arrays must be normalized without quadratic duplicate checks. | Use set-based duplicate detection and benchmark growth. | +| Security | Resource names are data, not code or filesystem paths. | Include shell-like resource names and assert no execution or path access. | +| Accessibility | Later renderers must receive explicit posture labels for screen-reader-friendly text. | Contract test for posture label fields on every normalized summary. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-005-contract-bundle-manifest-ingest-port.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-005-contract-bundle-manifest-ingest-port.md new file mode 100644 index 00000000..107e26ab --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-005-contract-bundle-manifest-ingest-port.md @@ -0,0 +1,127 @@ +--- +title: HLAW-005 ContractBundleManifestIngestPort +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-005 ContractBundleManifestIngestPort + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance depends on traceability: law diffs, coverage reports, and +capability summaries are only meaningful if they were produced from the same +contract bundle family. Wesley emits manifests containing schema, law, profile, +bundle, compiler, and codec hashes. Holmes needs an ingest port that verifies +those fields are present and usable before any cross-artifact gate runs. + +`ContractBundleManifestIngestPort` turns Wesley contract bundle manifests into a +normalized provenance record for later traceability gates and reports. + +### Target User/Audience + +- Release reviewers who need to see which schema and law hashes produced an + assurance report. +- CI maintainers preventing stale law evidence from being mixed with current + branch artifacts. +- Holmes report developers rendering provenance sections. +- QA engineers testing mismatch, partial manifest, and unsupported-version + behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Required hash validation | 100% of accepted release manifests include schemaHash, lawHash, profileHash, and bundleHash. | +| Producer traceability | 100% of accepted manifests include Wesley compiler identity and law codec version. | +| Mismatch readiness | Manifest ingest exposes normalized hash fields for later cross-artifact consistency checks. | + +## Scope Definition + +### In Scope + +- Accept `wesley.contract-bundle-manifest/v1` JSON artifacts. +- Validate manifest version, schema hash, law hash, profile hash, bundle hash, + law codec, compiler name/version, source paths, generated artifact references, + and timestamp policy. +- Normalize required and optional provenance fields. +- Distinguish absent optional hashes from invalid required hashes. +- Preserve manifest source artifact reference for report links and audit + witnesses. + +### Out of Scope + +- Holmes will not recompute schema, law, profile, or bundle hashes. +- Holmes will not decide whether the compiler version is supported beyond + manifest schema compatibility in this slice. +- Holmes will not compare hashes across law artifacts until + `BundleTraceabilityGateDecision`. +- Holmes will not download remote generated artifacts. +- Holmes will not render provenance tables in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release reviewer, I want Holmes to ingest bundle hashes so that every law assurance report can name the exact contract bundle. | +| US-002 | As a CI maintainer, I want malformed hashes rejected so that stale or handcrafted manifests cannot pass as Wesley output. | +| US-003 | As a Holmes report developer, I want compiler and codec provenance normalized so that reports can present generation context. | +| US-004 | As a QA engineer, I want optional provenance fields handled explicitly so that partial local manifests do not masquerade as release-ready evidence. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A manifest includes valid schema, law, profile, and bundle hashes | Holmes ingests it | The normalized provenance record exposes all four hashes exactly as authored. | +| US-002 | A manifest contains `schemaHash: "abc"` | Holmes ingests it | Validation fails with `HLAW_MANIFEST_INVALID_HASH`. | +| US-002 | A release manifest omits `bundleHash` | Holmes ingests it | Validation fails with `HLAW_MANIFEST_MISSING_REQUIRED_HASH`. | +| US-003 | A manifest includes compiler `wesley` version `0.1.0` and codec `weslaw-ir-json/v1` | Holmes ingests it | Compiler and codec fields are preserved for report rendering. | +| US-004 | A local manifest omits optional generated artifact references | Holmes ingests it | Validation succeeds and marks generated artifact references unavailable. | +| US-004 | A manifest timestamp differs between two otherwise identical runs | Holmes normalizes it | The timestamp is preserved as provenance but excluded from semantic equality checks. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Complete release manifest | Happy | `fixtures/hlaw/manifests/release-complete.json` | Accepted normalized provenance. | +| TS-002 | Minimal local manifest | Happy | `fixtures/hlaw/manifests/local-minimal.json` | Accepted with unavailable optional fields. | +| TS-003 | Invalid hash syntax | Negative | `fixtures/hlaw/manifests/invalid-hash.json` | `HLAW_MANIFEST_INVALID_HASH`. | +| TS-004 | Missing bundle hash | Negative | `fixtures/hlaw/manifests/missing-bundle-hash.json` | `HLAW_MANIFEST_MISSING_REQUIRED_HASH`. | +| TS-005 | Unsupported manifest version | Negative | `fixtures/hlaw/manifests/unsupported-version.json` | `HLAW_MANIFEST_UNSUPPORTED_VERSION`. | +| TS-006 | Future compiler version | Edge | `fixtures/hlaw/manifests/future-compiler.json` | Accepted unless manifest schema is unsupported; policy evaluates later. | +| TS-007 | Large generated artifact list | Load | generated fixture | Accepted within manifest ingest budget. | + +### Happy Path Testing + +1. Load a complete manifest through the artifact locator. +2. Validate version, hash formats, compiler identity, codec identity, profile + id, and source references. +3. Assert that required hashes are preserved byte-for-byte. +4. Snapshot the normalized provenance record with deterministic field ordering. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed JSON, unsupported manifest version, missing required + hashes, invalid hash algorithm prefix, invalid digest length, duplicate + generated artifact ids, invalid source path metadata, and non-string compiler + fields. +- Timeouts: ingest does not fetch generated artifacts; remote or filesystem + timeout behavior belongs to artifact location. +- Concurrent users or retries: repeated manifest ingest must normalize to the + same provenance record even if timestamp fields differ in input fixtures. +- Broken dependencies: a manifest that references missing generated artifacts is + accepted as provenance but later artifact availability checks may fail. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Ingest a manifest with 5,000 generated artifact references in under 250 ms after bytes are loaded. | Benchmark generated manifest fixtures. | +| Load | Generated artifact references must be streamed or normalized without quadratic duplicate checks. | Allocation and growth checks over generated references. | +| Security | Manifest source paths are metadata and must not trigger file reads. | Include path traversal strings and assert no access during manifest ingest. | +| Accessibility | Provenance fields must be available as full text, not truncated-only display strings. | Contract test normalized record includes complete hash values. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-006-weslaw-artifact-locator.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-006-weslaw-artifact-locator.md new file mode 100644 index 00000000..f5e5b4fb --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-006-weslaw-artifact-locator.md @@ -0,0 +1,125 @@ +--- +title: HLAW-006 WeslawArtifactLocator +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-006 WeslawArtifactLocator + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance needs to resolve evidence artifacts from local CLI flags, +bundle metadata, and CI workflow paths. If path resolution is duplicated across +commands and publishers, the system will produce inconsistent diagnostics, +security posture, and path precedence. Artifact location must be deterministic, +workspace-confined by default, and testable without network or wall-clock +dependencies. + +`WeslawArtifactLocator` is the adapter boundary that resolves local law evidence +references into loaded artifact bytes and source metadata for ingest ports. + +### Target User/Audience + +- CLI operators supplying artifact paths manually. +- GitHub workflow maintainers passing workspace-relative paths from CI jobs. +- Holmes application services that need loaded bytes without knowing filesystem + policy. +- Security reviewers checking path traversal and symlink behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Resolution determinism | The same bundle and flag inputs resolve to the same ordered artifact list on repeated runs. | +| Security coverage | 100% of path traversal, symlink escape, and missing-file cases have stable diagnostics. | +| Adapter reuse | All first-chunk ingest ports receive bytes through the same locator contract. | + +## Scope Definition + +### In Scope + +- Define path resolution precedence: explicit CLI flag, bundle artifact path, + bundle manifest relative reference, and workflow-provided artifact directory. +- Normalize workspace-relative paths against an explicit workspace root. +- Reject path traversal, symlink escape, unsupported URI schemes, directory + paths where files are required, unreadable files, and duplicate resolved + canonical paths. +- Return loaded bytes plus artifact role, original reference, canonical path, + file size, and optional content hash. +- Provide in-memory test locator behavior for deterministic unit tests. + +### Out of Scope + +- No remote artifact download in this slice. +- No GitHub Actions artifact API calls in this slice. +- No caching layer beyond one locator invocation. +- No automatic discovery of artifacts by glob unless a later feature explicitly + designs it. +- No mutation, cleanup, or writing of artifact files. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CLI operator, I want explicit flags to override bundle-relative defaults so that I can test replacement artifacts locally. | +| US-002 | As a CI maintainer, I want workspace-confined path resolution so that untrusted PR inputs cannot make Holmes read arbitrary files. | +| US-003 | As a Holmes developer, I want loaded bytes tagged with artifact roles so that ingest ports produce precise diagnostics. | +| US-004 | As a QA engineer, I want an in-memory locator so that ingest tests do not rely on wall-clock, filesystem layout, or shell state. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Both `--law-diff path/a.json` and a bundle `lawDiff.path` are present | The locator resolves the law diff role | The explicit CLI path wins and the source metadata records the override. | +| US-002 | A bundle path resolves to `../secrets.json` outside the workspace | The locator resolves artifacts | Resolution fails with `HLAW_ARTIFACT_PATH_ESCAPE`. | +| US-002 | A symlink inside the workspace points outside the workspace | The locator canonicalizes it | Resolution fails with `HLAW_ARTIFACT_SYMLINK_ESCAPE`. | +| US-003 | A law coverage artifact is unreadable | The locator loads it | The error includes artifact role `lawCoverage` and the original path reference. | +| US-004 | A unit test supplies in-memory bytes for `lawDiff` | The application requests the artifact | The in-memory locator returns bytes with deterministic metadata and no filesystem access. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Explicit flag overrides bundle path | Happy | temp workspace with two law diff files | Explicit path loaded and override recorded. | +| TS-002 | Bundle-relative path resolution | Happy | `fixtures/hlaw/bundles/clean-release.json` | Artifact loaded relative to bundle directory. | +| TS-003 | Path traversal attempt | Security | path `../outside.json` | `HLAW_ARTIFACT_PATH_ESCAPE`. | +| TS-004 | Symlink escape | Security | symlink fixture | `HLAW_ARTIFACT_SYMLINK_ESCAPE`. | +| TS-005 | Missing file | Negative | missing law coverage path | `HLAW_ARTIFACT_NOT_FOUND`. | +| TS-006 | Directory supplied as file | Edge | directory path for manifest | `HLAW_ARTIFACT_NOT_FILE`. | +| TS-007 | Large artifact file | Load | 25 MB generated JSON file | Load succeeds or fails with documented size diagnostic if over budget. | + +### Happy Path Testing + +1. Create a temporary workspace containing a bundle and all required artifacts. +2. Resolve artifacts using bundle-relative paths only. +3. Resolve again with an explicit override for one role. +4. Verify deterministic artifact order, canonical path metadata, byte lengths, + and role labels. + +### Negative/Edge Case Testing + +- Invalid inputs: empty path strings, unsupported URI schemes, absolute paths + when absolute mode is disabled, path traversal, symlink escape, directories, + missing files, unreadable files, duplicate canonical paths, and files over the + configured size limit. +- Timeouts: inject a filesystem adapter timeout and assert + `HLAW_ARTIFACT_READ_TIMEOUT` with no retry loop in the domain layer. +- Concurrent users or retries: concurrent resolution of the same workspace must + not mutate shared process working directory or global path state. +- Broken dependencies: unavailable filesystem adapter returns an infrastructure + error mapped to input validation, not an assurance finding. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Resolve and read five 5 MB artifacts in under 1 second on local disk. | Temp-file benchmark with deterministic fixture bytes. | +| Load | Enforce a documented per-artifact and total-byte budget before parsing. | Generate files just below and above configured limits. | +| Security | Workspace confinement must happen after canonicalization and symlink resolution. | Path traversal and symlink escape fixtures on supported platforms. | +| Accessibility | Diagnostics must name role, original reference, and normalized path when safe. | Snapshot text diagnostics with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-007-law-evidence-validation-result.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-007-law-evidence-validation-result.md new file mode 100644 index 00000000..ffd1310e --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-007-law-evidence-validation-result.md @@ -0,0 +1,125 @@ +--- +title: HLAW-007 LawEvidenceValidationResult +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-007 LawEvidenceValidationResult + +## Feature Overview & Objectives + +### Problem Statement + +Holmes must clearly separate invalid evidence from valid evidence that produces +failing assurance findings. A malformed law diff JSON file is not the same as a +coverage gate failure; unsupported bundle versions are not semantic law risks. +Without a typed validation result, CLI exit codes, GitHub comments, MCP +responses, and audit witnesses will conflate input errors with judgment. + +`LawEvidenceValidationResult` defines the typed result envelope for all +first-stage evidence validation before Holmes runs gates or produces assurance +verdicts. + +### Target User/Audience + +- Holmes CLI users who need actionable diagnostics before assessment. +- CI maintainers mapping validation errors to hard job failures. +- MCP clients distinguishing bad requests from failing law gates. +- QA engineers verifying invalid artifact fixtures never produce findings. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Error separation | 100% of invalid evidence cases return validation errors and zero assurance findings. | +| Exit-code readiness | Every validation outcome maps to a documented future CLI exit category. | +| JSON stability | Validation result JSON snapshots remain deterministic across repeated runs. | + +## Scope Definition + +### In Scope + +- Define a validation result object containing status, normalized bundle + reference, validation errors, validation warnings, loaded artifact metadata, + unsupported optional evidence, and future exit-code category. +- Define statuses: `valid`, `validWithWarnings`, `invalid`, and + `infrastructureError`. +- Define diagnostic fields: code, severity, artifact role, path, message, + details, source location if available, and remediation hint. +- Require invalid validation results to carry no assurance findings or gate + decisions. +- Define JSON serialization for CLI, API, MCP, and audit witness reuse. + +### Out of Scope + +- No final Holmes verdict in this slice. +- No pass/warn/fail gate model in this slice except the placeholder + exit-category hint. +- No GitHub rendering in this slice. +- No suppression behavior for validation errors. +- No automatic remediation or artifact regeneration. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CLI user, I want invalid evidence diagnostics before assessment so that I can fix file paths or formats first. | +| US-002 | As a CI maintainer, I want validation status separated from assurance status so that infrastructure and product risks do not share one exit reason. | +| US-003 | As an MCP client, I want machine-readable diagnostics with artifact roles so that agents can explain failures without parsing prose. | +| US-004 | As a QA engineer, I want validation snapshots to prove invalid inputs never emit findings or gates. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A bundle is missing its manifest artifact | Holmes validates evidence | The result status is `invalid` and includes `HLAW_BUNDLE_MISSING_REQUIRED_ARTIFACT`. | +| US-002 | An artifact read timeout occurs | Holmes validates evidence | The result status is `infrastructureError` and assessment does not run. | +| US-003 | A law diff artifact has malformed JSON | Holmes validates evidence | The diagnostic includes artifact role `lawDiff`, source path, code, and remediation hint. | +| US-004 | Evidence validation fails | Holmes prepares assessment input | No `SemanticChangeFinding`, coverage gate, or traceability gate is produced. | +| US-004 | Evidence validation succeeds with optional source schema absent | Holmes validates evidence | The result status is `validWithWarnings` only if policy asks missing optional source refs to warn; otherwise `valid`. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Valid bundle and artifacts | Happy | `fixtures/hlaw/bundles/clean-release.json` | Status `valid`. | +| TS-002 | Valid with optional evidence unavailable | Happy | `fixtures/hlaw/bundles/clean-local-minimal.json` | Status `valid` or `validWithWarnings` per policy. | +| TS-003 | Missing required artifact | Negative | missing manifest path | Status `invalid`, no findings. | +| TS-004 | Malformed law diff | Negative | malformed law diff fixture | Status `invalid`, role-specific diagnostic. | +| TS-005 | Filesystem timeout | Edge | fake locator timeout | Status `infrastructureError`. | +| TS-006 | Multiple invalid artifacts | Negative | bundle with malformed diff and invalid manifest | All diagnostics sorted deterministically. | +| TS-007 | Validation warnings only | Edge | optional source references absent | No gate decisions produced. | + +### Happy Path Testing + +1. Validate a clean evidence bundle. +2. Assert that status is `valid`, errors are empty, loaded artifact metadata is + present, and normalized bundle identity is included. +3. Serialize validation result as JSON twice and assert byte equality. +4. Verify that no gate or finding fields appear in the validation result. + +### Negative/Edge Case Testing + +- Invalid inputs: missing required artifacts, unsupported versions, malformed + JSON, invalid hashes, unknown event kinds, path traversal, count mismatches, + and contradictory capability posture. +- Timeouts: fake locator returns read timeout; result is + `infrastructureError` with no retry from validation result logic. +- Concurrent users or retries: validation result construction must be immutable + after creation so concurrent renderers cannot mutate diagnostic order. +- Broken dependencies: unavailable filesystem, invalid UTF-8 where JSON bytes + are expected, and adapter panic boundaries are mapped to infrastructure + diagnostics where recoverable. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Construct validation results with 1,000 diagnostics in under 100 ms. | Synthetic diagnostic benchmark. | +| Load | JSON serialization must remain deterministic for large diagnostic arrays. | Snapshot large invalid bundle output. | +| Security | Diagnostics must not leak file contents or absolute paths unless configured for trusted local mode. | Fixtures with secret-like file names and content. | +| Accessibility | Text diagnostics must include code and remediation hint without relying on color. | Snapshot terminal text with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-008-semantic-change-finding.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-008-semantic-change-finding.md new file mode 100644 index 00000000..b554b076 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-008-semantic-change-finding.md @@ -0,0 +1,125 @@ +--- +title: HLAW-008 SemanticChangeFinding +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-008 SemanticChangeFinding + +## Feature Overview & Objectives + +### Problem Statement + +Wesley law diff events need to become Holmes findings before they can be +rendered, gated, filtered, summarized, or published. The finding model must be +stable enough for snapshots and PR comments while preserving Wesley's semantic +classification. It must not invent new law meaning, and it must be traceable to +the source diff artifact. + +`SemanticChangeFinding` defines the domain finding for law diff events, with +stable ids, severity posture, law id, subject, change payload, artifact +reference, and renderer-neutral summary fields. + +### Target User/Audience + +- Holmes domain developers implementing law assessment. +- Reviewers reading semantic change summaries. +- GitHub and MCP adapters that need stable finding ids. +- QA engineers asserting deterministic sorting and rendering inputs. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Stable identity | The same law diff event produces the same finding id across CLI, GitHub, MCP, and API flows. | +| Classification fidelity | 100% of findings preserve Wesley event kind and change posture without reclassification. | +| Renderer readiness | Each finding exposes summary, details, severity, subject, law id, and source artifact reference. | + +## Scope Definition + +### In Scope + +- Define `SemanticChangeFinding` fields: finding id, law id, subject, event + kind, change posture, severity, summary, details, source artifact reference, + before/after hashes where available, profile, and tags. +- Define deterministic finding id derivation from bundle hash family, law id, + subject, event kind, and event ordinal or event id. +- Define default severity mapping inputs while leaving policy override behavior + to later policy slices. +- Define sort order by severity, subject kind, subject, law id, event kind, and + event id. +- Define Markdown snippet and JSON rendering inputs without implementing final + report sections. + +### Out of Scope + +- No policy-driven severity override in this slice. +- No GitHub annotation mapping in this slice. +- No final report document composition in this slice. +- No suppression, override, or review-state behavior. +- No recomputation of semantic diffs from source law files. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want each semantic law change represented as a finding so that I can inspect it consistently across reports. | +| US-002 | As a GitHub publisher, I want stable finding ids so that updated comments do not duplicate findings across reruns. | +| US-003 | As a Holmes policy author, I want severity to be explicit but policy-adjustable later so that local and release profiles can differ. | +| US-004 | As a QA engineer, I want deterministic sort order so that snapshots are stable across platforms. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A law diff event reports a weakened scalar bound | Holmes constructs a finding | The finding preserves event kind, subject, law id, before/after fields, and source artifact reference. | +| US-002 | The same event is ingested twice from the same bundle hash family | Holmes derives finding ids | The finding id is identical across runs. | +| US-002 | The same law id appears in two distinct events | Holmes derives finding ids | The findings have distinct ids because event identity is included. | +| US-003 | Wesley supplies a severity hint | Holmes constructs a finding | The hint is preserved as input severity and marked as policy-adjustable later. | +| US-004 | Findings arrive in random JSON order | Holmes sorts them | Output order is deterministic by the documented sort key. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Weakened scalar law event | Happy | `fixtures/hlaw/law-diff/weakened-scalar.json` | One finding with preserved posture. | +| TS-002 | Footprint expanded event | Happy | `fixtures/hlaw/law-diff/footprint-expanded.json` | Finding includes resource delta fields. | +| TS-003 | Duplicate law id across separate events | Edge | `fixtures/hlaw/law-diff/repeated-law-id.json` | Distinct stable finding ids. | +| TS-004 | Random event order | Edge | shuffled mixed-event fixture | Stable sorted finding output. | +| TS-005 | Missing event id after ingest validation bypass in test | Negative | constructed invalid event | Finding constructor rejects input. | +| TS-006 | Long details payload | Load | event with large delta arrays | Summary remains bounded; details retain data. | +| TS-007 | Markdown-like law id or subject text | Security | crafted event strings | Treated as data for later escaping. | + +### Happy Path Testing + +1. Feed normalized law diff events into the finding constructor. +2. Verify one finding per event. +3. Assert stable id derivation over repeated runs. +4. Assert JSON rendering inputs include full traceability fields. +5. Assert Markdown snippet fields are plain data and not pre-rendered unsafe + HTML. + +### Negative/Edge Case Testing + +- Invalid inputs: missing law id, missing subject, missing event kind, absent + event identity, invalid severity hint, empty summary, and unsupported subject + syntax after ingest validation bypass. +- Timeouts: finding construction is CPU-only and must not include IO or clocks. +- Concurrent users or retries: finding id derivation must be pure and safe under + concurrent assessment. +- Broken dependencies: if source artifact reference is unavailable, finding + construction requires an explicit unavailable reference object rather than + null. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Construct and sort 10,000 findings in under 750 ms. | Benchmark synthetic normalized events. | +| Load | Large delta payloads must not be copied into multiple summary fields. | Allocation checks over footprint delta fixtures. | +| Security | Finding strings must be marked as untrusted data for renderers. | Fixtures with Markdown, HTML, and shell-like strings. | +| Accessibility | Each finding must include text severity and summary fields for non-color renderers. | Contract test required text fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-009-law-coverage-gate-decision.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-009-law-coverage-gate-decision.md new file mode 100644 index 00000000..fcaf47d0 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-009-law-coverage-gate-decision.md @@ -0,0 +1,124 @@ +--- +title: HLAW-009 LawCoverageGateDecision +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-009 LawCoverageGateDecision + +## Feature Overview & Objectives + +### Problem Statement + +Coverage evidence is only useful when Holmes can decide what it means for a +specific assurance profile. A local exploratory profile may tolerate advisory +coverage gaps, while a release profile may fail on missing mutation footprints +or custom scalar semantics. Holmes needs a gate model that evaluates normalized +coverage evidence against policy without recomputing coverage. + +`LawCoverageGateDecision` defines pass, warn, fail, and unavailable outcomes for +law coverage profiles and categories. + +### Target User/Audience + +- Release maintainers deciding whether law coverage is sufficient to merge or + release. +- Holmes policy developers specifying required categories and thresholds. +- CI maintainers mapping coverage gates to exit behavior. +- Reviewers who need missing-subject evidence attached to gate outcomes. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Gate specificity | 100% of failing coverage gates include profile, category, threshold, actual value, and missing subjects. | +| Profile correctness | Gates evaluate only the selected profile unless policy explicitly compares profiles. | +| Unavailable clarity | Missing coverage artifacts produce `unavailable`, not false pass or false fail without policy. | + +## Scope Definition + +### In Scope + +- Define gate states: `pass`, `warn`, `fail`, and `unavailable`. +- Evaluate normalized coverage evidence against a policy input containing + required categories, warning thresholds, failure thresholds, and missing + subject display limits. +- Produce gate decisions with profile id, category id, actual counts, + thresholds, missing subjects, evidence artifact reference, and rationale. +- Define behavior for absent coverage evidence, absent categories, empty + categories, and percentage rounding. +- Preserve policy and evidence separation: policy decides posture, Wesley + coverage report provides facts. + +### Out of Scope + +- No policy schema design beyond the minimal policy input shape needed by this + gate. +- No CLI exit-code mapping in this slice. +- No suppression or override behavior. +- No GitHub check summary rendering. +- No coverage computation from schema or law source. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release maintainer, I want required coverage categories to fail when below threshold so that missing law cannot ship unnoticed. | +| US-002 | As a local developer, I want advisory coverage gaps to warn instead of fail so that exploratory runs remain useful. | +| US-003 | As a reviewer, I want gate decisions to include missing subjects so that failures are directly actionable. | +| US-004 | As a CI maintainer, I want absent coverage evidence represented explicitly so that workflow configuration errors are visible. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Release policy requires mutation footprint coverage at 100% and actual coverage is 95% | Holmes evaluates the gate | The decision is `fail` with missing mutation subjects listed. | +| US-002 | Local policy marks invariant coverage advisory with warning threshold 80% and actual coverage is 70% | Holmes evaluates the gate | The decision is `warn`, not `fail`. | +| US-003 | Coverage is below threshold with five missing subjects | Holmes emits the decision | The decision includes the missing subjects up to the policy display limit and records any omitted count. | +| US-004 | Coverage evidence is unavailable | Holmes evaluates coverage gates | The decision is `unavailable` unless policy explicitly treats unavailable coverage as failure. | +| US-004 | A required category is absent from the coverage report | Holmes evaluates the gate | The decision is `fail` or `unavailable` according to policy, with category absence called out. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Release profile full coverage | Happy | release-perfect coverage plus strict policy | Gate `pass`. | +| TS-002 | Release required gap | Negative | missing-required-footprints coverage | Gate `fail` with subjects. | +| TS-003 | Local advisory gap | Happy | local advisory coverage plus local policy | Gate `warn`. | +| TS-004 | Coverage unavailable | Edge | no coverage artifact | Gate `unavailable`. | +| TS-005 | Category absent | Edge | coverage report missing required category | Policy-specific fail/unavailable. | +| TS-006 | Percentage boundary at threshold | Edge | 99/100 with 99% threshold | Gate `pass`; 98/100 fails or warns per policy. | +| TS-007 | Huge missing-subject list | Load | 50,000 missing subjects | Decision truncates display but keeps counts. | + +### Happy Path Testing + +1. Load normalized coverage evidence and a minimal policy input. +2. Evaluate gates for a selected profile. +3. Verify pass/warn/fail/unavailable outcomes for representative categories. +4. Assert that decisions include evidence references and policy rationale. +5. Snapshot sorted gate decisions. + +### Negative/Edge Case Testing + +- Invalid inputs: unknown profile, policy threshold below 0 or above 100, + warning threshold stricter than failure threshold where disallowed, missing + required category, absent coverage evidence, and inconsistent normalized + counts after validation bypass. +- Timeouts: gate evaluation is CPU-only and must not use wall-clock time. +- Concurrent users or retries: gate evaluation must be pure and deterministic + for the same evidence and policy. +- Broken dependencies: if coverage evidence is invalid, this gate is not run; + validation result owns that failure. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Evaluate 1,000 category gates in under 100 ms. | Synthetic normalized coverage benchmark. | +| Load | Missing-subject display truncation must avoid copying all subjects into summaries. | Large missing-subject fixture and allocation check. | +| Security | Policy names and category ids are treated as data, not file paths or commands. | Crafted string fixtures. | +| Accessibility | Gate decisions must include text status and rationale independent of color. | Contract test status, summary, and remediation fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-010-bundle-traceability-gate-decision.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-010-bundle-traceability-gate-decision.md new file mode 100644 index 00000000..3c341250 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-010-bundle-traceability-gate-decision.md @@ -0,0 +1,125 @@ +--- +title: HLAW-010 BundleTraceabilityGateDecision +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-010 BundleTraceabilityGateDecision + +## Feature Overview & Objectives + +### Problem Statement + +Holmes must reject mixed evidence where law diff, coverage, capability, and +manifest artifacts come from different contract bundle hashes. Without a +traceability gate, a PR could combine a current manifest with stale coverage or +capability summaries, producing a plausible but false assurance result. + +`BundleTraceabilityGateDecision` evaluates cross-artifact hash consistency +against the expected contract bundle hash family and produces pass, fail, or +unavailable outcomes before reports claim readiness. + +### Target User/Audience + +- Release maintainers who need confidence that all law evidence belongs to the + same compiled contract bundle. +- CI maintainers preventing stale artifact reuse across jobs. +- Holmes report authors rendering provenance and mismatch callouts. +- QA engineers creating stale, mismatched, and unsupported manifest fixtures. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Stale evidence detection | 100% of mismatched required artifact hashes produce a failing traceability gate. | +| Bundle clarity | Passing gates name the bundle hash family used for all evidence. | +| Unsupported-version safety | Unsupported manifest or artifact versions prevent a pass outcome. | + +## Scope Definition + +### In Scope + +- Define traceability gate states: `pass`, `fail`, and `unavailable`. +- Compare expected bundle hash, manifest bundle hash, and artifact-declared + schema/law/profile/bundle hashes where available. +- Detect missing required hash anchors, mismatched schema hash, mismatched law + hash, mismatched profile hash, mismatched bundle hash, unsupported manifest + version, and artifact version mismatch. +- Produce decision details naming each artifact role, expected hash, actual hash, + mismatch type, and source artifact reference. +- Define checkpoint playback expectations: replaying a saved validation input + must reproduce the same traceability decision. + +### Out of Scope + +- Holmes will not recompute any hash from source artifacts. +- Holmes will not rebind law to a new schema hash. +- Holmes will not decide whether a mismatch can be waived; override policy is a + later slice. +- Holmes will not fetch missing artifacts. +- Holmes will not publish the gate to GitHub in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release maintainer, I want Holmes to fail when law evidence artifacts reference different bundle hashes so that stale evidence cannot ship. | +| US-002 | As a CI maintainer, I want traceability gates to identify the exact mismatched artifact so that workflow wiring can be fixed quickly. | +| US-003 | As a reviewer, I want passing traceability gates to show the common schema, law, profile, and bundle hashes so that provenance is visible. | +| US-004 | As a QA engineer, I want replayed evidence checkpoints to produce identical traceability decisions so that audits are deterministic. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Law diff and coverage artifacts declare different `bundleHash` values | Holmes evaluates traceability | The gate decision is `fail` with both artifact roles named. | +| US-001 | The manifest omits a required release hash anchor | Holmes evaluates traceability | The gate decision is `fail` unless validation has already rejected the manifest. | +| US-002 | Capability summary has a stale `lawHash` | Holmes evaluates traceability | The mismatch detail names `lawCapabilities`, expected law hash, and actual law hash. | +| US-003 | All required artifacts share the expected hash family | Holmes evaluates traceability | The gate decision is `pass` and includes common schema, law, profile, and bundle hashes. | +| US-004 | A saved validation result is replayed with the same policy | Holmes evaluates traceability twice | Both decisions serialize to byte-identical JSON. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | All artifacts share hash family | Happy | `fixtures/hlaw/traceability/consistent-release.json` | Gate `pass`. | +| TS-002 | Law diff stale bundle hash | Negative | `fixtures/hlaw/traceability/stale-law-diff.json` | Gate `fail`, role `lawDiff`. | +| TS-003 | Coverage stale profile hash | Negative | `fixtures/hlaw/traceability/stale-coverage-profile.json` | Gate `fail`, role `lawCoverage`. | +| TS-004 | Capability summary missing optional hash in local mode | Edge | local minimal fixture | Gate `unavailable` or `pass` according to policy input. | +| TS-005 | Unsupported manifest version | Negative | unsupported manifest fixture | No pass outcome; validation or gate failure. | +| TS-006 | No expected bundle hash supplied | Edge | bundle without expected hash | Gate compares available manifest/artifact hashes and reports unavailable where insufficient. | +| TS-007 | Replay checkpoint | Happy | saved validation result JSON | Byte-identical gate decision across runs. | + +### Happy Path Testing + +1. Validate and normalize a clean release evidence bundle. +2. Evaluate traceability against the expected bundle hash. +3. Confirm every required artifact role reports the same schema, law, profile, + and bundle hashes. +4. Serialize the decision twice and assert byte equality. +5. Confirm the decision carries artifact references for report linking. + +### Negative/Edge Case Testing + +- Invalid inputs: mismatched schema hash, law hash, profile hash, bundle hash, + unsupported version, missing required hash, malformed hash after validation + bypass, duplicate artifact role, and absent expected bundle hash. +- Timeouts: traceability evaluation is CPU-only and must not read files or use + wall-clock time. +- Concurrent users or retries: evaluating the same normalized evidence in + parallel must produce identical decision ids and ordering. +- Broken dependencies: invalid evidence prevents gate execution; partially + unavailable optional evidence produces `unavailable` only when policy allows. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Compare hash families across 1,000 artifact references in under 50 ms. | Synthetic normalized evidence benchmark. | +| Load | Mismatch detail lists must be truncated for display while retaining machine-readable counts. | Large stale-artifact fixture. | +| Security | Hash strings are treated as data and validated by syntax before comparison. | Fixtures with malformed and injection-like hash strings. | +| Accessibility | Mismatch output must name expected and actual hash values in text. | Snapshot text gate summaries with color disabled. | From 90a7a7888cf4a9013f601141808118989e00bc10 Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 14:42:31 -0700 Subject: [PATCH 3/9] docs(holmes): expand law assurance prd coverage --- docs/BEARING.md | 17 +-- .../holmes-weslaw-assurance-prd-test-plan.md | 86 ++++++++----- .../HLAW-011-law-assurance-report-document.md | 119 +++++++++++++++++ .../prds/HLAW-012-law-diff-report-section.md | 114 +++++++++++++++++ .../HLAW-013-law-coverage-report-section.md | 114 +++++++++++++++++ .../HLAW-014-law-capability-report-section.md | 114 +++++++++++++++++ ...AW-015-bundle-provenance-report-section.md | 117 +++++++++++++++++ .../HLAW-016-holmes-weslaw-validate-cli.md | 114 +++++++++++++++++ .../prds/HLAW-017-holmes-weslaw-assess-cli.md | 117 +++++++++++++++++ .../prds/HLAW-018-holmes-weslaw-report-cli.md | 112 ++++++++++++++++ .../HLAW-019-law-assurance-artifact-writer.md | 120 ++++++++++++++++++ ...HLAW-020-law-assurance-exit-code-policy.md | 116 +++++++++++++++++ .../HLAW-021-github-law-assurance-comment.md | 116 +++++++++++++++++ .../HLAW-022-github-law-gate-check-summary.md | 111 ++++++++++++++++ ...HLAW-023-github-law-finding-annotations.md | 114 +++++++++++++++++ .../HLAW-024-github-law-evidence-links.md | 112 ++++++++++++++++ .../HLAW-025-github-law-override-controls.md | 115 +++++++++++++++++ .../HLAW-026-mcp-assess-weslaw-bundle-tool.md | 115 +++++++++++++++++ .../HLAW-027-mcp-law-evidence-resources.md | 112 ++++++++++++++++ .../HLAW-028-mcp-explain-law-finding-tool.md | 113 +++++++++++++++++ .../prds/HLAW-029-mcp-law-policy-tool.md | 115 +++++++++++++++++ .../prds/HLAW-030-agent-safe-law-summary.md | 113 +++++++++++++++++ .../HLAW-031-law-assurance-policy-schema.md | 114 +++++++++++++++++ .../HLAW-032-law-severity-mapping-policy.md | 111 ++++++++++++++++ .../HLAW-033-law-coverage-threshold-policy.md | 112 ++++++++++++++++ ...AW-034-law-assurance-suppression-policy.md | 115 +++++++++++++++++ .../HLAW-035-law-assurance-audit-witness.md | 115 +++++++++++++++++ 27 files changed, 2926 insertions(+), 37 deletions(-) create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-011-law-assurance-report-document.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-012-law-diff-report-section.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-013-law-coverage-report-section.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-014-law-capability-report-section.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-015-bundle-provenance-report-section.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-016-holmes-weslaw-validate-cli.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-017-holmes-weslaw-assess-cli.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-018-holmes-weslaw-report-cli.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-019-law-assurance-artifact-writer.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-020-law-assurance-exit-code-policy.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-021-github-law-assurance-comment.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-022-github-law-gate-check-summary.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-023-github-law-finding-annotations.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-024-github-law-evidence-links.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-025-github-law-override-controls.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-026-mcp-assess-weslaw-bundle-tool.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-027-mcp-law-evidence-resources.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-028-mcp-explain-law-finding-tool.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-029-mcp-law-policy-tool.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-030-agent-safe-law-summary.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-031-law-assurance-policy-schema.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-032-law-severity-mapping-policy.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-033-law-coverage-threshold-policy.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-034-law-assurance-suppression-policy.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-035-law-assurance-audit-witness.md diff --git a/docs/BEARING.md b/docs/BEARING.md index 78d2304a..f7a8558d 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -411,17 +411,17 @@ The active packet is Working budget: **50 slices**. -Status: **10 / 50 slices closed**. The plan allocates `HLAW-001` through +Status: **35 / 50 slices closed**. The plan allocates `HLAW-001` through `HLAW-050` across evidence intake, typed domain contracts, report models, CLI flows, GitHub publishing, MCP surfaces, policy, QA fixtures, determinism, performance budgets, migration, release gates, operator docs, and campaign closeout. Each slice must produce a PRD/test-plan artifact with explicit objectives, scope, user stories, BDD acceptance criteria, and test scenarios. -The first pull closed `HLAW-001` through `HLAW-010`: evidence intake and typed -domain contracts for law diffs, coverage reports, capability summaries, bundle -manifests, artifact location, validation results, semantic findings, coverage -gates, and traceability gates. +Closed slices now cover `HLAW-001` through `HLAW-035`: evidence intake, typed +domain contracts, report sections, CLI operator flows, local artifact writing, +exit-code policy, GitHub publishing surfaces, MCP tools/resources, agent-safe +summaries, and the first policy/audit contracts. ## Next Target @@ -443,9 +443,10 @@ runtime ownership into the base compiler. The next pull is: -1. `HLAW-011` through `HLAW-020`: write PRD/test-plan artifacts for the report - model, CLI operator flows, local artifact writer, and exit-code policy that - sit on top of the evidence intake contracts. +1. `HLAW-036` through `HLAW-040`: write PRD/test-plan artifacts for the golden + fixture corpus, negative fixture corpus, fake clock and ports, concurrency + and idempotence, and performance budget slices that complete the policy/QA + harness chunk. ## Post-Retirement Freestyle Slice Log diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md index 6f88a9c4..eaf767a6 100644 --- a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -10,7 +10,7 @@ release: v0.0.8 ## Status -Active planning packet. First PR chunk complete. +Active planning packet. Slices `HLAW-001` through `HLAW-035` are complete. ## Question @@ -84,9 +84,9 @@ failure behavior, and test fixtures where known. | Chunk | Slices | Planned PR Shape | Purpose | | --- | --- | --- | --- | | 1 | HLAW-001..HLAW-010 | Complete | Evidence intake and typed domain contracts. | -| 2 | HLAW-011..HLAW-020 | One PR | Report model, CLI operator flows, and local artifacts. | -| 3 | HLAW-021..HLAW-030 | One PR | GitHub and MCP interfaces over the same assurance use cases. | -| 4 | HLAW-031..HLAW-040 | One PR | Policy, QA harnesses, determinism, concurrency, and budgets. | +| 2 | HLAW-011..HLAW-020 | Complete | Report model, CLI operator flows, and local artifacts. | +| 3 | HLAW-021..HLAW-030 | Complete | GitHub and MCP interfaces over the same assurance use cases. | +| 4 | HLAW-031..HLAW-040 | In progress | Policy, QA harnesses, determinism, concurrency, and budgets. | | 5 | HLAW-041..HLAW-050 | One PR | Migration, release gates, documentation, and campaign closeout. | Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. @@ -152,54 +152,54 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ### Report Model, CLI, And Local Artifacts -- [ ] HLAW-011 `LawAssuranceReportDocument` PRD and test plan. +- [x] HLAW-011 `LawAssuranceReportDocument` PRD and test plan. - Feature/product: A structured report section family for semantic changes, coverage, capabilities, and bundle traceability inside the Holmes `ReportDocument`. - Required output: PRD for section ids, tables, summary metrics, attachments, stable ordering, and renderer-neutral semantics. -- [ ] HLAW-012 `LawDiffReportSection` PRD and test plan. +- [x] HLAW-012 `LawDiffReportSection` PRD and test plan. - Feature/product: A report section that presents semantic law diff events in review order while preserving machine-readable event kinds. - Required output: PRD for field columns, grouped summaries, high-risk event highlighting, truncation policy, and no-change behavior. -- [ ] HLAW-013 `LawCoverageReportSection` PRD and test plan. +- [x] HLAW-013 `LawCoverageReportSection` PRD and test plan. - Feature/product: A report section that presents law coverage by profile, category, required status, covered count, and missing subjects. - Required output: PRD for thresholds, empty categories, required versus advisory categories, and accessibility of table output. -- [ ] HLAW-014 `LawCapabilityReportSection` PRD and test plan. +- [x] HLAW-014 `LawCapabilityReportSection` PRD and test plan. - Feature/product: A report section that presents footprint capability summaries while explicitly labeling them report-only. - Required output: PRD for wording, resource grouping, empty lists, large footprint truncation, and runtime-enforcement disclaimers. -- [ ] HLAW-015 `BundleProvenanceReportSection` PRD and test plan. +- [x] HLAW-015 `BundleProvenanceReportSection` PRD and test plan. - Feature/product: A report section that shows schemaHash, lawHash, profileHash, bundleHash, law codec, compiler identity, and generator provenance. - Required output: PRD for required fields, partial manifests, hash display, copy/paste safety, and mismatch callouts. -- [ ] HLAW-016 `holmes weslaw validate` CLI PRD and test plan. +- [x] HLAW-016 `holmes weslaw validate` CLI PRD and test plan. - Feature/product: A Holmes CLI command that validates a `HolmesLawEvidence` input bundle without making readiness judgments. - Required output: PRD for flags, JSON/text output, exit codes, invalid bundle diagnostics, and fixture golden outputs. -- [ ] HLAW-017 `holmes weslaw assess` CLI PRD and test plan. +- [x] HLAW-017 `holmes weslaw assess` CLI PRD and test plan. - Feature/product: A Holmes CLI command that evaluates validated law evidence into gates, findings, verdict, and a structured report document. - Required output: PRD for flags, policy selection, `--fail-on` behavior, terminal output, JSON output, and missing optional artifact behavior. -- [ ] HLAW-018 `holmes weslaw report` CLI PRD and test plan. +- [x] HLAW-018 `holmes weslaw report` CLI PRD and test plan. - Feature/product: A Holmes CLI command that renders a `ReportDocument` as Markdown, JSON, terminal text, or file output without publishing anywhere. - Required output: PRD for renderer selection, output paths, stdout behavior, overwrite policy, and snapshot tests. -- [ ] HLAW-019 `LawAssuranceArtifactWriter` PRD and test plan. +- [x] HLAW-019 `LawAssuranceArtifactWriter` PRD and test plan. - Feature/product: A local output adapter that writes normalized validation, assessment, and rendered report artifacts for CI and later review. - Required output: PRD for artifact names, deterministic bytes, directory creation, collision policy, and reproducible hash checks. -- [ ] HLAW-020 `LawAssuranceExitCodePolicy` PRD and test plan. +- [x] HLAW-020 `LawAssuranceExitCodePolicy` PRD and test plan. - Feature/product: A CLI exit-code policy for validation errors, assurance failures, warnings, publisher failures, and internal errors. - Required output: PRD for exit-code table, `--fail-on` gates, CI defaults, @@ -207,52 +207,52 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ### GitHub And MCP Interfaces -- [ ] HLAW-021 `GitHubLawAssuranceComment` PRD and test plan. +- [x] HLAW-021 `GitHubLawAssuranceComment` PRD and test plan. - Feature/product: A GitHub PR comment renderer/publisher for law diff, coverage, capability, and bundle provenance summaries. - Required output: PRD for sticky comment markers, update behavior, markdown constraints, truncation, links, and idempotent publishing. -- [ ] HLAW-022 `GitHubLawGateCheckSummary` PRD and test plan. +- [x] HLAW-022 `GitHubLawGateCheckSummary` PRD and test plan. - Feature/product: A GitHub-facing gate summary that tells reviewers whether law evidence is pass, warn, fail, or unavailable. - Required output: PRD for review wording, blocked-merge posture, required versus advisory gates, and stale evidence detection. -- [ ] HLAW-023 `GitHubLawFindingAnnotations` PRD and test plan. +- [x] HLAW-023 `GitHubLawFindingAnnotations` PRD and test plan. - Feature/product: A mapping from law findings to PR annotations or comment bullets where file/line context exists. - Required output: PRD for annotation eligibility, no-line findings, deduplication, rate limits, and fallback rendering. -- [ ] HLAW-024 `GitHubLawEvidenceLinks` PRD and test plan. +- [x] HLAW-024 `GitHubLawEvidenceLinks` PRD and test plan. - Feature/product: A link model that connects PR comments to law artifacts, CI runs, bundle manifests, and rendered reports. - Required output: PRD for artifact URLs, missing artifact behavior, expiration notes, and markdown link safety. -- [ ] HLAW-025 `GitHubLawOverrideControls` PRD and test plan. +- [x] HLAW-025 `GitHubLawOverrideControls` PRD and test plan. - Feature/product: A policy-controlled override surface for maintainers to acknowledge advisory law warnings without hiding failed validation. - Required output: PRD for labels/checkboxes, audit records, non-overridable failures, and drift checkpoint criteria. -- [ ] HLAW-026 `McpAssessWeslawBundleTool` PRD and test plan. +- [x] HLAW-026 `McpAssessWeslawBundleTool` PRD and test plan. - Feature/product: An MCP tool that assesses a law evidence bundle and returns structured gates, findings, and rendered report references. - Required output: PRD for request/response schema, workspace authorization, error mapping, and deterministic examples. -- [ ] HLAW-027 `McpLawEvidenceResources` PRD and test plan. +- [x] HLAW-027 `McpLawEvidenceResources` PRD and test plan. - Feature/product: MCP resources exposing law diff, coverage, capability, bundle manifest, and rendered law report data. - Required output: PRD for resource URIs, caching, access control, invalid bundle references, and schema examples. -- [ ] HLAW-028 `McpExplainLawFindingTool` PRD and test plan. +- [x] HLAW-028 `McpExplainLawFindingTool` PRD and test plan. - Feature/product: An MCP tool that explains one Holmes law finding with source artifact references and suggested next action. - Required output: PRD for finding ids, explanation shape, citation fallback, and missing finding behavior. -- [ ] HLAW-029 `McpLawPolicyTool` PRD and test plan. +- [x] HLAW-029 `McpLawPolicyTool` PRD and test plan. - Feature/product: An MCP tool that returns active law assurance policy, thresholds, required gates, and non-overridable checks. - Required output: PRD for policy redaction, profile selection, unknown profile errors, and stale policy detection. -- [ ] HLAW-030 `AgentSafeLawSummary` PRD and test plan. +- [x] HLAW-030 `AgentSafeLawSummary` PRD and test plan. - Feature/product: A compact, structured summary format optimized for agents that need law evidence without long Markdown comments. - Required output: PRD for token budgets, severity grouping, artifact refs, @@ -260,27 +260,27 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ### Policy, QA Harnesses, Determinism, And Budgets -- [ ] HLAW-031 `LawAssurancePolicySchema` PRD and test plan. +- [x] HLAW-031 `LawAssurancePolicySchema` PRD and test plan. - Feature/product: A versioned policy schema defining required law evidence, thresholds, severity mappings, and override rules. - Required output: PRD for schema versioning, defaults, profile inheritance, unknown fields, and JSON Schema validation. -- [ ] HLAW-032 `LawSeverityMappingPolicy` PRD and test plan. +- [x] HLAW-032 `LawSeverityMappingPolicy` PRD and test plan. - Feature/product: A policy layer that maps law diff event kinds and coverage gaps to Holmes severities without changing Wesley's semantic classifications. - Required output: PRD for mapping table, unmapped event behavior, release/local differences, and fixture coverage. -- [ ] HLAW-033 `LawCoverageThresholdPolicy` PRD and test plan. +- [x] HLAW-033 `LawCoverageThresholdPolicy` PRD and test plan. - Feature/product: A policy layer that sets required coverage floors by category and profile. - Required output: PRD for pass/warn/fail thresholds, category absences, percentage rounding, and boundary-value tests. -- [ ] HLAW-034 `LawAssuranceSuppressionPolicy` PRD and test plan. +- [x] HLAW-034 `LawAssuranceSuppressionPolicy` PRD and test plan. - Feature/product: A suppression/audit model for known advisory findings that must not suppress invalid evidence or failed binding. - Required output: PRD for suppression ids, expiration, reason text, audit output, and abuse-prevention tests. -- [ ] HLAW-035 `LawAssuranceAuditWitness` PRD and test plan. +- [x] HLAW-035 `LawAssuranceAuditWitness` PRD and test plan. - Feature/product: A deterministic witness artifact recording inputs, policy, outputs, hashes, and the exact gates evaluated by Holmes. - Required output: PRD for witness schema, hash coverage, replay fields, @@ -399,6 +399,34 @@ campaign. The next chunk can safely define report sections, CLI operator flows, artifact writing, and exit-code behavior on top of the evidence contracts specified here. +## Drift Check: HLAW-025 + +Date: 2026-05-26. + +Status: **25 / 50 slices closed**. + +Decision: continue. The report, CLI, local artifact, and GitHub slices still +fit the campaign boundary. Holmes is being specified as an assurance layer that +validates, assesses, reports, publishes, and audits Wesley-produced evidence. +The GitHub override slice deliberately keeps invalid evidence and +non-overridable required gates outside waiver scope. + +No scope correction is needed. The next surfaces should be MCP and agent-safe +interfaces over the same domain model, not new law semantics. + +## Progress Check: HLAW-035 + +Date: 2026-05-26. + +Status: **35 / 50 slices closed**. + +Decision: continue with `HLAW-036` through `HLAW-040` next. The first five +policy/audit slices establish the policy schema, severity mapping, coverage +thresholds, suppression boundaries, and audit witness requirements. The +remaining policy/QA harness work should now pin fixture corpora, fake-clock and +port requirements, concurrency/idempotence, and performance budgets before the +campaign moves into migration and closeout. + ## Non-Goals For The 50-Slice Planning Campaign - Do not implement Rust Holmes crates yet. diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-011-law-assurance-report-document.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-011-law-assurance-report-document.md new file mode 100644 index 00000000..394a8a4e --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-011-law-assurance-report-document.md @@ -0,0 +1,119 @@ +--- +title: HLAW-011 LawAssuranceReportDocument +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-011 LawAssuranceReportDocument + +## Feature Overview & Objectives + +### Problem Statement + +Holmes needs a renderer-neutral report document for `weslaw` assurance. The +document must collect semantic change findings, coverage gates, capability +posture, and bundle provenance without baking in Markdown, GitHub, terminal, or +MCP output decisions. Without a common report document, every output adapter +will invent its own section order, summary language, truncation behavior, and +machine-readable fields. + +### Target User/Audience + +- Holmes application developers composing law assurance assessments. +- CLI, GitHub, API, and MCP adapter authors rendering the same assessment. +- Reviewers who need stable section ids and summary metrics across surfaces. +- QA engineers snapshotting report structure independently from presentation. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Renderer parity | 100% of law assurance renderers consume the same `LawAssuranceReportDocument`. | +| Section determinism | Repeated assessments produce identical section ordering and ids. | +| Evidence traceability | Every section can reference the source validation result, artifact role, or gate decision that produced it. | + +## Scope Definition + +### In Scope + +- Define `LawAssuranceReportDocument` as a structured, renderer-neutral document + with title, report id, profile, summary metrics, sections, attachments, + source bundle reference, generated-at clock value, and verdict placeholder. +- Define stable section ids for law diff, coverage, capabilities, bundle + provenance, validation diagnostics, and gate summary. +- Define section ordering and attachment reference rules. +- Define a deterministic JSON representation suitable for snapshots and future + audit witnesses. +- Define no-data behavior for empty law diffs, unavailable coverage, empty + capabilities, and missing optional provenance. + +### Out of Scope + +- No Markdown, terminal, GitHub, or MCP rendering in this slice. +- No final CLI flags or file-writing behavior. +- No GitHub comment markers, annotations, or check-run integration. +- No suppression or override policy. +- No recomputation of Wesley law evidence. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CLI renderer, I want one report document so that text, JSON, and Markdown outputs share the same facts. | +| US-002 | As a GitHub publisher, I want stable section ids so that sticky PR comments can update sections predictably later. | +| US-003 | As a QA engineer, I want deterministic JSON so that report snapshots catch real behavior drift. | +| US-004 | As a reviewer, I want empty states explicitly represented so that absent evidence is not confused with passing evidence. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid assessment contains findings, gates, and provenance | Holmes builds the report document | The report contains separate sections for each domain area without renderer-specific Markdown. | +| US-002 | The same assessment is rendered twice | Holmes builds report documents | Section ids and ordering are byte-identical. | +| US-003 | A report contains attachments | Holmes serializes the report JSON | Attachment references are sorted by id and include role, media type, and path or URI. | +| US-004 | Law diff has zero events | Holmes builds the report document | The law diff section is present with explicit `noSemanticChanges` state. | +| US-004 | Coverage evidence is unavailable | Holmes builds the report document | The coverage section records `unavailable`, not `pass`. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Full assessment report | Happy | normalized findings, gates, provenance | Complete report with stable sections. | +| TS-002 | Empty law diff report | Happy | no-change diff evidence | Law diff section present with no-change state. | +| TS-003 | Unavailable coverage | Edge | validation result without coverage | Coverage section marked unavailable. | +| TS-004 | Duplicate section id construction | Negative | test-only malformed section list | Constructor rejects duplicate ids. | +| TS-005 | Large findings list | Load | 10,000 synthetic findings | Report builds within budget with summary counts. | +| TS-006 | Untrusted strings in summaries | Security | crafted law ids and subjects | Report stores data without pre-rendering HTML. | + +### Happy Path Testing + +1. Build a report document from a complete assessment fixture. +2. Assert the report contains summary, law diff, coverage, capability, + provenance, and gate sections. +3. Serialize the report JSON twice and assert byte equality. +4. Verify every section references the evidence or gate that produced it. + +### Negative/Edge Case Testing + +- Invalid inputs: duplicate section ids, missing report id, missing profile, + orphan attachment references, invalid verdict placeholder, and section data + with no evidence reference. +- Timeouts: report construction is CPU-only and must not read files, call GitHub, + or inspect wall-clock time directly; generated time comes from injected clock. +- Concurrent users or retries: report construction must be pure for identical + assessment input and fake-clock value. +- Broken dependencies: renderer failures are out of scope and must not mutate + the report document. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build a 10,000-finding report in under 500 ms. | Synthetic report benchmark. | +| Load | Summary metrics must avoid duplicating full finding payloads. | Allocation check over large report fixture. | +| Security | Report data remains escaped-data-ready and never contains trusted HTML. | Inject Markdown/HTML strings and inspect serialized JSON. | +| Accessibility | Report structure must expose text labels and summaries for every section. | Contract test required labels independent of color or layout. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-012-law-diff-report-section.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-012-law-diff-report-section.md new file mode 100644 index 00000000..607e7a50 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-012-law-diff-report-section.md @@ -0,0 +1,114 @@ +--- +title: HLAW-012 LawDiffReportSection +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-012 LawDiffReportSection + +## Feature Overview & Objectives + +### Problem Statement + +Semantic law diffs are the main reviewer-facing signal for `weslaw` changes. +Holmes needs a dedicated report section that presents findings in review order +while preserving Wesley event kinds and machine-readable fields. The section +must avoid vague summaries such as "law changed" and instead name subject, +law id, event kind, severity, and changed fields. + +### Target User/Audience + +- PR reviewers evaluating semantic changes. +- Release maintainers looking for high-risk law weakening or footprint changes. +- GitHub and CLI renderers presenting concise diff tables. +- QA engineers verifying no-change, truncation, and sorting behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Review specificity | 100% of rendered rows include law id, subject, event kind, and severity. | +| Sort stability | The same findings produce identical row order across renderers. | +| High-risk visibility | Weakening, removal, and footprint expansion events are explicitly countable. | + +## Scope Definition + +### In Scope + +- Define `LawDiffReportSection` data shape inside `LawAssuranceReportDocument`. +- Define columns: severity, event kind, posture, subject, law id, summary, + changed fields, source artifact reference, and optional source location. +- Define grouped summary counts by event kind, severity, subject kind, and + high-risk classification. +- Define truncation policy for large event lists with omitted-row accounting. +- Define no-change behavior when no semantic law diff findings exist. + +### Out of Scope + +- No final Markdown table rendering. +- No GitHub annotations or inline comments. +- No severity remapping beyond consuming existing finding severity. +- No semantic diff computation. +- No suppression or reviewer acknowledgement. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a PR reviewer, I want semantic law changes grouped and sorted so that I can scan risk quickly. | +| US-002 | As a release maintainer, I want high-risk event counts so that weakening and removals are visible before merge. | +| US-003 | As a renderer author, I want table-ready row data so that terminal, Markdown, and JSON output stay consistent. | +| US-004 | As a QA engineer, I want large diffs truncated deterministically so that reports remain usable. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Findings include scalar, variant, footprint, and channel changes | Holmes builds the section | Rows are sorted by severity, subject kind, subject, law id, and event kind. | +| US-002 | Findings include a law removal and a weakening | Holmes builds the section | Summary counts include high-risk count `2` with event kinds preserved. | +| US-003 | A finding has source location metadata | Holmes builds the row | The row includes source artifact reference and location fields. | +| US-004 | Findings exceed display limit `100` | Holmes builds the section | The section includes the first deterministic 100 rows and an omitted count. | +| US-004 | Findings list is empty | Holmes builds the section | The section state is `noSemanticChanges` and contains zero rows. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Mixed findings section | Happy | mixed semantic findings | Sorted rows and grouped counts. | +| TS-002 | Empty findings | Happy | no findings | No-change state. | +| TS-003 | High-risk weakening/removal | Negative | weakening and removal findings | High-risk counts populated. | +| TS-004 | Long changed-fields list | Edge | footprint change with many resources | Row summary bounded, details preserved. | +| TS-005 | Large findings list | Load | 10,000 findings | Deterministic truncation and omitted count. | +| TS-006 | Crafted Markdown in law id | Security | malicious string fixture | Data preserved for renderer escaping. | + +### Happy Path Testing + +1. Build section from a mixed finding fixture. +2. Verify columns for severity, event kind, posture, subject, law id, summary, + changed fields, and artifact reference. +3. Assert grouped counts and deterministic row order. +4. Snapshot JSON row output. + +### Negative/Edge Case Testing + +- Invalid inputs: missing finding id, missing subject, missing event kind, + duplicate row id after finding normalization bypass, invalid display limit, + and changed-fields payload that exceeds summary budget. +- Timeouts: section construction is CPU-only and uses no clocks or IO. +- Concurrent users or retries: deterministic row sorting must not depend on map + iteration order. +- Broken dependencies: if finding construction failed, this section is not + built; validation owns that failure. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build and summarize 10,000 rows in under 300 ms. | Synthetic finding benchmark. | +| Load | Truncation must retain counts without rendering every row. | Large findings fixture. | +| Security | All row strings are untrusted and renderer-escaped later. | Injection-like law ids and subjects. | +| Accessibility | Rows expose text severity and event kind for non-color renderers. | Contract test required row labels. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-013-law-coverage-report-section.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-013-law-coverage-report-section.md new file mode 100644 index 00000000..076ac813 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-013-law-coverage-report-section.md @@ -0,0 +1,114 @@ +--- +title: HLAW-013 LawCoverageReportSection +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-013 LawCoverageReportSection + +## Feature Overview & Objectives + +### Problem Statement + +Law coverage gate decisions need a report section that makes coverage gaps +actionable. A raw percentage is insufficient; reviewers need profile, category, +required/advisory status, thresholds, covered count, total count, missing +subjects, gate state, and omitted-missing-subject counts. The section must +distinguish unavailable coverage from passing coverage. + +### Target User/Audience + +- Release maintainers reviewing required law coverage. +- Local developers improving missing law coverage. +- CLI and GitHub renderers creating coverage tables. +- QA engineers testing thresholds, empty categories, and accessibility. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Missing-subject actionability | 100% of failing required rows include at least one missing subject or an omitted count. | +| State clarity | Coverage rows distinguish pass, warn, fail, and unavailable without color. | +| Profile fidelity | Section output always names the evaluated profile. | + +## Scope Definition + +### In Scope + +- Define `LawCoverageReportSection` rows for profile/category gate decisions. +- Include columns: profile, category, required status, state, covered, total, + percentage, warning threshold, failure threshold, missing subjects, omitted + missing count, and evidence reference. +- Define empty-category and unavailable-coverage states. +- Define percentage rounding and display precision. +- Define accessibility requirements for text status and missing-subject lists. + +### Out of Scope + +- No coverage computation from schema or law artifacts. +- No threshold policy definition beyond consuming evaluated gate decisions. +- No GitHub check status integration. +- No suppression or override behavior. +- No final renderer-specific table formatting. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release maintainer, I want failing coverage rows to list missing subjects so that I know what law to add. | +| US-002 | As a local developer, I want advisory warnings shown separately from failures so that I can prioritize work. | +| US-003 | As a renderer author, I want normalized coverage row fields so that Markdown and JSON output agree. | +| US-004 | As an accessibility reviewer, I want text status labels so that coverage state does not depend on color. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A required category fails with missing subjects | Holmes builds the section | The row state is `fail` and includes missing subject coordinates. | +| US-002 | An advisory category is below warning threshold | Holmes builds the section | The row state is `warn`, required status is advisory, and failure text is not used. | +| US-003 | Coverage gate decisions are supplied in random order | Holmes builds the section | Rows are sorted by profile, required status, category, and state severity. | +| US-004 | A row is rendered by a non-color renderer | The renderer reads section data | The row contains text `stateLabel` and `requiredLabel`. | +| US-004 | Coverage evidence is unavailable | Holmes builds the section | The section includes an unavailable row with no percentage pretending to be 0%. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Passing release coverage | Happy | pass gate decisions | Rows state `pass`. | +| TS-002 | Required coverage failure | Negative | fail gate with missing subjects | Missing subjects included. | +| TS-003 | Advisory warning | Happy | warn gate decision | Advisory row state `warn`. | +| TS-004 | Unavailable coverage | Edge | unavailable gate decision | Unavailable row, no percentage. | +| TS-005 | Boundary percentage rounding | Edge | 2/3 coverage | Documented precision used. | +| TS-006 | Large missing subject list | Load | 50,000 missing subjects | Truncated display and omitted count. | + +### Happy Path Testing + +1. Build section from pass, warn, and fail gate decisions. +2. Verify row fields include profile, category, state labels, counts, + thresholds, and evidence references. +3. Assert percentage rounding matches the documented precision. +4. Snapshot deterministic row ordering. + +### Negative/Edge Case Testing + +- Invalid inputs: gate decision missing profile, missing category, percentage + inconsistent with counts after validation bypass, negative omitted count, + invalid threshold ordering, and missing evidence reference. +- Timeouts: section construction is CPU-only and uses no filesystem or network. +- Concurrent users or retries: sorting and truncation must be stable under + parallel construction. +- Broken dependencies: if coverage gate evaluation is skipped because evidence + is invalid, the section records validation status rather than fabricating rows. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build 1,000 coverage rows in under 100 ms. | Synthetic gate decision benchmark. | +| Load | Missing subject truncation must preserve total missing count. | Large missing-subject fixture. | +| Security | Subject coordinates are display data only. | Include path-like and Markdown-like coordinates. | +| Accessibility | Every row includes text state, category, and required labels. | Contract test renderer-neutral row fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-014-law-capability-report-section.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-014-law-capability-report-section.md new file mode 100644 index 00000000..5433b678 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-014-law-capability-report-section.md @@ -0,0 +1,114 @@ +--- +title: HLAW-014 LawCapabilityReportSection +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-014 LawCapabilityReportSection + +## Feature Overview & Objectives + +### Problem Statement + +Footprint capability summaries explain which operations are declared to read, +write, create, or forbid resources. Holmes must report that posture without +claiming runtime enforcement unless evidence explicitly supports enforcement. +The section needs consistent wording, grouping, truncation, and empty-state +behavior so reviewers can understand operation boundaries without being misled. + +### Target User/Audience + +- Runtime maintainers reviewing operation footprint posture. +- PR reviewers checking whether semantic law changes widened access. +- Renderer authors building tables for CLI, Markdown, and GitHub. +- QA engineers testing report-only disclaimers and large footprint lists. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Disclaimer correctness | 100% of report-only rows include explicit report-only posture. | +| Resource readability | Reads, writes, creates, and forbids are grouped separately for every operation. | +| Empty-state safety | Absent capabilities are never rendered as unrestricted or enforced access. | + +## Scope Definition + +### In Scope + +- Define `LawCapabilityReportSection` rows by operation subject and law id. +- Include report-only/runtime-enforcement posture, reads, writes, creates, + forbids, slots, closures, empty-state marker, and source artifact reference. +- Define resource grouping and deterministic truncation for large lists. +- Define wording constraints: report-only footprint summaries must not say + "enforced", "blocked", or "prevented" without enforcement evidence. +- Define unavailable, intentionally-empty, and malformed-input states. + +### Out of Scope + +- No runtime enforcement verification. +- No handler code inspection. +- No capability API generation. +- No GitHub annotation mapping. +- No final renderer-specific output. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want each operation's declared reads/writes/creates/forbids grouped so that access posture is scannable. | +| US-002 | As a runtime maintainer, I want report-only summaries labeled clearly so that no one confuses declaration with enforcement. | +| US-003 | As a renderer author, I want truncation metadata so that large footprints remain readable. | +| US-004 | As a QA engineer, I want absent and intentionally-empty capability states distinguished. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A capability summary lists reads, writes, creates, and forbids | Holmes builds the section | The row contains separate arrays for each resource posture. | +| US-002 | `reportOnly` is true and runtime enforcement is false | Holmes builds the row | The row includes a required report-only disclaimer field. | +| US-002 | Runtime enforcement evidence is present | Holmes builds the row | The row may include enforcement wording only with the evidence reference. | +| US-003 | A resource list exceeds the display limit | Holmes builds the section | The row includes truncated resources and omitted counts per group. | +| US-004 | Capability evidence lacks an operation | Holmes builds the section | The operation is unavailable, not intentionally empty. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Normal report-only operation | Happy | jedit replace capability summary | Grouped row with disclaimer. | +| TS-002 | Runtime-enforced operation | Happy | future enforced summary | Enforcement reference preserved. | +| TS-003 | Missing operation | Edge | absent capability entry | Unavailable state. | +| TS-004 | Intentionally empty operation | Edge | explicit empty summary | Empty state, not unavailable. | +| TS-005 | Large resource groups | Load | generated footprint summary | Truncation per group. | +| TS-006 | Forbidden wording regression | Negative | renderer text fixture | Report data forces non-enforcement wording. | + +### Happy Path Testing + +1. Build section from normalized capability summaries. +2. Assert each row contains operation subject, law id, posture labels, grouped + resources, and artifact reference. +3. Verify report-only disclaimer data is present. +4. Snapshot sorted rows and truncation metadata. + +### Negative/Edge Case Testing + +- Invalid inputs: missing posture, contradictory posture, duplicate resource in + mutually exclusive groups, slot reference missing after ingest bypass, invalid + display limit, and missing artifact reference. +- Timeouts: section construction performs no IO and uses no clock. +- Concurrent users or retries: resource sorting and truncation must be stable. +- Broken dependencies: invalid capability evidence prevents section creation; + unavailable evidence produces unavailable rows only if assessment policy asks + for them. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build 5,000 operation rows in under 500 ms. | Synthetic capability benchmark. | +| Load | Truncation must avoid duplicating full resource lists into summaries. | Large footprint fixture. | +| Security | Resource names are untrusted display data. | Markdown/HTML/resource-name injection fixtures. | +| Accessibility | Every posture and resource group has a text label. | Contract test row labels and disclaimer field. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-015-bundle-provenance-report-section.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-015-bundle-provenance-report-section.md new file mode 100644 index 00000000..f24de0e3 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-015-bundle-provenance-report-section.md @@ -0,0 +1,117 @@ +--- +title: HLAW-015 BundleProvenanceReportSection +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-015 BundleProvenanceReportSection + +## Feature Overview & Objectives + +### Problem Statement + +Every Holmes law assurance report must be traceable to the Wesley contract +bundle that produced its evidence. Reviewers need schemaHash, lawHash, +profileHash, bundleHash, law codec, compiler identity, and source artifact +references in one stable section. Without that section, semantic findings and +coverage gates are detached from the exact compiler inputs they judge. + +### Target User/Audience + +- Release maintainers verifying evidence provenance. +- Incident reviewers tracing a report back to generated artifacts. +- GitHub and CLI renderers displaying hash families safely. +- QA engineers testing partial manifests, copy/paste safety, and mismatch + callouts. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Hash visibility | Accepted release reports show schema, law, profile, and bundle hashes in full-text data fields. | +| Provenance completeness | Compiler identity and law codec appear in 100% of provenance sections when manifest ingest accepts them. | +| Mismatch clarity | Traceability failures can link directly to provenance fields. | + +## Scope Definition + +### In Scope + +- Define `BundleProvenanceReportSection` data rows for manifest provenance and + cross-artifact hash consistency results. +- Include schemaHash, lawHash, profileHash, bundleHash, compiler name/version, + law codec, manifest version, source references, generated artifact references, + and traceability gate status. +- Define hash display data: full hash, short hash, algorithm, and copy-safe + string. +- Define partial manifest behavior for local runs. +- Define mismatch callout fields linking to `BundleTraceabilityGateDecision`. + +### Out of Scope + +- No hash recomputation. +- No remote artifact download. +- No release signing or signature verification. +- No final Markdown formatting. +- No policy decision about whether a partial manifest blocks merge. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release maintainer, I want the report to show the exact contract bundle hash family so that I can audit what was assessed. | +| US-002 | As an incident reviewer, I want compiler and codec provenance so that I can reproduce or investigate old reports. | +| US-003 | As a renderer author, I want full and short hash fields so that output can be readable without losing copy/paste safety. | +| US-004 | As a QA engineer, I want mismatch callouts linked to traceability gates so that stale evidence is visible. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Manifest provenance includes all hash fields | Holmes builds the section | The section contains schemaHash, lawHash, profileHash, and bundleHash as full values. | +| US-002 | Manifest contains compiler `wesley` version and codec | Holmes builds the section | Compiler and codec fields are present and renderer-neutral. | +| US-003 | A renderer needs short display hashes | Holmes builds the section | Each hash has full and short display fields derived deterministically. | +| US-004 | Traceability gate failed on stale coverage | Holmes builds the section | The provenance section includes a mismatch callout referencing `lawCoverage`. | +| US-004 | Local manifest omits optional generated artifacts | Holmes builds the section | The section records those references as unavailable without failing construction. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Complete release provenance | Happy | complete manifest plus pass traceability gate | Full provenance section. | +| TS-002 | Partial local provenance | Happy | local manifest missing optional refs | Unavailable optional refs. | +| TS-003 | Stale coverage mismatch | Negative | failed traceability gate | Mismatch callout. | +| TS-004 | Long generated artifact list | Load | manifest with 5,000 artifacts | Truncated display with counts. | +| TS-005 | Malformed hash after validation bypass | Negative | constructed invalid provenance | Section constructor rejects input. | +| TS-006 | Copy-safe hash rendering | Accessibility | full hash fixture | Full text retained, short hash derived. | + +### Happy Path Testing + +1. Build section from normalized manifest provenance and traceability gate. +2. Assert full hash fields, short hash fields, compiler identity, codec, and + manifest version are present. +3. Verify optional generated artifact references are represented deterministically. +4. Snapshot section JSON. + +### Negative/Edge Case Testing + +- Invalid inputs: missing required provenance field after validation bypass, + invalid hash syntax, missing traceability gate reference, duplicate generated + artifact ids, and short-hash collision within one section. +- Timeouts: section construction uses no IO or wall-clock time. +- Concurrent users or retries: short hash derivation and artifact ordering are + deterministic under parallel construction. +- Broken dependencies: if manifest validation failed, this section may only + render validation diagnostics, not trusted provenance. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build provenance with 5,000 artifact refs in under 200 ms. | Synthetic manifest benchmark. | +| Load | Artifact reference truncation must keep total counts. | Large manifest fixture. | +| Security | Paths and URLs are untrusted data for later renderers. | Injection-like URL/path fixture. | +| Accessibility | Full hashes remain available as text, not tooltip-only or color-only data. | Contract test full hash fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-016-holmes-weslaw-validate-cli.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-016-holmes-weslaw-validate-cli.md new file mode 100644 index 00000000..31458e4d --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-016-holmes-weslaw-validate-cli.md @@ -0,0 +1,114 @@ +--- +title: HLAW-016 holmes weslaw validate CLI +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-016 `holmes weslaw validate` CLI + +## Feature Overview & Objectives + +### Problem Statement + +Operators need a fast way to prove that law assurance evidence is readable, +version-compatible, and internally well-formed before Holmes makes any judgment. +The validation command must fail on bad evidence without reporting product risk +or merge readiness. This prevents malformed paths, stale artifact schemas, and +broken JSON from polluting later reports. + +### Target User/Audience + +- Local developers validating `weslaw` evidence before opening a PR. +- CI maintainers splitting evidence validation from assessment. +- QA engineers running negative fixture suites. +- Agents that need a narrow command for "is this evidence usable?" + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Validation isolation | The command never emits assurance gates or verdicts. | +| Fixture coverage | Every validation error family has at least one documented fixture. | +| Output determinism | JSON output is byte-identical for identical inputs and fake-clock settings. | + +## Scope Definition + +### In Scope + +- Define command: `holmes weslaw validate --bundle [--profile ]`. +- Define optional artifact override flags for required evidence roles. +- Support output formats `text` and `json`. +- Emit `LawEvidenceValidationResult`. +- Define exit behavior for valid, valid-with-warnings, invalid evidence, and + infrastructure errors. +- Define fixture golden outputs for clean, malformed, missing, unsupported, and + path-policy cases. + +### Out of Scope + +- No assessment verdict, findings, gate decisions, or report rendering. +- No GitHub publishing. +- No MCP server behavior. +- No running Wesley commands to generate evidence. +- No auto-fix or regeneration of invalid evidence. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a local developer, I want to validate a law evidence bundle so that I can fix artifact problems before assessment. | +| US-002 | As a CI maintainer, I want a dedicated validation exit code so that bad inputs fail early. | +| US-003 | As an agent, I want JSON validation output so that I can explain exact evidence errors. | +| US-004 | As a QA engineer, I want golden text and JSON outputs so that command behavior remains stable. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A clean bundle path is supplied | The user runs `holmes weslaw validate --bundle bundle.json` | The command exits success and prints validation status. | +| US-002 | The law diff path is missing | The command runs | The command exits with validation-failure category and emits no findings. | +| US-003 | `--format json` is supplied | The command runs | Output is `LawEvidenceValidationResult` JSON. | +| US-004 | The same invalid fixture is run twice | The command runs | Text and JSON outputs are deterministic after path normalization. | +| US-004 | An unsupported bundle version is supplied | The command runs | Output includes `HLAW_BUNDLE_UNSUPPORTED_VERSION`. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean bundle validation | Happy | `fixtures/hlaw/bundles/clean-release.json` | Exit success, valid result. | +| TS-002 | JSON output | Happy | clean bundle with `--format json` | Deterministic JSON result. | +| TS-003 | Missing required artifact | Negative | missing law diff path | Validation failure exit. | +| TS-004 | Malformed bundle JSON | Negative | malformed bundle | Malformed diagnostic. | +| TS-005 | Path traversal | Security | traversal bundle | Path-policy diagnostic. | +| TS-006 | Filesystem timeout | Edge | fake locator timeout | Infrastructure error exit. | + +### Happy Path Testing + +1. Run `holmes weslaw validate --bundle clean-release.json --format json`. +2. Assert exit success, no assurance findings, and valid status. +3. Run text output and snapshot status, artifact roles, and warnings. +4. Confirm optional source evidence absence does not fail unless policy says so. + +### Negative/Edge Case Testing + +- Invalid inputs: missing `--bundle`, unreadable bundle, malformed JSON, + unsupported version, missing required artifact, invalid path, and unknown + output format. +- Timeouts: fake locator read timeout yields infrastructure error and no retry + loop in the command. +- Concurrent users or retries: repeated command invocations must not write files + or mutate workspace state. +- Broken dependencies: no GitHub or network dependency is allowed for validate. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Validate a clean bundle under 500 ms after process startup. | CLI integration benchmark. | +| Load | Handle large validation diagnostics without truncating JSON. | Invalid bundle with 1,000 diagnostics. | +| Security | Path policy blocks workspace escape before artifact parsing. | Traversal and symlink tests. | +| Accessibility | Text output includes diagnostic codes and remediation without relying on color. | Snapshot with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-017-holmes-weslaw-assess-cli.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-017-holmes-weslaw-assess-cli.md new file mode 100644 index 00000000..1a832bb8 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-017-holmes-weslaw-assess-cli.md @@ -0,0 +1,117 @@ +--- +title: HLAW-017 holmes weslaw assess CLI +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-017 `holmes weslaw assess` CLI + +## Feature Overview & Objectives + +### Problem Statement + +After evidence validates, operators need Holmes to evaluate law findings and +gates under a selected policy profile. The assessment command must produce a +structured report document and machine-readable verdict while preserving the +boundary that Wesley owns semantic law facts. It must not blur validation errors +with assurance failures. + +### Target User/Audience + +- CI maintainers running law assurance as a merge or release gate. +- Local developers checking whether law evidence would pass profile policy. +- Release reviewers consuming JSON or text summaries. +- Agents that need structured gates and findings. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Gate completeness | Assessment output includes semantic findings, coverage gates, traceability gates, and provenance when evidence is valid. | +| Profile clarity | Every assessment names the selected policy profile. | +| Fail-on control | `--fail-on` behavior is documented for validation errors, failed gates, warnings, and unavailable gates. | + +## Scope Definition + +### In Scope + +- Define command: `holmes weslaw assess --bundle --policy `. +- Support `--profile `, `--format text|json`, `--fail-on `, and + `--output ` as design requirements. +- Run validation first; assessment proceeds only on valid evidence. +- Produce `LawAssuranceReportDocument`, findings, gate decisions, and verdict + summary. +- Define missing optional artifact behavior and policy selection behavior. + +### Out of Scope + +- No GitHub publishing. +- No final standalone rendering command behavior beyond immediate command + output. +- No policy schema details beyond consuming a valid policy. +- No suppression and override implementation. +- No calling Wesley to generate missing artifacts. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want assessment to fail the job when release-required law gates fail. | +| US-002 | As a local developer, I want advisory output without hard failure when using a local profile. | +| US-003 | As an agent, I want JSON findings and gates so that I can summarize next actions. | +| US-004 | As a QA engineer, I want validation failures to stop assessment before any verdict is emitted. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Valid evidence with a failing release coverage gate | The user runs `holmes weslaw assess --profile ci-release --fail-on fail` | The command exits failure and emits a failed gate. | +| US-002 | Valid evidence with advisory warnings under local profile | The command runs with `--fail-on fail` | The command exits success and reports warnings. | +| US-003 | `--format json` is supplied | The command runs | Output contains report document, verdict, findings, gates, and validation summary. | +| US-004 | Evidence validation is invalid | The command runs | Assessment stops and emits only validation result with validation exit category. | +| US-004 | Policy profile is unknown | The command runs | The command fails with unknown-profile diagnostic before gate evaluation. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Passing release assessment | Happy | clean bundle plus strict policy | Success verdict. | +| TS-002 | Failing coverage gate | Negative | missing required coverage | Failed verdict and exit failure. | +| TS-003 | Advisory local warning | Happy | advisory coverage gap | Warning verdict, success with `--fail-on fail`. | +| TS-004 | Invalid evidence | Negative | malformed law diff | Validation result only. | +| TS-005 | Unknown profile | Negative | policy missing profile | Unknown profile diagnostic. | +| TS-006 | Unavailable optional capability evidence | Edge | local partial bundle | Unavailable section per policy. | + +### Happy Path Testing + +1. Run assessment over clean release fixtures. +2. Assert validation passes before gates run. +3. Assert report document includes law diff, coverage, capabilities, provenance, + and traceability sections. +4. Assert exit behavior follows `--fail-on`. +5. Snapshot JSON output. + +### Negative/Edge Case Testing + +- Invalid inputs: missing bundle, invalid bundle, missing policy, unknown + profile, unsupported policy version, invalid `--fail-on`, failed gates, + unavailable required evidence, and warning-only outcomes. +- Timeouts: adapter timeouts during artifact load produce validation or + infrastructure errors before assessment; assessment itself is CPU-only. +- Concurrent users or retries: repeated assessment with same fake clock and + inputs produces identical report JSON. +- Broken dependencies: no GitHub dependency is allowed; file writer failures + are surfaced only when `--output` is requested. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Assess a 10,000-finding bundle under 2 seconds after bytes are loaded. | CLI integration benchmark. | +| Load | JSON output must remain valid for large reports and avoid terminal-only truncation. | Large fixture snapshots. | +| Security | Policy and bundle paths follow locator confinement rules. | Path traversal policy tests. | +| Accessibility | Text output includes status words, counts, and diagnostics without color dependence. | Snapshot with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-018-holmes-weslaw-report-cli.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-018-holmes-weslaw-report-cli.md new file mode 100644 index 00000000..a6241f29 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-018-holmes-weslaw-report-cli.md @@ -0,0 +1,112 @@ +--- +title: HLAW-018 holmes weslaw report CLI +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-018 `holmes weslaw report` CLI + +## Feature Overview & Objectives + +### Problem Statement + +Assessment and rendering are separate concerns. Operators need a command that +renders an existing `LawAssuranceReportDocument` into terminal text, Markdown, +or JSON without revalidating evidence or re-evaluating gates. This allows CI +pipelines to archive structured reports once, then render them for different +surfaces. + +### Target User/Audience + +- CI workflows rendering saved law reports into artifacts. +- Local developers previewing Markdown before PR publication. +- GitHub publishers reusing Markdown from an existing report. +- QA engineers snapshotting renderer behavior separately from assessment. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Separation of concerns | Report command never reads raw law diff, coverage, or manifest artifacts. | +| Renderer parity | Markdown, text, and JSON renderers use the same report document input. | +| Output safety | File overwrite behavior is explicit and deterministic. | + +## Scope Definition + +### In Scope + +- Define command: `holmes weslaw report --report --format text|markdown|json`. +- Support `--output `, `--overwrite`, and stdout behavior. +- Validate report document version before rendering. +- Define Markdown, terminal text, and JSON rendering requirements at a product + level. +- Define snapshot tests for each renderer and overwrite policy. + +### Out of Scope + +- No evidence validation. +- No gate evaluation. +- No GitHub publishing. +- No report mutation except renderer-specific output. +- No remote report loading. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want to render a saved report to Markdown so that it can be uploaded or posted later. | +| US-002 | As a local developer, I want terminal text output so that I can inspect assessment results quickly. | +| US-003 | As a QA engineer, I want renderer snapshots so that presentation drift is visible. | +| US-004 | As an operator, I want explicit overwrite controls so that report artifacts are not accidentally replaced. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid report document path and `--format markdown` | The command runs | Markdown is emitted to stdout or output path without re-running assessment. | +| US-002 | A valid report and `--format text` | The command runs | Terminal-safe text includes verdict, gates, and section summaries. | +| US-003 | The same report is rendered twice | The command runs | Output bytes are identical for the same format and width setting. | +| US-004 | Output file exists and `--overwrite` is absent | The command runs | Command fails with overwrite diagnostic and leaves file unchanged. | +| US-004 | Report document version is unsupported | The command runs | Command fails with unsupported report version diagnostic. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Markdown render | Happy | complete report fixture | Markdown snapshot. | +| TS-002 | Text render | Happy | complete report fixture | Text snapshot. | +| TS-003 | JSON pass-through render | Happy | complete report fixture | Normalized JSON snapshot. | +| TS-004 | Existing output without overwrite | Negative | temp output path exists | Overwrite diagnostic. | +| TS-005 | Unsupported report version | Negative | future report fixture | Unsupported version diagnostic. | +| TS-006 | Large report render | Load | 10,000 findings report | Truncated display, valid output. | + +### Happy Path Testing + +1. Render the same valid report to text, Markdown, and JSON. +2. Assert renderers use section order from the report document. +3. Verify stdout and file-output modes. +4. Snapshot output with stable width and color disabled. + +### Negative/Edge Case Testing + +- Invalid inputs: missing `--report`, missing file, malformed report JSON, + unsupported version, unknown format, invalid output path, existing output + without overwrite, and directory output path. +- Timeouts: file read/write timeout is an infrastructure error; rendering is + CPU-only after bytes load. +- Concurrent users or retries: rendering to the same output path without + overwrite must fail deterministically; distinct paths can run concurrently. +- Broken dependencies: no GitHub or network dependency is allowed. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Render a 10,000-finding report to Markdown under 1 second. | Renderer benchmark. | +| Load | Large reports must preserve machine-readable counts even when display rows truncate. | Large report fixture. | +| Security | Markdown renderer must escape untrusted law ids, subjects, and paths. | Injection fixtures. | +| Accessibility | Text and Markdown include headings and status words, not color-only signals. | Snapshot with color disabled and heading assertions. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-019-law-assurance-artifact-writer.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-019-law-assurance-artifact-writer.md new file mode 100644 index 00000000..03678f37 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-019-law-assurance-artifact-writer.md @@ -0,0 +1,120 @@ +--- +title: HLAW-019 LawAssuranceArtifactWriter +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-019 LawAssuranceArtifactWriter + +## Feature Overview & Objectives + +### Problem Statement + +CI and local workflows need deterministic artifacts from validation, +assessment, and rendering: validation JSON, report document JSON, rendered +Markdown, rendered text, and audit-ready metadata. If each command writes files +ad hoc, artifact names, overwrite behavior, byte stability, and directory +creation rules will diverge. + +`LawAssuranceArtifactWriter` defines the local output adapter for reproducible +Holmes law assurance artifacts. + +### Target User/Audience + +- CI maintainers archiving law assurance outputs. +- Local developers comparing generated reports. +- QA engineers snapshotting deterministic artifact bytes. +- Future GitHub publishers linking to local or workflow artifacts. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Deterministic bytes | Repeated writes from identical inputs produce byte-identical files. | +| Naming consistency | Artifact filenames follow one documented naming convention. | +| Safe writes | Existing files are not overwritten unless policy explicitly allows it. | + +## Scope Definition + +### In Scope + +- Define default artifact names: validation result, assessment report document, + rendered Markdown, rendered text, rendered JSON, and writer manifest. +- Define output directory creation, temp-file write, atomic replace, overwrite + policy, collision handling, and file permissions. +- Define deterministic serialization settings. +- Define writer manifest with artifact role, filename, media type, byte length, + content hash, and creation clock value. +- Define failure handling for unwritable directories, collisions, partial + writes, and disk-full simulation. + +### Out of Scope + +- No remote artifact upload. +- No GitHub Actions artifact API integration. +- No retention or cleanup policy beyond local write behavior. +- No compression or signing. +- No assessment logic. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want Holmes to write predictable artifact names so that workflow upload steps are stable. | +| US-002 | As a QA engineer, I want deterministic bytes and content hashes so that snapshots catch real changes. | +| US-003 | As a local developer, I want safe overwrite controls so that manual artifacts are not lost. | +| US-004 | As a future publisher, I want a writer manifest so that report links can be generated without rediscovering files. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Output directory is empty | Holmes writes validation and report artifacts | Files use documented names and roles. | +| US-002 | The same report is written twice to separate directories | Holmes writes artifacts | File bytes and content hashes match. | +| US-003 | A target file exists and overwrite is false | Holmes writes artifacts | Writer fails before replacing the file. | +| US-004 | Artifacts are written successfully | Holmes writes writer manifest | Manifest lists role, path, media type, byte length, and hash for each artifact. | +| US-004 | A write fails after temp file creation | Holmes handles failure | Partial temp files are cleaned up where possible and final manifest is not written as success. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Write complete artifact set | Happy | report and validation fixtures | Files and manifest written. | +| TS-002 | Repeat write in fresh directory | Happy | same inputs | Byte-identical outputs. | +| TS-003 | Collision without overwrite | Negative | existing report file | Collision diagnostic. | +| TS-004 | Atomic overwrite enabled | Edge | existing generated file | Atomic replace and updated manifest. | +| TS-005 | Unwritable directory | Negative | permission-denied temp dir | Infrastructure diagnostic. | +| TS-006 | Disk-full simulation | Edge | fake writer adapter | Partial write failure, no success manifest. | + +### Happy Path Testing + +1. Write validation result, report document, Markdown, text, and JSON renderings + to an empty temp directory. +2. Verify filenames, media types, byte counts, hashes, and manifest contents. +3. Repeat in a second temp directory and compare hashes. +4. Assert all writes use injected clock for manifest timestamps. + +### Negative/Edge Case Testing + +- Invalid inputs: empty output dir path, output path outside workspace, file + where directory is expected, collision without overwrite, invalid media type, + manifest hash mismatch after write, and non-UTF-8 rendered text. +- Timeouts: fake writer timeout produces infrastructure diagnostic and no + success manifest. +- Concurrent users or retries: concurrent writes to same directory must fail or + lock according to documented collision policy; distinct directories succeed. +- Broken dependencies: filesystem errors are surfaced without changing domain + assessment results. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Write a 25 MB artifact set in under 2 seconds on local disk. | Temp-directory benchmark. | +| Load | Writer manifest handles 1,000 artifacts without quadratic sorting. | Synthetic artifact list. | +| Security | Output paths are workspace-confined by default. | Traversal and symlink output tests. | +| Accessibility | Rendered text and Markdown artifacts preserve headings and status labels. | Snapshot generated outputs. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-020-law-assurance-exit-code-policy.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-020-law-assurance-exit-code-policy.md new file mode 100644 index 00000000..5456c076 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-020-law-assurance-exit-code-policy.md @@ -0,0 +1,116 @@ +--- +title: HLAW-020 LawAssuranceExitCodePolicy +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-020 LawAssuranceExitCodePolicy + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance commands need exit codes that are predictable for humans +and CI. Validation errors, failed gates, warnings, unavailable advisory +evidence, publisher failures, and internal errors are different outcomes. If +they all return one generic failure code, CI cannot decide whether to fix +workflow wiring, add law coverage, or retry infrastructure. + +### Target User/Audience + +- CI maintainers configuring branch protection and release gates. +- CLI users interpreting command failures. +- QA engineers writing negative command tests. +- Future GitHub publishers mapping local command outcomes to checks. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Outcome specificity | Every command outcome maps to one documented exit category. | +| CI predictability | `--fail-on` changes only warning/gate handling, not validation error handling. | +| Test coverage | Each exit category has at least one command-level fixture. | + +## Scope Definition + +### In Scope + +- Define exit categories for success, success-with-warnings, validation failure, + assurance failure, unavailable required evidence, publisher failure, + infrastructure error, usage error, and internal error. +- Define numeric exit code recommendations for POSIX CLI behavior. +- Define `--fail-on` semantics for warnings, failed gates, unavailable gates, + and validation failures. +- Define command-specific application for `validate`, `assess`, and `report`. +- Define negative tests for every category. + +### Out of Scope + +- No GitHub check conclusion mapping in this slice. +- No MCP error-code mapping. +- No branch protection configuration. +- No suppression or override policy. +- No retry policy beyond preserving distinct infrastructure errors. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want failed release gates to exit differently from malformed evidence so that workflow failures are actionable. | +| US-002 | As a local developer, I want warning-only assessments to optionally succeed so that local exploration is not blocked. | +| US-003 | As a QA engineer, I want one fixture per exit category so that regressions are obvious. | +| US-004 | As a CLI user, I want usage errors to be distinct from internal errors so that I know when I supplied bad flags. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Evidence validation fails | Any law assurance command validates inputs | The exit category is validation failure regardless of `--fail-on`. | +| US-001 | Assessment has a failed release gate | `--fail-on fail` is active | The exit category is assurance failure. | +| US-002 | Assessment has warnings only | `--fail-on fail` is active | The command exits success-with-warnings. | +| US-002 | Assessment has warnings only | `--fail-on warn` is active | The command exits assurance failure or warning-failure according to documented code table. | +| US-004 | User supplies unknown format | The command parses flags | The exit category is usage error. | +| US-004 | An unexpected panic boundary is caught | The command exits | The exit category is internal error and no success artifact is written. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean validate | Happy | clean bundle | Success code. | +| TS-002 | Invalid evidence | Negative | malformed law diff | Validation failure code. | +| TS-003 | Failed gate | Negative | missing required coverage | Assurance failure code. | +| TS-004 | Warning-only local assessment | Happy | advisory warning | Success-with-warnings or warning failure per flag. | +| TS-005 | Unknown CLI flag | Negative | `--wat` | Usage error code. | +| TS-006 | Writer failure | Edge | unwritable output path | Infrastructure error code. | + +### Happy Path Testing + +1. Run `validate`, `assess`, and `report` happy-path fixtures. +2. Verify success or success-with-warnings codes. +3. Run assessment warning fixtures with each `--fail-on` mode. +4. Snapshot command stderr/stdout and exit categories. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed evidence, unknown profile, failed gates, unavailable + required evidence, unknown format, missing required CLI args, writer failure, + publisher failure placeholder, and internal error injection. +- Timeouts: artifact locator timeout maps to infrastructure error, not internal + error or assurance failure. +- Concurrent users or retries: exit code depends only on command result, not on + concurrent run order. +- Broken dependencies: unavailable GitHub publisher in future commands maps to + publisher failure, not evidence validation failure. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Exit policy evaluation is constant-time relative to report size after summary counts are known. | Unit benchmark over synthetic summaries. | +| Load | Large diagnostic lists do not affect selected exit category except by severity maximum. | Large validation result fixture. | +| Security | Usage errors must not echo secret environment values. | Fixture with secret-like env var and bad flags. | +| Accessibility | CLI text always names the exit category and primary reason. | Snapshot stderr/stdout with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-021-github-law-assurance-comment.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-021-github-law-assurance-comment.md new file mode 100644 index 00000000..e5808177 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-021-github-law-assurance-comment.md @@ -0,0 +1,116 @@ +--- +title: HLAW-021 GitHubLawAssuranceComment +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-021 GitHubLawAssuranceComment + +## Feature Overview & Objectives + +### Problem Statement + +Reviewers need a concise PR comment that summarizes Holmes `weslaw` assurance +without dumping full JSON or duplicating comments on every CI rerun. The comment +must be idempotent, clearly marked as Holmes output, and faithful to the report +document. It must not hide validation errors behind polished summaries or imply +that Holmes owns Wesley semantic truth. + +### Target User/Audience + +- PR reviewers reading law assurance evidence in GitHub. +- CI maintainers configuring comment publishing. +- Holmes adapter developers implementing GitHub output. +- QA engineers testing idempotent updates and Markdown safety. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Idempotent publishing | Repeated runs update one sticky comment instead of creating duplicates. | +| Reviewer usefulness | Comment includes verdict, gate summary, high-risk findings, and artifact links when available. | +| Markdown safety | Untrusted law ids, subjects, and paths are escaped in rendered Markdown. | + +## Scope Definition + +### In Scope + +- Define a sticky PR comment body for `LawAssuranceReportDocument`. +- Define hidden marker format, update behavior, author matching, and stale + comment replacement. +- Include summary verdict, validation status, gate table, semantic diff + highlights, coverage summary, capability posture disclaimer, provenance hash + family, and evidence links. +- Define truncation and "details omitted" behavior. +- Define behavior when validation fails and no assessment report exists. + +### Out of Scope + +- No GitHub Check Run API implementation. +- No inline annotation creation. +- No merge decision or branch protection mutation. +- No CodeRabbit interaction. +- No artifact upload. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a PR reviewer, I want one updated Holmes law assurance comment so that I can review current evidence without comment spam. | +| US-002 | As a CI maintainer, I want validation failures rendered clearly so that broken evidence wiring is not mistaken for a law failure. | +| US-003 | As a Holmes adapter developer, I want a stable hidden marker so that comment updates are deterministic. | +| US-004 | As a QA engineer, I want Markdown injection fixtures so that untrusted law fields are escaped. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A PR already has a Holmes law comment marker | Publisher runs again | The existing comment is updated rather than creating a new one. | +| US-002 | Evidence validation fails | Publisher renders comment | Comment headline says validation failed and omits assurance verdict claims. | +| US-003 | No prior marker exists | Publisher runs | A new comment is created with the hidden marker. | +| US-004 | Law id contains Markdown table syntax | Renderer builds comment | The law id is escaped or code-formatted safely. | +| US-004 | Report exceeds max comment size | Renderer builds comment | Comment truncates deterministically and links to full artifact when available. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | First publish | Happy | report fixture, no prior comment | New marked comment. | +| TS-002 | Update existing comment | Happy | prior marker comment | Single comment updated. | +| TS-003 | Validation failure | Negative | validation result only | Validation failure comment. | +| TS-004 | Large report | Load | 10,000 findings report | Truncated comment and omitted counts. | +| TS-005 | Markdown injection | Security | crafted law ids | Escaped Markdown. | +| TS-006 | GitHub API failure | Edge | fake publisher error | Publisher failure surfaced separately. | + +### Happy Path Testing + +1. Render a comment from a passing report fixture. +2. Publish to a fake GitHub comment store with no marker. +3. Publish again with updated report contents. +4. Assert one comment exists and contains current verdict, gates, provenance, + and evidence links. + +### Negative/Edge Case Testing + +- Invalid inputs: missing report and missing validation result, duplicate + existing markers, malformed artifact links, comment body over GitHub size + limit, missing PR number, and unauthorized publisher. +- Timeouts: GitHub API timeout maps to publisher failure, not assessment + failure. +- Concurrent users or retries: concurrent publishers must use marker lookup and + update semantics to avoid duplicate comments where GitHub API permits. +- Broken dependencies: if GitHub is unavailable, local artifacts remain valid + and publishing failure is reported separately. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Render comment from a large report in under 500 ms. | Renderer benchmark. | +| Load | Comment body must stay under configured size limit with omitted counts. | Large report fixture. | +| Security | Escape Markdown and HTML-sensitive fields. | Injection fixture snapshots. | +| Accessibility | Comment uses headings, tables, and text statuses independent of color. | Markdown snapshot review. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-022-github-law-gate-check-summary.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-022-github-law-gate-check-summary.md new file mode 100644 index 00000000..3b640806 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-022-github-law-gate-check-summary.md @@ -0,0 +1,111 @@ +--- +title: HLAW-022 GitHubLawGateCheckSummary +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-022 GitHubLawGateCheckSummary + +## Feature Overview & Objectives + +### Problem Statement + +PR reviewers need a compact GitHub-facing gate summary that distinguishes pass, +warn, fail, and unavailable law assurance states. The summary should explain +whether failures are validation failures, coverage gates, traceability gates, or +policy warnings. It should not be confused with GitHub branch protection rules, +which are configured outside Holmes. + +### Target User/Audience + +- Reviewers scanning PR readiness. +- CI maintainers deciding how Holmes output maps to required checks. +- Holmes GitHub adapter developers. +- QA engineers testing stale evidence and blocked-merge wording. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Summary clarity | 100% of summaries name primary blocking reason when state is fail or unavailable. | +| Gate fidelity | Gate states are copied from Holmes assessment, not recomputed in GitHub adapter. | +| Stale evidence detection | Stale or mismatched bundle evidence is highlighted before other advisory notes. | + +## Scope Definition + +### In Scope + +- Define summary fields for GitHub comment and future check-run body: conclusion, + state label, primary reason, gate counts, failed gates, warning gates, + unavailable gates, validation status, and stale evidence flag. +- Define wording for pass, warn, fail, unavailable, and validation-failed + summaries. +- Define stale evidence summary from traceability gate details. +- Define required-versus-advisory gate presentation. + +### Out of Scope + +- No creation of GitHub Checks. +- No branch protection mutation. +- No override controls beyond displaying current state. +- No rerun trigger. +- No GitHub annotation line mapping. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want one sentence explaining whether law assurance is pass, warn, fail, or unavailable. | +| US-002 | As a CI maintainer, I want failed required gates separated from advisory warnings. | +| US-003 | As a release maintainer, I want stale evidence called out before semantic findings. | +| US-004 | As a QA engineer, I want each state covered by snapshots. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | All gates pass | Holmes builds GitHub summary | State is `pass` and primary reason says required law gates passed. | +| US-002 | Advisory warnings exist but required gates pass | Holmes builds summary | State is `warn` with required gate count passing. | +| US-002 | A required coverage gate fails | Holmes builds summary | State is `fail` and failed required gate count is nonzero. | +| US-003 | Traceability gate fails from stale law diff | Holmes builds summary | Primary reason names stale evidence and artifact role. | +| US-004 | Validation failed before assessment | Holmes builds summary | State is `fail` with validation failure wording, not gate failure wording. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | All pass | Happy | passing report | Pass summary. | +| TS-002 | Warning-only | Happy | advisory warning report | Warn summary. | +| TS-003 | Required gate failure | Negative | failed coverage report | Fail summary. | +| TS-004 | Traceability stale evidence | Negative | failed traceability report | Stale evidence primary reason. | +| TS-005 | Validation failure | Negative | validation result only | Validation failure summary. | +| TS-006 | Unavailable required evidence | Edge | unavailable required gate | Unavailable or fail according to policy. | + +### Happy Path Testing + +1. Build summaries from pass and warn report fixtures. +2. Assert state labels, counts, and primary reasons. +3. Verify required and advisory counts are separated. +4. Snapshot renderer-neutral summary JSON. + +### Negative/Edge Case Testing + +- Invalid inputs: no gates and no validation result, conflicting verdict and + gate counts, unknown gate state, missing primary failed gate, and stale flag + without mismatch details. +- Timeouts: summary construction is CPU-only and uses no GitHub calls. +- Concurrent users or retries: summary output is pure for the same report. +- Broken dependencies: GitHub publishing failure is not part of summary state. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build summary in under 10 ms for 1,000 gates. | Synthetic gate benchmark. | +| Load | Gate counts scale without row rendering. | Large gate fixture. | +| Security | Summary strings escape untrusted gate names in renderers. | Crafted gate id fixtures. | +| Accessibility | State label and primary reason are text fields. | Contract test non-color summary fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-023-github-law-finding-annotations.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-023-github-law-finding-annotations.md new file mode 100644 index 00000000..0ed57396 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-023-github-law-finding-annotations.md @@ -0,0 +1,114 @@ +--- +title: HLAW-023 GitHubLawFindingAnnotations +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-023 GitHubLawFindingAnnotations + +## Feature Overview & Objectives + +### Problem Statement + +Some law findings have source file and line context, but many are bundle-level +or artifact-level. Holmes needs a safe mapping from findings to GitHub +annotations where line context exists, with deterministic fallback to PR comment +bullets where it does not. Bad annotation mapping can create noisy, misleading, +or rate-limited reviews. + +### Target User/Audience + +- PR reviewers looking for inline context. +- GitHub adapter developers implementing annotation publishing. +- CI maintainers concerned about API rate limits. +- QA engineers testing deduplication and no-line fallback behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Annotation eligibility | 100% of annotations require file path, line, severity, and source artifact context. | +| Fallback safety | Findings without line context remain visible in summary comments. | +| Rate-limit control | Annotation count respects configurable maximums with omitted counts. | + +## Scope Definition + +### In Scope + +- Define annotation eligibility for findings with workspace-relative file path + and valid line range. +- Define annotation fields: path, start line, end line, severity, title, + message, finding id, law id, subject, and artifact reference. +- Define deduplication by finding id and source location. +- Define fallback row generation for no-line or invalid-line findings. +- Define rate-limit and max-annotation truncation behavior. + +### Out of Scope + +- No GitHub API implementation details. +- No Checks API decision. +- No attempt to infer line numbers from raw law ids. +- No source file mutation. +- No resolving stale annotations after file changes beyond deterministic + eligibility checks. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want inline annotations for findings with precise source lines so that I can inspect law changes near source. | +| US-002 | As a reviewer, I want no-line findings still visible so that bundle-level issues are not lost. | +| US-003 | As a CI maintainer, I want annotation limits so that large reports do not exhaust GitHub API budgets. | +| US-004 | As a QA engineer, I want deterministic deduplication so that reruns do not duplicate annotations. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A finding has valid file and line context | Holmes maps annotations | One annotation candidate is produced. | +| US-002 | A finding lacks line context | Holmes maps annotations | No annotation is produced and fallback comment item is emitted. | +| US-003 | Eligible annotations exceed max count | Holmes maps annotations | First deterministic set is emitted and omitted count is recorded. | +| US-004 | Two findings share id and location | Holmes maps annotations | Duplicate annotation is suppressed. | +| US-004 | A line number is outside file bounds when bounds are available | Holmes maps annotations | Finding falls back to summary item with invalid-location reason. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Finding with valid source line | Happy | finding with path/line | Annotation candidate. | +| TS-002 | Bundle-level finding | Happy | no-line finding | Fallback item. | +| TS-003 | Duplicate finding location | Edge | duplicate finding ids | One annotation. | +| TS-004 | Max annotations exceeded | Load | 1,000 eligible findings | Truncated annotations and omitted count. | +| TS-005 | Invalid path escape | Security | path `../law.json` | No annotation, security diagnostic. | +| TS-006 | Invalid line range | Negative | start line greater than end | Fallback with invalid-location reason. | + +### Happy Path Testing + +1. Map a mixed finding set with line and no-line findings. +2. Assert valid-line findings produce annotation candidates. +3. Assert no-line findings produce fallback comment items. +4. Verify deterministic order and deduplication. + +### Negative/Edge Case Testing + +- Invalid inputs: path traversal, absolute path when disabled, invalid line + range, missing severity, missing finding id, duplicate candidate, and too many + annotations. +- Timeouts: annotation mapping does not call GitHub; optional file-bound checks + use injected repository file metadata. +- Concurrent users or retries: mapping is pure and deterministic. +- Broken dependencies: absent file metadata does not block annotation creation + if line context is syntactically valid, but records unchecked bounds. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Map 10,000 findings in under 300 ms. | Synthetic finding benchmark. | +| Load | Annotation truncation must preserve omitted counts. | Large eligible finding fixture. | +| Security | Annotation paths are workspace-relative and sanitized. | Traversal and absolute path fixtures. | +| Accessibility | Annotation messages include text severity and law id. | Contract test annotation fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-024-github-law-evidence-links.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-024-github-law-evidence-links.md new file mode 100644 index 00000000..3016fd3e --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-024-github-law-evidence-links.md @@ -0,0 +1,112 @@ +--- +title: HLAW-024 GitHubLawEvidenceLinks +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-024 GitHubLawEvidenceLinks + +## Feature Overview & Objectives + +### Problem Statement + +GitHub comments and summaries should link reviewers to law evidence artifacts, +CI runs, bundle manifests, and rendered reports. Links must be explicit, +sanitized, and honest about retention or expiration. Broken or unsafe links +make assurance output hard to audit and can expose reviewers to untrusted URL +content. + +### Target User/Audience + +- PR reviewers opening full law reports and raw evidence artifacts. +- CI maintainers configuring artifact retention and URL generation. +- Holmes GitHub adapter developers. +- QA engineers testing missing, expired, and unsafe links. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Link traceability | Every published evidence link includes role, label, URL, and retention note where known. | +| Safety | Unsafe URL schemes are rejected before Markdown rendering. | +| Missing-link clarity | Missing artifacts produce unavailable link rows, not broken Markdown. | + +## Scope Definition + +### In Scope + +- Define evidence link objects for report document, validation result, law diff, + coverage report, capability summary, bundle manifest, CI run, and workflow + artifact. +- Define allowed URL schemes and Markdown link escaping. +- Define retention metadata: expiresAt, retentionDays, unavailable, unknown, or + local-only. +- Define missing artifact behavior and stale link warnings. +- Define ordering and grouping in GitHub comments. + +### Out of Scope + +- No upload of artifacts. +- No GitHub API calls to discover artifact URLs. +- No link shortening service. +- No dashboard or static site. +- No authentication or signed URL generation. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a reviewer, I want links to full reports and raw evidence so that I can audit summary claims. | +| US-002 | As a CI maintainer, I want retention notes so that reviewers understand when workflow artifacts may expire. | +| US-003 | As a security reviewer, I want unsafe link schemes rejected so that PR comments cannot publish malicious links. | +| US-004 | As a QA engineer, I want missing links represented explicitly so snapshots do not contain broken anchors. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Report artifact URL is available | Holmes builds link set | Link includes role `reportDocument`, label, URL, and media type. | +| US-002 | Artifact retention is 14 days | Holmes builds link set | Link metadata includes retention note. | +| US-003 | URL uses `javascript:` scheme | Holmes validates link | Link is rejected with unsafe scheme diagnostic. | +| US-004 | Coverage artifact was unavailable | Holmes builds link set | Coverage link row is marked unavailable without Markdown URL. | +| US-004 | A local-only path has no public URL | Holmes builds GitHub links | Link row states local-only and does not expose unsafe absolute paths. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Complete CI link set | Happy | report with artifact URLs | Grouped evidence links. | +| TS-002 | Missing optional coverage link | Edge | unavailable coverage | Unavailable row. | +| TS-003 | Unsafe scheme | Security | `javascript:` URL | Link validation failure. | +| TS-004 | Expiring artifact | Happy | expiresAt metadata | Retention note. | +| TS-005 | Long URL | Edge | very long artifact URL | Escaped and preserved. | +| TS-006 | Local absolute path | Security | `/Users/...` path | Not published by default. | + +### Happy Path Testing + +1. Build evidence links from report attachments and CI metadata. +2. Verify roles, labels, URLs, media types, and retention notes. +3. Render Markdown link rows through a test renderer. +4. Snapshot deterministic ordering. + +### Negative/Edge Case Testing + +- Invalid inputs: unsafe URL scheme, missing label, duplicate role/id, malformed + URL, local absolute path, missing retention metadata, and expired artifact. +- Timeouts: link construction performs no network checks. +- Concurrent users or retries: link ordering is deterministic and pure. +- Broken dependencies: missing CI metadata yields unavailable links rather than + failing assessment. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Build 1,000 evidence links in under 50 ms. | Synthetic link benchmark. | +| Load | Link grouping handles large attachment sets without duplicate scans. | Large attachment fixture. | +| Security | Allow only `https` and explicitly approved repository-relative link schemes. | Unsafe scheme fixture suite. | +| Accessibility | Link labels describe destination role, not "click here". | Snapshot link labels. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-025-github-law-override-controls.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-025-github-law-override-controls.md new file mode 100644 index 00000000..2a5848f9 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-025-github-law-override-controls.md @@ -0,0 +1,115 @@ +--- +title: HLAW-025 GitHubLawOverrideControls +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-025 GitHubLawOverrideControls + +## Feature Overview & Objectives + +### Problem Statement + +Some advisory law warnings may need maintainers to acknowledge risk without +blocking a PR, but invalid evidence and non-overridable release failures must +remain blocked. Holmes needs a design for GitHub-visible override controls that +are explicit, auditable, and policy-bound. The controls must not hide findings +or create a backdoor around validation failures. + +### Target User/Audience + +- Maintainers acknowledging advisory law warnings. +- Release managers defining non-overridable gates. +- Reviewers auditing why a warning was accepted. +- QA engineers testing abuse-prevention and audit records. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Auditability | 100% of accepted overrides produce an audit record with actor, reason, finding/gate id, and timestamp source. | +| Safety | Validation failures and non-overridable gates cannot be overridden. | +| Visibility | Overridden warnings remain visible in comments and reports. | + +## Scope Definition + +### In Scope + +- Define policy-controlled override eligibility for advisory findings and + warning gates. +- Define GitHub surfaces: label, checkbox command, or explicit comment command + as candidate interfaces for later implementation. +- Define required audit fields: actor, source, reason, target id, previous + state, resulting state, timestamp, and policy profile. +- Define non-overridable categories: validation failure, malformed evidence, + traceability failure where policy marks required, and failed required gates. +- Define drift checkpoint at HLAW-025. + +### Out of Scope + +- No implementation of GitHub labels, slash commands, or checkbox parsing yet. +- No branch protection integration. +- No permanent storage backend. +- No suppression of findings from reports. +- No override of Wesley semantic law classifications. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a maintainer, I want to acknowledge an advisory warning with a reason so that a PR can proceed while keeping an audit trail. | +| US-002 | As a release manager, I want required failures to be non-overridable so that invalid evidence cannot ship. | +| US-003 | As a reviewer, I want overridden warnings to remain visible so that risk is not hidden. | +| US-004 | As a QA engineer, I want override eligibility tested per policy profile. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A policy marks advisory coverage warning as overridable | Maintainer submits valid override with reason | Holmes records an audit candidate and marks warning acknowledged. | +| US-002 | Evidence validation failed | Maintainer attempts override | Holmes rejects override as non-overridable. | +| US-002 | Required traceability gate failed | Maintainer attempts override | Holmes rejects override unless policy explicitly allows a non-release profile exception. | +| US-003 | A warning is overridden | Holmes renders GitHub summary | The warning remains visible with acknowledged status and reason reference. | +| US-004 | Override has no reason | Holmes validates override | Override is rejected with missing-reason diagnostic. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Advisory warning override | Happy | warning gate plus valid override | Acknowledged warning and audit record. | +| TS-002 | Missing reason | Negative | override without reason | Rejected. | +| TS-003 | Validation failure override attempt | Negative | invalid evidence result | Non-overridable rejection. | +| TS-004 | Required gate override attempt | Negative | failed required gate | Rejected by default. | +| TS-005 | Expired override | Edge | override past expiration | Ignored or rejected per policy. | +| TS-006 | Duplicate override target | Edge | two overrides for same warning | Deterministic latest/duplicate policy. | + +### Happy Path Testing + +1. Evaluate advisory warning under a policy that allows acknowledgement. +2. Apply a valid override candidate from a fake GitHub actor. +3. Verify audit fields and acknowledged warning state. +4. Render summary showing the warning remains visible. + +### Negative/Edge Case Testing + +- Invalid inputs: missing actor, missing reason, unknown finding id, expired + override, duplicate override, non-overridable validation failure, required + gate failure, and profile mismatch. +- Timeouts: override evaluation uses injected clock and no wall-clock calls. +- Concurrent users or retries: duplicate override submissions must produce + deterministic audit ordering. +- Broken dependencies: if GitHub actor identity is unavailable, override is not + accepted. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Evaluate 1,000 override candidates in under 100 ms. | Synthetic override benchmark. | +| Load | Audit records remain append-only and sorted deterministically. | Large override fixture. | +| Security | Override reason is untrusted text and must be escaped later. | Injection reason fixture. | +| Accessibility | Acknowledged state includes text labels and reason reference. | Contract test summary fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-026-mcp-assess-weslaw-bundle-tool.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-026-mcp-assess-weslaw-bundle-tool.md new file mode 100644 index 00000000..0f746e84 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-026-mcp-assess-weslaw-bundle-tool.md @@ -0,0 +1,115 @@ +--- +title: HLAW-026 McpAssessWeslawBundleTool +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-026 McpAssessWeslawBundleTool + +## Feature Overview & Objectives + +### Problem Statement + +Agents need a structured way to ask Holmes to assess a `weslaw` evidence bundle +without shelling out through CLI text or scraping GitHub comments. The MCP tool +must expose the same validation and assessment semantics as the CLI while +respecting workspace authorization and artifact path policy. + +### Target User/Audience + +- MCP agents assisting reviewers with law assurance evidence. +- Holmes MCP adapter developers. +- Workspace administrators controlling which files tools may read. +- QA engineers testing request/response schemas and error mapping. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| CLI parity | MCP assessment returns the same gates and findings as `holmes weslaw assess` for the same inputs. | +| Schema stability | Request and response schemas are versioned and fixture-tested. | +| Authorization clarity | Unauthorized bundle paths fail before artifact parsing. | + +## Scope Definition + +### In Scope + +- Define MCP tool name `holmes.assessWeslawBundle`. +- Define request fields: bundle URI/path, policy URI/path, profile, output + detail level, max findings, and include report document flag. +- Define response fields: validation result, verdict, gate summary, findings, + report references, diagnostics, and omitted-detail accounting. +- Define workspace authorization and path confinement requirements. +- Define error mapping for invalid request, unauthorized path, validation + failure, assessment failure, and infrastructure failure. + +### Out of Scope + +- No MCP server implementation. +- No remote URL fetching unless future workspace policy allows it. +- No GitHub publishing. +- No long-running job queue. +- No mutation of source files or evidence artifacts. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an agent, I want to assess a law evidence bundle through MCP so that I can reason over structured gates and findings. | +| US-002 | As a workspace admin, I want MCP bundle access confined to authorized workspace paths. | +| US-003 | As a Holmes developer, I want MCP results to match CLI assessment semantics. | +| US-004 | As a QA engineer, I want deterministic examples for success and failure responses. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid request references a clean bundle and policy | The MCP tool runs | Response includes validation status, verdict, gates, and findings. | +| US-002 | Request path points outside authorized workspace | The MCP tool runs | Response is an authorization error before artifact parsing. | +| US-003 | CLI and MCP use the same clean fixture | Both assessments run | Gate ids, finding ids, and verdict match. | +| US-004 | Request asks for max 20 findings | The MCP tool runs on 100 findings | Response includes 20 findings and omitted count 80. | +| US-004 | Evidence validation fails | The MCP tool runs | Response includes validation result and no assessment verdict. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean MCP assessment | Happy | clean bundle and policy | Verdict and gates returned. | +| TS-002 | CLI parity | Happy | same fixture through CLI and MCP | Matching ids and verdict. | +| TS-003 | Unauthorized path | Security | outside workspace bundle path | Authorization error. | +| TS-004 | Invalid request schema | Negative | missing bundle field | Invalid request error. | +| TS-005 | Max findings truncation | Load | large report fixture | Omitted count reported. | +| TS-006 | Validation failure | Negative | malformed evidence | Validation result only. | + +### Happy Path Testing + +1. Submit a valid MCP request for a clean local bundle. +2. Compare response to CLI assessment fixture output. +3. Verify response schema version, verdict, gates, findings, and report + references. +4. Snapshot compact and full-detail responses. + +### Negative/Edge Case Testing + +- Invalid inputs: missing bundle, unsupported request version, unknown profile, + unauthorized path, path traversal, invalid max findings, missing policy, and + malformed evidence. +- Timeouts: request-level timeout returns MCP timeout error with partial work + discarded unless response streaming is designed later. +- Concurrent users or retries: concurrent requests over same read-only fixtures + produce identical responses. +- Broken dependencies: filesystem adapter failure maps to infrastructure error; + no GitHub dependency exists. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Respond to clean small bundle in under 1 second after request dispatch. | MCP harness benchmark. | +| Load | Large responses respect max findings and omitted-detail accounting. | 10,000-finding fixture. | +| Security | Workspace authorization happens before path canonicalization reads file contents. | Unauthorized path tests. | +| Accessibility | Response includes text status fields agents can quote directly. | Schema contract test. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-027-mcp-law-evidence-resources.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-027-mcp-law-evidence-resources.md new file mode 100644 index 00000000..90fc5708 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-027-mcp-law-evidence-resources.md @@ -0,0 +1,112 @@ +--- +title: HLAW-027 McpLawEvidenceResources +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-027 McpLawEvidenceResources + +## Feature Overview & Objectives + +### Problem Statement + +Agents often need to inspect raw law evidence, normalized validation results, +and rendered reports after an assessment. MCP resources provide a stable, +read-only way to expose these artifacts without inventing ad hoc file paths or +embedding large blobs in every tool response. + +### Target User/Audience + +- MCP agents retrieving evidence details on demand. +- Holmes MCP adapter developers defining resource URIs. +- Workspace admins enforcing access rules. +- QA engineers testing caching, invalid references, and schema examples. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Resource consistency | All law evidence resources use one documented URI scheme. | +| Access control | Unauthorized or stale resource ids fail with clear MCP errors. | +| Payload discipline | Large resources can be summarized or paged without corrupting canonical artifacts. | + +## Scope Definition + +### In Scope + +- Define MCP resource URI family: `holmes://weslaw//`. +- Define resources for law diff, law coverage, capability summary, bundle + manifest, validation result, report document, rendered Markdown, and audit + witness placeholder. +- Define cache keys based on assessment id, artifact role, and content hash. +- Define stale, missing, unauthorized, and invalid resource behavior. +- Define schema examples for small resources. + +### Out of Scope + +- No remote artifact storage. +- No resource mutation. +- No long-term retention guarantees. +- No dashboard or static site. +- No authentication design beyond workspace authorization requirements. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an agent, I want to fetch the normalized report document resource so that I can answer detailed reviewer questions. | +| US-002 | As a workspace admin, I want resources scoped to an assessment id so that arbitrary files are not exposed. | +| US-003 | As a Holmes developer, I want resource ids tied to content hashes so that stale reads are detectable. | +| US-004 | As a QA engineer, I want resource schema examples for every supported role. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Assessment response includes report resource URI | Agent reads the resource | MCP returns the report document with declared media type. | +| US-002 | Agent requests a URI for unknown assessment id | Resource resolver runs | Resolver returns not found without reading arbitrary paths. | +| US-003 | Stored content hash differs from requested resource metadata | Resolver reads resource | Resolver returns stale resource diagnostic. | +| US-004 | Each supported role has a schema example | Docs validation runs | Examples validate against resource response schema. | +| US-004 | Resource payload exceeds configured inline size | Resolver returns resource | Response includes summary or paging metadata according to policy. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Read report resource | Happy | valid assessment id | Report payload returned. | +| TS-002 | Read law diff resource | Happy | valid law diff role | Law diff payload returned. | +| TS-003 | Unknown assessment id | Negative | nonexistent id | Not found. | +| TS-004 | Unauthorized resource | Security | different workspace id | Authorization error. | +| TS-005 | Stale content hash | Edge | changed artifact bytes | Stale diagnostic. | +| TS-006 | Large resource | Load | large report document | Size policy applied. | + +### Happy Path Testing + +1. Run a fake assessment that registers resource metadata. +2. Read each supported resource role by URI. +3. Verify media type, content hash, and payload or summary metadata. +4. Snapshot response schema examples. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed URI, unknown assessment id, unknown role, + unauthorized workspace, stale content hash, missing backing artifact, + unsupported media type, and resource over max inline size. +- Timeouts: resource read timeout returns MCP resource error with role and id. +- Concurrent users or retries: repeated resource reads are idempotent and do not + mutate cache state except allowed read-through metadata. +- Broken dependencies: missing local artifact after assessment returns stale or + missing resource, not reassessment. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Resolve small resource in under 100 ms after authorization. | MCP resource harness benchmark. | +| Load | Large resources respect size limits and omit counts. | Large artifact fixture. | +| Security | URI parser prevents path traversal and arbitrary file reads. | Malformed URI and traversal tests. | +| Accessibility | Resource summaries include text role and status fields. | Schema contract test. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-028-mcp-explain-law-finding-tool.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-028-mcp-explain-law-finding-tool.md new file mode 100644 index 00000000..1d728511 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-028-mcp-explain-law-finding-tool.md @@ -0,0 +1,113 @@ +--- +title: HLAW-028 McpExplainLawFindingTool +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-028 McpExplainLawFindingTool + +## Feature Overview & Objectives + +### Problem Statement + +Agents need to explain one law finding without dumping the entire report. The +explanation must cite source artifact references, preserve Wesley's event kind, +name the policy posture, and suggest next actions without pretending to +autonomously fix semantic law. + +### Target User/Audience + +- Review agents answering "what does this finding mean?" +- PR reviewers inspecting a specific failed gate or semantic change. +- Holmes MCP developers defining explanation responses. +- QA engineers testing missing finding and citation fallback behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Finding specificity | 100% of explanations include finding id, law id, subject, event kind, and source artifact reference. | +| Citation discipline | Explanations cite report or evidence resources when available. | +| Boundary clarity | Suggested next actions never say Holmes should edit or rebind `weslaw` automatically. | + +## Scope Definition + +### In Scope + +- Define MCP tool `holmes.explainLawFinding`. +- Define request fields: assessment id, finding id, detail level, and optional + include related gates flag. +- Define response fields: finding identity, plain-language explanation, source + citations, related gates, policy posture, next actions, and unavailable data + reasons. +- Define missing finding, stale assessment, and citation fallback behavior. +- Define deterministic explanation templates by event kind and gate type. + +### Out of Scope + +- No generative free-form diagnosis as authoritative output. +- No source file edits. +- No GitHub comment posting. +- No recomputation of semantic diffs. +- No cross-repo lookup. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an agent, I want to explain a finding id so that a reviewer gets focused context. | +| US-002 | As a reviewer, I want source citations so that I can inspect the underlying evidence. | +| US-003 | As a Wesley maintainer, I want explanations to preserve event kind and avoid reclassification. | +| US-004 | As a QA engineer, I want missing finding behavior to be deterministic. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid finding id exists in assessment | The MCP tool runs | Response includes explanation, finding identity, and next actions. | +| US-002 | Source artifact resource is available | The tool explains finding | Response includes citation to artifact resource URI. | +| US-002 | Source artifact resource is unavailable | The tool explains finding | Response includes citation fallback reason. | +| US-003 | Finding event kind is `footprintExpanded` | The tool explains finding | Explanation uses that event kind and does not rename posture. | +| US-004 | Finding id is unknown | The tool runs | Response is not found with available finding count and no fabricated explanation. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Explain weakened scalar | Happy | finding fixture | Explanation with citations. | +| TS-002 | Explain footprint expansion | Happy | footprint finding | Preserved event kind. | +| TS-003 | Unknown finding id | Negative | nonexistent id | Not found. | +| TS-004 | Missing citation resource | Edge | source unavailable | Fallback reason. | +| TS-005 | Related failed gate | Happy | finding linked to gate | Related gate included. | +| TS-006 | Crafted finding text | Security | injection strings | Explanation treats strings as data. | + +### Happy Path Testing + +1. Register assessment with findings and resources. +2. Explain one finding by id. +3. Verify identity fields, event kind, citations, policy posture, and next + actions. +4. Snapshot response for each supported high-level event template. + +### Negative/Edge Case Testing + +- Invalid inputs: missing assessment id, unknown finding id, stale assessment, + unauthorized assessment, invalid detail level, missing citation resource, and + finding without related gate. +- Timeouts: explanation uses stored report/resources and no network calls. +- Concurrent users or retries: explanation is deterministic for same assessment. +- Broken dependencies: resource lookup failure yields citation fallback, not + fabricated source. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Explain one finding in under 50 ms after assessment lookup. | MCP tool benchmark. | +| Load | Large assessments use indexed finding lookup, not linear scan where avoidable. | 100,000 finding fixture. | +| Security | Explanation escapes untrusted finding strings in renderer-facing fields. | Injection fixture. | +| Accessibility | Explanation includes plain text summary and next-action fields. | Schema contract test. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-029-mcp-law-policy-tool.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-029-mcp-law-policy-tool.md new file mode 100644 index 00000000..1fb2d5d5 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-029-mcp-law-policy-tool.md @@ -0,0 +1,115 @@ +--- +title: HLAW-029 McpLawPolicyTool +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-029 McpLawPolicyTool + +## Feature Overview & Objectives + +### Problem Statement + +Agents need to inspect active law assurance policy without parsing local config +files or guessing which profile is active. The MCP policy tool must expose +thresholds, required gates, severity mappings, override rules, and +non-overridable checks in a safe, redacted, versioned response. + +### Target User/Audience + +- Agents explaining why a gate passed or failed. +- Release maintainers reviewing active policy profile. +- Holmes policy developers validating profile selection behavior. +- QA engineers testing redaction, unknown profiles, and stale policy detection. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Policy transparency | Tool response lists required gates, thresholds, and non-overridable checks for the selected profile. | +| Redaction safety | Secret or environment-derived policy values are never exposed. | +| Profile correctness | Unknown profiles fail with deterministic diagnostics. | + +## Scope Definition + +### In Scope + +- Define MCP tool `holmes.getLawPolicy`. +- Define request fields: policy path or assessment id, profile id, include + defaults flag, and redaction mode. +- Define response fields: policy version, profile, thresholds, severity + mappings, required evidence, override eligibility, non-overridable checks, + source reference, and stale policy flag. +- Define redaction rules for secrets, token-like values, and local absolute + paths. +- Define unknown profile and unsupported policy version errors. + +### Out of Scope + +- No policy editing. +- No branch protection mutation. +- No remote policy discovery. +- No profile inference from environment variables. +- No private secret reveal for debugging. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an agent, I want active policy thresholds so that I can explain why a law gate failed. | +| US-002 | As a release maintainer, I want non-overridable checks listed so that override boundaries are clear. | +| US-003 | As a security reviewer, I want sensitive policy fields redacted in MCP responses. | +| US-004 | As a QA engineer, I want unknown profiles and stale policy states tested. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Valid policy and profile are supplied | The tool runs | Response includes thresholds and severity mappings for that profile. | +| US-002 | Policy marks validation failures non-overridable | The tool runs | Response lists validation failure under non-overridable checks. | +| US-003 | Policy source contains token-like field | The tool runs | Response redacts the field and records redaction count. | +| US-004 | Profile id is unknown | The tool runs | Response is unknown-profile error with available profile ids. | +| US-004 | Assessment policy hash differs from current policy hash | The tool runs by assessment id | Response marks policy stale. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Read release policy | Happy | valid policy fixture | Thresholds and required gates returned. | +| TS-002 | Read local policy | Happy | local profile fixture | Advisory posture returned. | +| TS-003 | Unknown profile | Negative | missing profile id | Unknown-profile error. | +| TS-004 | Unsupported policy version | Negative | future policy fixture | Unsupported version error. | +| TS-005 | Secret field redaction | Security | token-like values | Redacted response. | +| TS-006 | Stale assessment policy | Edge | mismatched policy hash | Stale flag. | + +### Happy Path Testing + +1. Invoke tool with valid policy and release profile. +2. Assert thresholds, severity mappings, required evidence, and + non-overridable checks are present. +3. Invoke with local profile and compare advisory differences. +4. Snapshot redacted response. + +### Negative/Edge Case Testing + +- Invalid inputs: missing policy reference, unsupported version, unknown + profile, malformed policy, invalid threshold, duplicate profile id, stale + assessment policy hash, and unauthorized path. +- Timeouts: policy file read timeout maps to MCP infrastructure error. +- Concurrent users or retries: policy response is deterministic for same policy + bytes and redaction mode. +- Broken dependencies: no GitHub dependency; unavailable assessment id returns + not found. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Return policy response in under 100 ms after bytes are loaded. | MCP policy benchmark. | +| Load | Policies with 1,000 mappings remain serializable and sorted. | Large policy fixture. | +| Security | Redaction covers token-like keys, secret-like values, and local absolute paths. | Redaction fixture suite. | +| Accessibility | Response includes text profile and rule descriptions. | Schema contract test. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-030-agent-safe-law-summary.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-030-agent-safe-law-summary.md new file mode 100644 index 00000000..3718b606 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-030-agent-safe-law-summary.md @@ -0,0 +1,113 @@ +--- +title: HLAW-030 AgentSafeLawSummary +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-030 AgentSafeLawSummary + +## Feature Overview & Objectives + +### Problem Statement + +Agents need compact law assurance summaries that fit token budgets while +preserving enough structure to avoid hallucinated conclusions. A full report +document can be too large for routine context, and a Markdown comment can be +too presentation-oriented. `AgentSafeLawSummary` provides a bounded, structured +summary with omitted-detail accounting and artifact references. + +### Target User/Audience + +- Agents summarizing PR law assurance state. +- MCP tool clients requesting compact responses. +- CLI users asking for `--summary agent`. +- QA engineers testing token budgets and omitted-detail accounting. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Budget adherence | Summary respects configured max findings, max gates, and text byte budget. | +| No hidden omission | Every omitted category reports omitted count and resource reference. | +| Decision clarity | Summary includes validation status, verdict, top blockers, warnings, and next artifact references. | + +## Scope Definition + +### In Scope + +- Define `AgentSafeLawSummary` schema for CLI and MCP use. +- Include compact fields: status, verdict, profile, bundle hash, top blockers, + warning count, finding count, gate count, evidence links/resources, omitted + counts, and recommended next inspection resource. +- Define severity grouping and deterministic selection of top findings/gates. +- Define token/byte budget controls and fallback when budget is too small. +- Define parity requirements between CLI and MCP summary output. + +### Out of Scope + +- No natural-language generation as source of truth. +- No replacement for full report document. +- No GitHub comment formatting. +- No agent autonomy or automatic fixing. +- No cross-repo summary aggregation. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an agent, I want a compact structured summary so that I can brief a user without loading the full report. | +| US-002 | As a reviewer, I want omitted-detail counts so that I know when to open the full report. | +| US-003 | As an MCP client, I want the same summary shape as the CLI so that tool behavior is predictable. | +| US-004 | As a QA engineer, I want deterministic top-blocker selection under tight budgets. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A failing report has multiple gates and findings | Holmes builds summary | Summary includes verdict, top blockers, counts, and report resource reference. | +| US-002 | Findings exceed max summary count | Holmes builds summary | Summary includes omitted finding count and full report reference. | +| US-003 | CLI and MCP request same budget | Holmes builds summaries | Structured fields match except transport metadata. | +| US-004 | Budget permits only one blocker | Holmes builds summary | Highest-priority blocker is selected by documented sort key. | +| US-004 | Budget is too small for required fields | Holmes builds summary | Summary returns minimal status plus budget-exceeded diagnostic. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Passing compact summary | Happy | passing report | Status, counts, resource refs. | +| TS-002 | Failing compact summary | Happy | failed report | Top blockers and counts. | +| TS-003 | Tight budget | Edge | budget one blocker | Deterministic top blocker. | +| TS-004 | Tiny impossible budget | Negative | budget below minimum | Minimal diagnostic summary. | +| TS-005 | CLI/MCP parity | Happy | same report and budget | Matching fields. | +| TS-006 | Large report | Load | 100,000 findings | Bounded summary time and size. | + +### Happy Path Testing + +1. Build summary from passing and failing report fixtures. +2. Verify status, verdict, profile, bundle hash, counts, blockers, warnings, + omitted counts, and artifact references. +3. Compare CLI and MCP summary output for parity. +4. Snapshot deterministic ordering under multiple budgets. + +### Negative/Edge Case Testing + +- Invalid inputs: missing report id, no resource references, invalid budget, + unsupported summary version, inconsistent counts after report bypass, and + missing verdict. +- Timeouts: summary construction is CPU-only and uses no wall-clock time. +- Concurrent users or retries: summary selection must be pure and deterministic. +- Broken dependencies: missing full report resource yields warning in summary, + not invented detail. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Summarize 100,000 findings in under 500 ms using precomputed counts. | Large report benchmark. | +| Load | Summary output must stay below configured byte budget. | Budget fixture tests. | +| Security | All strings remain untrusted data for callers/renderers. | Injection fixture. | +| Accessibility | Summary uses explicit text status and next-inspection labels. | Schema contract test. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-031-law-assurance-policy-schema.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-031-law-assurance-policy-schema.md new file mode 100644 index 00000000..3168dc48 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-031-law-assurance-policy-schema.md @@ -0,0 +1,114 @@ +--- +title: HLAW-031 LawAssurancePolicySchema +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-031 LawAssurancePolicySchema + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance needs a versioned policy schema that defines required +evidence, profiles, thresholds, severity mappings, override eligibility, and +non-overridable checks. Without a typed schema, local and CI policies will drift +into undocumented YAML or JSON conventions that commands, MCP tools, and GitHub +publishers interpret differently. + +### Target User/Audience + +- Release managers defining `ci-release` law assurance posture. +- Local developers using advisory profiles. +- Holmes policy developers validating policy files. +- QA engineers testing unknown fields, defaults, and profile inheritance. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Schema validation | 100% of policy files validate against `holmes.law-assurance-policy/v1`. | +| Profile explicitness | Every assessment identifies exactly one policy profile. | +| Default safety | Unknown fields and implicit environment-derived behavior are rejected unless explicitly allowed. | + +## Scope Definition + +### In Scope + +- Define `holmes.law-assurance-policy/v1` top-level fields: version, profiles, + defaultProfile, severityMappings, coverageThresholds, requiredEvidence, + overrideRules, failOn defaults, redaction policy, and schema metadata. +- Define JSON Schema publication requirement for the policy authoring format. +- Define profile inheritance rules and explicit override precedence. +- Define unknown-field behavior, default materialization, and deterministic + normalized policy representation. +- Define validation diagnostics for malformed policy files. + +### Out of Scope + +- No policy UI. +- No policy editing command. +- No branch protection configuration. +- No Rego/CEL expression language. +- No environment-variable-driven implicit profiles. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release manager, I want a versioned policy file so that law assurance gates are explicit and reviewable. | +| US-002 | As a local developer, I want profile inheritance so that local policy can differ from release policy without duplicating every field. | +| US-003 | As a Holmes developer, I want normalized policy output so that CLI and MCP evaluate the same rules. | +| US-004 | As a QA engineer, I want malformed policies rejected with stable diagnostics. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A valid v1 policy file is supplied | Holmes validates policy | Policy is accepted and normalized with version preserved. | +| US-002 | Local profile inherits from base and overrides warning threshold | Holmes normalizes policy | Inherited fields are materialized deterministically with local override applied. | +| US-003 | CLI and MCP load same policy | Holmes normalizes policy | Normalized JSON is identical. | +| US-004 | Policy contains unknown top-level field | Holmes validates policy | Validation fails with `HLAW_POLICY_UNKNOWN_FIELD` unless extension mode is explicitly enabled. | +| US-004 | Policy has no profiles | Holmes validates policy | Validation fails with `HLAW_POLICY_MISSING_PROFILE`. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Valid release policy | Happy | `fixtures/hlaw/policy/release.json` | Accepted normalized policy. | +| TS-002 | Profile inheritance | Happy | base plus local profile | Materialized inherited fields. | +| TS-003 | Unknown field | Negative | unknown top-level field | Unknown-field diagnostic. | +| TS-004 | Missing profiles | Negative | empty profile map | Missing profile diagnostic. | +| TS-005 | Invalid threshold | Negative | threshold 150 | Threshold diagnostic. | +| TS-006 | Large mapping set | Load | 1,000 severity mappings | Deterministic normalization. | + +### Happy Path Testing + +1. Validate a release policy fixture. +2. Normalize inherited local and release profiles. +3. Validate against JSON Schema. +4. Snapshot normalized policy JSON and diagnostics. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed JSON, unsupported version, unknown field, missing + profiles, circular inheritance, invalid threshold, duplicate profile id, + unknown severity, and implicit environment profile reference. +- Timeouts: policy validation reads only provided policy bytes and uses no + network calls. +- Concurrent users or retries: normalized policy is immutable and deterministic. +- Broken dependencies: schema file missing in test harness fails policy schema + validation tests, not runtime policy semantics. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Normalize a 1,000-rule policy in under 100 ms. | Synthetic policy benchmark. | +| Load | Inheritance resolution must detect cycles without recursion overflow. | Deep inheritance fixtures. | +| Security | Redaction policy prevents secret-like values from MCP exposure. | Policy redaction fixture. | +| Accessibility | Policy diagnostics include code, profile, and field path as text. | Snapshot diagnostics. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-032-law-severity-mapping-policy.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-032-law-severity-mapping-policy.md new file mode 100644 index 00000000..d673fa00 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-032-law-severity-mapping-policy.md @@ -0,0 +1,111 @@ +--- +title: HLAW-032 LawSeverityMappingPolicy +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-032 LawSeverityMappingPolicy + +## Feature Overview & Objectives + +### Problem Statement + +Wesley classifies semantic law changes, but Holmes must map those event kinds +and coverage gaps into reviewer-facing severities under a policy profile. +Severity mapping is policy, not semantic truth. It must be explicit, testable, +profile-specific, and unable to mutate Wesley's original event classification. + +### Target User/Audience + +- Release managers deciding which law changes block release. +- Local developers using warning-oriented profiles. +- Holmes assessment developers applying severity policy. +- QA engineers testing unmapped events and profile differences. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Classification preservation | 100% of findings retain original Wesley event kind after severity mapping. | +| Mapping completeness | Release profiles define explicit behavior for every supported high-risk event kind. | +| Unmapped safety | Unmapped event kinds produce deterministic diagnostics or default posture. | + +## Scope Definition + +### In Scope + +- Define severity mapping policy for law diff event kinds, coverage gate states, + traceability gates, validation warnings, and unavailable evidence. +- Define severity values: info, advisory, warning, error, critical. +- Define profile-specific mappings and fallback behavior. +- Define diagnostics for unmapped event kinds when policy requires exhaustive + mappings. +- Define tests proving severity mapping does not change event kind, posture, law + id, subject, or gate state. + +### Out of Scope + +- No semantic law diff classification. +- No user interface for editing mappings. +- No branch protection setup. +- No suppression or override behavior. +- No expression language for dynamic severity. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release manager, I want law weakening events mapped to error or critical so that risky changes block release. | +| US-002 | As a local developer, I want the same event kinds mapped less strictly in local profiles. | +| US-003 | As a Wesley maintainer, I want original event kinds preserved so Holmes policy cannot rewrite compiler truth. | +| US-004 | As a QA engineer, I want unmapped event behavior covered by fixtures. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Release policy maps `lawRemoved` to critical | Holmes maps severity | Finding severity becomes critical and event kind remains `lawRemoved`. | +| US-002 | Local policy maps `coverageGap` to warning | Holmes maps severity | Gate finding is warning under local profile. | +| US-003 | A finding has event kind `footprintExpanded` | Severity mapping runs | The output still records original event kind `footprintExpanded`. | +| US-004 | Policy is exhaustive and event kind is unmapped | Mapping runs | Assessment fails with `HLAW_SEVERITY_UNMAPPED_EVENT_KIND`. | +| US-004 | Policy has default mapping | Unknown advisory event is mapped | Default severity applies and diagnostic records fallback use. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Release weakening event | Happy | weakening finding plus release policy | Error or critical severity. | +| TS-002 | Local advisory event | Happy | same finding plus local policy | Warning or advisory severity. | +| TS-003 | Unmapped exhaustive policy | Negative | unknown event | Unmapped diagnostic. | +| TS-004 | Default fallback policy | Edge | unknown advisory event | Default severity plus fallback record. | +| TS-005 | Invalid severity string | Negative | policy typo | Policy validation diagnostic. | +| TS-006 | Large mapping table | Load | 1,000 mappings | Fast deterministic lookup. | + +### Happy Path Testing + +1. Apply release and local severity policies to the same finding set. +2. Assert severities differ where profile says so. +3. Assert event kind, law id, subject, and posture are unchanged. +4. Snapshot mapped finding outputs. + +### Negative/Edge Case Testing + +- Invalid inputs: unknown severity, unmapped event under exhaustive policy, + duplicate mapping, impossible gate state, missing default, and profile not + found. +- Timeouts: mapping is CPU-only and deterministic. +- Concurrent users or retries: severity mapping uses immutable normalized policy. +- Broken dependencies: invalid policy prevents assessment before mapping. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Map 100,000 findings in under 250 ms. | Synthetic finding benchmark. | +| Load | Mapping lookup must be O(1) or O(log n). | Large mapping benchmark. | +| Security | Policy strings are data and never executed. | Injection-like mapping keys. | +| Accessibility | Mapped severity includes text label and original event kind. | Contract test mapped fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-033-law-coverage-threshold-policy.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-033-law-coverage-threshold-policy.md new file mode 100644 index 00000000..e1e02488 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-033-law-coverage-threshold-policy.md @@ -0,0 +1,112 @@ +--- +title: HLAW-033 LawCoverageThresholdPolicy +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-033 LawCoverageThresholdPolicy + +## Feature Overview & Objectives + +### Problem Statement + +Law coverage gates depend on profile-specific thresholds by category. A release +profile may require 100% mutation footprint coverage, while local development +may only warn below 80%. Threshold policy must define pass/warn/fail behavior, +rounding, absent-category behavior, and unavailable-evidence posture in a way +that is deterministic and easy to test. + +### Target User/Audience + +- Release managers setting coverage requirements. +- Local developers interpreting advisory coverage gaps. +- Holmes gate evaluators applying thresholds. +- QA engineers testing boundary values and missing categories. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Boundary correctness | Threshold tests cover exact-boundary, one-below, and absent-category cases. | +| Profile specificity | Each profile can define category-specific warning and failure thresholds. | +| Missing-subject actionability | Failing threshold decisions preserve missing subject evidence. | + +## Scope Definition + +### In Scope + +- Define threshold policy fields for category id, required/advisory status, + warning threshold, failure threshold, unavailable behavior, absent-category + behavior, precision, and display limit. +- Define percentage calculation and rounding rules. +- Define profile inheritance and category default behavior. +- Define pass/warn/fail/unavailable decision inputs for + `LawCoverageGateDecision`. +- Define boundary-value test matrix. + +### Out of Scope + +- No coverage computation. +- No severity mapping outside coverage gates. +- No suppression or override. +- No GitHub rendering. +- No policy UI. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release manager, I want mutation footprint coverage to fail below 100% so that release-required operations have law. | +| US-002 | As a local developer, I want scalar semantic coverage to warn below a lower threshold so that local runs stay informative. | +| US-003 | As a QA engineer, I want exact rounding rules so that 99.995% does not behave differently across platforms. | +| US-004 | As a reviewer, I want absent categories called out instead of treated as 0% or 100% silently. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Release threshold is 100% and coverage is 99/100 | Holmes evaluates coverage | Gate fails and missing subjects are listed. | +| US-002 | Local warning threshold is 80% and coverage is 75% | Holmes evaluates coverage | Gate warns under local profile. | +| US-003 | Coverage ratio has repeating decimal | Holmes evaluates coverage | Percentage uses documented precision and deterministic rounding. | +| US-004 | Required category is absent | Holmes evaluates coverage | Gate state follows absent-category policy and records absence. | +| US-004 | Coverage evidence is unavailable | Holmes evaluates coverage | Gate state follows unavailable-evidence policy. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Exact pass threshold | Happy | 100/100 with 100% threshold | Pass. | +| TS-002 | One below failure threshold | Negative | 99/100 with 100% threshold | Fail. | +| TS-003 | Advisory warning threshold | Happy | 75/100 with 80% warn | Warn. | +| TS-004 | Boundary rounding | Edge | 2/3 with configured precision | Deterministic percentage. | +| TS-005 | Absent category | Edge | category missing | Policy-specific outcome. | +| TS-006 | Unavailable coverage | Edge | no coverage evidence | Policy-specific outcome. | + +### Happy Path Testing + +1. Load normalized policy with coverage thresholds. +2. Evaluate category coverage across release and local profiles. +3. Assert pass/warn/fail/unavailable states and missing subject preservation. +4. Snapshot boundary-value matrix outputs. + +### Negative/Edge Case Testing + +- Invalid inputs: threshold below 0 or above 100, warning threshold inconsistent + with failure threshold, invalid precision, missing category id, unknown + unavailable behavior, and duplicate category policy. +- Timeouts: threshold evaluation is CPU-only. +- Concurrent users or retries: percentage calculation and rounding are pure. +- Broken dependencies: invalid coverage evidence prevents threshold evaluation. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Evaluate 10,000 category thresholds in under 100 ms. | Synthetic coverage benchmark. | +| Load | Missing-subject display limit avoids large summary expansion. | Large missing-subject fixture. | +| Security | Category ids are data and never used as paths. | Crafted category id tests. | +| Accessibility | Decision output includes text status, actual percentage, and threshold. | Contract test decision fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-034-law-assurance-suppression-policy.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-034-law-assurance-suppression-policy.md new file mode 100644 index 00000000..721bc0ae --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-034-law-assurance-suppression-policy.md @@ -0,0 +1,115 @@ +--- +title: HLAW-034 LawAssuranceSuppressionPolicy +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-034 LawAssuranceSuppressionPolicy + +## Feature Overview & Objectives + +### Problem Statement + +Some known advisory findings may need temporary suppression so that teams can +ship while tracking debt. Suppression is dangerous: it can hide real risk if it +applies too broadly or if it can suppress invalid evidence. Holmes needs a +policy-bound suppression model that is narrow, expiring, auditable, and unable +to suppress validation errors or non-overridable gates. + +### Target User/Audience + +- Maintainers managing known advisory law debt. +- Release managers protecting non-overridable checks. +- Reviewers auditing suppressed findings. +- QA engineers testing expiration, abuse prevention, and matching semantics. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Suppression precision | Every suppression targets explicit finding id, gate id, or law subject pattern with bounded scope. | +| Expiration safety | Expired suppressions are ignored and reported. | +| Non-overridable protection | Validation failures and required non-overridable gates cannot be suppressed. | + +## Scope Definition + +### In Scope + +- Define suppression policy fields: id, target type, target selector, profile, + reason, owner, created date, expiration date, allowed severities, and audit + tags. +- Define matching rules for exact finding id, gate id, law id, subject, and + category selector. +- Define expiration behavior using injected clock. +- Define diagnostics for broad, expired, malformed, duplicate, and + non-overridable suppression attempts. +- Define report behavior: suppressed findings remain countable and visible in a + suppression summary. + +### Out of Scope + +- No GitHub UI for creating suppressions. +- No database-backed suppression store. +- No suppression of validation errors. +- No suppression of Wesley semantic law facts. +- No permanent waiver workflow. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a maintainer, I want to suppress one known advisory finding with an expiration so that debt is tracked. | +| US-002 | As a release manager, I want invalid evidence and required gate failures to ignore suppression attempts. | +| US-003 | As a reviewer, I want suppressed findings summarized so that risk is still visible. | +| US-004 | As a QA engineer, I want broad wildcard suppressions rejected unless policy explicitly permits them. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Advisory finding matches unexpired suppression | Holmes applies policy | Finding is marked suppressed with suppression id and audit metadata. | +| US-002 | Validation failure matches suppression selector | Holmes applies policy | Suppression is rejected and validation failure remains active. | +| US-002 | Required non-overridable gate matches suppression | Holmes applies policy | Gate remains active and suppression diagnostic is emitted. | +| US-003 | A finding is suppressed | Holmes builds report | Suppression summary includes id, reason, owner, expiration, and target. | +| US-004 | Suppression target is `*` and broad suppressions are disabled | Holmes validates policy | Policy fails with broad suppression diagnostic. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Exact advisory suppression | Happy | finding plus suppression | Finding marked suppressed. | +| TS-002 | Expired suppression | Edge | expired suppression | Ignored and reported expired. | +| TS-003 | Validation failure suppression | Negative | validation error target | Rejected. | +| TS-004 | Required gate suppression | Negative | non-overridable gate | Rejected. | +| TS-005 | Broad wildcard suppression | Negative | wildcard target | Rejected unless enabled. | +| TS-006 | Duplicate suppression ids | Negative | duplicate ids | Policy validation failure. | + +### Happy Path Testing + +1. Apply suppression policy to a warning-level finding. +2. Verify suppressed state, audit metadata, and report summary. +3. Confirm the underlying finding remains present in machine-readable output. +4. Snapshot suppression summary. + +### Negative/Edge Case Testing + +- Invalid inputs: missing reason, missing owner, expired suppression, duplicate + id, broad wildcard, invalid selector syntax, profile mismatch, severity not + allowed, validation error target, and non-overridable gate target. +- Timeouts: expiration uses fake clock; no wall-clock reads. +- Concurrent users or retries: suppression matching order is deterministic. +- Broken dependencies: invalid policy prevents assessment rather than silently + ignoring suppressions. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Match 10,000 suppressions against 10,000 findings under 1 second using indexed selectors. | Synthetic suppression benchmark. | +| Load | Suppression summary truncates long reason text safely. | Large reason fixture. | +| Security | Suppression reasons and selectors are untrusted display text. | Injection fixtures. | +| Accessibility | Suppressed state includes text labels and reason references. | Contract test report fields. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-035-law-assurance-audit-witness.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-035-law-assurance-audit-witness.md new file mode 100644 index 00000000..6241624b --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-035-law-assurance-audit-witness.md @@ -0,0 +1,115 @@ +--- +title: HLAW-035 LawAssuranceAuditWitness +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-035 LawAssuranceAuditWitness + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance needs a deterministic witness artifact that records what +inputs were used, what policy was active, what gates were evaluated, what report +was produced, and which hashes identify the evidence. This witness is not a +replacement for Wesley compiler truth; it is Holmes' audit trail for the +judgment it made over published evidence. + +### Target User/Audience + +- Release auditors reconstructing law assurance decisions. +- CI maintainers archiving deterministic assessment evidence. +- Holmes developers ensuring reproducible assessment outputs. +- QA engineers testing fake-clock and replay behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Replayability | Replaying the same witness inputs with the same policy produces the same gate and report hashes. | +| Hash coverage | Witness records bundle, report, policy, validation, and rendered artifact hashes where available. | +| Clock determinism | Witness timestamps come only from injected clock. | + +## Scope Definition + +### In Scope + +- Define `holmes.law-assurance-witness/v1` artifact. +- Include assessment id, command/tool source, input bundle reference, artifact + hashes, policy hash, profile, validation result hash, report document hash, + rendered artifact hashes, gate ids and states, finding counts, suppression + audit records, override audit candidates, and generated-at timestamp. +- Define deterministic serialization and hash calculation. +- Define replay fields needed to compare future assessment output. +- Define fake-clock requirement for tests. + +### Out of Scope + +- No cryptographic signing. +- No remote attestation. +- No storage or retention backend. +- No recomputation of Wesley law hashes. +- No human approval workflow. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release auditor, I want a witness artifact so that I can reconstruct what Holmes assessed. | +| US-002 | As a CI maintainer, I want hashes for every output artifact so that archived files can be verified later. | +| US-003 | As a QA engineer, I want fake-clock timestamps so that witness snapshots are deterministic. | +| US-004 | As a Holmes developer, I want replay fields so that future regressions can compare assessment results. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Holmes completes assessment | Witness writer runs | Witness includes assessment id, profile, input references, gates, and report hash. | +| US-002 | Artifact writer produced Markdown and JSON files | Witness writer runs | Witness records content hashes for each artifact. | +| US-003 | Fake clock is set to a fixed timestamp | Witness writer runs twice | Witness bytes are identical. | +| US-004 | Witness is replayed with same inputs | Replay comparison runs | Gate state and report hash match. | +| US-004 | Policy hash differs during replay | Replay comparison runs | Replay reports policy mismatch before comparing verdicts. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Passing assessment witness | Happy | passing report and artifacts | Witness written with hashes. | +| TS-002 | Failing assessment witness | Happy | failed gate report | Witness records failed gates. | +| TS-003 | Fake-clock determinism | Happy | fixed timestamp | Byte-identical witnesses. | +| TS-004 | Replay match | Happy | witness plus same inputs | Match result. | +| TS-005 | Replay policy mismatch | Negative | changed policy hash | Policy mismatch diagnostic. | +| TS-006 | Missing rendered artifact hash | Edge | optional artifact absent | Unavailable recorded. | + +### Happy Path Testing + +1. Generate witness from a complete assessment and artifact manifest. +2. Verify hashes, gate states, finding counts, profile, and timestamp source. +3. Serialize witness twice with fake clock and compare bytes. +4. Run replay comparison against the same normalized assessment output. + +### Negative/Edge Case Testing + +- Invalid inputs: missing assessment id, missing policy hash, invalid artifact + hash, duplicate gate ids, inconsistent finding counts, missing clock, and + replay policy mismatch. +- Timeouts: witness generation uses injected clock and already-written artifact + metadata; no network calls. +- Concurrent users or retries: witness ids and serialization are deterministic + when inputs and clock are fixed. +- Broken dependencies: artifact writer failure prevents success witness and may + emit failure witness only if explicitly designed later. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Write witness for 10,000 gates/findings in under 300 ms after hashes are available. | Synthetic witness benchmark. | +| Load | Witness stores counts and ids without duplicating full report payloads. | Large assessment fixture. | +| Security | Witness excludes secret env vars and local absolute paths by default. | Secret/path redaction fixture. | +| Accessibility | Witness is machine-readable JSON with text status fields for audit tools. | JSON Schema validation and snapshot. | From 527f46ae2bdbec0eef9aa7366239fba57f2120b3 Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 15:26:58 -0700 Subject: [PATCH 4/9] docs(holmes): resolve assurance planning review issues --- docs/BEARING.md | 6 +++--- .../holmes-weslaw-assurance-prd-test-plan.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/BEARING.md b/docs/BEARING.md index f7a8558d..cf64f622 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -425,9 +425,9 @@ summaries, and the first policy/audit contracts. ## Next Target -The immediate focus is **Holmes `weslaw` assurance planning**: spend the next -50 slices writing implementation-grade PRDs and test plans before the Rust -Holmes assurance integration begins. +The immediate focus is **Holmes `weslaw` assurance planning**: continue the +50-slice PRD/test-plan campaign, with 35 slices closed and 15 remaining before +the Rust Holmes assurance integration begins. Current evidence still includes complete v0.0.5 publication proof, Rust L1 fixtures for directive-heavy SDL, schema extensions, nested list type diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md index eaf767a6..562dd6e3 100644 --- a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -81,7 +81,7 @@ failure behavior, and test fixtures where known. ## Chunking -| Chunk | Slices | Planned PR Shape | Purpose | +| Chunk | Slices | Status | Purpose | | --- | --- | --- | --- | | 1 | HLAW-001..HLAW-010 | Complete | Evidence intake and typed domain contracts. | | 2 | HLAW-011..HLAW-020 | Complete | Report model, CLI operator flows, and local artifacts. | From 0670b80f86802fe0cb872a6eb9a2f82e62f619ae Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 15:37:46 -0700 Subject: [PATCH 5/9] docs(holmes): finish law assurance qa and migration prds --- docs/BEARING.md | 18 +-- .../holmes-weslaw-assurance-prd-test-plan.md | 59 +++++++-- ...036-law-assurance-golden-fixture-corpus.md | 122 ++++++++++++++++++ ...7-law-assurance-negative-fixture-corpus.md | 117 +++++++++++++++++ ...-038-law-assurance-fake-clock-and-ports.md | 117 +++++++++++++++++ ...w-assurance-concurrency-and-idempotence.md | 117 +++++++++++++++++ ...AW-040-law-assurance-performance-budget.md | 118 +++++++++++++++++ ...-041-legacy-holmes-law-evidence-mapping.md | 115 +++++++++++++++++ ...-042-holmes-workflow-weslaw-integration.md | 117 +++++++++++++++++ .../HLAW-043-rust-holmes-crate-scaffold.md | 115 +++++++++++++++++ ...LAW-044-transitional-holmes-cli-aliases.md | 114 ++++++++++++++++ .../HLAW-045-law-assurance-operator-docs.md | 119 +++++++++++++++++ 12 files changed, 1227 insertions(+), 21 deletions(-) create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-036-law-assurance-golden-fixture-corpus.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-037-law-assurance-negative-fixture-corpus.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-038-law-assurance-fake-clock-and-ports.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-039-law-assurance-concurrency-and-idempotence.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-040-law-assurance-performance-budget.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-041-legacy-holmes-law-evidence-mapping.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-042-holmes-workflow-weslaw-integration.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-043-rust-holmes-crate-scaffold.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-044-transitional-holmes-cli-aliases.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-045-law-assurance-operator-docs.md diff --git a/docs/BEARING.md b/docs/BEARING.md index cf64f622..bd26e594 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -411,22 +411,25 @@ The active packet is Working budget: **50 slices**. -Status: **35 / 50 slices closed**. The plan allocates `HLAW-001` through +Status: **45 / 50 slices closed**. The plan allocates `HLAW-001` through `HLAW-050` across evidence intake, typed domain contracts, report models, CLI flows, GitHub publishing, MCP surfaces, policy, QA fixtures, determinism, performance budgets, migration, release gates, operator docs, and campaign closeout. Each slice must produce a PRD/test-plan artifact with explicit objectives, scope, user stories, BDD acceptance criteria, and test scenarios. -Closed slices now cover `HLAW-001` through `HLAW-035`: evidence intake, typed +Closed slices now cover `HLAW-001` through `HLAW-045`: evidence intake, typed domain contracts, report sections, CLI operator flows, local artifact writing, exit-code policy, GitHub publishing surfaces, MCP tools/resources, agent-safe -summaries, and the first policy/audit contracts. +summaries, policy/audit contracts, QA fixture corpora, deterministic clock and +port seams, concurrency/idempotence requirements, performance budgets, legacy +Holmes migration mapping, workflow integration, Rust crate scaffold boundaries, +transitional CLI aliases, and operator documentation. ## Next Target The immediate focus is **Holmes `weslaw` assurance planning**: continue the -50-slice PRD/test-plan campaign, with 35 slices closed and 15 remaining before +50-slice PRD/test-plan campaign, with 45 slices closed and 5 remaining before the Rust Holmes assurance integration begins. Current evidence still includes complete v0.0.5 publication proof, Rust L1 @@ -443,10 +446,9 @@ runtime ownership into the base compiler. The next pull is: -1. `HLAW-036` through `HLAW-040`: write PRD/test-plan artifacts for the golden - fixture corpus, negative fixture corpus, fake clock and ports, concurrency - and idempotence, and performance budget slices that complete the policy/QA - harness chunk. +1. `HLAW-046` through `HLAW-050`: write PRD/test-plan artifacts for schema + versioning, artifact retention, end-to-end workflow, release gate rollout, + and campaign closeout so the planning packet can reach 50/50. ## Post-Retirement Freestyle Slice Log diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md index 562dd6e3..8dd3191c 100644 --- a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -10,7 +10,7 @@ release: v0.0.8 ## Status -Active planning packet. Slices `HLAW-001` through `HLAW-035` are complete. +Active planning packet. Slices `HLAW-001` through `HLAW-045` are complete. ## Question @@ -86,8 +86,8 @@ failure behavior, and test fixtures where known. | 1 | HLAW-001..HLAW-010 | Complete | Evidence intake and typed domain contracts. | | 2 | HLAW-011..HLAW-020 | Complete | Report model, CLI operator flows, and local artifacts. | | 3 | HLAW-021..HLAW-030 | Complete | GitHub and MCP interfaces over the same assurance use cases. | -| 4 | HLAW-031..HLAW-040 | In progress | Policy, QA harnesses, determinism, concurrency, and budgets. | -| 5 | HLAW-041..HLAW-050 | One PR | Migration, release gates, documentation, and campaign closeout. | +| 4 | HLAW-031..HLAW-040 | Complete | Policy, QA harnesses, determinism, concurrency, and budgets. | +| 5 | HLAW-041..HLAW-050 | In progress | Migration, release gates, documentation, and campaign closeout. | Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. @@ -285,28 +285,28 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. outputs, hashes, and the exact gates evaluated by Holmes. - Required output: PRD for witness schema, hash coverage, replay fields, clock injection, and reproducibility tests. -- [ ] HLAW-036 `LawAssuranceGoldenFixtureCorpus` PRD and test plan. +- [x] HLAW-036 `LawAssuranceGoldenFixtureCorpus` PRD and test plan. - Feature/product: A fixture corpus covering clean, warning, failing, malformed, stale, and missing law evidence bundles. - Required output: PRD for fixture naming, expected outputs, snapshot regeneration policy, and cross-platform stability. -- [ ] HLAW-037 `LawAssuranceNegativeFixtureCorpus` PRD and test plan. +- [x] HLAW-037 `LawAssuranceNegativeFixtureCorpus` PRD and test plan. - Feature/product: A negative fixture set for invalid JSON, unsupported versions, hash mismatches, missing artifacts, unknown profiles, and malformed policies. - Required output: PRD for diagnostic codes, exit behavior, fixture isolation, and panic-free guarantees. -- [ ] HLAW-038 `LawAssuranceFakeClockAndPorts` PRD and test plan. +- [x] HLAW-038 `LawAssuranceFakeClockAndPorts` PRD and test plan. - Feature/product: Dependency-injected clock and in-memory ports for deterministic tests across CLI, API, MCP, and GitHub adapters. - Required output: PRD for fake-clock API, no-wall-clock assertions, adapter contracts, and concurrency-safe tests. -- [ ] HLAW-039 `LawAssuranceConcurrencyAndIdempotence` PRD and test plan. +- [x] HLAW-039 `LawAssuranceConcurrencyAndIdempotence` PRD and test plan. - Feature/product: Test requirements for repeated, concurrent, and retried assessment/publish operations. - Required output: PRD for idempotent comment updates, artifact overwrite policy, race simulation, and lock-free domain behavior. -- [ ] HLAW-040 `LawAssurancePerformanceBudget` PRD and test plan. +- [x] HLAW-040 `LawAssurancePerformanceBudget` PRD and test plan. - Feature/product: Performance and size budgets for law evidence validation, assessment, rendering, and publishing. - Required output: PRD for benchmark fixtures, large report limits, timeout @@ -314,28 +314,28 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. ### Migration, Release Gates, Docs, And Closeout -- [ ] HLAW-041 `LegacyHolmesLawEvidenceMapping` PRD and test plan. +- [x] HLAW-041 `LegacyHolmesLawEvidenceMapping` PRD and test plan. - Feature/product: A mapping from current JavaScript Holmes workflow artifacts to the future Rust Holmes law assurance bundle. - Required output: PRD for retained fields, rejected fields, migration gaps, and compatibility fixtures. -- [ ] HLAW-042 `HolmesWorkflowWeslawIntegration` PRD and test plan. +- [x] HLAW-042 `HolmesWorkflowWeslawIntegration` PRD and test plan. - Feature/product: CI workflow integration that runs Wesley law commands, assembles law evidence, and invokes Holmes assessment. - Required output: PRD for job dependencies, artifact paths, failure propagation, retry behavior, and branch/fork permissions. -- [ ] HLAW-043 `RustHolmesCrateScaffold` PRD and test plan. +- [x] HLAW-043 `RustHolmesCrateScaffold` PRD and test plan. - Feature/product: The initial Rust crate/module structure needed to host law assurance domain, application, reporting, and adapters. - Required output: PRD for crate boundaries, public API, dependency rules, compile-time guard tests, and no-GitHub-in-domain enforcement. -- [ ] HLAW-044 `TransitionalHolmesCliAliases` PRD and test plan. +- [x] HLAW-044 `TransitionalHolmesCliAliases` PRD and test plan. - Feature/product: Transitional CLI aliases or wrapper behavior that lets existing workflows call the new law assurance path without reviving legacy Node authority. - Required output: PRD for supported aliases, deprecation messages, exit parity, and removal gates. -- [ ] HLAW-045 `LawAssuranceOperatorDocs` PRD and test plan. +- [x] HLAW-045 `LawAssuranceOperatorDocs` PRD and test plan. - Feature/product: Operator documentation for generating law evidence, running Holmes law assessment, reading findings, and resolving failures. - Required output: PRD for docs locations, command examples, troubleshooting @@ -427,6 +427,39 @@ remaining policy/QA harness work should now pin fixture corpora, fake-clock and port requirements, concurrency/idempotence, and performance budgets before the campaign moves into migration and closeout. +## Drift Check: HLAW-040 + +Date: 2026-05-26. + +Status: **40 / 50 slices closed**. + +Decision: continue. The policy and QA harness chunk stayed inside the planning +campaign boundary. The golden and negative fixture corpus slices define +repeatable evidence inputs; the fake-clock and port slice prevents wall-clock +or network nondeterminism from entering the future Rust Holmes assurance core; +the concurrency/idempotence slice keeps repeated assessment and publishing +operations boring; and the performance budget slice defines measurable limits +without making Holmes a law compiler or benchmark framework. + +No scope correction is needed. The next work should finish migration, workflow, +crate boundary, alias, and operator-documentation planning before the campaign +closes on schema versioning, retention, end-to-end workflow, release gate +rollout, and final closeout. + +## Progress Check: HLAW-045 + +Date: 2026-05-26. + +Status: **45 / 50 slices closed**. + +Decision: continue with `HLAW-046` through `HLAW-050` next. The migration and +operator-readiness slices now specify how existing JavaScript Holmes artifacts +map into the future Rust law assurance bundle, how CI assembles and invokes the +evidence path, where the Rust crate boundaries must sit, which transitional CLI +aliases are allowed, and what operator documentation must prove. The final five +slices should close schema compatibility, artifact retention, end-to-end +workflow, release-gate rollout, and campaign closeout. + ## Non-Goals For The 50-Slice Planning Campaign - Do not implement Rust Holmes crates yet. diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-036-law-assurance-golden-fixture-corpus.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-036-law-assurance-golden-fixture-corpus.md new file mode 100644 index 00000000..07c7d844 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-036-law-assurance-golden-fixture-corpus.md @@ -0,0 +1,122 @@ +--- +title: HLAW-036 LawAssuranceGoldenFixtureCorpus +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-036 LawAssuranceGoldenFixtureCorpus + +## Feature Overview & Objectives + +### Problem Statement + +Holmes `weslaw` assurance needs a golden fixture corpus before implementation +starts. The corpus must define clean, warning, failing, stale, missing, and +large-but-valid evidence bundles with expected validation, assessment, report, +and witness outputs. Without named golden fixtures, future Rust, CLI, GitHub, +and MCP implementations will drift while still claiming to implement the same +PRDs. + +### Target User/Audience + +- Holmes implementers building the Rust domain and adapter tests. +- QA engineers maintaining cross-platform snapshot fixtures. +- CI maintainers running deterministic regression suites. +- Reviewers checking whether a future implementation is faithful to this design + packet. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Fixture coverage | Golden corpus contains at least one clean, warning, failing, stale, missing-optional, and large valid bundle. | +| Snapshot determinism | Golden outputs are byte-identical across repeated local and CI runs with a fake clock. | +| Interface parity | CLI, API, GitHub, and MCP tests reuse the same corpus names and expected outcomes. | + +## Scope Definition + +### In Scope + +- Define `fixtures/hlaw/golden/` as the planned corpus root. +- Define fixture families for clean release, clean local, advisory warnings, + failed coverage, failed traceability, stale evidence, missing optional + evidence, and large valid evidence. +- Define expected outputs for validation result, assessment report document, + rendered Markdown summary, agent-safe summary, audit witness, and exit + category. +- Define fixture naming conventions, metadata file shape, fake-clock timestamp, + bundle hash placeholders, and snapshot regeneration policy. +- Define cross-platform stability requirements for paths, sorting, timestamps, + and newlines. + +### Out of Scope + +- No fixture files are generated in this PRD slice. +- No implementation test harness is written in this slice. +- No runtime execution witness is required. +- No GitHub API fixture is required beyond renderer/publisher input snapshots. +- No source-of-truth recomputation of Wesley law artifacts by Holmes. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a Holmes implementer, I want named golden fixtures so that each domain service has shared expected inputs and outputs. | +| US-002 | As a QA engineer, I want deterministic snapshots so that report drift is caught during review. | +| US-003 | As a CI maintainer, I want fixture metadata to identify which commands and adapters each fixture exercises. | +| US-004 | As a reviewer, I want golden failures and golden warnings represented separately so that risk posture is not ambiguous. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A future test references `golden/clean-release` | The harness loads metadata | It finds bundle, policy, expected validation, expected assessment, expected report, and witness expectations. | +| US-002 | The same fixture is evaluated twice with fake clock `2026-01-01T00:00:00Z` | Outputs are serialized | Snapshot bytes are identical. | +| US-003 | A fixture is marked `surfaces: [cli, mcp, github]` | Test selection runs | CLI, MCP, and GitHub tests can include the fixture without copying inputs. | +| US-004 | A warning-only fixture is assessed with `--fail-on fail` | Assessment runs | Expected exit category is success-with-warnings, not assurance failure. | +| US-004 | A failed traceability fixture is assessed | Assessment runs | Expected gate state is fail and provenance mismatch details are present. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean release fixture | Happy | `fixtures/hlaw/golden/clean-release` | Valid evidence, pass verdict, success exit. | +| TS-002 | Clean local fixture | Happy | `fixtures/hlaw/golden/clean-local` | Valid evidence, local profile, advisory policy. | +| TS-003 | Warning-only fixture | Happy | `fixtures/hlaw/golden/warning-advisory` | Warning verdict and deterministic warning summary. | +| TS-004 | Failing coverage fixture | Negative | `fixtures/hlaw/golden/fail-coverage` | Failed coverage gate with missing subjects. | +| TS-005 | Stale traceability fixture | Negative | `fixtures/hlaw/golden/fail-traceability-stale` | Failed traceability gate with stale artifact role. | +| TS-006 | Large valid fixture | Load | `fixtures/hlaw/golden/large-valid` | Outputs remain deterministic within performance budget. | + +### Happy Path Testing + +1. Define metadata schema for each golden fixture directory. +2. Load clean release and clean local fixtures through the planned artifact + locator. +3. Assert expected validation, assessment, report document, summary, and witness + outputs. +4. Re-run with the same fake clock and compare bytes. + +### Negative/Edge Case Testing + +- Invalid inputs: fixture metadata missing expected output, duplicate fixture + id, unsupported fixture schema version, absent fake-clock timestamp, invalid + profile name, and output snapshot not matching declared media type. +- Timeouts: fixture evaluation uses fake clocks and in-memory adapters where + possible; slow filesystem reads are surfaced by the harness. +- Concurrent users or retries: two test processes reading the corpus must not + mutate fixture directories or regenerate snapshots implicitly. +- Broken dependencies: if a fixture references a missing artifact, the corpus + validation fails before implementation tests run. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Corpus metadata validation completes in under 500 ms for 100 fixtures. | Synthetic fixture index benchmark. | +| Load | Large fixture outputs may be stored as compressed test assets only if snapshot comparison remains deterministic. | Large fixture storage review. | +| Security | Fixture paths are workspace-relative and must not include local absolute paths. | Path lint over fixture metadata. | +| Accessibility | Golden Markdown snapshots include headings and text statuses for screen-reader-friendly review. | Markdown snapshot assertions. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-037-law-assurance-negative-fixture-corpus.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-037-law-assurance-negative-fixture-corpus.md new file mode 100644 index 00000000..f0d68e04 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-037-law-assurance-negative-fixture-corpus.md @@ -0,0 +1,117 @@ +--- +title: HLAW-037 LawAssuranceNegativeFixtureCorpus +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-037 LawAssuranceNegativeFixtureCorpus + +## Feature Overview & Objectives + +### Problem Statement + +Holmes must fail safely on malformed, unsupported, unauthorized, stale, and +contradictory `weslaw` assurance evidence. A negative fixture corpus is required +so invalid inputs never accidentally produce findings, gates, reports, or +success artifacts. The corpus must distinguish validation failures from +assessment failures and infrastructure errors. + +### Target User/Audience + +- QA engineers building validation and panic-free guarantees. +- Holmes implementers verifying error taxonomy. +- Security reviewers testing path and link handling. +- CI maintainers ensuring invalid evidence fails early. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Error coverage | Every validation diagnostic family has at least one negative fixture. | +| Panic-free behavior | 100% of negative fixtures return typed diagnostics instead of panics. | +| Failure isolation | Invalid evidence fixtures emit zero assurance findings and zero gate decisions. | + +## Scope Definition + +### In Scope + +- Define `fixtures/hlaw/negative/` as the planned negative corpus root. +- Include invalid JSON, unsupported versions, missing required artifacts, + invalid hashes, hash mismatches, unknown profiles, malformed policies, + unsafe paths, unsafe URLs, duplicate ids, contradictory capability posture, + broad suppressions, expired suppressions, and unauthorized MCP resources. +- Define expected diagnostic codes, exit categories, and no-assessment + guarantees. +- Define isolation rules so each fixture fails for one primary reason unless it + is explicitly a multi-error fixture. +- Define panic-free and no-success-artifact assertions. + +### Out of Scope + +- No fuzzing engine in this slice. +- No negative fixture implementation files in this PRD slice. +- No live GitHub API failure simulation. +- No remote URL fetch tests. +- No runtime handler access violation tests. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a QA engineer, I want one negative fixture per diagnostic family so that error coverage is measurable. | +| US-002 | As a Holmes implementer, I want invalid evidence to stop before assessment so that bad inputs cannot become findings. | +| US-003 | As a security reviewer, I want unsafe path and URL fixtures so that publishing and artifact loading remain safe. | +| US-004 | As a CI maintainer, I want expected exit categories for each negative fixture so that regressions break fast. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A diagnostic code exists in the validation taxonomy | Corpus coverage is checked | At least one negative fixture declares that code as its primary expected diagnostic. | +| US-002 | A malformed law diff fixture is evaluated | Holmes validates evidence | Validation fails and no assessment report is produced. | +| US-003 | A bundle path escapes workspace root | Artifact locator runs | Fixture expects `HLAW_ARTIFACT_PATH_ESCAPE` and no file content read. | +| US-004 | An unknown profile policy fixture runs through CLI assessment | Command exits | Exit category is usage or validation failure as documented, not success. | +| US-004 | A multi-error fixture is marked `multiError: true` | Corpus validation runs | Multiple diagnostics are allowed and sorted deterministically. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Malformed bundle JSON | Negative | `negative/bundle-malformed-json` | Malformed JSON diagnostic. | +| TS-002 | Unsupported law diff version | Negative | `negative/law-diff-unsupported-version` | Unsupported version diagnostic. | +| TS-003 | Missing required manifest | Negative | `negative/missing-manifest` | Missing required artifact diagnostic. | +| TS-004 | Unsafe path traversal | Security | `negative/path-traversal` | Workspace escape diagnostic. | +| TS-005 | Unsafe evidence link | Security | `negative/link-unsafe-scheme` | Unsafe URL diagnostic. | +| TS-006 | Contradictory capability posture | Negative | `negative/capability-contradiction` | Contradiction diagnostic. | + +### Happy Path Testing + +1. Validate the negative corpus index. +2. For each fixture, run the command or domain validator named in metadata. +3. Assert the expected primary diagnostic, exit category, and no-success-output + guarantees. +4. Snapshot diagnostic order for multi-error fixtures. + +### Negative/Edge Case Testing + +- Invalid inputs: negative fixture metadata missing primary diagnostic, fixture + that unexpectedly passes, fixture that emits a different primary diagnostic, + fixture with local absolute path, and fixture that produces success artifact. +- Timeouts: timeout fixtures must use injected fake adapters, not real sleeps. +- Concurrent users or retries: negative fixtures must be read-only and safe for + parallel test execution. +- Broken dependencies: if a fixture references missing expected output, corpus + validation fails before the negative test body runs. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Negative corpus validation completes in under 1 second for 200 fixtures. | Fixture index benchmark. | +| Load | Multi-error fixtures can assert at least 100 diagnostics without unstable ordering. | Synthetic multi-error fixture. | +| Security | Corpus lint rejects absolute paths, unsafe URLs, and shell snippets in executable fields. | Metadata security lint. | +| Accessibility | Diagnostic snapshots include code, artifact role, and remediation text. | Snapshot assertions. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-038-law-assurance-fake-clock-and-ports.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-038-law-assurance-fake-clock-and-ports.md new file mode 100644 index 00000000..17e99497 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-038-law-assurance-fake-clock-and-ports.md @@ -0,0 +1,117 @@ +--- +title: HLAW-038 LawAssuranceFakeClockAndPorts +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-038 LawAssuranceFakeClockAndPorts + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance outputs include timestamps, timeouts, artifact metadata, +publisher attempts, resource reads, and witness generation. Tests will become +non-deterministic if implementations read wall-clock time, global filesystem +state, or live GitHub/MCP services directly. The architecture needs +dependency-injected clocks and ports before any implementation begins. + +### Target User/Audience + +- Holmes Rust implementers building domain and application services. +- QA engineers requiring deterministic snapshots. +- CI maintainers diagnosing flaky tests. +- Reviewers verifying that wall-clock and external-service behavior is isolated. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| No wall-clock dependency | Domain and application tests can run with a fake clock and zero direct system-time calls. | +| Port isolation | Filesystem, GitHub, MCP, artifact writer, and policy readers are replaceable with in-memory fakes. | +| Snapshot stability | Timestamped reports and witnesses are byte-identical under the same fake clock. | + +## Scope Definition + +### In Scope + +- Define `ClockPort` with fixed, advancing, and timeout-injection fake + implementations. +- Define planned ports for artifact loading, artifact writing, GitHub + publishing, MCP resource registry, policy loading, report rendering, and + command IO. +- Define test-only in-memory adapters with deterministic ordering and failure + injection. +- Define rules forbidding domain services from using wall-clock time, process + current directory, network clients, or ambient environment variables directly. +- Define concurrency-safe fake behavior. + +### Out of Scope + +- No production adapter implementation in this slice. +- No real GitHub or MCP server integration. +- No async runtime selection. +- No mocking framework choice beyond port contracts. +- No test code generation. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a QA engineer, I want fake clocks so that audit witness timestamps are deterministic. | +| US-002 | As a Holmes implementer, I want in-memory ports so that domain tests do not depend on filesystem or GitHub state. | +| US-003 | As a CI maintainer, I want timeout behavior injected deterministically instead of using real sleeps. | +| US-004 | As a reviewer, I want tests that fail if domain code reads wall-clock time directly. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Fake clock is set to `2026-01-01T00:00:00Z` | Report and witness are generated twice | Serialized timestamps are identical. | +| US-002 | In-memory artifact loader contains law diff bytes | Ingest service runs | Service reads through the port and never touches filesystem. | +| US-003 | Fake artifact loader is configured to timeout | Validation runs | Validation returns infrastructure timeout diagnostic without sleeping. | +| US-004 | Domain code attempts to import system clock API | Architecture guard runs | Guard fails with direct-clock-use diagnostic. | +| US-004 | Concurrent tests share fake adapters | Tests run | Adapter state remains deterministic or explicitly isolated per test. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Fixed fake clock | Happy | fixed timestamp | Identical timestamps in outputs. | +| TS-002 | Advancing fake clock | Happy | configured increments | Predictable sequence. | +| TS-003 | Artifact read timeout | Edge | fake loader timeout | Timeout diagnostic, no sleep. | +| TS-004 | GitHub publish failure | Edge | fake publisher error | Publisher failure separated. | +| TS-005 | Direct clock import guard | Negative | domain module with forbidden import | Architecture guard fails. | +| TS-006 | Concurrent fake adapter use | Load | parallel assessment tests | No nondeterministic ordering. | + +### Happy Path Testing + +1. Build assessment services with fake clock and in-memory ports. +2. Generate validation, report, artifact writer manifest, and witness outputs. +3. Assert byte equality across repeated runs. +4. Confirm all side effects are visible through fake adapter state. + +### Negative/Edge Case Testing + +- Invalid inputs: missing fake clock, adapter not registered, fake port state + reused without reset, direct wall-clock call, direct filesystem call in domain + service, and real sleep in timeout test. +- Timeouts: timeout behavior is injected through ports and never waits on real + elapsed time. +- Concurrent users or retries: fake adapters must either be immutable or guard + mutable state with deterministic ordering. +- Broken dependencies: fake adapter failure modes map to the same diagnostics as + production ports. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Fake-port tests add less than 10 ms overhead per small assessment fixture. | Test harness benchmark. | +| Load | In-memory ports can hold 100 MB synthetic artifact bytes for load tests without global state. | Large fake artifact fixture. | +| Security | Fakes must enforce the same path and URL policy as production adapters unless a test explicitly disables it. | Policy parity tests. | +| Accessibility | Fake-rendered text snapshots include the same status labels as production renderers. | Snapshot comparison. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-039-law-assurance-concurrency-and-idempotence.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-039-law-assurance-concurrency-and-idempotence.md new file mode 100644 index 00000000..c050bde1 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-039-law-assurance-concurrency-and-idempotence.md @@ -0,0 +1,117 @@ +--- +title: HLAW-039 LawAssuranceConcurrencyAndIdempotence +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-039 LawAssuranceConcurrencyAndIdempotence + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance will run in CI retries, local repeated commands, parallel +MCP requests, and GitHub publishing reruns. These operations must be idempotent +where safe, deterministic under concurrency, and explicit where collisions are +not allowed. The design must prevent duplicated PR comments, unstable artifact +bytes, race-prone output paths, and report ids that change without input +changes. + +### Target User/Audience + +- CI maintainers relying on retry-safe law assurance jobs. +- Holmes adapter developers implementing GitHub, CLI, and MCP surfaces. +- QA engineers designing race simulations. +- Reviewers checking that reruns do not create noisy duplicate output. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Retry safety | Re-running assessment with identical inputs produces identical report ids, finding ids, gate ids, and witness hashes. | +| Publish idempotence | GitHub comment publishing updates one sticky comment across reruns. | +| Collision clarity | Concurrent writes to the same output directory either serialize safely or fail with a documented collision diagnostic. | + +## Scope Definition + +### In Scope + +- Define idempotence keys for validation, assessment, report document, artifact + writer, witness, GitHub comment, MCP assessment, and resource registry. +- Define deterministic ids derived from input bundle hash family, policy hash, + profile, command/tool version, and fake-clock value where appropriate. +- Define concurrent artifact write policy: unique output directory required by + default, explicit overwrite/replace behavior only when enabled. +- Define GitHub sticky comment update race behavior. +- Define race simulation fixtures and retry tests. + +### Out of Scope + +- No distributed lock service. +- No database transaction model. +- No live GitHub race test. +- No queue or job scheduler. +- No cross-repo concurrency coordination. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want retried assessment jobs to produce identical outputs so that reruns are trustworthy. | +| US-002 | As a GitHub adapter developer, I want sticky comment idempotence so that reruns update rather than duplicate comments. | +| US-003 | As a CLI user, I want concurrent output collisions to fail clearly instead of corrupting files. | +| US-004 | As a QA engineer, I want deterministic race simulations without real sleeps. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Same bundle, policy, profile, and fake clock are assessed twice | Assessment runs | Report id, finding ids, gate ids, and witness hash match. | +| US-002 | Two publishers race to update the same marker comment | Fake GitHub publisher runs | Exactly one latest comment body is retained or a documented retry diagnostic is emitted. | +| US-003 | Two CLI runs write to the same output directory without overwrite | Artifact writer runs | One run succeeds and the other fails with collision diagnostic, or both fail before partial writes according to policy. | +| US-004 | Race fixture injects interleaving at write step | Test harness runs | Outcome is deterministic across repeated runs. | +| US-004 | MCP assessment request repeats with same idempotence key | MCP adapter runs | Response references the same assessment id or documented cached result. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Assessment retry | Happy | same clean bundle twice | Identical ids and hashes. | +| TS-002 | Warning assessment retry | Happy | warning fixture twice | Identical warnings and report. | +| TS-003 | Concurrent output collision | Edge | same output dir | Collision diagnostic or serialized success per policy. | +| TS-004 | Sticky comment race | Edge | fake GitHub interleaving | One final marker comment. | +| TS-005 | MCP duplicate request | Happy | same idempotence key | Same assessment id. | +| TS-006 | Random input order | Load | shuffled findings/gates | Stable output ordering. | + +### Happy Path Testing + +1. Run the same valid assessment twice with fixed fake clock. +2. Compare validation result, report document, artifact manifest, and witness + hashes. +3. Run GitHub fake publisher twice and assert one marker comment. +4. Run MCP assessment twice and assert stable assessment id. + +### Negative/Edge Case Testing + +- Invalid inputs: missing idempotence key where required, conflicting profile + with same key, output collision, partial write failure, duplicate marker + comments, stale cached result, and nondeterministic map iteration order. +- Timeouts: race simulations use fake interleaving controls, not wall-clock + sleeps. +- Concurrent users or retries: tests must cover parallel reads, parallel writes + to distinct directories, and conflicting writes to the same directory. +- Broken dependencies: GitHub API retry exhaustion yields publisher failure + without changing assessment verdict. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Idempotence key derivation adds under 10 ms for large reports. | Hash/id benchmark. | +| Load | Sorting 100,000 finding/gate ids remains deterministic and within budget. | Large shuffled fixture. | +| Security | Idempotence keys must not embed secrets or local absolute paths. | Key redaction test. | +| Accessibility | Retry/collision diagnostics include text reason and remediation. | Snapshot diagnostics. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-040-law-assurance-performance-budget.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-040-law-assurance-performance-budget.md new file mode 100644 index 00000000..c9551b69 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-040-law-assurance-performance-budget.md @@ -0,0 +1,118 @@ +--- +title: HLAW-040 LawAssurancePerformanceBudget +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-040 LawAssurancePerformanceBudget + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance will process potentially large law diffs, coverage reports, +capability summaries, reports, witnesses, and GitHub/MCP summaries. Without +performance and size budgets, future implementation can become slow, memory +hungry, or timeout-prone while still passing functional tests. Budgets must be +defined before implementation so regressions are measurable. + +### Target User/Audience + +- Holmes implementers choosing data structures and serialization strategies. +- CI maintainers setting job timeouts. +- QA engineers writing benchmark fixtures. +- Reviewers deciding whether performance regressions are acceptable. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Budget coverage | Validation, assessment, report construction, rendering, artifact writing, MCP summary, and GitHub comment rendering each have a named budget. | +| Large-fixture readiness | At least one large valid fixture exercises 10,000 findings and 50,000 coverage subjects. | +| Regression visibility | Benchmarks fail when runtime or memory exceeds documented thresholds by more than allowed tolerance. | + +## Scope Definition + +### In Scope + +- Define performance budgets for evidence validation, law diff ingest, coverage + ingest, capability ingest, gate evaluation, report construction, rendering, + artifact writing, MCP response construction, GitHub comment rendering, and + witness generation. +- Define size budgets for bundle JSON, law diff events, coverage subjects, + capability summaries, report document JSON, rendered Markdown, and comments. +- Define benchmark fixture sizes, timeout values, memory ceilings, and tolerance + policy. +- Define drift checkpoint at HLAW-040. +- Define reporting format for benchmark results. + +### Out of Scope + +- No benchmark implementation in this slice. +- No production timeout mechanism. +- No performance optimization work. +- No cloud-host-specific tuning. +- No live GitHub or MCP latency budget beyond adapter timeout classification. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want documented timeout budgets so that law assurance jobs do not grow without review. | +| US-002 | As a Holmes implementer, I want size limits so that parser and renderer behavior is bounded. | +| US-003 | As a QA engineer, I want large benchmark fixtures so that performance is tested before release. | +| US-004 | As a reviewer, I want benchmark reports to show which budget failed and by how much. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Assessment benchmark exceeds the configured time budget | Benchmark suite runs | Suite fails with component name, budget, actual value, and tolerance. | +| US-002 | Bundle file exceeds max configured bytes | Artifact locator validates input | Validation fails with size-budget diagnostic before parsing. | +| US-003 | Large fixture has 10,000 findings and 50,000 coverage subjects | Benchmark suite runs | Validation, assessment, and rendering budgets are measured separately. | +| US-004 | Markdown rendering exceeds size limit | Renderer budget check runs | Output is truncated or fails according to renderer policy with omitted counts. | +| US-004 | Benchmark platform is slower than baseline | Suite runs | Tolerance policy determines warning versus failure consistently. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Small clean bundle benchmark | Happy | clean release fixture | Under all budgets. | +| TS-002 | Large valid benchmark | Load | 10,000 findings and 50,000 coverage subjects | Under documented large budgets. | +| TS-003 | Oversized bundle | Negative | bundle above max bytes | Size-budget diagnostic. | +| TS-004 | Oversized comment | Edge | report above comment size | Truncation and omitted counts. | +| TS-005 | Slow fake adapter | Edge | injected timeout | Timeout diagnostic. | +| TS-006 | Memory ceiling breach | Load | generated huge capability report | Benchmark failure. | + +### Happy Path Testing + +1. Run benchmark harness over small and large valid fixtures. +2. Record component-level durations, input sizes, output sizes, and peak memory + where available. +3. Assert budgets and tolerances. +4. Serialize benchmark report deterministically for CI artifacts. + +### Negative/Edge Case Testing + +- Invalid inputs: oversized bundle, oversized report, too many findings, too + many coverage subjects, invalid tolerance, missing benchmark fixture, and fake + timeout adapter. +- Timeouts: benchmark timeout tests use injected fake adapters where possible; + production timeout classification is specified but not implemented here. +- Concurrent users or retries: benchmarks should isolate temp directories and + fake clocks to avoid cross-run interference. +- Broken dependencies: unavailable memory measurement tool downgrades to warning + only if policy permits. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Component budgets are explicit and measured independently. | Benchmark report assertions. | +| Load | Large fixture covers at least 10,000 findings, 50,000 coverage subjects, and 5,000 capability summaries. | Generated fixture validation. | +| Security | Size limits prevent unbounded artifact parsing and comment rendering. | Oversized input tests. | +| Accessibility | Benchmark failure reports include text component names and values. | Snapshot benchmark report. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-041-legacy-holmes-law-evidence-mapping.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-041-legacy-holmes-law-evidence-mapping.md new file mode 100644 index 00000000..140611fd --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-041-legacy-holmes-law-evidence-mapping.md @@ -0,0 +1,115 @@ +--- +title: HLAW-041 LegacyHolmesLawEvidenceMapping +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-041 LegacyHolmesLawEvidenceMapping + +## Feature Overview & Objectives + +### Problem Statement + +The current Holmes workflow already emits PR comments, investigation reports, +Watson verification output, and CI artifacts. A Rust Holmes `weslaw` assurance +path must not blindly port legacy behavior or revive retired Node authority. It +needs an explicit mapping that identifies which legacy report concepts are +retained, which are rejected, and which migration gaps require new Rust-native +contracts. + +### Target User/Audience + +- Holmes migration implementers. +- Maintainers reviewing legacy workflow compatibility. +- CI owners avoiding breakage during cutover. +- QA engineers building compatibility fixtures. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Mapping completeness | Every legacy Holmes law-relevant artifact has a retain, adapt, or reject decision. | +| No authority regression | No rejected legacy field becomes part of the new law assurance source of truth. | +| Compatibility coverage | Retained fields have fixture-backed migration expectations. | + +## Scope Definition + +### In Scope + +- Inventory legacy Holmes PR comment fields, investigation report fields, + Watson verification fields, Moriarty forecast fields, CI artifact links, and + workflow status fields that overlap with law assurance. +- Classify each field as retain, adapt, reject, or defer. +- Define compatibility fixture names for retained/adapted fields. +- Define rejected-field rationale for Node-only authority, vague citation + quality, non-law-specific readiness language, and runtime assumptions. +- Define migration gap list for fields that need new Rust Holmes contracts. + +### Out of Scope + +- No legacy Node code modification. +- No implementation of Rust mapping code. +- No migration execution. +- No deletion of legacy workflow files. +- No new Holmes readiness scoring model. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a Holmes implementer, I want a field-by-field legacy mapping so that Rust law assurance preserves only intentional behavior. | +| US-002 | As a maintainer, I want rejected legacy fields documented so that old Node authority does not sneak back in. | +| US-003 | As a CI owner, I want adapted fields identified so that workflow comments can change without surprise. | +| US-004 | As a QA engineer, I want compatibility fixtures for retained fields. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A legacy Holmes report field is law-relevant | Mapping is reviewed | Field has retain, adapt, reject, or defer classification. | +| US-002 | A field claims broad ship readiness from non-law evidence | Mapping is reviewed | Field is rejected or adapted so it cannot become law assurance truth. | +| US-003 | A legacy PR comment link maps to evidence link behavior | Mapping is reviewed | Field is marked adapt and tied to `GitHubLawEvidenceLinks`. | +| US-004 | A retained field is named | Fixture plan is generated | At least one compatibility fixture is listed for that field. | +| US-004 | A rejected field is named | Fixture plan is generated | No compatibility fixture is required except a negative assertion that it is absent. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Retain evidence link field | Happy | legacy PR comment fixture | Maps to new evidence link contract. | +| TS-002 | Adapt verdict summary | Happy | legacy Holmes verdict text | Maps to law-specific verdict summary only. | +| TS-003 | Reject broad readiness score | Negative | legacy weighted completion | Not included in law assurance truth. | +| TS-004 | Reject Node-only artifact path | Negative | legacy cache path | Not retained as source of truth. | +| TS-005 | Defer forecast output | Edge | Moriarty insufficient-data output | Deferred outside law assurance v1. | +| TS-006 | Missing mapping row | Negative | unmapped legacy field | Mapping audit fails. | + +### Happy Path Testing + +1. Parse a representative legacy PR comment fixture. +2. Apply mapping classifications manually or through future tooling. +3. Assert retained/adapted fields land in planned law assurance contracts. +4. Assert rejected fields are absent from the new report document expectations. + +### Negative/Edge Case Testing + +- Invalid inputs: unmapped legacy field, retained field without target contract, + adapted field without rationale, rejected field still present in new output, + and defer field used by release gate. +- Timeouts: mapping tests are static and use no network or wall-clock time. +- Concurrent users or retries: mapping fixture evaluation is read-only and + deterministic. +- Broken dependencies: missing legacy fixture fails mapping coverage, not new + law assurance behavior. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Mapping coverage audit completes in under 100 ms for 500 fields. | Synthetic mapping table benchmark. | +| Load | Mapping table remains readable when legacy inventory grows. | Field inventory size check. | +| Security | Rejected local paths and cache paths are not emitted in new public reports. | Negative fixture assertions. | +| Accessibility | Adapted comment fields retain text headings and status labels. | Snapshot mapped comment output. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-042-holmes-workflow-weslaw-integration.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-042-holmes-workflow-weslaw-integration.md new file mode 100644 index 00000000..5708713b --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-042-holmes-workflow-weslaw-integration.md @@ -0,0 +1,117 @@ +--- +title: HLAW-042 HolmesWorkflowWeslawIntegration +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-042 HolmesWorkflowWeslawIntegration + +## Feature Overview & Objectives + +### Problem Statement + +CI must assemble Wesley `weslaw` evidence and invoke Holmes law assurance in a +predictable order. The workflow integration must define job dependencies, +artifact paths, failure propagation, fork permissions, retry behavior, and +publishing posture before implementation starts. This prevents a future workflow +from mixing stale artifacts or publishing misleading comments on invalid +evidence. + +### Target User/Audience + +- GitHub Actions maintainers. +- Holmes CLI implementers. +- Release engineers configuring law assurance gates. +- QA engineers testing branch, fork, and artifact failure paths. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Dependency clarity | Workflow jobs declare evidence generation before Holmes assessment. | +| Artifact traceability | All Holmes inputs and outputs use documented artifact paths. | +| Fork safety | Untrusted fork PRs do not receive privileged publishing tokens. | + +## Scope Definition + +### In Scope + +- Define planned GitHub Actions job flow: Wesley law validate/diff/coverage/ + capabilities, bundle assembly, Holmes validate, Holmes assess, Holmes report, + artifact upload, optional PR comment publish. +- Define artifact path conventions under `.wesley/holmes-law/` or workflow + artifact roots. +- Define failure propagation for evidence generation, validation, assessment, + rendering, artifact upload, and publishing. +- Define branch/fork permission policy and read-only fallback. +- Define retry and stale artifact behavior. + +### Out of Scope + +- No workflow YAML implementation in this slice. +- No branch protection changes. +- No secret management implementation. +- No live GitHub artifact API tests. +- No sibling repo workflow edits. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a CI maintainer, I want the workflow to generate Wesley law artifacts before Holmes runs so that assessment inputs are fresh. | +| US-002 | As a release engineer, I want failed validation to stop publishing readiness comments. | +| US-003 | As a security reviewer, I want fork PRs to avoid privileged comment publishing. | +| US-004 | As a QA engineer, I want workflow failure paths documented with expected job conclusions. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Wesley law artifact job succeeds | Holmes assessment job starts | It reads documented artifact paths and expected bundle hash. | +| US-002 | Holmes validation fails | Workflow continues to reporting | It uploads validation failure artifacts but does not publish pass/warn readiness. | +| US-003 | PR originates from untrusted fork | Workflow runs | Publishing step is skipped or runs read-only without secrets. | +| US-004 | Artifact upload fails after assessment | Workflow completes | Job reports artifact failure separately from assessment verdict. | +| US-004 | Retry reruns same SHA | Workflow runs | Stale artifact checks use current run artifacts, not old run artifacts. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Clean main-branch PR | Happy | workflow fixture | Generate, assess, report, publish. | +| TS-002 | Validation failure | Negative | malformed evidence | Upload validation artifact, no pass comment. | +| TS-003 | Failed gate | Negative | coverage failure | Assessment failure conclusion. | +| TS-004 | Fork PR | Security | fork event fixture | Publish skipped/read-only. | +| TS-005 | Artifact upload failure | Edge | fake upload failure | Artifact failure surfaced separately. | +| TS-006 | Retry stale artifact | Edge | rerun event fixture | Uses fresh artifact set. | + +### Happy Path Testing + +1. Simulate a clean PR workflow with generated Wesley law artifacts. +2. Assemble Holmes law evidence bundle. +3. Run validate, assess, report, artifact upload, and optional publish steps. +4. Assert job outputs and artifact paths match the documented contract. + +### Negative/Edge Case Testing + +- Invalid inputs: missing Wesley artifact, stale bundle hash, invalid bundle, + failed coverage gate, artifact upload failure, comment publish failure, + missing token, fork PR, and workflow retry. +- Timeouts: workflow step timeouts are classified by step and do not rewrite + assessment verdict. +- Concurrent users or retries: workflow reruns use run-scoped artifact paths and + idempotent comment markers. +- Broken dependencies: GitHub API outages affect publishing only when artifacts + and assessment already succeeded. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | End-to-end law assurance workflow completes under the configured CI budget for large fixtures. | Workflow simulation with performance fixture. | +| Load | Artifact upload handles large report and witness artifacts within retention limits. | Large artifact upload fixture. | +| Security | Fork PRs never expose write tokens or local absolute paths in comments. | Event-permission fixture tests. | +| Accessibility | Published comments retain headings and text state labels. | Comment snapshot assertions. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-043-rust-holmes-crate-scaffold.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-043-rust-holmes-crate-scaffold.md new file mode 100644 index 00000000..3c01ea65 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-043-rust-holmes-crate-scaffold.md @@ -0,0 +1,115 @@ +--- +title: HLAW-043 RustHolmesCrateScaffold +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-043 RustHolmesCrateScaffold + +## Feature Overview & Objectives + +### Problem Statement + +The Rust Holmes rewrite needs crate and module boundaries that enforce the +assurance hexagon before code lands. Domain logic must stay independent from +GitHub, MCP, filesystem, CLI, and renderer adapters. `weslaw` law evidence must +enter through ports, and Holmes must not import compiler internals in a way that +turns assurance into law authorship. + +### Target User/Audience + +- Rust Holmes implementers. +- Architecture reviewers enforcing hexagonal boundaries. +- QA engineers writing compile-time guard tests. +- Future adapter authors for CLI, API, MCP, and GitHub. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Boundary enforcement | Domain crate has no GitHub, MCP, CLI, filesystem, or network dependencies. | +| Port clarity | All external effects enter through named ports/adapters. | +| Compile-time guard coverage | Dependency direction tests fail on forbidden imports. | + +## Scope Definition + +### In Scope + +- Define planned crate/module layout: domain, application, ports, adapters-cli, + adapters-github, adapters-mcp, reporting, fixtures, and test-support. +- Define public API boundaries for evidence validation, assessment, report + construction, policy evaluation, and witness generation. +- Define dependency rules and forbidden imports. +- Define compile-time guard tests for no-GitHub-in-domain, + no-filesystem-in-domain, no-wall-clock-in-domain, and no-Wesley-compiler- + internals-as-authority. +- Define fixture crate responsibilities. + +### Out of Scope + +- No Rust crate files are created in this slice. +- No implementation code. +- No package publishing. +- No async runtime selection. +- No replacement of existing workflows. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a Rust implementer, I want crate boundaries so that domain code can be built and tested without adapters. | +| US-002 | As an architecture reviewer, I want compile-time guard tests so that forbidden dependencies cannot creep in. | +| US-003 | As an adapter author, I want ports defined before adapters so that CLI, GitHub, and MCP reuse application services. | +| US-004 | As a QA engineer, I want fixture/test-support boundaries so that tests do not depend on production side effects. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Domain crate compiles alone | Dependency audit runs | It has no GitHub, MCP, CLI, filesystem, network, or wall-clock dependencies. | +| US-002 | Adapter dependency is added to domain | Guard test runs | Test fails with forbidden dependency diagnostic. | +| US-003 | CLI adapter needs assessment | It calls application service | Adapter passes bundle and policy through ports, not by reimplementing domain logic. | +| US-004 | Test-support crate provides fake clock | Domain tests run | Tests use fake clock and in-memory ports. | +| US-004 | Domain tries to call Wesley CLI | Guard test runs | Test fails because Holmes consumes artifacts, not compiler commands. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Domain-only compile | Happy | planned crate graph | Domain compiles without adapters. | +| TS-002 | Forbidden GitHub import | Negative | test fixture import | Guard fails. | +| TS-003 | Forbidden filesystem import | Negative | test fixture import | Guard fails. | +| TS-004 | Application uses ports | Happy | application service fixture | Ports injected. | +| TS-005 | Adapter calls domain directly bypassing app | Negative | architecture fixture | Guard warns or fails. | +| TS-006 | Fixture crate dependency direction | Happy | test-support graph | Test-support may depend inward, not vice versa. | + +### Happy Path Testing + +1. Validate planned crate graph against allowed dependency matrix. +2. Compile domain and application units with fake ports. +3. Compile adapters against ports/application services. +4. Run guard tests and ensure allowed graph passes. + +### Negative/Edge Case Testing + +- Invalid inputs: domain imports GitHub, domain imports filesystem, domain reads + wall-clock, domain invokes Wesley CLI, reporting imports GitHub publisher, and + fixture crate leaks into production dependencies. +- Timeouts: compile-time guards do not use wall-clock sleeps. +- Concurrent users or retries: dependency audits are deterministic over the + crate graph. +- Broken dependencies: missing optional adapter dependency fails only adapter + crate, not domain crate. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Workspace compile remains within CI budget after scaffold. | Cargo check benchmark. | +| Load | Dependency graph audit handles at least 50 crates/modules. | Synthetic graph test. | +| Security | Secret-bearing adapters cannot be depended on by domain/application core. | Dependency matrix guard. | +| Accessibility | Architecture diagnostics name forbidden dependency and owning crate. | Guard output snapshot. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-044-transitional-holmes-cli-aliases.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-044-transitional-holmes-cli-aliases.md new file mode 100644 index 00000000..f9d4087c --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-044-transitional-holmes-cli-aliases.md @@ -0,0 +1,114 @@ +--- +title: HLAW-044 TransitionalHolmesCliAliases +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-044 TransitionalHolmesCliAliases + +## Feature Overview & Objectives + +### Problem Statement + +Existing workflows and operator habits may call legacy Holmes commands while +the Rust law assurance path is introduced. Transitional aliases can reduce +cutover friction, but they must not preserve Node as an authority or make old +command names look permanent. The aliases need explicit deprecation messages, +exit parity expectations, and removal gates. + +### Target User/Audience + +- Operators running existing Holmes CLI workflows. +- CI maintainers migrating workflow commands. +- Rust Holmes implementers designing command routing. +- QA engineers testing deprecation and parity behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Cutover safety | Supported aliases route to the Rust law assurance path without invoking legacy Node authority. | +| Deprecation clarity | Every alias emits a planned removal version or removal condition. | +| Exit parity | Alias commands preserve documented exit categories for validation and assessment outcomes. | + +## Scope Definition + +### In Scope + +- Define supported transitional aliases for law assurance validation, + assessment, reporting, and PR comment preview. +- Define deprecation message content and stderr/stdout placement. +- Define exit category parity with canonical `holmes weslaw` commands. +- Define command help behavior and migration examples. +- Define removal gates: all workflows migrated, docs updated, CI proving no + alias use, and release note published. + +### Out of Scope + +- No alias implementation. +- No support for arbitrary legacy flags. +- No Node command invocation. +- No long-term compatibility guarantee. +- No workflow YAML change in this slice. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As an operator, I want old command invocations to point me to the new Rust law assurance command. | +| US-002 | As a CI maintainer, I want aliases to preserve exit categories while workflows migrate. | +| US-003 | As a maintainer, I want deprecation text so old aliases do not become permanent. | +| US-004 | As a QA engineer, I want unsupported legacy flags rejected explicitly. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | User runs a supported alias for validation | CLI routes command | It invokes the canonical validation behavior and prints deprecation guidance. | +| US-002 | Alias assessment hits failed gate | CLI exits | Exit category matches canonical `holmes weslaw assess`. | +| US-003 | User runs alias help | CLI prints help | Help names canonical command and removal gate. | +| US-004 | User supplies unsupported legacy flag | CLI parses args | Command fails with unsupported-flag diagnostic and migration hint. | +| US-004 | Alias would require Node legacy behavior | CLI routes command | Command fails; no Node path is invoked. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Supported validate alias | Happy | alias command fixture | Canonical validate behavior plus deprecation text. | +| TS-002 | Supported assess alias | Happy | warning/fail fixtures | Exit parity. | +| TS-003 | Unsupported legacy flag | Negative | old flag fixture | Unsupported diagnostic. | +| TS-004 | Alias help | Happy | `--help` | Canonical command and removal gate shown. | +| TS-005 | Alias would call Node | Negative | retired command fixture | Fails without invoking Node. | +| TS-006 | Removal gate check | Edge | docs/workflow inventory | Alias retained or removable per criteria. | + +### Happy Path Testing + +1. Run supported aliases against clean validation and assessment fixtures. +2. Compare outputs and exit categories with canonical commands. +3. Assert deprecation text appears exactly once. +4. Verify help text includes canonical replacement. + +### Negative/Edge Case Testing + +- Invalid inputs: unsupported legacy flag, retired command, command requiring + Node-only behavior, missing bundle, conflicting alias and canonical flags, and + unknown subcommand. +- Timeouts: aliases use the same fake ports and timeout behavior as canonical + commands. +- Concurrent users or retries: aliases must not write extra state beyond + canonical command outputs. +- Broken dependencies: if canonical command fails, alias exits with canonical + category and adds only deprecation context. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Alias dispatch adds under 5 ms before canonical command execution. | CLI dispatch benchmark. | +| Load | Help text remains concise and does not duplicate full legacy docs. | Help snapshot review. | +| Security | Alias routing must not shell out to Node or read legacy env vars. | Architecture guard and command fixture. | +| Accessibility | Deprecation text is plain text and includes replacement command. | Snapshot with color disabled. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-045-law-assurance-operator-docs.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-045-law-assurance-operator-docs.md new file mode 100644 index 00000000..0ed1d489 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-045-law-assurance-operator-docs.md @@ -0,0 +1,119 @@ +--- +title: HLAW-045 LawAssuranceOperatorDocs +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-045 LawAssuranceOperatorDocs + +## Feature Overview & Objectives + +### Problem Statement + +Operators need clear documentation for generating Wesley law evidence, running +Holmes validation and assessment, reading findings, handling failures, and +publishing or inspecting reports. Without operator docs, even a correct Rust +implementation will be difficult to adopt and likely misused in CI. + +### Target User/Audience + +- Local developers running law assurance before PRs. +- CI maintainers wiring law assurance into workflows. +- Release managers interpreting failed gates. +- Agents and reviewers following troubleshooting paths. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Command coverage | Docs include examples for validate, assess, report, artifact writing, and GitHub/MCP inspection. | +| Troubleshooting completeness | Docs cover validation failure, failed gates, unavailable evidence, stale hashes, and publisher failures. | +| Accessibility | Examples and tables are readable without color and do not rely on screenshots. | + +## Scope Definition + +### In Scope + +- Define planned docs locations for operator guide, CI integration guide, + troubleshooting matrix, command reference, GitHub comment guide, MCP resource + guide, and FAQ. +- Define required command examples for generating Wesley artifacts, assembling + bundle, validating, assessing, rendering, writing artifacts, and publishing. +- Define troubleshooting matrix by symptom, likely cause, command to run, and + remediation. +- Define docs validation checks for links, command snippets, fixture references, + and accessibility. +- Define update rule: operator docs must change whenever command flags, exit + codes, artifact paths, or report fields change. + +### Out of Scope + +- No final operator docs are written in this PRD slice. +- No screenshots or marketing pages. +- No live hosted documentation site. +- No product tutorial for GraphQL or `weslaw` authoring beyond links to Wesley + docs. +- No support escalation process. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a local developer, I want a copy/paste-safe command path so that I can validate law evidence before opening a PR. | +| US-002 | As a CI maintainer, I want workflow examples so that law assurance artifacts are generated and archived correctly. | +| US-003 | As a release manager, I want troubleshooting guidance for failed gates and stale evidence. | +| US-004 | As a docs reviewer, I want command snippets and links checked automatically. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Developer opens operator guide | They follow local commands | The guide shows validate, assess, report, and artifact output examples with placeholders clearly marked. | +| US-002 | CI maintainer opens integration guide | They copy workflow outline | The guide names job order, artifact paths, permissions, and fork-safe publishing posture. | +| US-003 | Release gate fails on traceability | Operator reads troubleshooting matrix | Matrix points to bundle hash mismatch, evidence artifact links, and rerun guidance. | +| US-004 | Command flag changes in implementation | Docs check runs | Missing docs update is caught by command snippet or reference check. | +| US-004 | Docs include external link | Link check runs | Broken link fails docs validation. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Local happy-path docs | Happy | operator guide command snippets | Snippets parse and reference known fixtures. | +| TS-002 | CI guide docs | Happy | workflow guide | Artifact paths and permissions documented. | +| TS-003 | Troubleshooting stale evidence | Negative | matrix row | Stale hash remediation present. | +| TS-004 | Broken docs link | Negative | invalid link fixture | Link checker fails. | +| TS-005 | Missing command flag docs | Negative | command manifest diff | Docs coverage check fails. | +| TS-006 | Accessibility review | Happy | Markdown docs | Headings, tables, text statuses pass. | + +### Happy Path Testing + +1. Validate all operator docs links. +2. Extract command snippets and compare command names/flags to the CLI manifest. +3. Validate fixture references exist. +4. Review troubleshooting matrix rows for validation failure, failed gate, + unavailable evidence, stale evidence, publisher failure, and internal error. + +### Negative/Edge Case Testing + +- Invalid inputs: broken links, stale command flags, missing fixture references, + outdated exit code table, missing fork-safety warning, docs using screenshots + as sole information, and absolute local paths. +- Timeouts: docs checks must be local and deterministic; external link checks + should be pinned or separated to avoid flaky PR validation. +- Concurrent users or retries: docs generation/checks are read-only and + deterministic. +- Broken dependencies: if CLI manifest is unavailable, docs command parity check + reports infrastructure error instead of silently passing. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Docs checks complete in under 30 seconds in CI. | Docs validation benchmark. | +| Load | Troubleshooting matrix remains scan-friendly under 50 rows. | Markdown lint and reviewer checklist. | +| Security | Docs do not include secrets, local absolute paths, or privileged token examples. | Secret/path lint. | +| Accessibility | Docs use headings, tables, and text status labels; no screenshot-only instructions. | Accessibility checklist and Markdown snapshots. | From 180da8205c0441454a978220cb7087b03db3190e Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 15:47:13 -0700 Subject: [PATCH 6/9] docs(holmes): close law assurance planning campaign --- docs/BEARING.md | 35 +++-- .../holmes-weslaw-assurance-prd-test-plan.md | 47 +++++-- ...LAW-046-law-assurance-schema-versioning.md | 127 +++++++++++++++++ ...AW-047-law-assurance-artifact-retention.md | 122 ++++++++++++++++ ...W-048-law-assurance-end-to-end-workflow.md | 131 ++++++++++++++++++ ...-049-law-assurance-release-gate-rollout.md | 125 +++++++++++++++++ ...AW-050-holmes-weslaw-assurance-closeout.md | 120 ++++++++++++++++ docs/design/README.md | 2 +- 8 files changed, 686 insertions(+), 23 deletions(-) create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-046-law-assurance-schema-versioning.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-047-law-assurance-artifact-retention.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-048-law-assurance-end-to-end-workflow.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-049-law-assurance-release-gate-rollout.md create mode 100644 docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-050-holmes-weslaw-assurance-closeout.md diff --git a/docs/BEARING.md b/docs/BEARING.md index bd26e594..0a9fb065 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -402,35 +402,38 @@ deferral, and v1 playback/retrospective closeout evidence. ### 12. Holmes `weslaw` Assurance Planning -The next chunk is a 50-slice PRD and test-plan campaign that turns the merged -`weslaw` v1 outputs into Holmes-facing assurance requirements before -implementation begins. +This completed 50-slice PRD and test-plan campaign turns the merged `weslaw` +v1 outputs into Holmes-facing assurance requirements before implementation +begins. -The active packet is +The completed packet is [0020-holmes-weslaw-assurance-prd-test-plan](./design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md). Working budget: **50 slices**. -Status: **45 / 50 slices closed**. The plan allocates `HLAW-001` through +Status: **50 / 50 slices closed**. The plan allocates `HLAW-001` through `HLAW-050` across evidence intake, typed domain contracts, report models, CLI flows, GitHub publishing, MCP surfaces, policy, QA fixtures, determinism, performance budgets, migration, release gates, operator docs, and campaign closeout. Each slice must produce a PRD/test-plan artifact with explicit objectives, scope, user stories, BDD acceptance criteria, and test scenarios. -Closed slices now cover `HLAW-001` through `HLAW-045`: evidence intake, typed +Closed slices now cover `HLAW-001` through `HLAW-050`: evidence intake, typed domain contracts, report sections, CLI operator flows, local artifact writing, exit-code policy, GitHub publishing surfaces, MCP tools/resources, agent-safe summaries, policy/audit contracts, QA fixture corpora, deterministic clock and port seams, concurrency/idempotence requirements, performance budgets, legacy Holmes migration mapping, workflow integration, Rust crate scaffold boundaries, -transitional CLI aliases, and operator documentation. +transitional CLI aliases, operator documentation, schema-version compatibility, +artifact retention, end-to-end workflow, release-gate rollout, and campaign +closeout. ## Next Target -The immediate focus is **Holmes `weslaw` assurance planning**: continue the -50-slice PRD/test-plan campaign, with 45 slices closed and 5 remaining before -the Rust Holmes assurance integration begins. +The immediate focus is **Holmes `weslaw` assurance planning PR review**: the +50-slice PRD/test-plan campaign is complete, and the next branch should begin +Rust Holmes assurance implementation from the evidence and validation core +before publishers or branch-protection gates. Current evidence still includes complete v0.0.5 publication proof, Rust L1 fixtures for directive-heavy SDL, schema extensions, nested list type @@ -444,11 +447,15 @@ without pinning Wesley to legacy Node. The `0019` packet names the semantic law architecture that lets Wesley compile meaning alongside shape without smuggling runtime ownership into the base compiler. -The next pull is: +The next implementation pull after this planning PR should start with: -1. `HLAW-046` through `HLAW-050`: write PRD/test-plan artifacts for schema - versioning, artifact retention, end-to-end workflow, release gate rollout, - and campaign closeout so the planning packet can reach 50/50. +1. `HLAW-001`, `HLAW-006`, `HLAW-007`, and `HLAW-046`: evidence bundle, + artifact locator, validation result, and schema-version validation. +2. `HLAW-002` through `HLAW-005`: law diff, coverage, capability, and contract + manifest ingest ports. +3. `HLAW-008` through `HLAW-010` plus `HLAW-035`: semantic findings, gate + decisions, provenance, and audit witness. +4. `HLAW-036` and `HLAW-037`: golden and negative fixture corpora. ## Post-Retirement Freestyle Slice Log diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md index 8dd3191c..9981980d 100644 --- a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md @@ -2,7 +2,7 @@ title: Holmes weslaw Assurance PRD And Test Plan Campaign legend: SPEC packet: 0020-holmes-weslaw-assurance-prd-test-plan -status: active +status: complete release: v0.0.8 --- @@ -10,7 +10,7 @@ release: v0.0.8 ## Status -Active planning packet. Slices `HLAW-001` through `HLAW-045` are complete. +Planning packet complete. Slices `HLAW-001` through `HLAW-050` are complete. ## Question @@ -87,7 +87,7 @@ failure behavior, and test fixtures where known. | 2 | HLAW-011..HLAW-020 | Complete | Report model, CLI operator flows, and local artifacts. | | 3 | HLAW-021..HLAW-030 | Complete | GitHub and MCP interfaces over the same assurance use cases. | | 4 | HLAW-031..HLAW-040 | Complete | Policy, QA harnesses, determinism, concurrency, and budgets. | -| 5 | HLAW-041..HLAW-050 | In progress | Migration, release gates, documentation, and campaign closeout. | +| 5 | HLAW-041..HLAW-050 | Complete | Migration, release gates, documentation, and campaign closeout. | Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. @@ -340,27 +340,27 @@ Drift checks happen after HLAW-010, HLAW-025, HLAW-040, and HLAW-050. running Holmes law assessment, reading findings, and resolving failures. - Required output: PRD for docs locations, command examples, troubleshooting matrix, docs command checks, and accessibility of examples. -- [ ] HLAW-046 `LawAssuranceSchemaVersioning` PRD and test plan. +- [x] HLAW-046 `LawAssuranceSchemaVersioning` PRD and test plan. - Feature/product: Versioning and compatibility rules for Holmes law evidence bundle schemas, policy schemas, report schemas, and witness schemas. - Required output: PRD for semver-like compatibility, unsupported-version diagnostics, migration notices, and schema validation tests. -- [ ] HLAW-047 `LawAssuranceArtifactRetention` PRD and test plan. +- [x] HLAW-047 `LawAssuranceArtifactRetention` PRD and test plan. - Feature/product: Artifact retention rules for local runs, CI runs, PR comments, and future dashboard links. - Required output: PRD for retention names, overwrite policy, cleanup behavior, stale link warnings, and fork-safe behavior. -- [ ] HLAW-048 `LawAssuranceEndToEndWorkflow` PRD and test plan. +- [x] HLAW-048 `LawAssuranceEndToEndWorkflow` PRD and test plan. - Feature/product: End-to-end workflow from GraphQL SDL and `weslaw` authoring through Wesley law artifacts to Holmes findings and PR review output. - Required output: PRD for full golden path, failure-path sequence, fixture repository layout, and release-gate assertions. -- [ ] HLAW-049 `LawAssuranceReleaseGateRollout` PRD and test plan. +- [x] HLAW-049 `LawAssuranceReleaseGateRollout` PRD and test plan. - Feature/product: A staged rollout plan for advisory, required, and non-overridable law assurance gates in CI. - Required output: PRD for rollout phases, branch protection interaction, opt-in/opt-out policy, false-positive handling, and rollback tests. -- [ ] HLAW-050 `HolmesWeslawAssuranceCloseout` PRD and test plan. +- [x] HLAW-050 `HolmesWeslawAssuranceCloseout` PRD and test plan. - Feature/product: Campaign closeout artifact summarizing completed PRDs, open decisions, implementation-ready slices, deferred scope, and next engineering branch. @@ -460,6 +460,37 @@ aliases are allowed, and what operator documentation must prove. The final five slices should close schema compatibility, artifact retention, end-to-end workflow, release-gate rollout, and campaign closeout. +## Drift Check: HLAW-050 + +Date: 2026-05-26. + +Status: **50 / 50 slices closed**. + +Decision: close the planning campaign. The final schema-versioning, artifact +retention, end-to-end workflow, release-gate rollout, and closeout slices keep +Holmes on the intended side of the boundary: Holmes validates and judges +Wesley-published law evidence, but it does not compile law, mutate shape, +invent semantic diffs, or reach into external repos for product truth. + +No scope correction is needed before PR review. The recommended next +engineering branch should start with the evidence and validation core before +publishers or branch-protection gates: + +1. Implement `HolmesLawEvidenceBundle`, artifact locators, and version + validation from `HLAW-001`, `HLAW-006`, `HLAW-007`, and `HLAW-046`. +2. Implement law diff, coverage, capability, and manifest ingest ports from + `HLAW-002` through `HLAW-005`. +3. Implement validation result, semantic finding, coverage gate, provenance + gate, and audit witness models from `HLAW-007` through `HLAW-010` and + `HLAW-035`. +4. Add golden and negative fixture corpora from `HLAW-036` and `HLAW-037`. +5. Only then add CLI/report/publisher surfaces from later HLAW slices. + +Deferred scope remains explicit: Law Matrix, LSP support, hosted dashboards, +external repo adoption, live branch-protection rollout, and Rust Holmes +implementation are outside this planning packet and need their own execution +branches. + ## Non-Goals For The 50-Slice Planning Campaign - Do not implement Rust Holmes crates yet. diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-046-law-assurance-schema-versioning.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-046-law-assurance-schema-versioning.md new file mode 100644 index 00000000..38e71fd5 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-046-law-assurance-schema-versioning.md @@ -0,0 +1,127 @@ +--- +title: HLAW-046 LawAssuranceSchemaVersioning +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-046 LawAssuranceSchemaVersioning + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance will ingest several versioned artifact schemas: evidence +bundle, policy, report document, audit witness, agent summary, MCP responses, +and GitHub publication payloads. Without explicit compatibility rules, a future +Holmes implementation can accidentally accept evidence it does not understand, +reject compatible patch-level changes, or publish reports with mixed schema +generations. The versioning contract must be planned before implementation so +every adapter handles schema evolution consistently. + +### Target User/Audience + +- Holmes implementers defining Rust parsers and schema validators. +- Wesley maintainers publishing law evidence artifact schemas. +- CI maintainers upgrading workflows across multiple repos. +- Release managers interpreting unsupported-version failures. +- Agents and MCP clients consuming report and summary schemas. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Version coverage | Evidence bundle, policy, report, witness, MCP, agent summary, and GitHub payload schemas each have compatibility rules. | +| Unsupported-version clarity | 100% of unsupported major, malformed version, and missing version fixtures emit stable diagnostic codes. | +| Upgrade safety | Compatible additive changes are accepted only when their schema rules explicitly allow unknown fields or extensions. | + +## Scope Definition + +### In Scope + +- Define semantic version fields for all Holmes law assurance artifact families. +- Define compatibility rules for major, minor, and patch changes in JSON schema + artifacts and rendered report metadata. +- Define validator behavior for missing versions, malformed versions, + unsupported major versions, unsupported minor versions, prerelease strings, + and build metadata. +- Define migration notice behavior for supported-but-deprecated versions. +- Define schema registry layout for local validation fixtures. +- Define how version errors affect CLI exit codes, MCP error responses, GitHub + comments, and audit witness output. + +### Out of Scope + +- No schema migration implementation is built in this slice. +- No automatic rewrite from old artifact versions to new artifact versions. +- No remote schema registry, package publishing, or hosted docs site. +- No changes to Wesley `weslaw` semantic Law IR versioning. +- No compatibility promise for draft or provisional fixture artifacts. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a Holmes implementer, I want every law assurance artifact to declare its schema version so that parsers can reject unknown formats deterministically. | +| US-002 | As a CI maintainer, I want compatible patch and minor changes to remain usable so that workflow upgrades do not break unnecessarily. | +| US-003 | As a release manager, I want unsupported-version diagnostics to name the artifact family and supported range so that the remediation path is obvious. | +| US-004 | As an MCP client author, I want schema version metadata on responses so that clients can detect and handle incompatible changes. | +| US-005 | As a QA engineer, I want fixtures for each compatibility boundary so that schema versioning cannot regress silently. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | A law evidence bundle omits `schemaVersion` | Holmes validates the bundle | Validation fails with a missing-version diagnostic for the evidence bundle artifact. | +| US-001 | A report document declares `schemaVersion: "2.0.0"` while Holmes supports `1.x` | Holmes validates the report | Validation fails before report rendering and names the unsupported major version. | +| US-002 | A policy artifact declares a supported minor version with an additive optional field | Holmes validates the policy | Validation succeeds only if the policy schema marks the field as extension-safe. | +| US-003 | A witness artifact declares a malformed version string | CLI validation runs | Exit code is validation failure and the diagnostic includes artifact family, value, and accepted format. | +| US-004 | MCP assessment response is returned | Client inspects metadata | Response includes schema family, schema version, and stable URI or identifier for the response shape. | +| US-005 | Compatibility fixture matrix runs | Each boundary fixture is loaded | Accept/reject outcome matches the declared compatibility table. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Current evidence bundle version | Happy | `fixtures/versioning/evidence-bundle-v1.json` | Accepted. | +| TS-002 | Missing schema version | Negative | bundle without `schemaVersion` | Missing-version diagnostic. | +| TS-003 | Unsupported major version | Negative | `schemaVersion: "2.0.0"` | Unsupported-major diagnostic. | +| TS-004 | Unsupported minor version | Edge | `schemaVersion: "1.99.0"` | Unsupported-minor diagnostic unless configured range admits it. | +| TS-005 | Patch-compatible schema | Happy | `schemaVersion: "1.0.7"` | Accepted with normalized version metadata. | +| TS-006 | Malformed version string | Negative | `schemaVersion: "v1"` | Malformed-version diagnostic. | +| TS-007 | Deprecated but supported version | Edge | `schemaVersion: "1.0.0"` with deprecation table | Accepted with migration notice. | +| TS-008 | Mixed artifact generations | Negative | bundle v1, policy v2, witness v1 | Validation fails with mixed-family compatibility diagnostic. | + +### Happy Path Testing + +1. Load a complete evidence bundle where every artifact declares the current + supported version. +2. Validate bundle, policy, report, witness, MCP summary, and GitHub payload + schema versions against the registry. +3. Assert parsed domain objects carry normalized artifact family and version + metadata. +4. Render report and audit witness with the same normalized versions. +5. Assert no migration notice is emitted for the current version set. + +### Negative/Edge Case Testing + +- Invalid inputs: missing version, null version, non-string version, malformed + semver, unsupported major, unsupported minor, prerelease not allowed, mixed + incompatible artifact families, and extension fields outside extension points. +- Timeouts: version validation is local and must not call network registries. +- Concurrent users or retries: registry reads are immutable and shared safely + across concurrent validations. +- Broken dependencies: missing local schema registry is an infrastructure error, + not a valid unsupported-version finding. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Version validation adds less than 25 ms for a complete artifact bundle. | Microbenchmark over fixture matrix. | +| Load | Registry lookup handles at least 100 schema families without changing diagnostic order. | Generated registry fixture. | +| Security | Unsupported versions fail closed and never skip structural validation. | Negative fixtures and audit witness assertion. | +| Accessibility | Version diagnostics include plain text family, supplied version, supported range, and remediation. | Snapshot CLI, MCP, and report diagnostics. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-047-law-assurance-artifact-retention.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-047-law-assurance-artifact-retention.md new file mode 100644 index 00000000..3e13fe2c --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-047-law-assurance-artifact-retention.md @@ -0,0 +1,122 @@ +--- +title: HLAW-047 LawAssuranceArtifactRetention +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-047 LawAssuranceArtifactRetention + +## Feature Overview & Objectives + +### Problem Statement + +Holmes law assurance will write local artifacts, CI artifacts, PR comment links, +audit witnesses, and future dashboard references. If retention rules are not +explicit, operators can lose the evidence needed to debug a failed gate, PR +comments can point to stale or inaccessible files, and local runs can +accumulate unbounded data. Retention rules must balance traceability, +determinism, storage cost, and fork-safe publication. + +### Target User/Audience + +- CI maintainers configuring artifact upload and cleanup. +- Local developers running repeated assessments. +- Release managers preserving evidence for merge decisions. +- GitHub publisher adapter implementers. +- Security reviewers checking fork and token behavior. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Retention coverage | Local, CI, PR comment, audit witness, and future dashboard artifact lifecycles are documented. | +| Link safety | PR comments never publish file paths or URLs that are known inaccessible to the PR author context. | +| Cleanup determinism | Local cleanup removes only Holmes-owned run directories and preserves pinned evidence by default. | + +## Scope Definition + +### In Scope + +- Define artifact naming scheme for local runs, CI runs, PR comments, audit + witness files, and future dashboard handles. +- Define default retention periods and cleanup rules for local `.wesley-cache` + output versus CI-uploaded artifacts. +- Define overwrite behavior for reruns, retries, and repeated commits. +- Define stale link detection and warning behavior for PR comments. +- Define fork-safe behavior when artifact upload, comment publication, or token + permissions are unavailable. +- Define pinned evidence behavior for release gates and audit replay. + +### Out of Scope + +- No hosted dashboard implementation. +- No object-store integration. +- No artifact signing beyond consuming existing bundle and law hashes. +- No cleanup of non-Holmes files. +- No retention policy for external repos outside the current Wesley workflow. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a local developer, I want repeated Holmes law assurance runs to write predictable artifact directories so that I can compare outputs without guessing filenames. | +| US-002 | As a CI maintainer, I want artifact retention periods and names documented so that workflow storage stays bounded. | +| US-003 | As a release manager, I want failed gate evidence retained long enough to audit a merge decision. | +| US-004 | As a fork PR contributor, I want the workflow to avoid publishing inaccessible or privileged artifact links. | +| US-005 | As a GitHub publisher adapter, I want stale link rules so that comments do not imply unavailable evidence is reviewable. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Developer runs assessment twice for the same commit | Artifacts are written locally | Runs use deterministic run ids or sequence ids and do not overwrite pinned evidence. | +| US-002 | CI uploads law assurance artifacts | Retention policy is applied | Artifact names include PR or commit identity, schema family, and run purpose. | +| US-003 | A release gate fails | Holmes writes audit evidence | Evidence is marked retention-required and referenced from report metadata. | +| US-004 | Workflow runs on an untrusted fork | Publisher checks permissions | GitHub comment omits privileged links and includes local artifact names instead. | +| US-005 | Previously published artifact link is no longer available | Publisher renders next comment | Comment marks the previous link stale and points to current retained evidence if available. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Local happy path | Happy | two local assessment runs | Deterministic directories, no pinned overwrite. | +| TS-002 | CI artifact naming | Happy | PR run context | Names include PR number, commit SHA, and artifact family. | +| TS-003 | Cleanup old unpinned runs | Happy | local cache with old runs | Only Holmes-owned expired runs removed. | +| TS-004 | Preserve pinned failed gate | Edge | pinned release evidence | Cleanup skips pinned evidence. | +| TS-005 | Fork-safe publication | Security | read-only fork context | No privileged links emitted. | +| TS-006 | Stale comment link | Edge | previous missing artifact URL | Stale link warning emitted. | +| TS-007 | Broken upload dependency | Negative | artifact upload unavailable | Report records unavailable artifact with deterministic diagnostic. | + +### Happy Path Testing + +1. Run local validate, assess, report, and artifact writer over a clean bundle. +2. Assert artifact directory names are deterministic and include run purpose. +3. Simulate CI context with PR number and commit SHA. +4. Assert uploaded artifact names match retention policy. +5. Render a GitHub comment with accessible artifact references. +6. Assert audit witness records retained artifact identifiers. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed retention config, negative retention days, missing + run id, path traversal in artifact names, duplicate artifact ids, and cleanup + roots outside Holmes-owned directories. +- Timeouts: artifact upload timeout becomes publisher-unavailable diagnostic; + local artifact writing remains complete. +- Concurrent users or retries: concurrent runs must not race on the same output + directory unless the run id is explicitly identical and idempotent. +- Broken dependencies: missing GitHub token, artifact upload failure, read-only + filesystem, and stale artifact URL are reported without losing local evidence. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Cleanup over 1,000 local run directories completes in under 2 seconds. | Generated cache fixture. | +| Load | Retention metadata handles at least 500 artifact references in one report. | Large report fixture. | +| Security | Artifact paths are relative, normalized, and cannot escape Holmes output root. | Path traversal tests. | +| Accessibility | Stale and unavailable artifact states are text labels, not color-only signals. | Snapshot report and comment rendering. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-048-law-assurance-end-to-end-workflow.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-048-law-assurance-end-to-end-workflow.md new file mode 100644 index 00000000..46e3e561 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-048-law-assurance-end-to-end-workflow.md @@ -0,0 +1,131 @@ +--- +title: HLAW-048 LawAssuranceEndToEndWorkflow +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-048 LawAssuranceEndToEndWorkflow + +## Feature Overview & Objectives + +### Problem Statement + +The preceding slices define individual Holmes law assurance inputs, policies, +reports, adapters, fixtures, and operator surfaces. The implementation still +needs one end-to-end workflow contract that ties GraphQL SDL and `weslaw` +authoring to Wesley law artifacts, Holmes assessment, report publication, PR +review output, and release gate decisions. Without a full workflow PRD, future +implementation can pass isolated unit tests while failing the operator journey. + +### Target User/Audience + +- Holmes implementation leads planning the first Rust engineering branch. +- QA engineers building fixture repositories and integration tests. +- CI maintainers wiring Wesley and Holmes commands together. +- Reviewers verifying PR comments and gate outcomes. +- Release managers requiring auditable evidence from authoring to merge. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Golden path coverage | One fixture repository proves SDL plus `weslaw` authoring through Holmes PR output. | +| Failure path coverage | At least five named failure paths are tested end to end with stable diagnostics. | +| Gate traceability | Final gate decision links back to schema hash, law hash, policy hash, and audit witness id. | + +## Scope Definition + +### In Scope + +- Define complete golden path from authored GraphQL SDL and `weslaw` documents + through Wesley `law validate`, `law diff`, `law coverage`, `law capabilities`, + bundle manifest assembly, Holmes validation, assessment, rendering, artifact + writing, GitHub publication, and audit witness output. +- Define fixture repository layout for happy path, semantic-law warning, + required gate failure, invalid evidence, stale hash, and publisher-unavailable + paths. +- Define required command order and artifact handoff contract between Wesley + and Holmes. +- Define end-to-end assertions for report content, gate decision, exit code, + artifact files, PR comment, MCP parity, and audit witness. +- Define what the end-to-end workflow proves and what remains out of scope. + +### Out of Scope + +- No implementation of the Rust Holmes end-to-end runner. +- No changes to Wesley law command semantics. +- No live GitHub API dependency in deterministic end-to-end tests. +- No external Echo, Continuum, jedit, warp-ttd, git-warp, or + `wesley-postgres` repo edits. +- No release certification beyond law assurance gate behavior. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a developer, I want one documented command chain from SDL and `weslaw` changes to a Holmes law assurance report so that I can reproduce CI locally. | +| US-002 | As a QA engineer, I want fixture repositories for golden and failure paths so that the whole workflow is tested instead of isolated parsers. | +| US-003 | As a reviewer, I want PR output to summarize semantic law changes, coverage gates, provenance, and required actions. | +| US-004 | As a release manager, I want final gate decisions to cite hashes and audit witness ids so that merge decisions are traceable. | +| US-005 | As an MCP client, I want the end-to-end assessment result to match CLI and GitHub summaries. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Developer has SDL, `weslaw`, policy, and base refs | They run the documented local workflow | Wesley artifacts are produced and Holmes emits validation, assessment, report, artifacts, and witness outputs. | +| US-002 | Golden fixture repository is checked out | End-to-end suite runs | Report is advisory-clean, exit code is success, and audit witness contains schema, law, policy, and bundle hashes. | +| US-002 | Required gate failure fixture is checked out | End-to-end suite runs | Exit code is required-gate failure and the PR summary names the blocking law finding. | +| US-003 | Semantic law diff includes strengthening and weakening events | PR renderer runs | Comment groups events by severity and includes omitted-count metadata if truncated. | +| US-004 | Release gate decision is required | Audit witness is written | Witness records exact evidence ids and policy gates evaluated. | +| US-005 | CLI, MCP, and GitHub summaries are generated from one assessment | Parity check runs | Shared finding ids and gate decisions match across all three surfaces. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Full golden path | Happy | fixture repo with valid SDL and `weslaw` | Success exit, report, comment, artifacts, witness. | +| TS-002 | Semantic warning path | Happy | law strengthening advisory fixture | Warning finding, non-blocking gate. | +| TS-003 | Required gate failure | Negative | weakened variant or coverage gap fixture | Required failure exit and blocking finding. | +| TS-004 | Invalid evidence | Negative | malformed law diff JSON | Validation failure before assessment. | +| TS-005 | Stale hash | Negative | bundle hash mismatch | Traceability gate failure. | +| TS-006 | Publisher unavailable | Edge | fake GitHub timeout | Local artifacts and witness succeed, publish diagnostic emitted. | +| TS-007 | MCP parity | Happy | assessment result fixture | MCP response matches CLI report model. | + +### Happy Path Testing + +1. Prepare fixture repository with base SDL, head SDL, base `weslaw`, head + `weslaw`, policy, and expected artifact manifest. +2. Run Wesley law commands to create validation, diff, coverage, capabilities, + and bundle manifest artifacts. +3. Assemble Holmes law evidence bundle from those artifacts. +4. Run Holmes validate, assess, report, and artifact writer. +5. Render GitHub comment through fake publisher and MCP summary through in-memory + adapter. +6. Assert all surfaces share finding ids, gate decisions, hashes, and witness id. + +### Negative/Edge Case Testing + +- Invalid inputs: malformed SDL, malformed `weslaw`, unsupported artifact + version, stale schema hash, missing policy, missing coverage, invalid bundle + manifest, weakened required law, and unknown report field. +- Timeouts: fake GitHub publisher, fake artifact upload, and fake clock + deadline tests classify timeout without changing assessment result. +- Concurrent users or retries: repeated end-to-end runs with same commit context + are idempotent and update the same fake PR comment. +- Broken dependencies: unavailable Wesley artifact, unavailable publisher, + read-only artifact directory, and missing MCP resource are separate + diagnostics. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Golden end-to-end local fixture completes in under 10 seconds excluding Rust compile time. | Integration benchmark. | +| Load | Large fixture with 10,000 findings still emits truncated GitHub comment and full local report. | Load integration fixture. | +| Security | No test path requires live secrets or network access; fake adapters enforce token absence. | Environment scrub and fake adapter tests. | +| Accessibility | Rendered Markdown summary uses headings, tables, and text labels for all gate states. | Snapshot review and Markdown lint. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-049-law-assurance-release-gate-rollout.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-049-law-assurance-release-gate-rollout.md new file mode 100644 index 00000000..f688c3c6 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-049-law-assurance-release-gate-rollout.md @@ -0,0 +1,125 @@ +--- +title: HLAW-049 LawAssuranceReleaseGateRollout +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-049 LawAssuranceReleaseGateRollout + +## Feature Overview & Objectives + +### Problem Statement + +Law assurance should become a release gate gradually. If it is enabled as a hard +branch-protection requirement on day one, false positives and incomplete +fixtures can block useful work. If it remains advisory forever, semantic law +regressions will continue to slip through review. The rollout must define +advisory, required, and non-overridable phases with explicit promotion, +rollback, and false-positive handling rules. + +### Target User/Audience + +- Release managers deciding when Holmes law assurance blocks merges. +- CI maintainers updating branch protection and workflow requirements. +- Holmes implementers adding gate modes and diagnostics. +- Reviewers responding to advisory and required findings. +- Maintainers handling false positives without weakening semantic law. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Rollout clarity | Advisory, required, and non-overridable phases each have entry and exit criteria. | +| False-positive handling | Every false-positive path records evidence, owner, expiration, and rollback or suppression decision. | +| Branch protection readiness | Required checks and admin-override behavior are documented before enforcement. | + +## Scope Definition + +### In Scope + +- Define rollout phases: local-only preview, advisory CI, required CI, and + non-overridable release gate. +- Define promotion criteria for moving from one phase to the next. +- Define rollback criteria and emergency disable behavior. +- Define branch protection interactions, including check names, required + statuses, admin override posture, and stale-check behavior. +- Define false-positive handling flow using suppression policy, issue tracking, + expiration, and audit witness. +- Define communication artifacts required before enforcement. + +### Out of Scope + +- No branch protection changes are applied in this slice. +- No production rollout is executed. +- No bypass mechanism for invalid evidence or failed required non-overridable + gates. +- No organization-wide policy outside Wesley. +- No live GitHub settings automation. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a release manager, I want phased enforcement so that law assurance becomes blocking only after evidence quality is proven. | +| US-002 | As a CI maintainer, I want stable check names and branch-protection rules so that required gates are predictable. | +| US-003 | As a reviewer, I want advisory findings to show future blocking impact so that fixes happen before enforcement. | +| US-004 | As a maintainer, I want a false-positive process that preserves auditability and does not hide invalid evidence. | +| US-005 | As an admin, I want rollback criteria so that a broken gate can be disabled safely without erasing the incident trail. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | Project is in advisory phase | Required law finding occurs | CI succeeds or warns according to policy, and report marks the finding as future-blocking. | +| US-001 | Promotion criteria are satisfied for the required phase | Release manager approves rollout | Required gate becomes a branch-protection candidate with documented check name. | +| US-002 | Required CI phase is active | Holmes emits required failure | Check conclusion is failure and branch protection can block merge. | +| US-003 | Advisory finding is rendered | Reviewer reads PR comment | Comment names severity, future phase impact, and remediation action. | +| US-004 | Maintainer claims false positive | Suppression process runs | Suppression requires id, owner, reason, evidence, expiration, and audit witness entry. | +| US-005 | Gate implementation produces widespread infrastructure failures | Rollback procedure runs | Gate returns to advisory or disabled mode with incident record and follow-up task. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Advisory required finding | Happy | advisory phase policy | Non-blocking check with future-blocking label. | +| TS-002 | Required phase failure | Negative | required phase policy | Failing check and blocking report. | +| TS-003 | Non-overridable invalid evidence | Negative | invalid evidence fixture | Cannot be suppressed or waived. | +| TS-004 | False-positive suppression | Edge | suppression with owner and expiration | Accepted and audited if policy permits. | +| TS-005 | Expired suppression | Negative | expired suppression fixture | Required finding reappears. | +| TS-006 | Rollback mode | Edge | rollback config | Gate downgrades to advisory and records incident id. | +| TS-007 | Stale branch protection check | Negative | stale commit SHA | Check marked stale and not reused for merge readiness. | + +### Happy Path Testing + +1. Run advisory policy over a fixture with one required finding. +2. Assert CI surface succeeds or warns while report marks future blocking impact. +3. Run required policy over the same fixture. +4. Assert check fails and branch-protection metadata names the required check. +5. Run non-overridable policy over invalid evidence. +6. Assert invalid evidence fails regardless of suppression or override inputs. + +### Negative/Edge Case Testing + +- Invalid inputs: unknown rollout phase, missing check name, malformed + suppression, expired suppression, suppression without owner, rollback without + incident id, stale commit SHA, and policy that tries to suppress invalid + evidence. +- Timeouts: CI timeout in required phase is infrastructure failure, not + advisory pass. +- Concurrent users or retries: repeated status updates for one commit are + idempotent and do not create conflicting check conclusions. +- Broken dependencies: GitHub status API unavailable records publication failure + while local report and audit witness remain complete. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Rollout policy evaluation adds less than 50 ms to assessment. | Policy benchmark. | +| Load | Suppression table supports at least 1,000 active entries with deterministic lookup. | Generated suppression fixture. | +| Security | Non-overridable invalid evidence and required binding failures cannot be waived. | Abuse-prevention fixtures. | +| Accessibility | Advisory, required, rollback, and non-overridable states use explicit text labels. | PR comment and report snapshots. | diff --git a/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-050-holmes-weslaw-assurance-closeout.md b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-050-holmes-weslaw-assurance-closeout.md new file mode 100644 index 00000000..6de0c429 --- /dev/null +++ b/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-050-holmes-weslaw-assurance-closeout.md @@ -0,0 +1,120 @@ +--- +title: HLAW-050 HolmesWeslawAssuranceCloseout +legend: SPEC +packet: 0020-holmes-weslaw-assurance-prd-test-plan +status: complete +release: v0.0.8 +--- + +# HLAW-050 HolmesWeslawAssuranceCloseout + +## Feature Overview & Objectives + +### Problem Statement + +The 50-slice Holmes `weslaw` assurance campaign produces a large planning +surface. Without a closeout requirement, the campaign can end with many PRD +files but no clear implementation order, unresolved decisions, evidence index, +or next-branch recommendation. The closeout artifact must turn the planning +packet into a usable handoff for Rust Holmes implementation. + +### Target User/Audience + +- Project maintainers deciding whether the planning campaign is complete. +- Holmes implementation leads starting the Rust engineering branch. +- QA engineers translating PRDs into fixtures and test suites. +- Reviewers checking that no planning debt was hidden. +- Future agents recovering context from `BEARING` and the design packet. + +### Success Metrics + +| KPI | Target | +| --- | --- | +| Campaign completion | 50 / 50 HLAW checklist items are checked and linked to PRD/test-plan artifacts. | +| Handoff clarity | Closeout names first implementation tranche, deferred scope, open decisions, and required validation. | +| Drift control | `BEARING` and the packet status agree on 50 / 50 closed before PR review. | + +## Scope Definition + +### In Scope + +- Define acceptance requirements for closing the 50-slice campaign. +- Define evidence index expectations for all HLAW PRD/test-plan artifacts. +- Define retrospective questions to answer before implementation begins. +- Define backlog suggestions for deferred work, including Law Matrix, LSP, + hosted dashboard, and future external repo adoption. +- Define next implementation branch recommendation and first engineering + tranche selection. +- Define `BEARING` update requirements and validation commands. + +### Out of Scope + +- No Rust Holmes implementation. +- No final merge decision or admin merge. +- No external repo edits. +- No branch protection change. +- No additional HLAW slices beyond 50 without a new packet or explicit + extension decision. + +## Detailed User Stories + +| ID | User Story | +| --- | --- | +| US-001 | As a maintainer, I want a campaign closeout checklist so that I can see whether planning is actually complete. | +| US-002 | As an implementation lead, I want the first Rust Holmes tranche identified so that engineering starts with the safest dependency order. | +| US-003 | As a QA engineer, I want the PRD index and fixture responsibilities summarized so that test work can begin immediately. | +| US-004 | As a reviewer, I want open decisions and deferred scope called out so that hidden work does not masquerade as completion. | +| US-005 | As a future agent, I want `BEARING` to point at the completed packet and next target. | + +## Acceptance Criteria (BDD Format) + +| Story | Given | When | Then | +| --- | --- | --- | --- | +| US-001 | HLAW campaign reaches slice 50 | Closeout runs | Packet status says 50 / 50 complete and all checklist entries are checked. | +| US-002 | Implementation branch is planned | Closeout summary is written | First tranche starts with evidence bundle, ingest ports, validation result, and fixture corpus before publishers. | +| US-003 | QA prepares implementation fixtures | Evidence index is reviewed | Every artifact family has at least one owning PRD and one test-plan source. | +| US-004 | Deferred scope exists | Closeout summary is reviewed | Deferred items are listed as backlog suggestions, not silent omissions. | +| US-005 | Future agent reads `BEARING` | They inspect current direction | `BEARING` names planning complete and points to Rust Holmes assurance implementation as next direction. | + +## Detailed Test Plan + +### Test Scenarios + +| ID | Scenario | Type | Fixture/Input | Expected Result | +| --- | --- | --- | --- | --- | +| TS-001 | Checklist complete | Happy | HLAW packet checklist | 50 checked entries. | +| TS-002 | Missing PRD artifact | Negative | removed HLAW file | Closeout audit fails. | +| TS-003 | Missing required heading | Negative | malformed PRD file | Heading audit fails. | +| TS-004 | BEARING mismatch | Negative | packet 50/50, BEARING 45/50 | Drift check fails review. | +| TS-005 | Deferred scope hidden | Negative | closeout without backlog list | Reviewer blocks closeout. | +| TS-006 | First tranche undefined | Negative | closeout without next implementation order | Handoff incomplete. | + +### Happy Path Testing + +1. Count `HLAW-001` through `HLAW-050` PRD/test-plan artifacts. +2. Verify each artifact includes the five required PRD/test-plan sections. +3. Verify checklist marks all 50 slices complete. +4. Verify `BEARING` and packet status both report 50 / 50 closed. +5. Verify closeout names first implementation tranche and deferred scope. +6. Run repository documentation and preflight checks. + +### Negative/Edge Case Testing + +- Invalid inputs: missing HLAW file, duplicate HLAW number, unchecked checklist + item, missing required heading, stale `BEARING`, missing closeout drift check, + and next branch recommendation that skips evidence validation. +- Timeouts: closeout validation is local file inspection and should not depend + on GitHub or network calls. +- Concurrent users or retries: repeated closeout audits are read-only and + deterministic. +- Broken dependencies: if docs validation scripts are unavailable, closeout + reports infrastructure failure instead of claiming campaign completion. + +### Non-Functional Testing + +| Category | Requirement | Test Method | +| --- | --- | --- | +| Performance | Closeout audit over all HLAW docs completes in under 10 seconds. | Local heading and checklist audit. | +| Load | Packet remains navigable with 50 artifacts and future extensions. | README and packet index review. | +| Security | Closeout does not require secrets, network tokens, or external repo access. | Environment scrub during validation. | +| Accessibility | Closeout progress and deferred scope use text counts and tables, not color-only state. | Markdown review. | diff --git a/docs/design/README.md b/docs/design/README.md index aed7bf66..83eef14e 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -52,7 +52,7 @@ Current packets: [canonicalization and diagnostic](./0019-weslaw-semantic-law-ir/CANONICALIZATION_AND_DIAGNOSTICS.md) substrate notes - [`0020`](./0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md): - Holmes `weslaw` assurance PRD and test-plan campaign for the next 50 slices + completed 50-slice Holmes `weslaw` assurance PRD and test-plan campaign - [Module Contract](./wesley-module-contract.md): Generic core boundary versus external module-owned domain surfaces - [Module Capability Contract](./wesley-module-capability-contract.md): The capability surfaces external modules should implement - [Contract / Artifact / Runtime Boundary](./wesley-contract-family-artifact-runtime-value.md): GraphQL-authored families, Wesley-emitted artifacts, and later runtime values From 06e50e7ded0151bfd247f67e11ad59e68fa4e712 Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 16:52:56 -0700 Subject: [PATCH 7/9] docs(bearing): add Holmes implementation slice ledger --- docs/BEARING.md | 161 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 9 deletions(-) diff --git a/docs/BEARING.md b/docs/BEARING.md index 0a9fb065..78b9404d 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -447,15 +447,158 @@ without pinning Wesley to legacy Node. The `0019` packet names the semantic law architecture that lets Wesley compile meaning alongside shape without smuggling runtime ownership into the base compiler. -The next implementation pull after this planning PR should start with: - -1. `HLAW-001`, `HLAW-006`, `HLAW-007`, and `HLAW-046`: evidence bundle, - artifact locator, validation result, and schema-version validation. -2. `HLAW-002` through `HLAW-005`: law diff, coverage, capability, and contract - manifest ingest ports. -3. `HLAW-008` through `HLAW-010` plus `HLAW-035`: semantic findings, gate - decisions, provenance, and audit witness. -4. `HLAW-036` and `HLAW-037`: golden and negative fixture corpora. +The implementation budget is **90 slices**. The first implementation PR should +take `HIMP-001` through `HIMP-015`, because those slices establish the Rust +Holmes assurance shell, evidence bundle, artifact locator, schema-version +validation, and first ingest ports before any publisher or branch-protection +surface exists. + +Every implementation slice below references the completed `0020` PRD/test-plan +artifact it implements. + +| Slice | Work | Design refs | +| --- | --- | --- | +| HIMP-001 | Create the Rust Holmes assurance crate or module shell with domain, application, reporting, and adapter namespaces. | [HLAW-043], [HLAW-050] | +| HIMP-002 | Add dependency-boundary tests proving the domain layer cannot import GitHub, filesystem, network, or process adapters. | [HLAW-043], [HLAW-038] | +| HIMP-003 | Define shared clock, filesystem, artifact, GitHub, and MCP port traits with fake implementations for tests. | [HLAW-038], [HLAW-043] | +| HIMP-004 | Add the first deterministic diagnostic-code taxonomy and error envelope shared by validation and ingest. | [HLAW-007], [HLAW-037] | +| HIMP-005 | Wire the new crate/module into workspace preflight without exposing public CLI commands yet. | [HLAW-043], [HLAW-042] | +| HIMP-006 | Document the implementation boundary and update local design navigation for the new Rust Holmes assurance work. | [HLAW-045], [HLAW-050] | +| HIMP-007 | Implement the typed `HolmesLawEvidenceBundle` model and required artifact-family fields. | [HLAW-001], [HLAW-046] | +| HIMP-008 | Implement `WeslawArtifactLocator` path resolution with relative-path normalization and path traversal rejection. | [HLAW-006], [HLAW-047] | +| HIMP-009 | Add the schema-version registry and accepted family/version table for bundle, policy, report, witness, MCP, and GitHub payloads. | [HLAW-046] | +| HIMP-010 | Implement semantic-version parsing and malformed, missing, unsupported-major, and unsupported-minor diagnostics. | [HLAW-046], [HLAW-037] | +| HIMP-011 | Implement bundle structural validation before artifact parsing, including required versus optional artifact references. | [HLAW-001], [HLAW-007] | +| HIMP-012 | Implement bundle provenance validation for schema hash, law hash, policy hash, bundle hash, and source identity fields. | [HLAW-005], [HLAW-010], [HLAW-015] | +| HIMP-013 | Implement missing, unavailable, oversized, and unreadable artifact diagnostics without panics. | [HLAW-006], [HLAW-007], [HLAW-047] | +| HIMP-014 | Add versioning fixtures for current, deprecated, malformed, unsupported, and mixed-generation artifacts. | [HLAW-046], [HLAW-037] | +| HIMP-015 | Add bundle validation golden and negative tests for the first local preflight gate. | [HLAW-001], [HLAW-036], [HLAW-037] | +| HIMP-016 | Implement the `LawDiffIngestPort` parser for `wesley law diff` JSON. | [HLAW-002] | +| HIMP-017 | Normalize law diff events into stable internal event records without reclassifying Wesley semantics. | [HLAW-002], [HLAW-008] | +| HIMP-018 | Add law diff negative handling for duplicate law ids, unknown event kinds, malformed JSON, and unsupported versions. | [HLAW-002], [HLAW-037] | +| HIMP-019 | Implement the `LawCoverageIngestPort` parser for category/profile-aware coverage artifacts. | [HLAW-003], [HLAW-033] | +| HIMP-020 | Normalize law coverage subjects, category totals, threshold inputs, and omitted-category accounting. | [HLAW-003], [HLAW-013], [HLAW-033] | +| HIMP-021 | Implement the `LawCapabilityIngestPort` parser for report-only capability summaries. | [HLAW-004], [HLAW-014] | +| HIMP-022 | Implement the `ContractBundleManifestIngestPort` parser and cross-check manifest hashes against bundle metadata. | [HLAW-005], [HLAW-015] | +| HIMP-023 | Create `SemanticChangeFinding` with stable finding ids, source coordinates, event refs, and remediation fields. | [HLAW-008], [HLAW-012] | +| HIMP-024 | Map law diff events to findings while preserving Wesley's original event classification. | [HLAW-002], [HLAW-008], [HLAW-032] | +| HIMP-025 | Implement `LawCoverageGateDecision` with profile/category threshold evaluation and boundary-value rounding. | [HLAW-009], [HLAW-033] | +| HIMP-026 | Implement `BundleTraceabilityGateDecision` for schema, law, policy, manifest, and artifact hash agreement. | [HLAW-010], [HLAW-015] | +| HIMP-027 | Add provenance report data structures for bundle source, artifact hashes, generator metadata, and evidence links. | [HLAW-015], [HLAW-047] | +| HIMP-028 | Add gate aggregation rules that produce one assessment outcome from validation, findings, coverage, and provenance gates. | [HLAW-009], [HLAW-010], [HLAW-020] | +| HIMP-029 | Add omitted-detail accounting for large finding sets and summaries. | [HLAW-030], [HLAW-040] | +| HIMP-030 | Add domain-level snapshot tests for findings, gate decisions, validation results, and provenance decisions. | [HLAW-007], [HLAW-008], [HLAW-009], [HLAW-010] | +| HIMP-031 | Implement `LawAssurancePolicySchema` loading and JSON/schema validation. | [HLAW-031], [HLAW-046] | +| HIMP-032 | Implement profile selection, profile inheritance, defaults, and unknown-profile diagnostics. | [HLAW-029], [HLAW-031], [HLAW-033] | +| HIMP-033 | Implement severity mapping from law diff event kind and coverage gap to Holmes severity. | [HLAW-032], [HLAW-008] | +| HIMP-034 | Implement coverage threshold policy with category absence, pass/warn/fail thresholds, and boundary rounding. | [HLAW-033], [HLAW-009] | +| HIMP-035 | Implement suppression policy with ids, owner, reason text, expiration, and audit fields. | [HLAW-034], [HLAW-035] | +| HIMP-036 | Enforce suppression abuse prevention for invalid evidence, failed binding, and non-overridable required gates. | [HLAW-034], [HLAW-049] | +| HIMP-037 | Implement rollout phase policy for local-preview, advisory, required, and non-overridable modes. | [HLAW-049], [HLAW-020] | +| HIMP-038 | Add policy fixture matrix and negative tests for malformed policy, expired suppression, and attempted invalid-evidence override. | [HLAW-031], [HLAW-034], [HLAW-037], [HLAW-049] | +| HIMP-039 | Implement `LawAssuranceAuditWitness` schema and deterministic witness construction. | [HLAW-035], [HLAW-046] | +| HIMP-040 | Record bundle, policy, report, finding, gate, hash, clock, and adapter evidence in the audit witness. | [HLAW-035], [HLAW-047] | +| HIMP-041 | Implement `LawAssuranceArtifactWriter` for local validation, assessment, report, summary, and witness artifacts. | [HLAW-019], [HLAW-047] | +| HIMP-042 | Implement retention metadata, deterministic artifact names, pinned evidence markers, and overwrite policy. | [HLAW-047], [HLAW-035] | +| HIMP-043 | Implement cleanup behavior for expired unpinned Holmes-owned local artifacts. | [HLAW-047], [HLAW-019] | +| HIMP-044 | Add writer and witness replay tests proving deterministic output across repeated runs. | [HLAW-019], [HLAW-035], [HLAW-039] | +| HIMP-045 | Build the golden fixture corpus for clean, warning, failing, malformed, stale, and missing evidence bundles. | [HLAW-036] | +| HIMP-046 | Build the negative fixture corpus for invalid JSON, unsupported versions, hash mismatches, unknown profiles, and malformed policies. | [HLAW-037] | +| HIMP-047 | Add fake clock and no-wall-clock assertions across validation, assessment, artifact writing, and publishing tests. | [HLAW-038], [HLAW-035] | +| HIMP-048 | Add in-memory ports for filesystem, artifact repository, GitHub publisher, MCP adapter, and workflow context. | [HLAW-038], [HLAW-039] | +| HIMP-049 | Add concurrency and idempotence tests for repeated assessment, repeated artifact writing, and retried publication. | [HLAW-039], [HLAW-021] | +| HIMP-050 | Add large-fixture performance budget harness for validation, assessment, rendering, artifact writing, and summaries. | [HLAW-040], [HLAW-030] | +| HIMP-051 | Add snapshot regeneration policy and fixture documentation for maintainers. | [HLAW-036], [HLAW-037], [HLAW-045] | +| HIMP-052 | Add fixture coverage checks so required scenario classes cannot disappear silently. | [HLAW-036], [HLAW-037], [HLAW-050] | +| HIMP-053 | Add hidden or internal CLI command routing for `holmes weslaw` without publishing GitHub behavior yet. | [HLAW-016], [HLAW-043] | +| HIMP-054 | Implement `holmes weslaw validate` using the evidence bundle, locator, version, and structural validation core. | [HLAW-016], [HLAW-001], [HLAW-007] | +| HIMP-055 | Implement `holmes weslaw assess` using ingest ports, policy, findings, and gate decisions. | [HLAW-017], [HLAW-008], [HLAW-009], [HLAW-031] | +| HIMP-056 | Implement `holmes weslaw report` for JSON and Markdown report outputs. | [HLAW-018], [HLAW-011] | +| HIMP-057 | Add artifact output flags, output-directory policy, and report/witness writer integration. | [HLAW-019], [HLAW-047] | +| HIMP-058 | Implement exit-code policy for success, advisory findings, required failure, invalid evidence, unavailable dependency, and internal error. | [HLAW-020], [HLAW-007] | +| HIMP-059 | Implement transitional CLI aliases and deprecation messages without reviving Node authority. | [HLAW-044], [HLAW-041] | +| HIMP-060 | Add CLI help, examples, and operator-path tests. | [HLAW-016], [HLAW-017], [HLAW-018], [HLAW-045] | +| HIMP-061 | Implement `LawAssuranceReportDocument` JSON model with metadata, findings, gates, sections, and artifact refs. | [HLAW-011], [HLAW-046] | +| HIMP-062 | Implement law diff report section grouped by event kind, severity, subject, and remediation. | [HLAW-012], [HLAW-008] | +| HIMP-063 | Implement law coverage report section with profile, category, threshold, and omitted-detail accounting. | [HLAW-013], [HLAW-033] | +| HIMP-064 | Implement law capability and bundle provenance report sections. | [HLAW-014], [HLAW-015] | +| HIMP-065 | Implement Markdown renderer with truncation, omitted counts, and no color-only status semantics. | [HLAW-011], [HLAW-030], [HLAW-045] | +| HIMP-066 | Implement agent-safe summary output with token budgets and artifact refs. | [HLAW-030], [HLAW-027] | +| HIMP-067 | Add report rendering snapshots for clean, warning, failing, invalid, and large fixtures. | [HLAW-011], [HLAW-036], [HLAW-040] | +| HIMP-068 | Implement GitHub law assurance PR comment renderer from the report model. | [HLAW-021], [HLAW-011] | +| HIMP-069 | Implement GitHub check summary/status payloads for advisory, required, invalid, and unavailable states. | [HLAW-022], [HLAW-049] | +| HIMP-070 | Implement GitHub finding annotations with file, line, subject, severity, and stable finding ids. | [HLAW-023], [HLAW-008] | +| HIMP-071 | Implement evidence link rendering with stale, unavailable, fork-safe, and retention-aware states. | [HLAW-024], [HLAW-047] | +| HIMP-072 | Implement override controls and suppression handoff without allowing invalid evidence or non-overridable gates to pass. | [HLAW-025], [HLAW-034], [HLAW-049] | +| HIMP-073 | Implement idempotent comment update and retry behavior against the fake GitHub publisher. | [HLAW-021], [HLAW-039] | +| HIMP-074 | Add GitHub adapter tests for permissions, fork contexts, publisher timeouts, and stale check conclusions. | [HLAW-021], [HLAW-022], [HLAW-047], [HLAW-049] | +| HIMP-075 | Implement MCP `assessWeslawBundle` tool using the same application service as CLI assessment. | [HLAW-026], [HLAW-017] | +| HIMP-076 | Implement MCP law evidence resources with redaction and artifact availability handling. | [HLAW-027], [HLAW-047] | +| HIMP-077 | Implement MCP `explainLawFinding` tool for finding ids, source event refs, gates, and remediation. | [HLAW-028], [HLAW-008] | +| HIMP-078 | Implement MCP law policy tool for active profile, thresholds, suppression posture, and rollout phase. | [HLAW-029], [HLAW-031], [HLAW-049] | +| HIMP-079 | Add MCP/CLI/GitHub parity tests for shared finding ids, gate decisions, and summaries. | [HLAW-026], [HLAW-030], [HLAW-048] | +| HIMP-080 | Build the end-to-end golden workflow from SDL and `weslaw` authoring through Wesley artifacts to Holmes report output. | [HLAW-048], [HLAW-001] | +| HIMP-081 | Add end-to-end required-failure, invalid-evidence, stale-hash, and publisher-unavailable workflows. | [HLAW-048], [HLAW-037] | +| HIMP-082 | Add CI workflow integration that assembles Wesley law artifacts and invokes Holmes law assurance. | [HLAW-042], [HLAW-048] | +| HIMP-083 | Add workflow artifact upload, retention, and fork-permission tests. | [HLAW-042], [HLAW-047] | +| HIMP-084 | Add end-to-end release-gate assertions for advisory, required, and non-overridable rollout phases. | [HLAW-048], [HLAW-049] | +| HIMP-085 | Write operator docs for local generation, validation, assessment, reporting, troubleshooting, and fixture maintenance. | [HLAW-045], [HLAW-050] | +| HIMP-086 | Update changelog and design docs for shipped Rust Holmes assurance behavior. | [HLAW-045], [HLAW-050] | +| HIMP-087 | Add command-snippet and docs parity checks for the new CLI and workflow surfaces. | [HLAW-045], [HLAW-016], [HLAW-042] | +| HIMP-088 | Run Code Lawyer self-review and resolve discovered implementation/documentation issues. | [HLAW-050], [HLAW-040] | +| HIMP-089 | Harden performance, timeout, and memory budgets after full adapter integration. | [HLAW-040], [HLAW-039] | +| HIMP-090 | Close the implementation campaign with final retrospective, backlog suggestions, and next-BEARING update. | [HLAW-050], [HLAW-049] | + +[HLAW-001]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-001-holmes-law-evidence-bundle.md +[HLAW-002]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-002-law-diff-ingest-port.md +[HLAW-003]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-003-law-coverage-ingest-port.md +[HLAW-004]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-004-law-capability-ingest-port.md +[HLAW-005]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-005-contract-bundle-manifest-ingest-port.md +[HLAW-006]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-006-weslaw-artifact-locator.md +[HLAW-007]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-007-law-evidence-validation-result.md +[HLAW-008]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-008-semantic-change-finding.md +[HLAW-009]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-009-law-coverage-gate-decision.md +[HLAW-010]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-010-bundle-traceability-gate-decision.md +[HLAW-011]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-011-law-assurance-report-document.md +[HLAW-012]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-012-law-diff-report-section.md +[HLAW-013]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-013-law-coverage-report-section.md +[HLAW-014]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-014-law-capability-report-section.md +[HLAW-015]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-015-bundle-provenance-report-section.md +[HLAW-016]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-016-holmes-weslaw-validate-cli.md +[HLAW-017]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-017-holmes-weslaw-assess-cli.md +[HLAW-018]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-018-holmes-weslaw-report-cli.md +[HLAW-019]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-019-law-assurance-artifact-writer.md +[HLAW-020]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-020-law-assurance-exit-code-policy.md +[HLAW-021]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-021-github-law-assurance-comment.md +[HLAW-022]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-022-github-law-gate-check-summary.md +[HLAW-023]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-023-github-law-finding-annotations.md +[HLAW-024]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-024-github-law-evidence-links.md +[HLAW-025]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-025-github-law-override-controls.md +[HLAW-026]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-026-mcp-assess-weslaw-bundle-tool.md +[HLAW-027]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-027-mcp-law-evidence-resources.md +[HLAW-028]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-028-mcp-explain-law-finding-tool.md +[HLAW-029]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-029-mcp-law-policy-tool.md +[HLAW-030]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-030-agent-safe-law-summary.md +[HLAW-031]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-031-law-assurance-policy-schema.md +[HLAW-032]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-032-law-severity-mapping-policy.md +[HLAW-033]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-033-law-coverage-threshold-policy.md +[HLAW-034]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-034-law-assurance-suppression-policy.md +[HLAW-035]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-035-law-assurance-audit-witness.md +[HLAW-036]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-036-law-assurance-golden-fixture-corpus.md +[HLAW-037]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-037-law-assurance-negative-fixture-corpus.md +[HLAW-038]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-038-law-assurance-fake-clock-and-ports.md +[HLAW-039]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-039-law-assurance-concurrency-and-idempotence.md +[HLAW-040]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-040-law-assurance-performance-budget.md +[HLAW-041]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-041-legacy-holmes-law-evidence-mapping.md +[HLAW-042]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-042-holmes-workflow-weslaw-integration.md +[HLAW-043]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-043-rust-holmes-crate-scaffold.md +[HLAW-044]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-044-transitional-holmes-cli-aliases.md +[HLAW-045]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-045-law-assurance-operator-docs.md +[HLAW-046]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-046-law-assurance-schema-versioning.md +[HLAW-047]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-047-law-assurance-artifact-retention.md +[HLAW-048]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-048-law-assurance-end-to-end-workflow.md +[HLAW-049]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-049-law-assurance-release-gate-rollout.md +[HLAW-050]: ./design/0020-holmes-weslaw-assurance-prd-test-plan/prds/HLAW-050-holmes-weslaw-assurance-closeout.md ## Post-Retirement Freestyle Slice Log From dcfa5cfb9f3ab61a8ff930bacd7bb0f269ffdb9c Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 18:22:33 -0700 Subject: [PATCH 8/9] feat(holmes): add Rust assurance foundation --- CHANGELOG.md | 6 + Cargo.lock | 8 + Cargo.toml | 1 + crates/wesley-holmes/Cargo.toml | 16 ++ crates/wesley-holmes/README.md | 32 +++ crates/wesley-holmes/src/adapters/mod.rs | 5 + .../src/application/artifact_locator.rs | 97 +++++++ crates/wesley-holmes/src/application/mod.rs | 5 + crates/wesley-holmes/src/domain/diagnostic.rs | 96 +++++++ crates/wesley-holmes/src/domain/evidence.rs | 119 ++++++++ crates/wesley-holmes/src/domain/mod.rs | 13 + crates/wesley-holmes/src/domain/versioning.rs | 217 +++++++++++++++ crates/wesley-holmes/src/lib.rs | 28 ++ crates/wesley-holmes/src/ports/mod.rs | 261 ++++++++++++++++++ crates/wesley-holmes/src/reporting/mod.rs | 4 + crates/wesley-holmes/tests/architecture.rs | 49 ++++ crates/wesley-holmes/tests/foundation.rs | 178 ++++++++++++ docs/ARCHITECTURE.md | 13 +- docs/BEARING.md | 24 +- docs/GUIDE.md | 4 +- docs/design/README.md | 4 +- 21 files changed, 1166 insertions(+), 14 deletions(-) create mode 100644 crates/wesley-holmes/Cargo.toml create mode 100644 crates/wesley-holmes/README.md create mode 100644 crates/wesley-holmes/src/adapters/mod.rs create mode 100644 crates/wesley-holmes/src/application/artifact_locator.rs create mode 100644 crates/wesley-holmes/src/application/mod.rs create mode 100644 crates/wesley-holmes/src/domain/diagnostic.rs create mode 100644 crates/wesley-holmes/src/domain/evidence.rs create mode 100644 crates/wesley-holmes/src/domain/mod.rs create mode 100644 crates/wesley-holmes/src/domain/versioning.rs create mode 100644 crates/wesley-holmes/src/lib.rs create mode 100644 crates/wesley-holmes/src/ports/mod.rs create mode 100644 crates/wesley-holmes/src/reporting/mod.rs create mode 100644 crates/wesley-holmes/tests/architecture.rs create mode 100644 crates/wesley-holmes/tests/foundation.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index d4faadf6..1506de5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,12 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve ### Added +- **Rust Holmes assurance foundation**: Added the unpublished + `crates/wesley-holmes` workspace crate with a hexagonal module shell, domain + dependency-boundary tests, deterministic port traits and fakes, a structured + diagnostic envelope, typed `HolmesLawEvidenceBundle` model, safe + workspace-relative artifact path locator, and artifact-family schema-version + registry for the first ten Holmes implementation slices. - **`weslaw` v1 consumer payoff**: `wesley emit rust --law ` now emits law-backed helper validators for integer scalar semantics and discriminated input variant rules, `wesley law capabilities` emits report-only diff --git a/Cargo.lock b/Cargo.lock index a387e285..98e35556 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1561,6 +1561,14 @@ dependencies = [ "wesley-core", ] +[[package]] +name = "wesley-holmes" +version = "0.0.5" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index 5cb62f04..685d3bbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "crates/wesley-core", "crates/wesley-emit-typescript", "crates/wesley-emit-rust", + "crates/wesley-holmes", "crates/wesley-cli", "xtask", ] diff --git a/crates/wesley-holmes/Cargo.toml b/crates/wesley-holmes/Cargo.toml new file mode 100644 index 00000000..e95604b0 --- /dev/null +++ b/crates/wesley-holmes/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "wesley-holmes" +version = "0.0.5" +edition = "2021" +description = "Rust Holmes law assurance foundation for Wesley semantic evidence" +license = "MIT" +repository = "https://github.com/flyingrobots/wesley" +homepage = "https://github.com/flyingrobots/wesley" +documentation = "https://docs.rs/wesley-holmes" +readme = "README.md" +publish = false + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + diff --git a/crates/wesley-holmes/README.md b/crates/wesley-holmes/README.md new file mode 100644 index 00000000..97de26e5 --- /dev/null +++ b/crates/wesley-holmes/README.md @@ -0,0 +1,32 @@ +# wesley-holmes + +`wesley-holmes` is the Rust foundation for Holmes law assurance work inside +Wesley. It consumes Wesley-published law evidence, policy, witness, MCP, and +GitHub payload artifacts; validates their envelope shape and version posture; +and prepares deterministic diagnostics and reporting surfaces for later CLI, +API, and MCP interfaces. + +This crate is intentionally not published yet. It is a workspace implementation +crate for the Holmes redesign described in the Wesley design packet: + +- [Holmes As Law Assurance Consumer](https://github.com/flyingrobots/wesley/blob/main/docs/design/0020-holmes-law-assurance-consumer.md) +- [Holmes End-to-End](https://github.com/flyingrobots/wesley/blob/main/docs/design/0018-holmes-end-to-end.md) + +## Boundary + +The crate follows the planned hexagonal boundary: + +- `domain`: pure law-assurance data, diagnostics, evidence models, and version + rules. Domain code must not import filesystem, network, process, GitHub, MCP, + or wall-clock dependencies. +- `application`: deterministic orchestration utilities that bind domain facts + to ports without owning external side effects. +- `ports`: abstract clock, artifact, policy, reporting, GitHub, MCP, and command + I/O traits plus deterministic fakes for tests. +- `adapters`: future concrete integrations for filesystem, GitHub, MCP, and CLI + surfaces. +- `reporting`: future renderer-facing DTOs and report assembly helpers. + +The current slice establishes the foundation only. No public Holmes CLI command +is exposed from Wesley yet. + diff --git a/crates/wesley-holmes/src/adapters/mod.rs b/crates/wesley-holmes/src/adapters/mod.rs new file mode 100644 index 00000000..bd3d2c70 --- /dev/null +++ b/crates/wesley-holmes/src/adapters/mod.rs @@ -0,0 +1,5 @@ +//! Concrete adapter boundary for future filesystem, GitHub, MCP, and CLI integrations. +//! +//! The first Holmes implementation slice deliberately leaves this namespace +//! empty. Concrete adapters will land only after the domain and port contracts +//! have proven stable. diff --git a/crates/wesley-holmes/src/application/artifact_locator.rs b/crates/wesley-holmes/src/application/artifact_locator.rs new file mode 100644 index 00000000..24c30bfa --- /dev/null +++ b/crates/wesley-holmes/src/application/artifact_locator.rs @@ -0,0 +1,97 @@ +//! Workspace-relative artifact path resolution. + +use std::path::{Component, Path}; + +/// Error returned when an artifact path cannot be normalized safely. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ArtifactLocatorError { + /// Human-readable explanation of the failed path normalization. + pub message: String, +} + +impl ArtifactLocatorError { + /// Build a new locator error. + pub fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +/// A normalized artifact path that stays inside the configured workspace root. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedArtifactPath { + /// Workspace-relative path using `/` separators. + pub workspace_relative: String, +} + +/// Resolves `weslaw` artifact references without touching the filesystem. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WeslawArtifactLocator { + workspace_root: String, +} + +impl WeslawArtifactLocator { + /// Create a locator for a workspace root label. + pub fn new(workspace_root: impl Into) -> Self { + Self { + workspace_root: workspace_root.into(), + } + } + + /// Return the configured workspace root label. + pub fn workspace_root(&self) -> &str { + &self.workspace_root + } + + /// Normalize a user-authored artifact path relative to the workspace. + /// + /// The resolver is lexical by design. It rejects absolute paths, Windows + /// prefixes, empty paths, and `..` components that would escape the + /// workspace root. It does not perform symlink or filesystem + /// canonicalization. + pub fn resolve(&self, path: &str) -> Result { + if path.trim().is_empty() { + return Err(ArtifactLocatorError::new("artifact path must not be empty")); + } + + let path = Path::new(path); + if path.is_absolute() { + return Err(ArtifactLocatorError::new( + "artifact path must be workspace-relative", + )); + } + + let mut normalized = Vec::new(); + for component in path.components() { + match component { + Component::CurDir => {} + Component::Normal(segment) => { + normalized.push(segment.to_string_lossy().into_owned()) + } + Component::ParentDir => { + if normalized.pop().is_none() { + return Err(ArtifactLocatorError::new( + "artifact path must not escape the workspace root", + )); + } + } + Component::Prefix(_) | Component::RootDir => { + return Err(ArtifactLocatorError::new( + "artifact path must be workspace-relative", + )); + } + } + } + + if normalized.is_empty() { + return Err(ArtifactLocatorError::new( + "artifact path must reference a file", + )); + } + + Ok(ResolvedArtifactPath { + workspace_relative: normalized.join("/"), + }) + } +} diff --git a/crates/wesley-holmes/src/application/mod.rs b/crates/wesley-holmes/src/application/mod.rs new file mode 100644 index 00000000..e1571707 --- /dev/null +++ b/crates/wesley-holmes/src/application/mod.rs @@ -0,0 +1,5 @@ +//! Application services for deterministic Holmes law-assurance orchestration. + +mod artifact_locator; + +pub use artifact_locator::{ArtifactLocatorError, ResolvedArtifactPath, WeslawArtifactLocator}; diff --git a/crates/wesley-holmes/src/domain/diagnostic.rs b/crates/wesley-holmes/src/domain/diagnostic.rs new file mode 100644 index 00000000..42dab9cb --- /dev/null +++ b/crates/wesley-holmes/src/domain/diagnostic.rs @@ -0,0 +1,96 @@ +//! Deterministic diagnostic envelopes for Holmes law assurance. + +use std::error::Error; +use std::fmt; + +use serde::{Deserialize, Serialize}; + +/// Stable diagnostic code emitted by Holmes validation and ingest paths. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum HolmesDiagnosticCode { + /// A required `schemaVersion` field was absent or blank. + HlawSchemaVersionMissing, + /// A `schemaVersion` field was not valid semantic version syntax. + HlawSchemaVersionMalformed, + /// A `schemaVersion` major version is not supported by this Holmes build. + HlawSchemaVersionUnsupportedMajor, + /// A `schemaVersion` minor version is newer than this Holmes build accepts. + HlawSchemaVersionUnsupportedMinor, + /// An artifact path attempted to escape the workspace root. + HlawArtifactPathEscape, + /// A law evidence bundle was missing a required artifact reference. + HlawEvidenceBundleInvalid, + /// A requested artifact was unavailable through its port. + HlawArtifactUnavailable, +} + +/// Severity attached to a Holmes diagnostic. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum HolmesSeverity { + /// A hard failure that prevents safe continuation. + Error, + /// A non-blocking issue that should be visible in reports. + Warning, + /// Informational context attached to a report. + Info, +} + +/// Structured diagnostic envelope shared by validation and ingest flows. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HolmesDiagnostic { + /// Stable diagnostic code. + pub code: HolmesDiagnosticCode, + /// Diagnostic severity. + pub severity: HolmesSeverity, + /// Human-readable explanation. + pub message: String, + /// Optional artifact family associated with this diagnostic. + #[serde(skip_serializing_if = "Option::is_none")] + pub artifact_family: Option, + /// Optional field path associated with this diagnostic. + #[serde(skip_serializing_if = "Option::is_none")] + pub field_path: Option, +} + +impl HolmesDiagnostic { + /// Create a new diagnostic envelope. + pub fn new( + code: HolmesDiagnosticCode, + severity: HolmesSeverity, + message: impl Into, + ) -> Self { + Self { + code, + severity, + message: message.into(), + artifact_family: None, + field_path: None, + } + } + + /// Attach an artifact-family label. + pub fn for_family(mut self, family: impl Into) -> Self { + self.artifact_family = Some(family.into()); + self + } + + /// Attach a field path. + pub fn at_field(mut self, field_path: impl Into) -> Self { + self.field_path = Some(field_path.into()); + self + } +} + +impl fmt::Display for HolmesDiagnostic { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "{:?}: {}", self.code, self.message) + } +} + +impl Error for HolmesDiagnostic {} + +/// Result alias for Holmes domain and port operations. +pub type HolmesResult = Result; diff --git a/crates/wesley-holmes/src/domain/evidence.rs b/crates/wesley-holmes/src/domain/evidence.rs new file mode 100644 index 00000000..a2842e40 --- /dev/null +++ b/crates/wesley-holmes/src/domain/evidence.rs @@ -0,0 +1,119 @@ +//! Law evidence bundle model consumed by Holmes. + +use serde::{Deserialize, Serialize}; + +use super::diagnostic::{HolmesDiagnostic, HolmesDiagnosticCode, HolmesResult, HolmesSeverity}; + +/// Workspace-relative reference to a Wesley-published artifact. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ArtifactRef { + /// Workspace-relative artifact path. + pub path: String, + /// Optional artifact-local schema version. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema_version: Option, + /// Optional expected SHA-256 digest. + #[serde(skip_serializing_if = "Option::is_none")] + pub sha256: Option, +} + +impl ArtifactRef { + /// Create an artifact reference with only a path. + pub fn new(path: impl Into) -> Self { + Self { + path: path.into(), + schema_version: None, + sha256: None, + } + } + + fn is_blank(&self) -> bool { + self.path.trim().is_empty() + } +} + +/// Required artifact families in a Holmes law evidence bundle. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct LawEvidenceArtifacts { + /// Machine-readable law diff artifact. + pub law_diff: ArtifactRef, + /// Law coverage artifact for the active assurance profile. + pub law_coverage: ArtifactRef, + /// Capability model artifact derived from operation footprint law. + pub law_capabilities: ArtifactRef, + /// Contract bundle manifest artifact. + pub contract_bundle_manifest: ArtifactRef, + /// Optional active policy artifact. + #[serde(skip_serializing_if = "Option::is_none")] + pub policy: Option, + /// Optional rendered report artifact. + #[serde(skip_serializing_if = "Option::is_none")] + pub report: Option, + /// Optional witness artifact. + #[serde(skip_serializing_if = "Option::is_none")] + pub witness: Option, +} + +/// Hash and source provenance for a law evidence bundle. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BundleProvenance { + /// Canonical schema hash that the evidence was derived from. + pub schema_hash: String, + /// Canonical law hash that the evidence was derived from. + pub law_hash: String, + /// Optional policy hash active during evidence production. + #[serde(skip_serializing_if = "Option::is_none")] + pub policy_hash: Option, + /// Canonical contract bundle hash. + pub bundle_hash: String, + /// Human-readable source label for the bundle. + pub source: String, +} + +/// Top-level Holmes law evidence bundle envelope. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HolmesLawEvidenceBundle { + /// Evidence bundle schema version. + pub schema_version: String, + /// Stable evidence bundle identifier. + pub bundle_id: String, + /// Required and optional artifacts that make up the bundle. + pub artifacts: LawEvidenceArtifacts, + /// Hash and source provenance. + pub provenance: BundleProvenance, +} + +impl HolmesLawEvidenceBundle { + /// Validate that all required artifact references are present. + pub fn validate_required_artifacts(&self) -> HolmesResult<()> { + let required = [ + ("artifacts.lawDiff", &self.artifacts.law_diff), + ("artifacts.lawCoverage", &self.artifacts.law_coverage), + ( + "artifacts.lawCapabilities", + &self.artifacts.law_capabilities, + ), + ( + "artifacts.contractBundleManifest", + &self.artifacts.contract_bundle_manifest, + ), + ]; + + for (field_path, artifact) in required { + if artifact.is_blank() { + return Err(HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawEvidenceBundleInvalid, + HolmesSeverity::Error, + "law evidence bundle is missing a required artifact reference", + ) + .at_field(field_path)); + } + } + + Ok(()) + } +} diff --git a/crates/wesley-holmes/src/domain/mod.rs b/crates/wesley-holmes/src/domain/mod.rs new file mode 100644 index 00000000..a37a57dd --- /dev/null +++ b/crates/wesley-holmes/src/domain/mod.rs @@ -0,0 +1,13 @@ +//! Pure Holmes law-assurance domain model. +//! +//! Domain code owns data, deterministic validation, and diagnostics. It must +//! not import ambient filesystem, network, process, GitHub, MCP, or wall-clock +//! dependencies. + +mod diagnostic; +mod evidence; +mod versioning; + +pub use diagnostic::{HolmesDiagnostic, HolmesDiagnosticCode, HolmesResult, HolmesSeverity}; +pub use evidence::{ArtifactRef, BundleProvenance, HolmesLawEvidenceBundle, LawEvidenceArtifacts}; +pub use versioning::{ArtifactFamily, ParsedSchemaVersion, VersionRegistry, VersionRequirement}; diff --git a/crates/wesley-holmes/src/domain/versioning.rs b/crates/wesley-holmes/src/domain/versioning.rs new file mode 100644 index 00000000..c096eed7 --- /dev/null +++ b/crates/wesley-holmes/src/domain/versioning.rs @@ -0,0 +1,217 @@ +//! Schema-version registry for Holmes artifact families. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +use super::diagnostic::{HolmesDiagnostic, HolmesDiagnosticCode, HolmesResult, HolmesSeverity}; + +/// Artifact families that Holmes accepts at its ingest boundary. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum ArtifactFamily { + /// Law evidence bundle envelope. + EvidenceBundle, + /// Assurance policy artifact. + Policy, + /// Rendered or structured assurance report artifact. + Report, + /// Audit witness artifact. + AuditWitness, + /// MCP response payload artifact. + McpResponse, + /// Agent summary payload artifact. + AgentSummary, + /// GitHub PR comment or review payload artifact. + GithubPayload, +} + +impl ArtifactFamily { + /// Return the stable artifact-family identifier. + pub fn id(self) -> &'static str { + match self { + ArtifactFamily::EvidenceBundle => "evidence-bundle", + ArtifactFamily::Policy => "policy", + ArtifactFamily::Report => "report", + ArtifactFamily::AuditWitness => "audit-witness", + ArtifactFamily::McpResponse => "mcp-response", + ArtifactFamily::AgentSummary => "agent-summary", + ArtifactFamily::GithubPayload => "github-payload", + } + } + + fn all() -> [ArtifactFamily; 7] { + [ + ArtifactFamily::EvidenceBundle, + ArtifactFamily::Policy, + ArtifactFamily::Report, + ArtifactFamily::AuditWitness, + ArtifactFamily::McpResponse, + ArtifactFamily::AgentSummary, + ArtifactFamily::GithubPayload, + ] + } +} + +/// Parsed three-part semantic schema version. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ParsedSchemaVersion { + /// Major version. + pub major: u64, + /// Minor version. + pub minor: u64, + /// Patch version. + pub patch: u64, +} + +impl ParsedSchemaVersion { + /// Parse a strict `MAJOR.MINOR.PATCH` schema version. + pub fn parse(value: &str) -> HolmesResult { + let parts = value.split('.').collect::>(); + if parts.len() != 3 || parts.iter().any(|part| part.is_empty()) { + return Err(malformed_version(value)); + } + + let parse_part = |part: &str| { + if !part.bytes().all(|byte| byte.is_ascii_digit()) { + return Err(malformed_version(value)); + } + part.parse::().map_err(|_| malformed_version(value)) + }; + + Ok(Self { + major: parse_part(parts[0])?, + minor: parse_part(parts[1])?, + patch: parse_part(parts[2])?, + }) + } +} + +/// Accepted version requirement for one artifact family. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct VersionRequirement { + /// Artifact family covered by the requirement. + pub family: ArtifactFamily, + /// Accepted major version. + pub major: u64, + /// Highest accepted minor version for the accepted major. + pub max_minor: u64, +} + +impl VersionRequirement { + /// Create a version requirement. + pub fn new(family: ArtifactFamily, major: u64, max_minor: u64) -> Self { + Self { + family, + major, + max_minor, + } + } +} + +/// Local registry of accepted artifact-family schema versions. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VersionRegistry { + requirements: BTreeMap, +} + +impl VersionRegistry { + /// Create a registry from explicit requirements. + pub fn new(requirements: impl IntoIterator) -> Self { + Self { + requirements: requirements + .into_iter() + .map(|requirement| (requirement.family, requirement)) + .collect(), + } + } + + /// Return the requirement for an artifact family. + pub fn requirement(&self, family: ArtifactFamily) -> Option { + self.requirements.get(&family).copied() + } + + /// Validate a schema version for an artifact family. + pub fn validate( + &self, + family: ArtifactFamily, + schema_version: Option<&str>, + ) -> HolmesResult { + let Some(raw_version) = schema_version else { + return Err(missing_version(family)); + }; + + if raw_version.trim().is_empty() { + return Err(missing_version(family)); + } + + let parsed = ParsedSchemaVersion::parse(raw_version) + .map_err(|diagnostic| diagnostic.for_family(family.id()))?; + let requirement = self + .requirement(family) + .unwrap_or_else(|| VersionRequirement::new(family, parsed.major, parsed.minor)); + + if parsed.major != requirement.major { + return Err(HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawSchemaVersionUnsupportedMajor, + HolmesSeverity::Error, + format!( + "unsupported {} schemaVersion major {}; expected {}", + family.id(), + parsed.major, + requirement.major + ), + ) + .for_family(family.id()) + .at_field("schemaVersion")); + } + + if parsed.minor > requirement.max_minor { + return Err(HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawSchemaVersionUnsupportedMinor, + HolmesSeverity::Error, + format!( + "unsupported {} schemaVersion minor {}; maximum accepted minor is {}", + family.id(), + parsed.minor, + requirement.max_minor + ), + ) + .for_family(family.id()) + .at_field("schemaVersion")); + } + + Ok(parsed) + } +} + +impl Default for VersionRegistry { + fn default() -> Self { + Self::new( + ArtifactFamily::all() + .into_iter() + .map(|family| VersionRequirement::new(family, 1, 0)), + ) + } +} + +fn missing_version(family: ArtifactFamily) -> HolmesDiagnostic { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawSchemaVersionMissing, + HolmesSeverity::Error, + format!("{} artifact is missing schemaVersion", family.id()), + ) + .for_family(family.id()) + .at_field("schemaVersion") +} + +fn malformed_version(value: &str) -> HolmesDiagnostic { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawSchemaVersionMalformed, + HolmesSeverity::Error, + format!("schemaVersion must use MAJOR.MINOR.PATCH digits, got {value:?}"), + ) + .at_field("schemaVersion") +} diff --git a/crates/wesley-holmes/src/lib.rs b/crates/wesley-holmes/src/lib.rs new file mode 100644 index 00000000..b5506680 --- /dev/null +++ b/crates/wesley-holmes/src/lib.rs @@ -0,0 +1,28 @@ +#![deny(warnings)] +#![deny(missing_docs)] + +//! Rust Holmes law assurance foundation for Wesley. +//! +//! This crate hosts the new Holmes boundary that consumes Wesley-published law +//! evidence. The first implementation slice keeps the domain pure, exposes +//! deterministic ports, and validates artifact-family version envelopes without +//! adding public CLI commands. + +pub mod adapters; +pub mod application; +pub mod domain; +pub mod ports; +pub mod reporting; + +pub use application::{ArtifactLocatorError, ResolvedArtifactPath, WeslawArtifactLocator}; +pub use domain::{ + ArtifactFamily, ArtifactRef, BundleProvenance, HolmesDiagnostic, HolmesDiagnosticCode, + HolmesLawEvidenceBundle, HolmesResult, HolmesSeverity, LawEvidenceArtifacts, + ParsedSchemaVersion, VersionRegistry, VersionRequirement, +}; +pub use ports::{ + ArtifactLoadPort, ArtifactWritePort, ClockPort, CommandIoPort, FilesystemPort, FixedClock, + GithubPublishPort, InMemoryArtifactStore, InMemoryMcpResourceRegistry, McpResourcePort, + PolicyLoadPort, RecordingCommandIo, RecordingGithubPublisher, ReportRenderPort, + StaticPolicyLoader, Timestamp, +}; diff --git a/crates/wesley-holmes/src/ports/mod.rs b/crates/wesley-holmes/src/ports/mod.rs new file mode 100644 index 00000000..73f971d4 --- /dev/null +++ b/crates/wesley-holmes/src/ports/mod.rs @@ -0,0 +1,261 @@ +//! Abstract Holmes side-effect ports and deterministic fakes. + +use std::collections::BTreeMap; + +use crate::domain::{ + ArtifactRef, HolmesDiagnostic, HolmesDiagnosticCode, HolmesResult, HolmesSeverity, +}; + +/// Deterministic timestamp value supplied through a clock port. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Timestamp { + /// Stable timestamp text. + pub value: String, +} + +impl Timestamp { + /// Create a timestamp from stable text. + pub fn new(value: impl Into) -> Self { + Self { + value: value.into(), + } + } +} + +/// Port for deterministic time access. +pub trait ClockPort { + /// Return the current timestamp according to this clock. + fn now(&self) -> Timestamp; +} + +/// Clock implementation that always returns the same timestamp. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FixedClock { + now: Timestamp, +} + +impl FixedClock { + /// Create a fixed clock. + pub fn new(now: Timestamp) -> Self { + Self { now } + } +} + +impl ClockPort for FixedClock { + fn now(&self) -> Timestamp { + self.now.clone() + } +} + +/// Port for loading artifact bytes. +pub trait ArtifactLoadPort { + /// Load an artifact reference. + fn read_artifact(&self, artifact: &ArtifactRef) -> HolmesResult>; +} + +/// Port for writing artifact bytes. +pub trait ArtifactWritePort { + /// Write artifact bytes to a workspace-relative path. + fn write_artifact(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()>; +} + +/// Port for workspace-local byte-oriented filesystem access. +pub trait FilesystemPort { + /// Read bytes from a workspace-relative file. + fn read_workspace_file(&self, path: &str) -> HolmesResult>; + + /// Write bytes to a workspace-relative file. + fn write_workspace_file(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()>; +} + +/// In-memory artifact store for deterministic tests. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct InMemoryArtifactStore { + artifacts: BTreeMap>, + writes: BTreeMap>, +} + +impl InMemoryArtifactStore { + /// Insert a readable artifact. + pub fn insert(&mut self, path: impl Into, bytes: impl Into>) { + self.artifacts.insert(path.into(), bytes.into()); + } + + /// Return bytes written to a path. + pub fn written(&self, path: &str) -> Option<&[u8]> { + self.writes.get(path).map(Vec::as_slice) + } +} + +impl ArtifactLoadPort for InMemoryArtifactStore { + fn read_artifact(&self, artifact: &ArtifactRef) -> HolmesResult> { + self.artifacts.get(&artifact.path).cloned().ok_or_else(|| { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawArtifactUnavailable, + HolmesSeverity::Error, + format!("artifact {:?} is unavailable", artifact.path), + ) + .at_field("path") + }) + } +} + +impl ArtifactWritePort for InMemoryArtifactStore { + fn write_artifact(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()> { + self.writes.insert(path.to_owned(), bytes.to_vec()); + Ok(()) + } +} + +impl FilesystemPort for InMemoryArtifactStore { + fn read_workspace_file(&self, path: &str) -> HolmesResult> { + self.artifacts.get(path).cloned().ok_or_else(|| { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawArtifactUnavailable, + HolmesSeverity::Error, + format!("workspace file {path:?} is unavailable"), + ) + .at_field("path") + }) + } + + fn write_workspace_file(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()> { + self.writes.insert(path.to_owned(), bytes.to_vec()); + Ok(()) + } +} + +/// Port for publishing GitHub PR comments or review summaries. +pub trait GithubPublishPort { + /// Publish a PR-facing comment body. + fn publish_pr_comment(&mut self, body: &str) -> HolmesResult<()>; +} + +/// Recording GitHub publisher fake. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct RecordingGithubPublisher { + comments: Vec, +} + +impl RecordingGithubPublisher { + /// Return recorded comment bodies. + pub fn comments(&self) -> &[String] { + &self.comments + } +} + +impl GithubPublishPort for RecordingGithubPublisher { + fn publish_pr_comment(&mut self, body: &str) -> HolmesResult<()> { + self.comments.push(body.to_owned()); + Ok(()) + } +} + +/// Port for registering MCP resources. +pub trait McpResourcePort { + /// Register a named MCP resource payload. + fn put_resource(&mut self, resource_id: &str, bytes: &[u8]) -> HolmesResult<()>; +} + +/// In-memory MCP resource registry fake. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct InMemoryMcpResourceRegistry { + resources: BTreeMap>, +} + +impl InMemoryMcpResourceRegistry { + /// Return bytes for a registered resource. + pub fn resource(&self, resource_id: &str) -> Option<&[u8]> { + self.resources.get(resource_id).map(Vec::as_slice) + } +} + +impl McpResourcePort for InMemoryMcpResourceRegistry { + fn put_resource(&mut self, resource_id: &str, bytes: &[u8]) -> HolmesResult<()> { + self.resources + .insert(resource_id.to_owned(), bytes.to_vec()); + Ok(()) + } +} + +/// Port for loading active policy bytes. +pub trait PolicyLoadPort { + /// Load policy bytes from a workspace-relative path. + fn load_policy(&self, path: &str) -> HolmesResult>; +} + +/// Static policy loader fake. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StaticPolicyLoader { + path: String, + bytes: Vec, +} + +impl StaticPolicyLoader { + /// Create a static policy loader. + pub fn new(path: impl Into, bytes: impl Into>) -> Self { + Self { + path: path.into(), + bytes: bytes.into(), + } + } +} + +impl PolicyLoadPort for StaticPolicyLoader { + fn load_policy(&self, path: &str) -> HolmesResult> { + if path == self.path { + Ok(self.bytes.clone()) + } else { + Err(HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawArtifactUnavailable, + HolmesSeverity::Error, + format!("policy artifact {path:?} is unavailable"), + ) + .at_field("path")) + } + } +} + +/// Port for rendering report payloads. +pub trait ReportRenderPort { + /// Render a report payload. + fn render_report(&self, body: &str) -> HolmesResult; +} + +/// Port for command standard output and error streams. +pub trait CommandIoPort { + /// Write standard output text. + fn stdout(&mut self, text: &str); + + /// Write standard error text. + fn stderr(&mut self, text: &str); +} + +/// Command I/O fake that records output streams. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct RecordingCommandIo { + stdout: Vec, + stderr: Vec, +} + +impl RecordingCommandIo { + /// Return recorded standard output writes. + pub fn stdout_lines(&self) -> &[String] { + &self.stdout + } + + /// Return recorded standard error writes. + pub fn stderr_lines(&self) -> &[String] { + &self.stderr + } +} + +impl CommandIoPort for RecordingCommandIo { + fn stdout(&mut self, text: &str) { + self.stdout.push(text.to_owned()); + } + + fn stderr(&mut self, text: &str) { + self.stderr.push(text.to_owned()); + } +} diff --git a/crates/wesley-holmes/src/reporting/mod.rs b/crates/wesley-holmes/src/reporting/mod.rs new file mode 100644 index 00000000..b855b09c --- /dev/null +++ b/crates/wesley-holmes/src/reporting/mod.rs @@ -0,0 +1,4 @@ +//! Reporting DTO and renderer boundary for future Holmes outputs. +//! +//! This namespace intentionally starts empty while the first implementation +//! slice establishes diagnostic, evidence, and versioning primitives. diff --git a/crates/wesley-holmes/tests/architecture.rs b/crates/wesley-holmes/tests/architecture.rs new file mode 100644 index 00000000..c4708ee0 --- /dev/null +++ b/crates/wesley-holmes/tests/architecture.rs @@ -0,0 +1,49 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +#[test] +fn domain_sources_do_not_import_ambient_adapters() { + let domain_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/domain"); + let mut files = Vec::new(); + collect_rs_files(&domain_dir, &mut files); + + let forbidden = [ + ("use std::fs", "filesystem imports"), + ("std::fs::", "filesystem references"), + ("use std::net", "network imports"), + ("std::net::", "network references"), + ("use std::process", "process imports"), + ("std::process::", "process references"), + ("SystemTime", "wall-clock access"), + ("Instant::now", "wall-clock access"), + ("chrono::Utc::now", "wall-clock access"), + ("reqwest", "HTTP client dependency"), + ("octocrab", "GitHub client dependency"), + ]; + + for file in files { + let source = fs::read_to_string(&file) + .unwrap_or_else(|error| panic!("failed to read {}: {error}", file.display())); + for (token, reason) in forbidden { + assert!( + !source.contains(token), + "domain file {} contains forbidden {reason}: {token}", + file.display() + ); + } + } +} + +fn collect_rs_files(directory: &Path, files: &mut Vec) { + for entry in fs::read_dir(directory) + .unwrap_or_else(|error| panic!("failed to read {}: {error}", directory.display())) + { + let entry = entry.expect("failed to read directory entry"); + let path = entry.path(); + if path.is_dir() { + collect_rs_files(&path, files); + } else if path.extension().and_then(|extension| extension.to_str()) == Some("rs") { + files.push(path); + } + } +} diff --git a/crates/wesley-holmes/tests/foundation.rs b/crates/wesley-holmes/tests/foundation.rs new file mode 100644 index 00000000..d4e9a562 --- /dev/null +++ b/crates/wesley-holmes/tests/foundation.rs @@ -0,0 +1,178 @@ +use wesley_holmes::{ + ArtifactFamily, ArtifactLoadPort, ArtifactRef, ArtifactWritePort, BundleProvenance, ClockPort, + CommandIoPort, FilesystemPort, FixedClock, HolmesDiagnosticCode, HolmesLawEvidenceBundle, + InMemoryArtifactStore, LawEvidenceArtifacts, McpResourcePort, RecordingCommandIo, Timestamp, + VersionRegistry, WeslawArtifactLocator, +}; + +#[test] +fn fixed_clock_returns_deterministic_timestamp() { + let clock = FixedClock::new(Timestamp::new("2026-05-26T00:00:00Z")); + + assert_eq!(clock.now(), Timestamp::new("2026-05-26T00:00:00Z")); +} + +#[test] +fn in_memory_artifact_store_reads_and_writes() { + let mut store = InMemoryArtifactStore::default(); + store.insert("evidence/law-diff.json", b"diff".to_vec()); + + let bytes = store + .read_artifact(&ArtifactRef::new("evidence/law-diff.json")) + .expect("artifact should be readable"); + store + .write_artifact("reports/summary.md", b"summary") + .expect("artifact should be writable"); + + assert_eq!(bytes, b"diff"); + assert_eq!(store.written("reports/summary.md"), Some(&b"summary"[..])); +} + +#[test] +fn filesystem_port_uses_workspace_relative_bytes_without_real_filesystem() { + let mut store = InMemoryArtifactStore::default(); + store.insert("policy/release.json", b"policy".to_vec()); + + let bytes = store + .read_workspace_file("policy/release.json") + .expect("workspace file should be readable"); + store + .write_workspace_file("reports/holmes.md", b"report") + .expect("workspace file should be writable"); + + assert_eq!(bytes, b"policy"); + assert_eq!(store.written("reports/holmes.md"), Some(&b"report"[..])); +} + +#[test] +fn evidence_bundle_requires_core_artifacts() { + let bundle = evidence_bundle_with_law_diff_path(""); + + let diagnostic = bundle + .validate_required_artifacts() + .expect_err("blank law diff path should fail validation"); + + assert_eq!( + diagnostic.code, + HolmesDiagnosticCode::HlawEvidenceBundleInvalid + ); + assert_eq!(diagnostic.field_path.as_deref(), Some("artifacts.lawDiff")); +} + +#[test] +fn artifact_locator_normalizes_workspace_relative_paths() { + let locator = WeslawArtifactLocator::new("/workspace"); + + let resolved = locator + .resolve("./evidence/../evidence/law-diff.json") + .expect("path should normalize inside workspace"); + + assert_eq!(resolved.workspace_relative, "evidence/law-diff.json"); +} + +#[test] +fn artifact_locator_rejects_escape_and_absolute_paths() { + let locator = WeslawArtifactLocator::new("/workspace"); + + let escape = locator + .resolve("../outside.json") + .expect_err("parent traversal should be rejected"); + let absolute = locator + .resolve("/tmp/outside.json") + .expect_err("absolute paths should be rejected"); + + assert!(escape.message.contains("escape")); + assert!(absolute.message.contains("workspace-relative")); +} + +#[test] +fn version_registry_accepts_current_versions() { + let registry = VersionRegistry::default(); + + for family in [ + ArtifactFamily::EvidenceBundle, + ArtifactFamily::Policy, + ArtifactFamily::Report, + ArtifactFamily::AuditWitness, + ArtifactFamily::McpResponse, + ArtifactFamily::AgentSummary, + ArtifactFamily::GithubPayload, + ] { + let parsed = registry + .validate(family, Some("1.0.0")) + .expect("current schema version should be accepted"); + assert_eq!(parsed.major, 1); + assert_eq!(parsed.minor, 0); + assert_eq!(parsed.patch, 0); + } +} + +#[test] +fn version_registry_rejects_missing_malformed_and_unsupported_versions() { + let registry = VersionRegistry::default(); + + let missing = registry + .validate(ArtifactFamily::EvidenceBundle, None) + .expect_err("missing schema version should fail"); + let malformed = registry + .validate(ArtifactFamily::EvidenceBundle, Some("v1.0.0")) + .expect_err("malformed schema version should fail"); + let unsupported_major = registry + .validate(ArtifactFamily::EvidenceBundle, Some("2.0.0")) + .expect_err("unsupported major should fail"); + let unsupported_minor = registry + .validate(ArtifactFamily::EvidenceBundle, Some("1.1.0")) + .expect_err("unsupported minor should fail"); + + assert_eq!(missing.code, HolmesDiagnosticCode::HlawSchemaVersionMissing); + assert_eq!( + malformed.code, + HolmesDiagnosticCode::HlawSchemaVersionMalformed + ); + assert_eq!( + unsupported_major.code, + HolmesDiagnosticCode::HlawSchemaVersionUnsupportedMajor + ); + assert_eq!( + unsupported_minor.code, + HolmesDiagnosticCode::HlawSchemaVersionUnsupportedMinor + ); +} + +#[test] +fn recording_ports_capture_outputs() { + let mut io = RecordingCommandIo::default(); + let mut mcp = wesley_holmes::InMemoryMcpResourceRegistry::default(); + + io.stdout("ready"); + io.stderr("diagnostic"); + mcp.put_resource("holmes://summary", b"payload") + .expect("MCP resource should record"); + + assert_eq!(io.stdout_lines(), &["ready".to_owned()]); + assert_eq!(io.stderr_lines(), &["diagnostic".to_owned()]); + assert_eq!(mcp.resource("holmes://summary"), Some(&b"payload"[..])); +} + +fn evidence_bundle_with_law_diff_path(path: &str) -> HolmesLawEvidenceBundle { + HolmesLawEvidenceBundle { + schema_version: "1.0.0".to_owned(), + bundle_id: "bundle-001".to_owned(), + artifacts: LawEvidenceArtifacts { + law_diff: ArtifactRef::new(path), + law_coverage: ArtifactRef::new("evidence/law-coverage.json"), + law_capabilities: ArtifactRef::new("evidence/law-capabilities.json"), + contract_bundle_manifest: ArtifactRef::new("evidence/bundle-manifest.json"), + policy: None, + report: None, + witness: None, + }, + provenance: BundleProvenance { + schema_hash: "sha256:schema".to_owned(), + law_hash: "sha256:law".to_owned(), + policy_hash: None, + bundle_hash: "sha256:bundle".to_owned(), + source: "test".to_owned(), + }, + } +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index ddac4083..e116c493 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -34,7 +34,11 @@ The repo is now split into three practical layers: exposes schema lowering, schema hashing, schema operation listing, schema diffing, Rust/TypeScript emission, operation selection analysis, and directive argument extraction from Rust crates. -3. **Non-compiler JavaScript surfaces**: `packages/` now contains Holmes +3. **Rust Holmes assurance foundation**: `crates/wesley-holmes` is the new + law-assurance foundation for Holmes evidence, versioning, ports, and future + reports. It consumes Wesley-published artifacts and does not expose product + CLI commands yet. +4. **Non-compiler JavaScript surfaces**: `packages/` now contains Holmes assurance tooling and browser/Bun/Deno host smoke experiments only. These packages are not release authority, compiler authority, or product entrypoints. @@ -56,6 +60,7 @@ flowchart LR Core[wesley-core] RustEmitter[wesley-emit-rust] TsEmitter[wesley-emit-typescript] + RustHolmes[wesley-holmes] NativeCli[wesley-cli] Xtask[xtask] end @@ -87,11 +92,12 @@ flowchart LR NativeCli --> TsEmitter RustEmitter --> Core TsEmitter --> Core + RustHolmes --> Fixtures Xtask --> NativeCli Xtask --> Core Core --> Fixtures - Holmes --> Fixtures + Holmes -. transitional assurance UI .-> RustHolmes Scripts --> Docs Scripts -. package hygiene .-> JS @@ -114,8 +120,9 @@ semantics. | `crates/wesley-cli/` | Native Rust `wesley` binary for schema deltas, schema hashes, Rust/TypeScript artifacts, and operation facts. | | `crates/wesley-emit-rust/` | Rust projection crate. Builds a Rust item/type AST from L1 IR and `SchemaOperation` data, then prints deterministic model and operation declarations. | | `crates/wesley-emit-typescript/` | Rust TypeScript projection crate. Builds a TypeScript declaration AST from L1 IR and `SchemaOperation` data, then prints deterministic model and operation declarations. | +| `crates/wesley-holmes/` | Rust Holmes law-assurance foundation. Defines pure domain models, deterministic ports/fakes, evidence bundle validation, artifact path resolution, and version diagnostics without exposing public CLI commands yet. | | `xtask/` | Rust repository automation: docs checks, tests, native preflight, release check, and package hygiene bridge. | -| `packages/wesley-holmes/` | Self-contained assurance, verification, counterfactual, Holmes/Moriarty-era tooling outside compiler authority. | +| `packages/wesley-holmes/` | Existing JavaScript Holmes surface outside compiler authority while the Rust assurance foundation grows behind it. | | `packages/wesley-host-browser/`, `wesley-host-bun/`, `wesley-host-deno/` | External host smoke experiments pending deletion or externalization. | | `schemas/` | JSON schemas and generic directive/schema assets used by tooling and tests. | | `test/fixtures/` | GraphQL fixtures, Rust L1 goldens, package examples, and reference schemas. | diff --git a/docs/BEARING.md b/docs/BEARING.md index 78b9404d..a08b4e73 100644 --- a/docs/BEARING.md +++ b/docs/BEARING.md @@ -430,10 +430,9 @@ closeout. ## Next Target -The immediate focus is **Holmes `weslaw` assurance planning PR review**: the -50-slice PRD/test-plan campaign is complete, and the next branch should begin -Rust Holmes assurance implementation from the evidence and validation core -before publishers or branch-protection gates. +The immediate focus is **Rust Holmes `weslaw` assurance implementation**: the +50-slice PRD/test-plan campaign is complete, and this branch is now building +the evidence and validation core before publishers or branch-protection gates. Current evidence still includes complete v0.0.5 publication proof, Rust L1 fixtures for directive-heavy SDL, schema extensions, nested list type @@ -447,11 +446,18 @@ without pinning Wesley to legacy Node. The `0019` packet names the semantic law architecture that lets Wesley compile meaning alongside shape without smuggling runtime ownership into the base compiler. -The implementation budget is **90 slices**. The first implementation PR should -take `HIMP-001` through `HIMP-015`, because those slices establish the Rust -Holmes assurance shell, evidence bundle, artifact locator, schema-version -validation, and first ingest ports before any publisher or branch-protection -surface exists. +The implementation budget is **90 slices**. Status: **10 / 90 slices closed**. +Closed implementation slices now cover `HIMP-001` through `HIMP-010`: the +workspace-local Rust Holmes assurance crate shell, domain dependency-boundary +tests, deterministic port traits and fakes, the first diagnostic taxonomy, the +workspace preflight hook, implementation-boundary docs, the typed +`HolmesLawEvidenceBundle`, safe artifact path normalization, accepted +artifact-family version registry, and schema-version diagnostics. + +The first implementation PR should take `HIMP-001` through `HIMP-015`, because +those slices establish the Rust Holmes assurance shell, evidence bundle, +artifact locator, schema-version validation, and first ingest ports before any +publisher or branch-protection surface exists. Every implementation slice below references the completed `0020` PRD/test-plan artifact it implements. diff --git a/docs/GUIDE.md b/docs/GUIDE.md index 8239d1b2..b855bbd3 100644 --- a/docs/GUIDE.md +++ b/docs/GUIDE.md @@ -68,7 +68,9 @@ The direct replacements are `wesley schema lower`, `wesley schema hash`, `wesley law rebind`, `wesley law capabilities`, `wesley law coverage`, `wesley doctor`, and `wesley emit typescript` or `wesley emit rust`. Zod and certificate commands are no longer generic compiler-front-door work. -Holmes-family commands live under `@wesley/holmes`. +Holmes-family commands still live under `@wesley/holmes`; the Rust foundation +for future Holmes law-assurance ingestion lives in `crates/wesley-holmes` and +does not expose public CLI commands yet. Zod output is no longer treated as core Wesley retirement work. Reintroduce it through an external target module or package when a consumer needs JavaScript diff --git a/docs/design/README.md b/docs/design/README.md index 83eef14e..1c186330 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -42,7 +42,9 @@ Current packets: [generator JS deletion audit](./0017-rust-native-front-door-and-node-retirement/GENERATOR_JS_DELETION_AUDIT.md), ending with the [final closeout](./0017-rust-native-front-door-and-node-retirement/FINAL_CLOSEOUT.md) -- [`0018`](./0018-holmes-assurance-hexagon/holmes-assurance-hexagon.md): Holmes assurance hexagon redesign with CLI, API, MCP, and reporting adapters +- [`0018`](./0018-holmes-assurance-hexagon/holmes-assurance-hexagon.md): + Holmes assurance hexagon redesign with CLI, API, MCP, and reporting adapters, + now backed by the initial `crates/wesley-holmes` Rust foundation - [`0019`](./0019-weslaw-semantic-law-ir/weslaw-semantic-law-ir.md): `weslaw` semantic Law IR for contract bundles, strict binding, canonical law hashes, semantic diffs, and deferred SDL+ syntax, including the v1 From 1b33fa3a3ad5efdd5bb57a29f32a160d8f86873d Mon Sep 17 00:00:00 2001 From: James Ross Date: Tue, 26 May 2026 18:36:25 -0700 Subject: [PATCH 9/9] fix(holmes): close assurance review gaps --- CHANGELOG.md | 7 ++ Cargo.lock | 1 - crates/wesley-holmes/Cargo.toml | 3 - crates/wesley-holmes/README.md | 5 +- .../src/application/artifact_locator.rs | 65 +++++++------ crates/wesley-holmes/src/application/mod.rs | 2 +- crates/wesley-holmes/src/domain/diagnostic.rs | 4 + crates/wesley-holmes/src/domain/versioning.rs | 18 +++- crates/wesley-holmes/src/lib.rs | 10 +- crates/wesley-holmes/src/ports/mod.rs | 29 +++++- crates/wesley-holmes/tests/foundation.rs | 92 ++++++++++++++++++- 11 files changed, 188 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1506de5b..7d74a3db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve ### Fixed +- **Rust Holmes assurance review fixes**: The new Holmes artifact locator now + returns stable Holmes diagnostics for invalid and escaping paths, rejects + platform-specific backslash and drive-path input before normalization, the + schema-version registry now fails closed when a family requirement is absent, + semantic-version parsing rejects leading-zero identifiers, in-memory port + writes are readable through the same fake store, and the crate metadata/docs + no longer point at nonexistent or unpublished documentation. - **`weslaw` semantic diff review fixes**: Law diffs now classify existing channel and invariant law modifications as modification events instead of additions, emit registry/tag/schema-hash events so changed `lawHash` values diff --git a/Cargo.lock b/Cargo.lock index 98e35556..fde4aff7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1566,7 +1566,6 @@ name = "wesley-holmes" version = "0.0.5" dependencies = [ "serde", - "serde_json", ] [[package]] diff --git a/crates/wesley-holmes/Cargo.toml b/crates/wesley-holmes/Cargo.toml index e95604b0..bc28c7b3 100644 --- a/crates/wesley-holmes/Cargo.toml +++ b/crates/wesley-holmes/Cargo.toml @@ -6,11 +6,8 @@ description = "Rust Holmes law assurance foundation for Wesley semantic evidence license = "MIT" repository = "https://github.com/flyingrobots/wesley" homepage = "https://github.com/flyingrobots/wesley" -documentation = "https://docs.rs/wesley-holmes" readme = "README.md" publish = false [dependencies] serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" - diff --git a/crates/wesley-holmes/README.md b/crates/wesley-holmes/README.md index 97de26e5..a022832c 100644 --- a/crates/wesley-holmes/README.md +++ b/crates/wesley-holmes/README.md @@ -9,8 +9,8 @@ API, and MCP interfaces. This crate is intentionally not published yet. It is a workspace implementation crate for the Holmes redesign described in the Wesley design packet: -- [Holmes As Law Assurance Consumer](https://github.com/flyingrobots/wesley/blob/main/docs/design/0020-holmes-law-assurance-consumer.md) -- [Holmes End-to-End](https://github.com/flyingrobots/wesley/blob/main/docs/design/0018-holmes-end-to-end.md) +- [Holmes `weslaw` Assurance PRD/Test Plan](https://github.com/flyingrobots/wesley/blob/main/docs/design/0020-holmes-weslaw-assurance-prd-test-plan/holmes-weslaw-assurance-prd-test-plan.md) +- [Holmes Assurance Hexagon](https://github.com/flyingrobots/wesley/blob/main/docs/design/0018-holmes-assurance-hexagon/holmes-assurance-hexagon.md) ## Boundary @@ -29,4 +29,3 @@ The crate follows the planned hexagonal boundary: The current slice establishes the foundation only. No public Holmes CLI command is exposed from Wesley yet. - diff --git a/crates/wesley-holmes/src/application/artifact_locator.rs b/crates/wesley-holmes/src/application/artifact_locator.rs index 24c30bfa..039f7f52 100644 --- a/crates/wesley-holmes/src/application/artifact_locator.rs +++ b/crates/wesley-holmes/src/application/artifact_locator.rs @@ -2,21 +2,7 @@ use std::path::{Component, Path}; -/// Error returned when an artifact path cannot be normalized safely. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ArtifactLocatorError { - /// Human-readable explanation of the failed path normalization. - pub message: String, -} - -impl ArtifactLocatorError { - /// Build a new locator error. - pub fn new(message: impl Into) -> Self { - Self { - message: message.into(), - } - } -} +use crate::domain::{HolmesDiagnostic, HolmesDiagnosticCode, HolmesResult, HolmesSeverity}; /// A normalized artifact path that stays inside the configured workspace root. #[derive(Debug, Clone, PartialEq, Eq)] @@ -50,16 +36,22 @@ impl WeslawArtifactLocator { /// prefixes, empty paths, and `..` components that would escape the /// workspace root. It does not perform symlink or filesystem /// canonicalization. - pub fn resolve(&self, path: &str) -> Result { + pub fn resolve(&self, path: &str) -> HolmesResult { if path.trim().is_empty() { - return Err(ArtifactLocatorError::new("artifact path must not be empty")); + return Err(invalid_path("artifact path must not be empty")); + } + + if path.contains('\\') { + return Err(path_escape("artifact path must use `/` separators")); + } + + if looks_like_windows_drive_path(path) { + return Err(path_escape("artifact path must be workspace-relative")); } let path = Path::new(path); if path.is_absolute() { - return Err(ArtifactLocatorError::new( - "artifact path must be workspace-relative", - )); + return Err(path_escape("artifact path must be workspace-relative")); } let mut normalized = Vec::new(); @@ -71,23 +63,19 @@ impl WeslawArtifactLocator { } Component::ParentDir => { if normalized.pop().is_none() { - return Err(ArtifactLocatorError::new( + return Err(path_escape( "artifact path must not escape the workspace root", )); } } Component::Prefix(_) | Component::RootDir => { - return Err(ArtifactLocatorError::new( - "artifact path must be workspace-relative", - )); + return Err(path_escape("artifact path must be workspace-relative")); } } } if normalized.is_empty() { - return Err(ArtifactLocatorError::new( - "artifact path must reference a file", - )); + return Err(invalid_path("artifact path must reference a file")); } Ok(ResolvedArtifactPath { @@ -95,3 +83,26 @@ impl WeslawArtifactLocator { }) } } + +fn invalid_path(message: impl Into) -> HolmesDiagnostic { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawArtifactPathInvalid, + HolmesSeverity::Error, + message, + ) + .at_field("path") +} + +fn path_escape(message: impl Into) -> HolmesDiagnostic { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawArtifactPathEscape, + HolmesSeverity::Error, + message, + ) + .at_field("path") +} + +fn looks_like_windows_drive_path(path: &str) -> bool { + let bytes = path.as_bytes(); + bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' +} diff --git a/crates/wesley-holmes/src/application/mod.rs b/crates/wesley-holmes/src/application/mod.rs index e1571707..c212ab14 100644 --- a/crates/wesley-holmes/src/application/mod.rs +++ b/crates/wesley-holmes/src/application/mod.rs @@ -2,4 +2,4 @@ mod artifact_locator; -pub use artifact_locator::{ArtifactLocatorError, ResolvedArtifactPath, WeslawArtifactLocator}; +pub use artifact_locator::{ResolvedArtifactPath, WeslawArtifactLocator}; diff --git a/crates/wesley-holmes/src/domain/diagnostic.rs b/crates/wesley-holmes/src/domain/diagnostic.rs index 42dab9cb..c57b37be 100644 --- a/crates/wesley-holmes/src/domain/diagnostic.rs +++ b/crates/wesley-holmes/src/domain/diagnostic.rs @@ -17,8 +17,12 @@ pub enum HolmesDiagnosticCode { HlawSchemaVersionUnsupportedMajor, /// A `schemaVersion` minor version is newer than this Holmes build accepts. HlawSchemaVersionUnsupportedMinor, + /// No local version requirement was configured for an artifact family. + HlawSchemaVersionRequirementMissing, /// An artifact path attempted to escape the workspace root. HlawArtifactPathEscape, + /// An artifact path was malformed before resolution. + HlawArtifactPathInvalid, /// A law evidence bundle was missing a required artifact reference. HlawEvidenceBundleInvalid, /// A requested artifact was unavailable through its port. diff --git a/crates/wesley-holmes/src/domain/versioning.rs b/crates/wesley-holmes/src/domain/versioning.rs index c096eed7..9e21d66f 100644 --- a/crates/wesley-holmes/src/domain/versioning.rs +++ b/crates/wesley-holmes/src/domain/versioning.rs @@ -77,6 +77,9 @@ impl ParsedSchemaVersion { if !part.bytes().all(|byte| byte.is_ascii_digit()) { return Err(malformed_version(value)); } + if part.len() > 1 && part.starts_with('0') { + return Err(malformed_version(value)); + } part.parse::().map_err(|_| malformed_version(value)) }; @@ -151,7 +154,7 @@ impl VersionRegistry { .map_err(|diagnostic| diagnostic.for_family(family.id()))?; let requirement = self .requirement(family) - .unwrap_or_else(|| VersionRequirement::new(family, parsed.major, parsed.minor)); + .ok_or_else(|| missing_requirement(family))?; if parsed.major != requirement.major { return Err(HolmesDiagnostic::new( @@ -215,3 +218,16 @@ fn malformed_version(value: &str) -> HolmesDiagnostic { ) .at_field("schemaVersion") } + +fn missing_requirement(family: ArtifactFamily) -> HolmesDiagnostic { + HolmesDiagnostic::new( + HolmesDiagnosticCode::HlawSchemaVersionRequirementMissing, + HolmesSeverity::Error, + format!( + "no schemaVersion requirement is configured for {}", + family.id() + ), + ) + .for_family(family.id()) + .at_field("schemaVersion") +} diff --git a/crates/wesley-holmes/src/lib.rs b/crates/wesley-holmes/src/lib.rs index b5506680..501b7070 100644 --- a/crates/wesley-holmes/src/lib.rs +++ b/crates/wesley-holmes/src/lib.rs @@ -14,15 +14,15 @@ pub mod domain; pub mod ports; pub mod reporting; -pub use application::{ArtifactLocatorError, ResolvedArtifactPath, WeslawArtifactLocator}; +pub use application::{ResolvedArtifactPath, WeslawArtifactLocator}; pub use domain::{ ArtifactFamily, ArtifactRef, BundleProvenance, HolmesDiagnostic, HolmesDiagnosticCode, HolmesLawEvidenceBundle, HolmesResult, HolmesSeverity, LawEvidenceArtifacts, ParsedSchemaVersion, VersionRegistry, VersionRequirement, }; pub use ports::{ - ArtifactLoadPort, ArtifactWritePort, ClockPort, CommandIoPort, FilesystemPort, FixedClock, - GithubPublishPort, InMemoryArtifactStore, InMemoryMcpResourceRegistry, McpResourcePort, - PolicyLoadPort, RecordingCommandIo, RecordingGithubPublisher, ReportRenderPort, - StaticPolicyLoader, Timestamp, + ArtifactLoadPort, ArtifactWritePort, ClockPort, CommandIoPort, EchoReportRenderer, + FilesystemPort, FixedClock, GithubPublishPort, InMemoryArtifactStore, + InMemoryMcpResourceRegistry, McpResourcePort, PolicyLoadPort, RecordingCommandIo, + RecordingGithubPublisher, ReportRenderPort, StaticPolicyLoader, Timestamp, }; diff --git a/crates/wesley-holmes/src/ports/mod.rs b/crates/wesley-holmes/src/ports/mod.rs index 73f971d4..0d3453b5 100644 --- a/crates/wesley-holmes/src/ports/mod.rs +++ b/crates/wesley-holmes/src/ports/mod.rs @@ -102,7 +102,9 @@ impl ArtifactLoadPort for InMemoryArtifactStore { impl ArtifactWritePort for InMemoryArtifactStore { fn write_artifact(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()> { - self.writes.insert(path.to_owned(), bytes.to_vec()); + let data = bytes.to_vec(); + self.writes.insert(path.to_owned(), data.clone()); + self.artifacts.insert(path.to_owned(), data); Ok(()) } } @@ -120,7 +122,9 @@ impl FilesystemPort for InMemoryArtifactStore { } fn write_workspace_file(&mut self, path: &str, bytes: &[u8]) -> HolmesResult<()> { - self.writes.insert(path.to_owned(), bytes.to_vec()); + let data = bytes.to_vec(); + self.writes.insert(path.to_owned(), data.clone()); + self.artifacts.insert(path.to_owned(), data); Ok(()) } } @@ -222,6 +226,27 @@ pub trait ReportRenderPort { fn render_report(&self, body: &str) -> HolmesResult; } +/// Deterministic report renderer fake that prefixes report bodies. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct EchoReportRenderer { + prefix: String, +} + +impl EchoReportRenderer { + /// Create a report renderer with a static prefix. + pub fn new(prefix: impl Into) -> Self { + Self { + prefix: prefix.into(), + } + } +} + +impl ReportRenderPort for EchoReportRenderer { + fn render_report(&self, body: &str) -> HolmesResult { + Ok(format!("{}{body}", self.prefix)) + } +} + /// Port for command standard output and error streams. pub trait CommandIoPort { /// Write standard output text. diff --git a/crates/wesley-holmes/tests/foundation.rs b/crates/wesley-holmes/tests/foundation.rs index d4e9a562..f8991eb8 100644 --- a/crates/wesley-holmes/tests/foundation.rs +++ b/crates/wesley-holmes/tests/foundation.rs @@ -1,8 +1,8 @@ use wesley_holmes::{ ArtifactFamily, ArtifactLoadPort, ArtifactRef, ArtifactWritePort, BundleProvenance, ClockPort, - CommandIoPort, FilesystemPort, FixedClock, HolmesDiagnosticCode, HolmesLawEvidenceBundle, - InMemoryArtifactStore, LawEvidenceArtifacts, McpResourcePort, RecordingCommandIo, Timestamp, - VersionRegistry, WeslawArtifactLocator, + CommandIoPort, EchoReportRenderer, FilesystemPort, FixedClock, HolmesDiagnosticCode, + HolmesLawEvidenceBundle, InMemoryArtifactStore, LawEvidenceArtifacts, McpResourcePort, + RecordingCommandIo, ReportRenderPort, Timestamp, VersionRegistry, WeslawArtifactLocator, }; #[test] @@ -26,6 +26,12 @@ fn in_memory_artifact_store_reads_and_writes() { assert_eq!(bytes, b"diff"); assert_eq!(store.written("reports/summary.md"), Some(&b"summary"[..])); + assert_eq!( + store + .read_artifact(&ArtifactRef::new("reports/summary.md")) + .expect("written artifact should be readable"), + b"summary" + ); } #[test] @@ -42,6 +48,12 @@ fn filesystem_port_uses_workspace_relative_bytes_without_real_filesystem() { assert_eq!(bytes, b"policy"); assert_eq!(store.written("reports/holmes.md"), Some(&b"report"[..])); + assert_eq!( + store + .read_workspace_file("reports/holmes.md") + .expect("written workspace file should be readable"), + b"report" + ); } #[test] @@ -81,8 +93,28 @@ fn artifact_locator_rejects_escape_and_absolute_paths() { .resolve("/tmp/outside.json") .expect_err("absolute paths should be rejected"); - assert!(escape.message.contains("escape")); - assert!(absolute.message.contains("workspace-relative")); + assert_eq!(escape.code, HolmesDiagnosticCode::HlawArtifactPathEscape); + assert_eq!(absolute.code, HolmesDiagnosticCode::HlawArtifactPathEscape); +} + +#[test] +fn artifact_locator_rejects_backslash_and_windows_drive_paths() { + let locator = WeslawArtifactLocator::new("/workspace"); + + for path in [ + "..\\outside.json", + "C:\\tmp\\outside.json", + "\\\\server\\share\\outside.json", + "C:/tmp/outside.json", + ] { + let diagnostic = locator + .resolve(path) + .expect_err("platform-specific path syntax should be rejected"); + assert_eq!( + diagnostic.code, + HolmesDiagnosticCode::HlawArtifactPathEscape + ); + } } #[test] @@ -123,6 +155,9 @@ fn version_registry_rejects_missing_malformed_and_unsupported_versions() { let unsupported_minor = registry .validate(ArtifactFamily::EvidenceBundle, Some("1.1.0")) .expect_err("unsupported minor should fail"); + let leading_zero = registry + .validate(ArtifactFamily::EvidenceBundle, Some("01.0.0")) + .expect_err("leading-zero schema version should fail"); assert_eq!(missing.code, HolmesDiagnosticCode::HlawSchemaVersionMissing); assert_eq!( @@ -137,6 +172,41 @@ fn version_registry_rejects_missing_malformed_and_unsupported_versions() { unsupported_minor.code, HolmesDiagnosticCode::HlawSchemaVersionUnsupportedMinor ); + assert_eq!( + leading_zero.code, + HolmesDiagnosticCode::HlawSchemaVersionMalformed + ); +} + +#[test] +fn version_registry_fails_closed_when_requirement_is_missing() { + let registry = VersionRegistry::new([]); + + let diagnostic = registry + .validate(ArtifactFamily::EvidenceBundle, Some("999.999.0")) + .expect_err("missing registry entry should fail closed"); + + assert_eq!( + diagnostic.code, + HolmesDiagnosticCode::HlawSchemaVersionRequirementMissing + ); + assert_eq!( + diagnostic.artifact_family.as_deref(), + Some("evidence-bundle") + ); +} + +#[test] +fn schema_version_parser_rejects_leading_zero_identifiers() { + for version in ["01.0.0", "1.01.0", "1.0.01"] { + let diagnostic = VersionRegistry::default() + .validate(ArtifactFamily::EvidenceBundle, Some(version)) + .expect_err("leading-zero semver identifiers should be malformed"); + assert_eq!( + diagnostic.code, + HolmesDiagnosticCode::HlawSchemaVersionMalformed + ); + } } #[test] @@ -154,6 +224,18 @@ fn recording_ports_capture_outputs() { assert_eq!(mcp.resource("holmes://summary"), Some(&b"payload"[..])); } +#[test] +fn report_renderer_fake_is_deterministic() { + let renderer = EchoReportRenderer::new("prefix:"); + + assert_eq!( + renderer + .render_report("body") + .expect("report rendering should be deterministic"), + "prefix:body" + ); +} + fn evidence_bundle_with_law_diff_path(path: &str) -> HolmesLawEvidenceBundle { HolmesLawEvidenceBundle { schema_version: "1.0.0".to_owned(),