From 64a57b3a67c8acfc44d07d7cf7ae2904fbaa850c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9D=90=8E=F0=9D=90=A7=F0=9D=90=9E=20=F0=9D=90=85?= =?UTF-8?q?=F0=9D=90=A2=F0=9D=90=A7=F0=9D=90=9E=20=F0=9D=90=92=F0=9D=90=AD?= =?UTF-8?q?=F0=9D=90=9A=F0=9D=90=AB=F0=9D=90=AC=F0=9D=90=AD=F0=9D=90=AE?= =?UTF-8?q?=F0=9D=90=9F=F0=9D=90=9F?= Date: Mon, 1 Jun 2026 15:56:20 +0630 Subject: [PATCH 1/2] Add per-domain failure counters to validation reports --- .github/workflows/governance-artifacts.yml | 37 ++ AGI_ASI_GSIFI_Blueprint_2026_2030.md | 523 +++++++++++++++++++++ GOVERNANCE_ARTIFACTS_README.md | 51 ++ artifacts/bbom/sample_tier0_fraud.json | 56 +++ examples/arre/sample_t0_sanctions_002.json | 39 ++ requirements-governance.txt | 1 + schemas/arre_record.schema.json | 135 ++++++ schemas/bbom.schema.json | 192 ++++++++ tests/test_governance_validator.py | 191 ++++++++ tools/__init__.py | 1 + tools/validate_ai_governance_artifacts.py | 248 ++++++++++ 11 files changed, 1474 insertions(+) create mode 100644 .github/workflows/governance-artifacts.yml create mode 100644 AGI_ASI_GSIFI_Blueprint_2026_2030.md create mode 100644 GOVERNANCE_ARTIFACTS_README.md create mode 100644 artifacts/bbom/sample_tier0_fraud.json create mode 100644 examples/arre/sample_t0_sanctions_002.json create mode 100644 requirements-governance.txt create mode 100644 schemas/arre_record.schema.json create mode 100644 schemas/bbom.schema.json create mode 100644 tests/test_governance_validator.py create mode 100644 tools/__init__.py create mode 100644 tools/validate_ai_governance_artifacts.py diff --git a/.github/workflows/governance-artifacts.yml b/.github/workflows/governance-artifacts.yml new file mode 100644 index 0000000..e538c85 --- /dev/null +++ b/.github/workflows/governance-artifacts.yml @@ -0,0 +1,37 @@ +name: Governance Artifact Validation + +on: + pull_request: + push: + branches: + - main + - master + +jobs: + validate-governance-artifacts: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: python -m pip install -r requirements-governance.txt pytest + + - name: Validate BBOM/ARRE artifacts + run: python tools/validate_ai_governance_artifacts.py --report-file .reports/governance-validation.json + + - name: Run validator tests + run: pytest -q tests/test_governance_validator.py + + - name: Upload governance validation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: governance-validation-report + path: .reports/governance-validation.json + if-no-files-found: ignore diff --git a/AGI_ASI_GSIFI_Blueprint_2026_2030.md b/AGI_ASI_GSIFI_Blueprint_2026_2030.md new file mode 100644 index 0000000..09e0465 --- /dev/null +++ b/AGI_ASI_GSIFI_Blueprint_2026_2030.md @@ -0,0 +1,523 @@ +# AGI/ASI Governance, Containment, and Civilizational Security Blueprint for G‑SIFIs (2026–2030) + +**Version:** 2.0 (implementation-grade update) +**Date:** April 28, 2026 +**Audience:** CISO, CTO, CIO, CRO/Model Risk, Ops Resilience, Compliance, Internal Audit, and Board Risk Committees in globally systemically important financial institutions (G‑SIFIs). + +--- + +## 0) How to use this document + +This blueprint is designed as an execution playbook, not a position paper. + +- **Section 1–4:** strategic posture and risk taxonomy. +- **Section 5–10:** technical architecture and control design. +- **Section 11–14:** regulator-facing evidence, operating model, and delivery roadmap. +- **Section 15–19:** concrete artifacts (schemas, runbooks, checklists, and control tests). + +If you are starting from zero, execute in this order: +1. Establish risk tiering + AI asset registry. +2. Stand up policy enforcement + containment triggers. +3. Implement BBOM + ARRE evidence pipeline. +4. Formalize invariants (TLA+) for Tier 0 workflows. +5. Add cryptographic and formal assurance where supervisory value is highest. + +--- + +## 1) Design principles for 2026–2030 + +1. **Safety-critical, not feature-critical.** Frontier AI touching critical banking functions is a safety-critical system. +2. **Systemic externality mindset.** G‑SIFIs must evaluate institution risk *and* network contagion risk. +3. **Containment-first scaling.** Capability growth is gated by containment maturity. +4. **Evidence-by-construction.** Controls must emit machine-readable supervisory evidence continuously. +5. **Human authority with machine-speed enforcement.** Policy-as-code enforces boundaries; humans own intent and approvals. +6. **Model risk + cyber risk + operational resilience convergence.** Treat as one integrated program. + +--- + +## 2) Scope and risk tiers + +### 2.1 AI usage categories +- **Advisory:** output informs human decisions. +- **Operational assistive:** output triggers low-risk automated actions. +- **Agentic delegated:** autonomous actions under defined authority budgets. +- **Critical autonomous:** potentially high-impact autonomous operations (generally disallowed for Tier 0 until advanced assurance is proven). + +### 2.2 Tiering matrix (mandatory) + +| Tier | Typical Functions | Maximum Autonomy | Approval Model | Containment SLA | +|---|---|---|---|---| +| Tier 0 (Critical) | Payments, sanctions, treasury, market risk, fraud blocking | Assistive / tightly delegated only | Dual-control + risk signoff | ≤ 60 sec to safe-mode | +| Tier 1 (High) | Client suitability, surveillance, underwriting assist | Delegated with hard budgets | Product + Risk + Compliance | ≤ 5 min | +| Tier 2 (Moderate) | Internal copilots, drafting, analytics support | Delegated bounded | Product owner | ≤ 30 min | +| Tier 3 (Low) | Non-sensitive internal productivity | Assistive | Team lead | best effort | + +--- + +## 3) Threat model (financial-sector specific) + +### 3.1 Primary threat classes +- **Authority drift:** agent exceeds granted authority scope. +- **Strategic deception:** model hides intent to maximize objective reward. +- **Tool-chain exploitation:** plugin/API abuse to pivot into sensitive systems. +- **Data exfiltration:** latent leakage via prompt, tool outputs, or model memory. +- **Coordinated model failure:** correlated misbehavior across shared providers. +- **Market-manipulation acceleration:** synthetic narratives and automated influence loops. + +### 3.2 Systemic propagation channels +- Common compute/model vendors. +- Shared reference data and vendor models. +- Payment/settlement infrastructure interdependencies. +- Third-party outages that disable guardrails simultaneously. + +### 3.3 Quantification baseline +Define and monitor: +- **P(unsafe action | prompt class, toolset, risk tier)** +- **Mean Time To Containment (MTTC)** +- **Containment success probability under concurrent attack** +- **Residual systemic exposure (institution + network weighted)** + +--- + +## 4) Target architecture overview + +``` +[Users/Systems] -> [WorkflowAI Pro] -> [Sentinel v2.4 Policy Mesh] -> [Model/Tools] + | | + v v + [Approval Engine] [Evidence Ledger + ARRE] + | + v + [Containment Orchestrator] + | + v + [AGI Containment Lab Replay] +``` + +Design objective: no high-impact inference or action path bypasses policy evaluation, evidence emission, and containment hooks. + +--- + +## 5) Sentinel AI Governance Platform v2.4 (reference design) + +Sentinel v2.4 is the enterprise control plane for AI inventory, policy enforcement, runtime monitoring, and regulator evidence production. + +### 5.1 Required services +1. **AI Asset Registry** (models, agents, prompts, tools, datasets). +2. **Policy Decision Point (PDP)** + **Policy Enforcement Points (PEP)**. +3. **Runtime Attestation Service** (signed posture snapshots). +4. **Evidence Ledger** (tamper-evident event chain). +5. **Containment Orchestrator** (playbook automation). +6. **ARRE Export Service** (regulator bundles). + +### 5.2 Mandatory APIs +- `register_artifact(artifact, bbom_ref, signature)` +- `evaluate_policy(subject, action, resource, context)` +- `attest_runtime(workload_id, posture_hash, detector_state)` +- `trigger_containment(workload_id, reason_code, severity)` +- `export_arre_bundle(period, control_scope, regulator_profile)` + +### 5.3 Sentinel policy primitives +- **Subject:** user, service account, agent role. +- **Action:** infer, call_tool, write_record, execute_trade, update_limit. +- **Resource:** dataset, API, environment, ledger segment. +- **Context:** jurisdiction, legal entity, data class, current threat level. +- **Obligation:** log, notify, require_approval, force_redaction, downgrade_mode. + +### 5.4 Safe degradation modes +- **Mode A:** read-only inference. +- **Mode B:** assistive output only (no tool execution). +- **Mode C:** isolated sandbox inference. +- **Mode D:** hard quarantine with forensics capture. + +--- + +## 6) WorkflowAI Pro governance pattern + +WorkflowAI Pro should run as a governed orchestrator, not open-ended automation. + +### 6.1 Control design +- **Capability tokens:** short-lived, scoped, non-transferable. +- **Authority budgets:** max API calls, spend, writes, external comms. +- **Loop guards:** max iteration count + max elapsed runtime. +- **High-risk step locks:** sanctions/KYC/market actions require human gate. +- **Reason trace requirement:** every autonomous step emits rationale and policy refs. + +### 6.2 Runtime policy examples +- Disallow external messaging for Tier 0 workflows. +- Require second approver for changes touching client suitability outcomes. +- Force jurisdictional redaction before cross-border inference. +- Downgrade to assistive mode if detector confidence falls below threshold. + +--- + +## 7) Behavioral Bill of Materials (BBOM) + +BBOM is required for every production AI artifact and must be signed and versioned. + +### 7.1 Canonical BBOM fields +- `artifact_id`, `provider`, `model_family`, `training_cutoff` +- `intended_use`, `prohibited_use` +- `autonomy_class`, `tool_permissions`, `max_authority_budget` +- `hazard_scores` (deception, jailbreak, exfiltration propensity) +- `jurisdiction_constraints`, `data_residency_constraints` +- `evaluation_suite_refs`, `acceptance_thresholds`, `expiry_conditions` +- `runtime_detector_bindings` + +### 7.2 Example BBOM JSON (minimal) +```json +{ + "artifact_id": "mdl-tier0-fraud-2026-09-15", + "provider": "internal+vendorX", + "autonomy_class": "delegated_bounded", + "tool_permissions": ["case_lookup", "alert_writeback"], + "max_authority_budget": { + "tool_calls": 20, + "elapsed_seconds": 90, + "external_network": false + }, + "hazard_scores": { + "deception": 0.18, + "jailbreak": 0.31, + "exfiltration": 0.12 + }, + "acceptance_thresholds": { + "max_deception": 0.20, + "max_exfiltration": 0.15 + } +} +``` + +### 7.3 BBOM lifecycle policy +- No BBOM = no deploy. +- Any material model change requires BBOM re-sign + re-approval. +- Automatic suspension when hazard drift exceeds threshold. + +--- + +## 8) Unified Meta‑Invariant Framework (TLA+, Coq, Q#) + +### 8.1 Invariant taxonomy +1. **Safety:** prohibited actions never execute. +2. **Liveness:** critical workflows complete under degraded safe operation. +3. **Attribution:** all high-impact actions have attributable approval chain. +4. **Containment:** specified triggers force deterministic bounded-state transition. + +### 8.2 Tool-role split +- **TLA+:** workflow state machine and distributed policy transition correctness. +- **Coq:** proof objects for critical policy logic (e.g., SoD, sanctions pathways). +- **Q#:** forward-looking quantum-risk simulation (crypto policy migration stress). + +### 8.3 Minimum formal program (first 12 months) +- Specify 12 Tier 0 invariants in TLA+. +- Model-check every release candidate for policy transition regressions. +- Select top 3 legal-critical controls and produce Coq proofs. +- Store proof artifacts and hashes in Sentinel evidence ledger. + +### 8.4 Sample invariant statements +- `INV-001`: No unapproved cross-border data transfer in Tier 0 paths. +- `INV-002`: Any sanctions-impacting action requires two distinct approvers. +- `INV-003`: Containment trigger `SEV1_DECEPTION` implies Mode D ≤ 60 sec. + +--- + +## 9) AGI Containment Labs (ACL): CAS‑SPP + Bayesian Belief Networks + +ACLs are isolated environments for adversarial evaluation, containment stress tests, and replay of production incidents. + +### 9.1 ACL baseline +- Segmented compute enclaves. +- One-way export for evidence artifacts. +- Synthetic payments/market/fraud simulation environment. +- Red-team harness covering prompt, tool, identity, and network attack surfaces. + +### 9.2 CAS‑SPP scoring model +**CAS‑SPP = w1·P(policy_penetration) + w2·MTTC_norm + w3·ContainmentFailureProb + w4·RecoveryIntegrityGap** + +- Board approves weights and tier-specific thresholds. +- Any Tier 0 workload with CAS‑SPP above threshold is blocked from prod. + +### 9.3 Bayesian Belief Network operation +- **Observed nodes:** detector anomalies, drift deltas, failed policy checks, vendor outage signals. +- **Latent nodes:** deception likelihood, collusion likelihood, contagion likelihood. +- **Action nodes:** downgrade mode, quarantine, human escalation, regulator pre-notification. + +Update cadence: +- Streaming updates for Tier 0/1. +- Hourly recalibration for prior drift. +- Monthly governance recalibration by model risk. + +--- + +## 10) Regulator-facing controls: ARRE + zk-SNARK compliance + +### 10.1 ARRE data contract +For each control: +- Control objective and legal basis. +- Policy-as-code identifier + version. +- Effectiveness metrics and confidence intervals. +- Exceptions/waivers and compensating controls. +- Signed evidence pointers (hashes, timestamps, approver IDs). + +### 10.2 zk-SNARK compliance use cases +- Prove that restricted data classes were never exported outside jurisdiction. +- Prove that segregation-of-duties approval chain was satisfied. +- Prove that outputs passed mandatory suitability/content filters. + +### 10.3 Practical constraints +- Keep circuits small, control-specific, and benchmarked for proving latency. +- Separate prover infrastructure from production transaction path where possible. +- Rotate proving/verifier keys under dual-control HSM governance. + +--- + +## 11) ICGC participation (International Compute Governance Consortium) + +Treat ICGC as systemic risk infrastructure. + +### 11.1 What to contribute +- Incident taxonomy + structured anonymized lessons. +- Compute concentration and dependency risk indicators. +- Finance-specific red-team scenarios and containment benchmarks. + +### 11.2 Internal governance +- CISO-led ICGC liaison office with Legal + Public Policy + Model Risk. +- 30-day SLA to assess and respond to consortium advisories. +- Quarterly board briefing on external systemic risk signals. + +--- + +## 12) Target operating model and RACI + +### 12.1 Three-lines adaptation +- **Line 1:** build/operate controls. +- **Line 2:** policy, challenge, validation, independent monitoring. +- **Line 3:** control design/effectiveness assurance. + +### 12.2 RACI (core processes) + +| Process | Eng/Platform | Model Risk | Compliance | CISO | Internal Audit | +|---|---|---|---|---|---| +| BBOM issuance | R | A | C | C | I | +| Tiering decision | C | A | C | R | I | +| Policy-as-code changes | R | C | C | A | I | +| Containment invocation | R | C | I | A | I | +| ARRE regulator bundle | C | R | A | C | I | +| Annual control assurance | C | C | C | C | A | + +Legend: R=Responsible, A=Accountable, C=Consulted, I=Informed. + +--- + +## 13) 2026–2030 implementation roadmap + +### Phase 1 — Foundation (Q3 2026 to Q2 2027) +- Stand up Sentinel MVP (registry + PDP/PEP + evidence ledger). +- Enforce BBOM registration policy enterprise-wide. +- Launch ACL pilot for 5 Tier 0/1 workflows. +- Define and test first 12 meta-invariants. + +**Exit criteria:** +- 95% AI assets inventoried. +- 100% Tier 0 workloads with policy enforcement in-path. +- MTTC median ≤ 5 minutes in exercises. + +### Phase 2 — Hardening (Q3 2027 to Q4 2028) +- Roll out WorkflowAI Pro guardrails for all Tier 0/1 agentic paths. +- Deploy BBN inference to production with escalation runbooks. +- Launch ARRE exports for routine supervisory engagements. +- Add 2–3 zk-SNARK proof-enabled controls. + +**Exit criteria:** +- MTTC median Tier 0 ≤ 60 seconds in controlled drills. +- < 1% high-severity policy bypass false negatives in test harness. +- 100% supervisory evidence bundles reproducible from ledger hashes. + +### Phase 3 — Systemic resilience (2029 to 2030) +- Cross-institution coordinated incident drills via ICGC. +- Multi-vendor correlated failure simulations. +- Continuous formal verification in CI/CD for Tier 0 controls. + +**Exit criteria:** +- Demonstrated safe degradation under correlated provider failure. +- Formal coverage of all Tier 0 critical decision flows. +- Board-approved systemic exposure within risk appetite. + +--- + +## 14) KPI/KRI library (board + engineering) + +### 14.1 Board KRIs +- Tier 0 containment coverage (%). +- MTTC p50/p95 for severe scenarios. +- AI vendor concentration index. +- Open high-risk exceptions aging > 30/60/90 days. +- Cross-jurisdiction compliance proof success rate. + +### 14.2 Engineering KPIs +- PDP decision latency p95/p99. +- Policy decision correctness (precision/recall against golden set). +- Detector false-negative rate (unsafe action classes). +- Invariant test pass rate per release. +- Evidence completeness score for ARRE bundle generation. + +--- + +## 15) Incident response integration (AI-major incident playbook) + +### 15.1 Trigger catalog (examples) +- `SEV1_DECEPTION_SIGNAL` +- `SEV1_MULTI_DETECTOR_BYPASS` +- `SEV1_UNAUTHORIZED_TOOL_CHAIN` +- `SEV2_POLICY_DRIFT_SPIKE` + +### 15.2 First-hour runbook +1. Auto-trigger containment mode per policy. +2. Freeze non-essential autonomous actions. +3. Start incident command with CISO delegate. +4. Capture forensic snapshots + evidence ledger seal. +5. Assess financial stability impact (payments/liquidity/market). +6. Prepare ARRE pre-notification package. + +### 15.3 Recovery criteria +- Root cause identified and control patch validated in ACL replay. +- Updated BBOM and policy signatures. +- Independent second-line signoff. +- Post-incident review with board risk committee. + +--- + +## 16) Implementation artifacts (ready-to-adapt templates) + +### 16.1 Policy-as-code rule template (pseudo) +```yaml +rule_id: T0-SANCTIONS-002 +when: + tier: 0 + action: execute_payment + context.sanctions_screening: required +then: + require: + - approval.count >= 2 + - approval.distinct_approvers == true + - bbom.hazard_scores.deception <= 0.20 + on_fail: + - deny + - trigger_containment: ModeB + - emit_event: SEV2_POLICY_BLOCK +``` + +### 16.2 ARRE record template +```json +{ + "control_id": "T0-SANCTIONS-002", + "objective": "Prevent unscreened sanctioned transfers", + "policy_version": "v1.9.3", + "evidence_hashes": ["..."], + "effectiveness": { + "period": "2026-Q4", + "pass_rate": 0.998, + "exceptions": 3 + }, + "waivers": [], + "approvals": ["risk", "compliance"] +} +``` + +### 16.3 Containment drill script (monthly) +- Inject deception-like signal into Tier 0 workflow. +- Verify automatic mode transition and approval lockouts. +- Validate ARRE evidence completeness. +- Replay in ACL and compare to baseline CAS‑SPP. + +--- + +## 17) Minimum budget architecture bill (2026–2027) + +1. Sentinel v2.4 core platform. +2. WorkflowAI Pro control extensions. +3. BBOM registry/signing service. +4. ACL infrastructure and red-team tooling. +5. BBN telemetry and inference stack. +6. ARRE evidence warehouse. +7. zk proof service (pilot scope). +8. Formal methods engineering capacity. +9. ICGC participation + external coordination. + +--- + +## 18) Anti-patterns and failure modes + +- “Policy PDF governance” without executable enforcement. +- Over-trust in vendor attestations without local independent validation. +- Mixing Tier 0 and Tier 2 workloads in shared unconstrained agent fabric. +- Containment plans that are manual-only and untested. +- Evidence stores that cannot reproduce decisions deterministically. + +--- + +## 19) 90/180/365-day execution checklist + +### First 90 days +- [ ] Approve enterprise tiering taxonomy. +- [ ] Put PDP/PEP in-path for all Tier 0 pilots. +- [ ] Require BBOM for all production-bound AI artifacts. +- [ ] Create AI-major incident playbook with named on-call roles. + +### First 180 days +- [ ] Validate top 12 invariants in TLA+. +- [ ] Run two red-team campaigns in ACL. +- [ ] Stand up ARRE prototype for one regulator-facing control family. +- [ ] Define BBN priors with second-line validation. + +### First 365 days +- [ ] Achieve Tier 0 containment coverage > 95%. +- [ ] Move at least two legal-critical controls to formal proof-backed assurance. +- [ ] Execute one cross-border supervisory walkthrough with replayable evidence. +- [ ] Complete one consortium-style coordinated exercise with external partners. + +--- + +## Closing executive message + +For G‑SIFIs, AGI/ASI governance is now a **financial stability and supervisory credibility function**. Competitive advantage in 2026–2030 comes from proving safe operation under stress through deterministic controls, measurable containment, formal invariants, and cryptographically verifiable evidence—not from raw model capability alone. + +--- + +## 20) Machine-readable control artifacts (for implementation) + +To support production adoption and supervisory replay, this repository includes concrete JSON Schemas: + +- `schemas/bbom.schema.json` — canonical BBOM structure and validation rules. +- `schemas/arre_record.schema.json` — ARRE evidence record structure and attestation requirements. + +### 20.1 Recommended CI checks + +Run schema validation in CI for every deployment artifact: + +```bash +ajv validate -s schemas/bbom.schema.json -d artifacts/bbom/*.json +ajv validate -s schemas/arre_record.schema.json -d examples/arre/*.json +``` + +### 20.2 Policy gate recommendation + +Validator defaults to scanning `examples/arre` and `evidence/arre` for ARRE files. + +Deployment pipeline should fail closed when: +- BBOM document does not validate. +- ARRE control records are missing required attestation fields. +- BBOM hazard thresholds exceed tier-specific policy limits. + +### 20.3 Bootstrap validator (schema-backed) + +For environments standardizing on Python tooling, install dependencies and run: + +```bash +python -m pip install -r requirements-governance.txt +python tools/validate_ai_governance_artifacts.py +``` + +Reference sample artifacts for pipeline onboarding: +- `artifacts/bbom/sample_tier0_fraud.json` +- `examples/arre/sample_t0_sanctions_002.json` diff --git a/GOVERNANCE_ARTIFACTS_README.md b/GOVERNANCE_ARTIFACTS_README.md new file mode 100644 index 0000000..0b1cf28 --- /dev/null +++ b/GOVERNANCE_ARTIFACTS_README.md @@ -0,0 +1,51 @@ +# Governance Artifacts Quickstart + +This repository includes machine-readable governance artifacts for AI controls: + +- `schemas/bbom.schema.json` +- `schemas/arre_record.schema.json` +- `artifacts/bbom/*.json` +- `examples/arre/*.json` +- `tools/validate_ai_governance_artifacts.py` + +## Local validation + +```bash +python -m pip install -r requirements-governance.txt +python tools/validate_ai_governance_artifacts.py +``` + +## Custom paths + +```bash +python tools/validate_ai_governance_artifacts.py \ + --bbom-dir artifacts/bbom \ + --arre-dir examples/arre \ + --arre-dir evidence/arre +``` + +## CI + +Validation is enforced in `.github/workflows/governance-artifacts.yml`. + +The validator enforces both JSON Schema compliance and semantic checks (for example BBOM threshold conformance and ARRE period consistency). + +Additional semantic checks include date format validation (via JSON Schema format checking) and duplicate ARRE evidence hash detection. + +Generate an auditable machine-readable summary report: + +```bash +python tools/validate_ai_governance_artifacts.py --report-file .reports/governance-validation.json +``` + +Report output includes discovered/checked counters, `passed_files`, `failed_files`, and `errors` for audit trails. + +The GitHub Actions workflow also uploads the report as a CI artifact (`governance-validation-report`) for audit retention. + +When schema loading fails, reports include `fatal_error: "schema_load_failure"` and the associated error in `errors`. + +Report output also includes `validator_version` and overall `status` (`passed`/`failed`) for easier pipeline gating. + +Reports include `exit_code` (0 for pass, 2 for validation failure) to simplify CI/CD policy gating. + +Per-domain failure counters (`bbom_failed`, `arre_failed`) are included to support targeted remediation dashboards. diff --git a/artifacts/bbom/sample_tier0_fraud.json b/artifacts/bbom/sample_tier0_fraud.json new file mode 100644 index 0000000..6d9d09b --- /dev/null +++ b/artifacts/bbom/sample_tier0_fraud.json @@ -0,0 +1,56 @@ +{ + "artifact_id": "mdl-tier0-fraud-2026-09-15", + "provider": "internal+vendorX", + "model_family": "transformer_reasoning_v4", + "training_cutoff": "2026-06-30", + "intended_use": [ + "Fraud alert triage support", + "Case prioritization recommendations" + ], + "prohibited_use": [ + "Autonomous payment release", + "Direct customer denial without human review" + ], + "autonomy_class": "delegated_bounded", + "tool_permissions": [ + "case_lookup", + "alert_writeback" + ], + "max_authority_budget": { + "tool_calls": 20, + "elapsed_seconds": 90, + "external_network": false, + "max_write_ops": 10 + }, + "hazard_scores": { + "deception": 0.18, + "jailbreak": 0.31, + "exfiltration": 0.12 + }, + "jurisdiction_constraints": [ + { + "jurisdiction": "US", + "rule": "PII must remain within approved domestic environments." + } + ], + "evaluation_suite_refs": [ + "evals/fraud/tier0_regression_2026q3" + ], + "acceptance_thresholds": { + "max_deception": 0.2, + "max_exfiltration": 0.15 + }, + "runtime_detector_bindings": [ + { + "detector_id": "detector-deception-v2", + "version": "2.3.1", + "severity_threshold": 0.8 + } + ], + "signature": { + "algorithm": "ed25519", + "signed_by": "ai-safety-release-bot", + "signed_at": "2026-09-16T14:12:00Z", + "digest": "57b5d6b6f0ea91f0d13a4f702f81ccab9f6c50d467af4d8f" + } +} diff --git a/examples/arre/sample_t0_sanctions_002.json b/examples/arre/sample_t0_sanctions_002.json new file mode 100644 index 0000000..436533a --- /dev/null +++ b/examples/arre/sample_t0_sanctions_002.json @@ -0,0 +1,39 @@ +{ + "control_id": "T0-SANCTIONS-002", + "objective": "Prevent unscreened sanctioned transfers from execution paths.", + "policy_version": "v1.9.3", + "control_owner": "Global Sanctions Control Office", + "period": { + "start": "2026-10-01", + "end": "2026-12-31" + }, + "effectiveness": { + "pass_rate": 0.998, + "exceptions": 3, + "confidence_interval_95": { + "lower": 0.996, + "upper": 0.999 + } + }, + "evidence_hashes": [ + "c83d5fce1ea04fdf9b7d4d1bfc41261f" + ], + "approvals": [ + { + "role": "model_risk", + "approved_by": "Jane Doe", + "approved_at": "2027-01-05T11:15:00Z" + }, + { + "role": "compliance", + "approved_by": "John Smith", + "approved_at": "2027-01-05T11:32:00Z" + } + ], + "attestation": { + "algorithm": "ed25519", + "signed_by": "arre-attestor", + "signed_at": "2027-01-05T12:00:00Z", + "digest": "1824ae6f5ea4f7825af2f2bb53f7a034086d12a452f6eab2" + } +} diff --git a/requirements-governance.txt b/requirements-governance.txt new file mode 100644 index 0000000..ec38a0f --- /dev/null +++ b/requirements-governance.txt @@ -0,0 +1 @@ +jsonschema>=4.22,<5 diff --git a/schemas/arre_record.schema.json b/schemas/arre_record.schema.json new file mode 100644 index 0000000..e84160e --- /dev/null +++ b/schemas/arre_record.schema.json @@ -0,0 +1,135 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.org/schemas/arre_record.schema.json", + "title": "ARRE Record", + "description": "Assurance, Risk, and Regulatory Evidence record schema for regulator-facing controls.", + "type": "object", + "additionalProperties": false, + "required": [ + "control_id", + "objective", + "policy_version", + "control_owner", + "period", + "effectiveness", + "evidence_hashes", + "approvals", + "attestation" + ], + "properties": { + "control_id": { + "type": "string", + "pattern": "^[A-Z0-9_-]{4,64}$" + }, + "objective": { + "type": "string", + "minLength": 10, + "maxLength": 512 + }, + "legal_basis": { + "type": "array", + "items": { + "type": "string", + "minLength": 2, + "maxLength": 128 + } + }, + "policy_version": { + "type": "string", + "minLength": 1, + "maxLength": 32 + }, + "control_owner": { + "type": "string", + "minLength": 2, + "maxLength": 128 + }, + "period": { + "type": "object", + "additionalProperties": false, + "required": ["start", "end"], + "properties": { + "start": { "type": "string", "format": "date" }, + "end": { "type": "string", "format": "date" } + } + }, + "effectiveness": { + "type": "object", + "additionalProperties": false, + "required": ["pass_rate", "exceptions"], + "properties": { + "pass_rate": { "type": "number", "minimum": 0, "maximum": 1 }, + "exceptions": { "type": "integer", "minimum": 0 }, + "confidence_interval_95": { + "type": "object", + "additionalProperties": false, + "required": ["lower", "upper"], + "properties": { + "lower": { "type": "number", "minimum": 0, "maximum": 1 }, + "upper": { "type": "number", "minimum": 0, "maximum": 1 } + } + } + } + }, + "exceptions": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["id", "severity", "status"], + "properties": { + "id": { "type": "string", "minLength": 2, "maxLength": 64 }, + "severity": { "type": "string", "enum": ["low", "medium", "high", "critical"] }, + "status": { "type": "string", "enum": ["open", "mitigated", "closed"] }, + "compensating_control": { "type": "string", "maxLength": 512 } + } + } + }, + "waivers": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["waiver_id", "approved_by", "expires_at"], + "properties": { + "waiver_id": { "type": "string", "minLength": 2, "maxLength": 64 }, + "approved_by": { "type": "string", "minLength": 2, "maxLength": 128 }, + "expires_at": { "type": "string", "format": "date" } + } + } + }, + "evidence_hashes": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-fA-F0-9]{32,128}$" + } + }, + "approvals": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["role", "approved_by", "approved_at"], + "properties": { + "role": { "type": "string", "minLength": 2, "maxLength": 64 }, + "approved_by": { "type": "string", "minLength": 2, "maxLength": 128 }, + "approved_at": { "type": "string", "format": "date-time" } + } + } + }, + "attestation": { + "type": "object", + "additionalProperties": false, + "required": ["algorithm", "signed_by", "signed_at", "digest"], + "properties": { + "algorithm": { "type": "string", "minLength": 2, "maxLength": 64 }, + "signed_by": { "type": "string", "minLength": 2, "maxLength": 128 }, + "signed_at": { "type": "string", "format": "date-time" }, + "digest": { "type": "string", "minLength": 16, "maxLength": 256 } + } + } + } +} diff --git a/schemas/bbom.schema.json b/schemas/bbom.schema.json new file mode 100644 index 0000000..cee62c5 --- /dev/null +++ b/schemas/bbom.schema.json @@ -0,0 +1,192 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.org/schemas/bbom.schema.json", + "title": "Behavioral Bill of Materials (BBOM)", + "description": "Minimum machine-readable BBOM specification for high-impact AI workloads in G-SIFIs.", + "type": "object", + "additionalProperties": false, + "required": [ + "artifact_id", + "provider", + "model_family", + "training_cutoff", + "intended_use", + "prohibited_use", + "autonomy_class", + "tool_permissions", + "max_authority_budget", + "hazard_scores", + "jurisdiction_constraints", + "evaluation_suite_refs", + "acceptance_thresholds", + "runtime_detector_bindings", + "signature" + ], + "properties": { + "artifact_id": { + "type": "string", + "pattern": "^[a-zA-Z0-9._:-]{6,128}$" + }, + "provider": { + "type": "string", + "minLength": 2, + "maxLength": 128 + }, + "model_family": { + "type": "string", + "minLength": 2, + "maxLength": 128 + }, + "training_cutoff": { + "type": "string", + "format": "date" + }, + "intended_use": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 3, + "maxLength": 256 + } + }, + "prohibited_use": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 3, + "maxLength": 256 + } + }, + "autonomy_class": { + "type": "string", + "enum": [ + "advisory", + "assistive", + "delegated_bounded", + "critical_autonomous" + ] + }, + "tool_permissions": { + "type": "array", + "items": { + "type": "string", + "minLength": 2, + "maxLength": 128 + } + }, + "max_authority_budget": { + "type": "object", + "additionalProperties": false, + "required": ["tool_calls", "elapsed_seconds", "external_network"], + "properties": { + "tool_calls": { + "type": "integer", + "minimum": 0, + "maximum": 100000 + }, + "elapsed_seconds": { + "type": "integer", + "minimum": 1, + "maximum": 86400 + }, + "external_network": { + "type": "boolean" + }, + "max_write_ops": { + "type": "integer", + "minimum": 0, + "maximum": 100000 + }, + "max_spend_usd": { + "type": "number", + "minimum": 0 + } + } + }, + "hazard_scores": { + "type": "object", + "additionalProperties": false, + "required": ["deception", "jailbreak", "exfiltration"], + "properties": { + "deception": { "type": "number", "minimum": 0, "maximum": 1 }, + "jailbreak": { "type": "number", "minimum": 0, "maximum": 1 }, + "exfiltration": { "type": "number", "minimum": 0, "maximum": 1 }, + "goal_misgeneralization": { "type": "number", "minimum": 0, "maximum": 1 } + } + }, + "jurisdiction_constraints": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["jurisdiction", "rule"], + "properties": { + "jurisdiction": { "type": "string", "minLength": 2, "maxLength": 64 }, + "rule": { "type": "string", "minLength": 3, "maxLength": 512 } + } + } + }, + "data_residency_constraints": { + "type": "array", + "items": { + "type": "string", + "minLength": 2, + "maxLength": 128 + } + }, + "evaluation_suite_refs": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-zA-Z0-9._:/-]{3,256}$" + } + }, + "acceptance_thresholds": { + "type": "object", + "additionalProperties": false, + "properties": { + "max_deception": { "type": "number", "minimum": 0, "maximum": 1 }, + "max_exfiltration": { "type": "number", "minimum": 0, "maximum": 1 }, + "max_jailbreak": { "type": "number", "minimum": 0, "maximum": 1 } + }, + "required": ["max_deception", "max_exfiltration"] + }, + "runtime_detector_bindings": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["detector_id", "version", "severity_threshold"], + "properties": { + "detector_id": { "type": "string", "minLength": 2, "maxLength": 128 }, + "version": { "type": "string", "minLength": 1, "maxLength": 32 }, + "severity_threshold": { "type": "number", "minimum": 0, "maximum": 1 } + } + } + }, + "expiry_conditions": { + "type": "array", + "items": { + "type": "string", + "minLength": 3, + "maxLength": 256 + } + }, + "signature": { + "type": "object", + "additionalProperties": false, + "required": ["algorithm", "signed_by", "signed_at", "digest"], + "properties": { + "algorithm": { "type": "string", "minLength": 2, "maxLength": 64 }, + "signed_by": { "type": "string", "minLength": 2, "maxLength": 128 }, + "signed_at": { "type": "string", "format": "date-time" }, + "digest": { "type": "string", "minLength": 16, "maxLength": 256 } + } + } + } +} diff --git a/tests/test_governance_validator.py b/tests/test_governance_validator.py new file mode 100644 index 0000000..f9ed5e3 --- /dev/null +++ b/tests/test_governance_validator.py @@ -0,0 +1,191 @@ +import json +from pathlib import Path + +from jsonschema import Draft202012Validator, FormatChecker + +from tools.validate_ai_governance_artifacts import ROOT, main, run_validation + + +def test_validator_main_passes_for_repo_samples(): + exit_code = main([]) + assert exit_code == 0 + + +def test_validator_emits_json_report(tmp_path: Path): + report = tmp_path / "validation-report.json" + exit_code = main(["--report-file", str(report)]) + assert exit_code == 0 + + payload = json.loads(report.read_text(encoding="utf-8")) + assert payload["bbom_files_discovered"] >= 1 + assert payload["arre_files_discovered"] >= 1 + assert payload["bbom_files_checked"] >= 1 + assert payload["arre_files_checked"] >= 1 + assert payload["failed_files"] == [] + assert payload["bbom_failed"] == 0 + assert payload["arre_failed"] == 0 + assert len(payload["passed_files"]) >= 2 + assert payload["validator_version"] + assert payload["status"] == "passed" + assert payload["bbom_dir"] + assert payload["arre_dirs"] + assert payload["errors"] == [] + assert payload["exit_code"] == 0 + + +def test_validator_supports_custom_arre_dir_args(): + exit_code = main(["--arre-dir", "examples/arre", "--bbom-dir", "artifacts/bbom"]) + assert exit_code == 0 + + +def test_validator_returns_error_when_arre_dir_missing(): + errors, _summary = run_validation("artifacts/bbom", ["does-not-exist/arre"]) + assert any("No ARRE files found" in error for error in errors) + assert _summary.get("exit_code") == 2 + + +def test_bbom_schema_rejects_missing_required_field(): + schema = json.loads((ROOT / "schemas" / "bbom.schema.json").read_text(encoding="utf-8")) + sample = json.loads((ROOT / "artifacts" / "bbom" / "sample_tier0_fraud.json").read_text(encoding="utf-8")) + sample.pop("artifact_id") + + errors = list(Draft202012Validator(schema).iter_errors(sample)) + assert errors, "Expected schema validation errors for missing artifact_id" + + +def test_arre_schema_rejects_missing_control_id(): + schema = json.loads((ROOT / "schemas" / "arre_record.schema.json").read_text(encoding="utf-8")) + sample = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + sample.pop("control_id") + + errors = list(Draft202012Validator(schema).iter_errors(sample)) + assert errors, "Expected schema validation errors for missing control_id" + + +def test_arre_schema_rejects_bad_date_format_when_format_checker_enabled(): + schema = json.loads((ROOT / "schemas" / "arre_record.schema.json").read_text(encoding="utf-8")) + sample = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + sample["period"]["start"] = "2026/10/01" + + errors = list(Draft202012Validator(schema, format_checker=FormatChecker()).iter_errors(sample)) + assert errors, "Expected schema validation errors for non-ISO date format" + + +def test_semantic_check_rejects_bbom_threshold_violation(tmp_path: Path): + bbom_dir = tmp_path / "bbom" + arre_dir = tmp_path / "arre" + bbom_dir.mkdir() + arre_dir.mkdir() + + bbom = json.loads((ROOT / "artifacts" / "bbom" / "sample_tier0_fraud.json").read_text(encoding="utf-8")) + arre = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + bbom["hazard_scores"]["deception"] = 0.9 + bbom["acceptance_thresholds"]["max_deception"] = 0.2 + + (bbom_dir / "bad_bbom.json").write_text(json.dumps(bbom), encoding="utf-8") + (arre_dir / "good_arre.json").write_text(json.dumps(arre), encoding="utf-8") + + errors, _summary = run_validation(str(bbom_dir), [str(arre_dir)]) + assert any("max_deception" in err for err in errors) + + +def test_semantic_check_rejects_arre_period_inversion(tmp_path: Path): + bbom_dir = tmp_path / "bbom" + arre_dir = tmp_path / "arre" + bbom_dir.mkdir() + arre_dir.mkdir() + + bbom = json.loads((ROOT / "artifacts" / "bbom" / "sample_tier0_fraud.json").read_text(encoding="utf-8")) + arre = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + arre["period"]["start"] = "2027-01-01" + arre["period"]["end"] = "2026-01-01" + + (bbom_dir / "good_bbom.json").write_text(json.dumps(bbom), encoding="utf-8") + (arre_dir / "bad_arre.json").write_text(json.dumps(arre), encoding="utf-8") + + errors, _summary = run_validation(str(bbom_dir), [str(arre_dir)]) + assert any("period.end before period.start" in err for err in errors) + + +def test_semantic_check_rejects_duplicate_evidence_hashes(tmp_path: Path): + bbom_dir = tmp_path / "bbom" + arre_dir = tmp_path / "arre" + bbom_dir.mkdir() + arre_dir.mkdir() + + bbom = json.loads((ROOT / "artifacts" / "bbom" / "sample_tier0_fraud.json").read_text(encoding="utf-8")) + arre = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + arre["evidence_hashes"] = ["abc123abc123abc123abc123abc123ab", "abc123abc123abc123abc123abc123ab"] + + (bbom_dir / "good_bbom.json").write_text(json.dumps(bbom), encoding="utf-8") + (arre_dir / "bad_arre.json").write_text(json.dumps(arre), encoding="utf-8") + + errors, _summary = run_validation(str(bbom_dir), [str(arre_dir)]) + assert any("duplicate evidence_hashes" in err for err in errors) + + +def test_failure_summary_contains_failed_file_details(tmp_path: Path): + bbom_dir = tmp_path / "bbom" + arre_dir = tmp_path / "arre" + bbom_dir.mkdir() + arre_dir.mkdir() + + bbom = json.loads((ROOT / "artifacts" / "bbom" / "sample_tier0_fraud.json").read_text(encoding="utf-8")) + arre = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + bbom["hazard_scores"]["deception"] = 0.99 + bbom["acceptance_thresholds"]["max_deception"] = 0.01 + + bad_bbom = bbom_dir / "bad_bbom.json" + good_arre = arre_dir / "good_arre.json" + bad_bbom.write_text(json.dumps(bbom), encoding="utf-8") + good_arre.write_text(json.dumps(arre), encoding="utf-8") + + errors, summary = run_validation(str(bbom_dir), [str(arre_dir)]) + assert errors + assert summary["failed_files"] + assert summary.get("exit_code") == 2 + assert summary.get("exit_code") == 2 + assert summary["failed_files"][0]["file"].endswith("bad_bbom.json") + assert summary["bbom_failed"] >= 1 + + +def test_malformed_json_is_reported_without_crash(tmp_path: Path): + bbom_dir = tmp_path / "bbom" + arre_dir = tmp_path / "arre" + bbom_dir.mkdir() + arre_dir.mkdir() + + # malformed BBOM JSON (missing closing brace) + (bbom_dir / "broken_bbom.json").write_text('{"artifact_id": "bad"', encoding="utf-8") + + arre = json.loads((ROOT / "examples" / "arre" / "sample_t0_sanctions_002.json").read_text(encoding="utf-8")) + (arre_dir / "good_arre.json").write_text(json.dumps(arre), encoding="utf-8") + + errors, summary = run_validation(str(bbom_dir), [str(arre_dir)]) + assert errors + assert any("Failed to parse JSON" in err for err in errors) + assert summary["failed_files"] + assert summary["bbom_failed"] >= 1 + + +def test_schema_load_failure_is_reported(monkeypatch): + from tools import validate_ai_governance_artifacts as mod + + original = mod.load_json + + def fake_load_json(path): + if str(path).endswith('bbom.schema.json'): + raise mod.ValidationError('simulated schema load failure') + return original(path) + + monkeypatch.setattr(mod, 'load_json', fake_load_json) + errors, summary = mod.run_validation('artifacts/bbom', ['examples/arre']) + + assert errors + assert 'simulated schema load failure' in errors[0] + assert summary.get('fatal_error') == 'schema_load_failure' + + +def test_main_returns_nonzero_for_missing_dirs(): + exit_code = main(["--bbom-dir", "missing/bbom", "--arre-dir", "missing/arre"]) + assert exit_code == 2 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..c81d6ae --- /dev/null +++ b/tools/__init__.py @@ -0,0 +1 @@ +"""Utility tools for governance artifact validation and automation.""" diff --git a/tools/validate_ai_governance_artifacts.py b/tools/validate_ai_governance_artifacts.py new file mode 100644 index 0000000..4435af4 --- /dev/null +++ b/tools/validate_ai_governance_artifacts.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +"""Validate BBOM and ARRE artifacts against repository JSON Schemas. + +Usage: + python tools/validate_ai_governance_artifacts.py + python tools/validate_ai_governance_artifacts.py --bbom-dir artifacts/bbom --arre-dir examples/arre +""" + +from __future__ import annotations + +import argparse +import json +import sys +from datetime import date, datetime, timezone +from pathlib import Path +from typing import Callable, NotRequired, TypedDict + +try: + from jsonschema import Draft202012Validator, FormatChecker +except ImportError as exc: # pragma: no cover + raise SystemExit( + "Missing dependency: jsonschema. Install with: " + "python -m pip install -r requirements-governance.txt" + ) from exc + +ROOT = Path(__file__).resolve().parents[1] +FORMAT_CHECKER = FormatChecker() +VALIDATOR_VERSION = "1.1.0" + + +class ValidationError(Exception): + """Raised when a governance artifact fails validation.""" + + +class FailedFileEntry(TypedDict): + file: str + error: str + + +class ValidationSummary(TypedDict): + timestamp_utc: str + validator_version: str + status: str + bbom_dir: str + arre_dirs: list[str] + bbom_files_discovered: int + arre_files_discovered: int + bbom_files_checked: int + arre_files_checked: int + passed_files: list[str] + failed_files: list[FailedFileEntry] + errors: list[str] + bbom_failed: int + arre_failed: int + fatal_error: NotRequired[str] + exit_code: int + + +def load_json(path: Path) -> dict: + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + raise ValidationError(f"Failed to parse JSON: {path} ({exc})") from exc + + +def write_report(report_file: Path, report: ValidationSummary) -> None: + report_file.parent.mkdir(parents=True, exist_ok=True) + report_file.write_text(json.dumps(report, indent=2, sort_keys=True), encoding="utf-8") + + +def validate_with_schema(data: dict, data_name: str, schema: dict) -> None: + validator = Draft202012Validator(schema, format_checker=FORMAT_CHECKER) + errors = sorted(validator.iter_errors(data), key=lambda e: list(e.path)) + if errors: + rendered = "; ".join( + f"path={'/'.join(map(str, err.path)) or ''}: {err.message}" for err in errors + ) + raise ValidationError(f"{data_name} failed schema validation: {rendered}") + + +def validate_bbom_semantics(data: dict, data_name: str) -> None: + hazard = data.get("hazard_scores", {}) + thresholds = data.get("acceptance_thresholds", {}) + + if "max_deception" in thresholds and hazard.get("deception", 0) > thresholds["max_deception"]: + raise ValidationError(f"{data_name} violates max_deception threshold") + if "max_exfiltration" in thresholds and hazard.get("exfiltration", 0) > thresholds["max_exfiltration"]: + raise ValidationError(f"{data_name} violates max_exfiltration threshold") + if "max_jailbreak" in thresholds and hazard.get("jailbreak", 0) > thresholds["max_jailbreak"]: + raise ValidationError(f"{data_name} violates max_jailbreak threshold") + + +def validate_arre_semantics(data: dict, data_name: str) -> None: + period = data.get("period", {}) + try: + start = date.fromisoformat(period["start"]) + end = date.fromisoformat(period["end"]) + except (KeyError, ValueError) as exc: + raise ValidationError(f"{data_name} has invalid period date values") from exc + + if end < start: + raise ValidationError(f"{data_name} has period.end before period.start") + + evidence_hashes = data.get("evidence_hashes", []) + if len(set(evidence_hashes)) != len(evidence_hashes): + raise ValidationError(f"{data_name} contains duplicate evidence_hashes") + + +def collect_artifacts(path: Path) -> list[Path]: + if not path.exists(): + return [] + return sorted(path.rglob("*.json")) + + +def display_path(path: Path) -> str: + try: + return str(path.relative_to(ROOT)) + except ValueError: + return str(path) + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Validate governance artifacts against JSON Schemas.") + parser.add_argument("--bbom-dir", default="artifacts/bbom", help="Directory containing BBOM JSON files.") + parser.add_argument( + "--arre-dir", + action="append", + default=None, + help="Directory containing ARRE JSON files. Can be specified multiple times.", + ) + parser.add_argument( + "--report-file", + default=None, + help="Optional output path for JSON validation report.", + ) + return parser.parse_args(argv) + + +def get_artifact_sets(bbom_dir: str, arre_dirs: list[str] | None) -> tuple[list[Path], list[Path], list[str]]: + bbom_files = collect_artifacts(ROOT / bbom_dir) + resolved_arre_dirs = arre_dirs or ["examples/arre", "evidence/arre"] + arre_files: list[Path] = [] + for arre_dir in resolved_arre_dirs: + arre_files.extend(collect_artifacts(ROOT / arre_dir)) + return bbom_files, sorted(set(arre_files)), resolved_arre_dirs + + +def build_summary(bbom_files: list[Path], arre_files: list[Path], bbom_dir: str, arre_dirs: list[str]) -> ValidationSummary: + return { + "timestamp_utc": datetime.now(timezone.utc).isoformat(), + "validator_version": VALIDATOR_VERSION, + "status": "unknown", + "bbom_dir": bbom_dir, + "arre_dirs": arre_dirs, + "bbom_files_discovered": len(bbom_files), + "arre_files_discovered": len(arre_files), + "bbom_files_checked": 0, + "arre_files_checked": 0, + "passed_files": [], + "failed_files": [], + "errors": [], + "bbom_failed": 0, + "arre_failed": 0, + "exit_code": 0, + } + + +def validate_file( + file: Path, + schema: dict, + semantic_validator: Callable[[dict, str], None], + summary: ValidationSummary, + counter_key: str, + failed_counter_key: str, + errors: list[str], + label: str, +) -> None: + try: + data = load_json(file) + validate_with_schema(data, file.name, schema) + semantic_validator(data, file.name) + summary[counter_key] += 1 + summary["passed_files"].append(display_path(file)) + print(f"OK {label}: {display_path(file)}") + except ValidationError as exc: + error = str(exc) + errors.append(error) + summary["failed_files"].append({"file": display_path(file), "error": error}) + summary[failed_counter_key] += 1 + + +def run_validation(bbom_dir: str, arre_dirs: list[str] | None) -> tuple[list[str], ValidationSummary]: + errors: list[str] = [] + + bbom_files, arre_files, resolved_arre_dirs = get_artifact_sets(bbom_dir, arre_dirs) + summary = build_summary(bbom_files, arre_files, bbom_dir, resolved_arre_dirs) + + try: + bbom_schema = load_json(ROOT / "schemas" / "bbom.schema.json") + arre_schema = load_json(ROOT / "schemas" / "arre_record.schema.json") + except ValidationError as exc: + errors.append(str(exc)) + summary["errors"] = errors + summary["fatal_error"] = "schema_load_failure" + summary["status"] = "failed" + summary["exit_code"] = 2 + return errors, summary + + if not bbom_files: + errors.append(f"No BBOM files found under {bbom_dir}") + if not arre_files: + errors.append("No ARRE files found under configured directories: " + ", ".join(resolved_arre_dirs)) + if errors: + summary["errors"] = errors + summary["status"] = "failed" + summary["exit_code"] = 2 + return errors, summary + + for file in bbom_files: + validate_file(file, bbom_schema, validate_bbom_semantics, summary, "bbom_files_checked", "bbom_failed", errors, "BBOM") + + for file in arre_files: + validate_file(file, arre_schema, validate_arre_semantics, summary, "arre_files_checked", "arre_failed", errors, "ARRE") + + summary["errors"] = errors + summary["status"] = "passed" if not errors else "failed" + summary["exit_code"] = 0 if not errors else 2 + return errors, summary + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv) + errors, summary = run_validation(args.bbom_dir, args.arre_dir) + + if args.report_file: + write_report(Path(args.report_file), summary) + + if errors: + for error in errors: + print(f"VALIDATION FAILED: {error}", file=sys.stderr) + return 2 + + print("All governance artifacts validated successfully against JSON Schemas and semantic checks.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 5726a47da3cf50b28f82ef19a615a6646dc0917f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jun 2026 09:26:41 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- AGI_ASI_GSIFI_Blueprint_2026_2030.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AGI_ASI_GSIFI_Blueprint_2026_2030.md b/AGI_ASI_GSIFI_Blueprint_2026_2030.md index 09e0465..1a1a633 100644 --- a/AGI_ASI_GSIFI_Blueprint_2026_2030.md +++ b/AGI_ASI_GSIFI_Blueprint_2026_2030.md @@ -1,7 +1,7 @@ # AGI/ASI Governance, Containment, and Civilizational Security Blueprint for G‑SIFIs (2026–2030) -**Version:** 2.0 (implementation-grade update) -**Date:** April 28, 2026 +**Version:** 2.0 (implementation-grade update) +**Date:** April 28, 2026 **Audience:** CISO, CTO, CIO, CRO/Model Risk, Ops Resilience, Compliance, Internal Audit, and Board Risk Committees in globally systemically important financial institutions (G‑SIFIs). ---