From bb0820864670b9c1dfd38337205838705cc4ac3d Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Wed, 20 May 2026 19:48:05 -0400 Subject: [PATCH] Add check path --- Makefile | 3 +- docs/steering-loader.md | 33 +++- .../steering-loader/synthetic-model.txt | 1 + .../steering-loader/synthetic-sae.txt | 1 + ...c.available.steering-artifact-receipt.json | 63 ++++++++ scripts/load-steering-receipt.py | 58 +++++++ src/agent_machine/steering_loader.py | 141 +++++++++++++++++- 7 files changed, 287 insertions(+), 13 deletions(-) create mode 100644 examples/fixtures/steering-loader/synthetic-model.txt create mode 100644 examples/fixtures/steering-loader/synthetic-sae.txt create mode 100644 examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json create mode 100755 scripts/load-steering-receipt.py diff --git a/Makefile b/Makefile index 92a31cb..461e8d1 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,8 @@ validate-cli: $(PYCLI) steer preflight --sourceset gpt2-small.res-jb --pretty >/tmp/agent-machine-pycli-steer-preflight.json $(BOOTSTRAP_CLI) steer preflight --sourceset gpt2-small.res-jb --pretty >/tmp/agent-machine-bootstrap-steer-preflight.json $(PYCLI) steer resolve-artifacts --sourceset gpt2-small.res-jb --local-dir /tmp/agent-machine-steering-artifacts --receipt-out /tmp/agent-machine-steering-artifact-receipt.json --dry-run --pretty >/tmp/agent-machine-pycli-artifact-receipt.json - $(PYTHON) scripts/verify-steering-receipt.py examples/steering-artifact-receipts/gpt2-small-res-jb.missing.steering-artifact-receipt.json --expect-status not_configured --pretty >/tmp/agent-machine-steering-load-preflight.json + $(PYTHON) scripts/verify-steering-receipt.py examples/steering-artifact-receipts/gpt2-small-res-jb.missing.steering-artifact-receipt.json --expect-status not_configured --pretty >/tmp/agent-machine-steering-verify-preflight.json + $(PYTHON) scripts/load-steering-receipt.py examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json --attempt-load --expect-status available --expect-model-loaded true --expect-sae-loaded true --pretty >/tmp/agent-machine-steering-synthetic-load.json $(PYCLI) version $(PYCLI) paths --format json $(PYCLI) doctor --format json diff --git a/docs/steering-loader.md b/docs/steering-loader.md index cfc215d..b235d4a 100644 --- a/docs/steering-loader.md +++ b/docs/steering-loader.md @@ -1,12 +1,12 @@ # Steering Receipt Loader -Status: receipt verification tranche for local steering work. +Status: receipt verification and CI-safe synthetic loading tranche for local steering work. ## Purpose -Before any local steering runtime may load model or SAE files, Agent Machine must verify that every artifact referenced by a `SteeringArtifactReceipt` exists locally and matches the receipt's SHA-256 digest. +Before any local steering runtime may use model or SAE files, Agent Machine must verify that every artifact referenced by a `SteeringArtifactReceipt` exists locally and matches the receipt's SHA-256 digest. -This document describes the fail-closed loader preflight. It does not claim applied steering. +The loader re-verifies the receipt immediately before any load attempt. This prevents a stale preflight check from being trusted after files on disk have changed. ## Verification command @@ -19,9 +19,23 @@ scripts/verify-steering-receipt.py \ The fixture paths intentionally do not exist. The expected result is `status: not_configured`, with missing-file diagnostics for each absent artifact. +## CI-safe load command + +```bash +scripts/load-steering-receipt.py \ + examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json \ + --attempt-load \ + --expect-status available \ + --expect-model-loaded true \ + --expect-sae-loaded true \ + --pretty +``` + +The synthetic fixture contains small text artifacts, not model or SAE weights. It proves the `SteeringLoader.load()` path re-verifies digests at load time and only reports loaded after the receipt is valid. + ## Runtime rule -A future runtime loader must not attempt to load GPT-2 Small or the residual-stream SAE until: +A future runtime loader must not attempt to use GPT-2 Small or the residual-stream SAE until: - the receipt validates against `contracts/steering-artifact-receipt.schema.json` - each referenced local path exists @@ -30,14 +44,17 @@ A future runtime loader must not attempt to load GPT-2 Small or the residual-str If any check fails, the runtime must fail closed and return a non-applied posture. +## Operator runtime imports + +The loader contains an optional runtime-import path for operator machines after a complete artifact receipt exists. Optional runtime dependencies are not part of normal validation and must remain outside the default bootstrap path. + ## Boundary -This tranche verifies receipt integrity only. It does not: +This tranche does not: -- load GPT-2 Small into memory -- load the SAE into memory - run inference - inject activations - return `status: applied` +- claim runtime readiness -The next implementation tranche may add optional runtime loading after this digest gate succeeds. +It adds the digest-gated load envelope that future activation code must use. diff --git a/examples/fixtures/steering-loader/synthetic-model.txt b/examples/fixtures/steering-loader/synthetic-model.txt new file mode 100644 index 0000000..4fc75f4 --- /dev/null +++ b/examples/fixtures/steering-loader/synthetic-model.txt @@ -0,0 +1 @@ +synthetic model artifact diff --git a/examples/fixtures/steering-loader/synthetic-sae.txt b/examples/fixtures/steering-loader/synthetic-sae.txt new file mode 100644 index 0000000..9bfa153 --- /dev/null +++ b/examples/fixtures/steering-loader/synthetic-sae.txt @@ -0,0 +1 @@ +synthetic SAE artifact diff --git a/examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json b/examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json new file mode 100644 index 0000000..b0b53ea --- /dev/null +++ b/examples/steering-artifact-receipts/synthetic.available.steering-artifact-receipt.json @@ -0,0 +1,63 @@ +{ + "specVersion": "0.1.0", + "id": "urn:srcos:agent-machine:steering-artifact-receipt:synthetic.available", + "kind": "SteeringArtifactReceipt", + "sourcesetId": "synthetic.available", + "status": "complete", + "generatedAt": "1970-01-01T00:00:00Z", + "activationIssue": "active-steering-work", + "artifactRecords": [ + { + "role": "model-weight", + "source": { + "type": "local", + "repo": "synthetic/model", + "filePath": "synthetic-model.txt", + "resolvedRevision": "synthetic-fixture", + "url": "local://examples/fixtures/steering-loader/synthetic-model.txt" + }, + "storage": { + "localPath": "examples/fixtures/steering-loader/synthetic-model.txt", + "sizeBytes": 25, + "storageReceiptRef": null + }, + "digest": { + "algorithm": "sha256", + "sha256": "e127f6c93d77b6e8d5ce59f9e98996484486bdbbe6d85cc2fccb1115e8c79bb0", + "verified": true + } + }, + { + "role": "sae-artifact", + "source": { + "type": "local", + "repo": "synthetic/sae", + "filePath": "synthetic-sae.txt", + "resolvedRevision": "synthetic-fixture", + "url": "local://examples/fixtures/steering-loader/synthetic-sae.txt" + }, + "storage": { + "localPath": "examples/fixtures/steering-loader/synthetic-sae.txt", + "sizeBytes": 23, + "storageReceiptRef": null + }, + "digest": { + "algorithm": "sha256", + "sha256": "a758389c63a7db69e811c4f26a4834e02a1630b2b29c08bdca5db49627d91415", + "verified": true + } + } + ], + "missing": [], + "storageReceiptRefs": [], + "policyRefs": [], + "agentRegistryGrantRefs": [], + "receiptSafety": { + "includeRawArtifacts": false, + "includeAuthMaterial": false + }, + "notes": [ + "Synthetic fixture for CI-only loader validation.", + "This fixture proves digest re-verification and load-path gating without model weights." + ] +} diff --git a/scripts/load-steering-receipt.py b/scripts/load-steering-receipt.py new file mode 100755 index 0000000..48df2e1 --- /dev/null +++ b/scripts/load-steering-receipt.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Run receipt-backed steering loader preflight or synthetic load.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "src")) + +from agent_machine.steering_loader import SteeringLoader # noqa: E402 + + +def main() -> int: + parser = argparse.ArgumentParser(description="Load or preflight a steering artifact receipt") + parser.add_argument("receipt", type=Path) + parser.add_argument("--attempt-load", action="store_true") + parser.add_argument("--allow-runtime-imports", action="store_true") + parser.add_argument("--expect-status", choices=["available", "not_configured"]) + parser.add_argument("--expect-model-loaded", choices=["true", "false"]) + parser.add_argument("--expect-sae-loaded", choices=["true", "false"]) + parser.add_argument("--pretty", action="store_true") + args = parser.parse_args() + + if not args.attempt_load and args.allow_runtime_imports: + print("--allow-runtime-imports requires --attempt-load", file=sys.stderr) + return 2 + + if args.attempt_load: + result = SteeringLoader().load(args.receipt, allow_runtime_imports=args.allow_runtime_imports) + else: + from agent_machine.steering_loader import verify_receipt_files + + result = verify_receipt_files(args.receipt) + + print(json.dumps(result, indent=2 if args.pretty else None, sort_keys=True)) + + if args.expect_status and result.get("status") != args.expect_status: + print(f"expected status {args.expect_status}, got {result.get('status')}", file=sys.stderr) + return 1 + if args.expect_model_loaded is not None: + expected = args.expect_model_loaded == "true" + if bool(result.get("modelLoaded")) is not expected: + print(f"expected modelLoaded={expected}, got {result.get('modelLoaded')}", file=sys.stderr) + return 1 + if args.expect_sae_loaded is not None: + expected = args.expect_sae_loaded == "true" + if bool(result.get("saeLoaded")) is not expected: + print(f"expected saeLoaded={expected}, got {result.get('saeLoaded')}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/agent_machine/steering_loader.py b/src/agent_machine/steering_loader.py index a1bbdf1..e806724 100644 --- a/src/agent_machine/steering_loader.py +++ b/src/agent_machine/steering_loader.py @@ -1,13 +1,15 @@ -"""Receipt-backed local artifact verification for steering runtime. +"""Receipt-backed local artifact verification and loading gate. -This module verifies a SteeringArtifactReceipt before any runtime may use the -referenced files. It is deliberately fail-closed: absent files or digest mismatch -produce a not_configured result rather than a runtime claim. +This module verifies a SteeringArtifactReceipt immediately before any runtime use +of the referenced files. It is deliberately fail-closed: absent files or digest +mismatch produce a not_configured result rather than a runtime claim. """ from __future__ import annotations import hashlib +import json +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -17,6 +19,72 @@ REPO_ROOT = repo_root_from_file(__file__) +@dataclass(frozen=True) +class SteeringLoadedArtifacts: + """Minimal loaded-artifact envelope consumed by future steering engines.""" + + sourceset_id: str + model_artifacts: list[Path] + sae_artifacts: list[Path] + receipt_path: Path + synthetic: bool = False + runtime_model: Any | None = None + runtime_tokenizer: Any | None = None + runtime_sae: Any | None = None + + +class SteeringLoader: + """Fail-closed loader that re-verifies receipt digests before loading.""" + + def load(self, receipt_path: Path, *, allow_runtime_imports: bool = False) -> dict[str, Any]: + verified = verify_receipt_files(receipt_path) + if verified["status"] != "available": + return { + **verified, + "loadAttempted": False, + "modelLoaded": False, + "saeLoaded": False, + "loadStatus": "not_configured", + "loadError": "receipt files must exist and match SHA-256 before loading", + } + + receipt = load_json(Path(receipt_path)) + records = receipt.get("artifactRecords", []) + model_files = files_for_roles(records, {"model-config", "model-weight", "tokenizer"}) + sae_files = files_for_roles(records, {"sae-config", "sae-artifact"}) + synthetic = is_synthetic_receipt(records) + + if synthetic: + loaded = SteeringLoadedArtifacts( + sourceset_id=str(receipt.get("sourcesetId")), + model_artifacts=model_files, + sae_artifacts=sae_files, + receipt_path=Path(receipt_path), + synthetic=True, + ) + return { + **verified, + "loadAttempted": True, + "modelLoaded": True, + "saeLoaded": True, + "loadStatus": "available", + "synthetic": True, + "loadedArtifactCount": len(loaded.model_artifacts) + len(loaded.sae_artifacts), + } + + if not allow_runtime_imports: + return { + **verified, + "loadAttempted": False, + "modelLoaded": False, + "saeLoaded": False, + "loadStatus": "not_configured", + "loadError": "runtime imports disabled; pass allow_runtime_imports on an operator machine after artifact verification", + } + + return load_runtime_artifacts(receipt, model_files, sae_files, Path(receipt_path), verified) + + def verify_receipt_files(receipt_path: Path) -> dict[str, Any]: """Verify receipt local paths and SHA-256 digests without loading artifacts.""" receipt_path = Path(receipt_path) @@ -77,6 +145,71 @@ def verify_artifact_record(record: dict[str, Any]) -> dict[str, Any]: return result +def load_runtime_artifacts( + receipt: dict[str, Any], + model_files: list[Path], + sae_files: list[Path], + receipt_path: Path, + verified: dict[str, Any], +) -> dict[str, Any]: + try: + from transformers import AutoModelForCausalLM, AutoTokenizer + except ImportError as exc: + return {**verified, "loadAttempted": True, "modelLoaded": False, "saeLoaded": False, "loadStatus": "not_configured", "loadError": f"missing optional dependency: transformers: {exc}"} + try: + from safetensors.torch import load_file as load_safetensors + except ImportError as exc: + return {**verified, "loadAttempted": True, "modelLoaded": False, "saeLoaded": False, "loadStatus": "not_configured", "loadError": f"missing optional dependency: safetensors: {exc}"} + + model_dir = common_parent(model_files) + if model_dir is None: + return {**verified, "loadAttempted": True, "modelLoaded": False, "saeLoaded": False, "loadStatus": "not_configured", "loadError": "could not determine local model directory from receipt"} + if not sae_files: + return {**verified, "loadAttempted": True, "modelLoaded": False, "saeLoaded": False, "loadStatus": "not_configured", "loadError": "receipt does not contain SAE files"} + + model = AutoModelForCausalLM.from_pretrained(str(model_dir), local_files_only=True) + tokenizer = AutoTokenizer.from_pretrained(str(model_dir), local_files_only=True) + sae_payloads: list[Any] = [] + for path in sae_files: + if path.name.endswith(".safetensors"): + sae_payloads.append(load_safetensors(str(path))) + elif path.name == "cfg.json": + sae_payloads.append(json.loads(path.read_text(encoding="utf-8"))) + + loaded = SteeringLoadedArtifacts( + sourceset_id=str(receipt.get("sourcesetId")), + model_artifacts=model_files, + sae_artifacts=sae_files, + receipt_path=receipt_path, + runtime_model=model, + runtime_tokenizer=tokenizer, + runtime_sae=sae_payloads, + ) + return {**verified, "loadAttempted": True, "modelLoaded": loaded.runtime_model is not None and loaded.runtime_tokenizer is not None, "saeLoaded": bool(loaded.runtime_sae), "loadStatus": "available" if loaded.runtime_model is not None and loaded.runtime_tokenizer is not None and loaded.runtime_sae else "not_configured", "synthetic": False} + + +def files_for_roles(records: list[dict[str, Any]], roles: set[str]) -> list[Path]: + return [Path(str(record.get("storage", {}).get("localPath"))) for record in records if record.get("role") in roles] + + +def is_synthetic_receipt(records: list[dict[str, Any]]) -> bool: + repos = {str(record.get("source", {}).get("repo", "")) for record in records if isinstance(record.get("source"), dict)} + return bool(repos) and all(repo.startswith("synthetic/") for repo in repos) + + +def common_parent(paths: list[Path]) -> Path | None: + if not paths: + return None + common = paths[0].parent + for path in paths[1:]: + parent = path.parent + while common != parent and common not in parent.parents: + if common.parent == common: + return None + common = common.parent + return common + + def sha256_file(path: Path) -> str: hasher = hashlib.sha256() with path.open("rb") as handle: