From 4f664a353cc023c2d76de879070fc16fdb8f9a2f Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Wed, 20 May 2026 17:47:31 -0400 Subject: [PATCH] Add steering artifact resolver --- Makefile | 1 + docs/index.md | 1 + docs/steering-artifact-resolution.md | 75 +++++++ src/agent_machine/cli.py | 159 +++------------ src/agent_machine/steering_artifacts.py | 259 ++++++++++++++++++++++++ 5 files changed, 368 insertions(+), 127 deletions(-) create mode 100644 docs/steering-artifact-resolution.md create mode 100644 src/agent_machine/steering_artifacts.py diff --git a/Makefile b/Makefile index 9393c50..01f7e19 100644 --- a/Makefile +++ b/Makefile @@ -90,6 +90,7 @@ validate-cli: $(BOOTSTRAP_CLI) steer stub-response /tmp/agent-machine-steer-request.json --pretty >/tmp/agent-machine-bootstrap-steer-stub-response.json $(PYCLI) steer preflight --sourceset gpt2-small.res-jb --pretty >/tmp/agent-machine-pycli-steer-preflight.json $(BOOTSTRAP_CLI) steer preflight --sourceset gpt2-small.res-jb --pretty >/tmp/agent-machine-bootstrap-steer-preflight.json + $(PYCLI) steer resolve-artifacts --sourceset gpt2-small.res-jb --local-dir /tmp/agent-machine-steering-artifacts --receipt-out /tmp/agent-machine-steering-artifact-receipt.json --dry-run --pretty >/tmp/agent-machine-pycli-artifact-receipt.json $(PYCLI) version $(PYCLI) paths --format json $(PYCLI) doctor --format json diff --git a/docs/index.md b/docs/index.md index d5f298e..cf2fc5e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,6 +15,7 @@ Agent Machine is a bootstrap runtime-control substrate for SourceOS agent worklo | [Local /steer endpoint contract](local-steer-endpoint.md) | Noetica-compatible local steering endpoint contract and stub behavior. | | [Steering sourceset registry](steering-sourcesets.md) | Registered model/SAE sourceset records for local steering work. | | [Steering artifact receipts](steering-artifact-receipts.md) | Artifact-resolution receipt contract for model and SAE files. | +| [Steering artifact resolution](steering-artifact-resolution.md) | Operator command for resolving model/SAE files and emitting a complete receipt. | | [GPT-2 Small steering activation path](steering-activation-path.md) | Fail-closed real-path entrypoint and remaining blockers for controlled activation. | ## Architecture diff --git a/docs/steering-artifact-resolution.md b/docs/steering-artifact-resolution.md new file mode 100644 index 0000000..75335ad --- /dev/null +++ b/docs/steering-artifact-resolution.md @@ -0,0 +1,75 @@ +# Steering Artifact Resolution + +Status: operator command for producing a complete `SteeringArtifactReceipt`. + +## Purpose + +The local steering runtime may not claim an applied steering result unless the model and SAE files are resolved to exact source files and verified by SHA-256 digest. + +This command prepares that receipt. It does not load GPT-2 Small, load the SAE, run inference, or inject activations. + +## Dry run + +CI and contributors can validate the receipt shape without network access: + +```bash +agent-machine steer resolve-artifacts \ + --sourceset gpt2-small.res-jb \ + --local-dir /tmp/agent-machine-steering-artifacts \ + --receipt-out /tmp/agent-machine-steering-artifact-receipt.json \ + --dry-run \ + --pretty +``` + +Dry run emits a pending receipt and does not contact Hugging Face. + +## Real operator run + +On an operator machine with optional steering dependencies installed: + +```bash +python3 -m pip install -r requirements-steering.txt + +agent-machine steer resolve-artifacts \ + --sourceset gpt2-small.res-jb \ + --local-dir /var/lib/agent-machine/models/steering \ + --receipt-out /var/lib/agent-machine/evidence/gpt2-small-res-jb.steering-artifact-receipt.json \ + --allow-network \ + --pretty +``` + +The resolver uses `huggingface_hub` with an explicit `local_dir` so the receipt records stable local paths rather than opaque default cache paths. + +## Receipt requirements + +For each resolved model, tokenizer, and SAE file, the receipt records: + +- source repository +- exact file path +- resolved immutable revision / commit SHA +- local path +- file size +- SHA-256 digest +- digest verification status + +The GPT-2 Small resolver currently resolves: + +```text +openai-community/gpt2: + config.json + generation_config.json + merges.txt + model.safetensors + tokenizer.json + tokenizer_config.json + vocab.json + +jbloom/GPT2-Small-SAEs-Reformatted: + blocks.6.hook_resid_pre/cfg.json + blocks.6.hook_resid_pre/sae_weights.safetensors + blocks.6.hook_resid_pre/sparsity.safetensors +``` + +## Boundary + +A complete artifact receipt is necessary but not sufficient for applied steering. The active steering gate still also requires storage receipt references, policy/grant admission, model loading, SAE loading, activation injection, and a local smoke record with `status: applied`. diff --git a/src/agent_machine/cli.py b/src/agent_machine/cli.py index 1ee63af..a1abb88 100644 --- a/src/agent_machine/cli.py +++ b/src/agent_machine/cli.py @@ -27,7 +27,6 @@ def load_json(path: Path) -> dict[str, Any]: - """Load a JSON object without importing optional validation dependencies.""" try: with path.open("r", encoding="utf-8") as handle: value = json.load(handle) @@ -47,10 +46,7 @@ def dependency_hint(error: BaseException) -> str: missing = "jsonschema" else: missing = "a required Python dependency" - return ( - f"Agent Machine Python dependency missing: {missing}.\n" - f"Install dependencies with: python3 -m pip install -r {REQUIREMENTS_PATH}" - ) + return f"Agent Machine Python dependency missing: {missing}.\nInstall dependencies with: python3 -m pip install -r {REQUIREMENTS_PATH}" def import_renderer(importer: Callable[[], Any]) -> Any: @@ -93,37 +89,11 @@ def probe_payload() -> dict[str, Any]: return { "specVersion": "0.1.0", "kind": "AgentMachineProbe", - "host": { - "hostname": platform.node() or command_output(["hostname"]), - "os": platform.system() or "unknown", - "kernel": platform.release() or "unknown", - "arch": platform.machine() or "unknown", - }, - "runtime": { - "systemdAvailable": command_available("systemctl"), - "podmanAvailable": command_available("podman"), - "dockerAvailable": command_available("docker"), - "selinuxMode": selinux_mode(), - "cgroupMode": cgroup_mode(), - }, - "storage": { - "lvmAvailable": command_available("lvs"), - "modelCache": str(default_model_cache_path()), - "runtimeCache": str(default_runtime_cache_path()), - "evidencePath": str(default_evidence_path()), - }, - "accelerators": { - "cpuAvailable": True, - "vulkanProbeAvailable": command_available("vulkaninfo"), - "cudaProbeAvailable": command_available("nvidia-smi"), - "rocmProbeAvailable": command_available("rocminfo"), - "metalAvailable": False, - }, - "safety": { - "rawPromptContentIncluded": False, - "rawKvCacheContentIncluded": False, - "secretValuesIncluded": False, - }, + "host": {"hostname": platform.node() or command_output(["hostname"]), "os": platform.system() or "unknown", "kernel": platform.release() or "unknown", "arch": platform.machine() or "unknown"}, + "runtime": {"systemdAvailable": command_available("systemctl"), "podmanAvailable": command_available("podman"), "dockerAvailable": command_available("docker"), "selinuxMode": selinux_mode(), "cgroupMode": cgroup_mode()}, + "storage": {"lvmAvailable": command_available("lvs"), "modelCache": str(default_model_cache_path()), "runtimeCache": str(default_runtime_cache_path()), "evidencePath": str(default_evidence_path())}, + "accelerators": {"cpuAvailable": True, "vulkanProbeAvailable": command_available("vulkaninfo"), "cudaProbeAvailable": command_available("nvidia-smi"), "rocmProbeAvailable": command_available("rocminfo"), "metalAvailable": False}, + "safety": {"rawPromptContentIncluded": False, "rawKvCacheContentIncluded": False, "secretValuesIncluded": False}, } @@ -131,21 +101,9 @@ def doctor_payload() -> dict[str, Any]: return { "specVersion": "0.1.0", "kind": "AgentMachineDoctor", - "install": { - "cliVersion": __version__, - "homebrewAvailable": command_available("brew"), - "bootstrapOnly": True, - "runtimeDirectoriesManaged": False, - }, - "readiness": { - "podmanAvailable": command_available("podman"), - "lvmAvailable": command_available("lvs"), - "probeSecretFree": True, - }, - "nextActions": [ - "agent-machine probe --format json", - "review docs/install.md before privileged runtime activation", - ], + "install": {"cliVersion": __version__, "homebrewAvailable": command_available("brew"), "bootstrapOnly": True, "runtimeDirectoriesManaged": False}, + "readiness": {"podmanAvailable": command_available("podman"), "lvmAvailable": command_available("lvs"), "probeSecretFree": True}, + "nextActions": ["agent-machine probe --format json", "review docs/install.md before privileged runtime activation"], } @@ -187,14 +145,7 @@ def cmd_version(_args: argparse.Namespace) -> int: def cmd_paths(args: argparse.Namespace) -> int: - paths = { - "config": str(default_config_path()), - "state": str(default_state_path()), - "models": str(default_model_cache_path()), - "cache": str(default_runtime_cache_path()), - "evidence": str(default_evidence_path()), - "runtime": str(default_runtime_path()), - } + paths = {"config": str(default_config_path()), "state": str(default_state_path()), "models": str(default_model_cache_path()), "cache": str(default_runtime_cache_path()), "evidence": str(default_evidence_path()), "runtime": str(default_runtime_path())} if args.format == "json": print(json.dumps(paths, sort_keys=True)) else: @@ -210,7 +161,6 @@ def cmd_doctor(args: argparse.Namespace) -> int: def cmd_probe(args: argparse.Namespace) -> int: - # --fail-closed is accepted now so shell and Python CLIs remain compatible. print_json_or_text(probe_payload(), args.format, print_probe_text) return 0 @@ -265,28 +215,12 @@ def cmd_policy_resolve(args: argparse.Namespace) -> int: policy_fabric = import_renderer(lambda: __import__("agent_machine.policy_fabric", fromlist=["_unused"])) agentpod = load_json(args.agentpod_json) policies = policy_fabric.load_policy_admissions(files=args.policy_file, directories=args.policy_dir, root=REPO_ROOT) - policy = policy_fabric.resolve_policy_admission( - policies=policies, - agentpod_id=str(agentpod.get("id")), - request_type=args.request_type, - deployment_receipt_id=args.deployment_receipt_id, - agent_machine_id=args.agent_machine_id, - provider_id=args.provider_id, - policy_id=args.policy_id, - expected_status=args.expected_status, - allow_missing_stub=not args.no_missing_stub, - decided_at=args.decided_at, - root=REPO_ROOT, - ) - if args.pretty: - print(json.dumps(policy, indent=2, sort_keys=True)) - else: - print(json.dumps(policy, sort_keys=True, separators=(",", ":"))) + policy = policy_fabric.resolve_policy_admission(policies=policies, agentpod_id=str(agentpod.get("id")), request_type=args.request_type, deployment_receipt_id=args.deployment_receipt_id, agent_machine_id=args.agent_machine_id, provider_id=args.provider_id, policy_id=args.policy_id, expected_status=args.expected_status, allow_missing_stub=not args.no_missing_stub, decided_at=args.decided_at, root=REPO_ROOT) + print(json.dumps(policy, indent=2 if args.pretty else None, sort_keys=True)) return 0 def resolve_activation_policy_and_grant(args: argparse.Namespace, agentpod: dict[str, Any], policy_fabric: Any) -> tuple[dict[str, Any], dict[str, Any]]: - """Resolve activation policy/grant from explicit files or local policy store.""" policy_json = args.policy_json grant_json = args.grant_json resolver_requested = bool(args.policy_file or args.policy_dir or args.policy_id or args.expected_status) @@ -294,26 +228,11 @@ def resolve_activation_policy_and_grant(args: argparse.Namespace, agentpod: dict grant_json = policy_json policy_json = None if grant_json is None: - raise AssertionError( - "grant JSON is required. Use either ` ` " - "or ` --policy-dir `" - ) + raise AssertionError("grant JSON is required. Use either ` ` or ` --policy-dir `") if policy_json is not None: return load_json(policy_json), load_json(grant_json) policies = policy_fabric.load_policy_admissions(files=args.policy_file, directories=args.policy_dir, root=REPO_ROOT) - policy = policy_fabric.resolve_policy_admission( - policies=policies, - agentpod_id=str(agentpod.get("id")), - request_type="activation", - deployment_receipt_id=args.deployment_receipt_id, - agent_machine_id=args.agent_machine_id, - provider_id=args.provider_id, - policy_id=args.policy_id, - expected_status=args.expected_status, - allow_missing_stub=not args.no_missing_stub, - decided_at=args.decided_at, - root=REPO_ROOT, - ) + policy = policy_fabric.resolve_policy_admission(policies=policies, agentpod_id=str(agentpod.get("id")), request_type="activation", deployment_receipt_id=args.deployment_receipt_id, agent_machine_id=args.agent_machine_id, provider_id=args.provider_id, policy_id=args.policy_id, expected_status=args.expected_status, allow_missing_stub=not args.no_missing_stub, decided_at=args.decided_at, root=REPO_ROOT) return policy, load_json(grant_json) @@ -326,22 +245,9 @@ def cmd_activate_evaluate(args: argparse.Namespace) -> int: storage_receipt_refs = list(args.storage_receipt_ref or []) if not storage_receipt_refs and storage_receipts: storage_receipt_refs = [str(receipt.get("id")) for receipt in storage_receipts] - decision = activation.evaluate_activation( - agentpod=agentpod, - policy=policy, - grant=grant, - deployment_receipt_id=args.deployment_receipt_id, - storage_receipt_refs=storage_receipt_refs, - storage_receipts=storage_receipts if storage_receipts else None, - decided_at=args.decided_at, - decision_id=args.decision_id, - root=REPO_ROOT, - ) + decision = activation.evaluate_activation(agentpod=agentpod, policy=policy, grant=grant, deployment_receipt_id=args.deployment_receipt_id, storage_receipt_refs=storage_receipt_refs, storage_receipts=storage_receipts if storage_receipts else None, decided_at=args.decided_at, decision_id=args.decision_id, root=REPO_ROOT) activation.validate_activation_decision_payload(decision, REPO_ROOT) - if args.pretty: - print(json.dumps(decision, indent=2, sort_keys=True)) - else: - print(json.dumps(decision, sort_keys=True, separators=(",", ":"))) + print(json.dumps(decision, indent=2 if args.pretty else None, sort_keys=True)) return 0 @@ -370,50 +276,47 @@ def cmd_steer_serve(args: argparse.Namespace) -> int: return int(steering_runtime.serve_sourceset(args.sourceset, host=args.host, port=args.port)) +def cmd_steer_resolve_artifacts(args: argparse.Namespace) -> int: + steering_artifacts = __import__("agent_machine.steering_artifacts", fromlist=["_unused"]) + result = steering_artifacts.resolve_steering_artifacts(args.sourceset, args.local_dir, args.receipt_out, allow_network=args.allow_network, dry_run=args.dry_run, revision=args.revision) + print(json.dumps(result, indent=2 if args.pretty else None, sort_keys=True)) + return 0 + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Agent Machine Python CLI") subcommands = parser.add_subparsers(dest="command", required=True) - version = subcommands.add_parser("version", help="Print package version") version.set_defaults(func=cmd_version) - paths = subcommands.add_parser("paths", help="Print default runtime paths") paths.add_argument("--format", choices=["text", "json"], default="text") paths.set_defaults(func=cmd_paths) - doctor = subcommands.add_parser("doctor", help="Run conservative install/readiness diagnostics") doctor.add_argument("--format", choices=["text", "json"], default="text") doctor.set_defaults(func=cmd_doctor) - probe = subcommands.add_parser("probe", help="Run conservative host/runtime probe") probe.add_argument("--format", choices=["text", "json"], default="text") probe.add_argument("--fail-closed", action="store_true") probe.set_defaults(func=cmd_probe) - render = subcommands.add_parser("render", help="Render AgentPod-derived artifacts") render_subcommands = render.add_subparsers(dest="render_command", required=True) - render_plan = render_subcommands.add_parser("plan", help="Render AgentPodDeploymentPlan JSON") render_plan.add_argument("agentpod_json", type=Path) render_plan.add_argument("--pretty", action="store_true") render_plan.set_defaults(func=cmd_render_plan) - render_receipt = render_subcommands.add_parser("receipt", help="Render DeploymentReceipt JSON") render_receipt.add_argument("agentpod_json", type=Path) render_receipt.add_argument("--pretty", action="store_true") render_receipt.add_argument("--artifact-path", default="stdout:AgentPodDeploymentPlan") render_receipt.set_defaults(func=cmd_render_receipt) - render_quadlet = render_subcommands.add_parser("quadlet", help="Render local Quadlet .container") render_quadlet.add_argument("agentpod_json", type=Path) render_quadlet.add_argument("--compare", type=Path) render_quadlet.set_defaults(func=cmd_render_quadlet) - render_k8s = render_subcommands.add_parser("k8s", help="Render Kubernetes YAML") render_k8s.add_argument("agentpod_json", type=Path) render_k8s.add_argument("--compare", type=Path) render_k8s.set_defaults(func=cmd_render_k8s) - policy = subcommands.add_parser("policy", help="Resolve Policy Fabric admission artifacts") policy_subcommands = policy.add_subparsers(dest="policy_command", required=True) policy_resolve = policy_subcommands.add_parser("resolve", help="Resolve a PolicyAdmission from local files/stores") @@ -430,7 +333,6 @@ def build_parser() -> argparse.ArgumentParser: policy_resolve.add_argument("--decided-at", default="1970-01-01T00:00:00Z") policy_resolve.add_argument("--pretty", action="store_true") policy_resolve.set_defaults(func=cmd_policy_resolve) - activate = subcommands.add_parser("activate", help="Evaluate activation readiness") activate_subcommands = activate.add_subparsers(dest="activate_command", required=True) activate_evaluate = activate_subcommands.add_parser("evaluate", help="Evaluate AgentPod activation decision") @@ -452,33 +354,36 @@ def build_parser() -> argparse.ArgumentParser: activate_evaluate.add_argument("--decision-id") activate_evaluate.add_argument("--pretty", action="store_true") activate_evaluate.set_defaults(func=cmd_activate_evaluate) - steer = subcommands.add_parser("steer", help="Inspect or serve local steering endpoints") steer_subcommands = steer.add_subparsers(dest="steer_command", required=True) - stub_response = steer_subcommands.add_parser("stub-response", help="Render a Noetica-compatible steering stub response") stub_response.add_argument("request_json", type=Path) stub_response.add_argument("--status", choices=["not_configured", "noop"], default="not_configured") stub_response.add_argument("--pretty", action="store_true") stub_response.set_defaults(func=cmd_steer_stub_response) - serve_stub = steer_subcommands.add_parser("serve-stub", help="Serve local POST /steer contract stub") serve_stub.add_argument("--host", default="127.0.0.1") serve_stub.add_argument("--port", type=int, default=8080) serve_stub.add_argument("--status", choices=["not_configured", "noop"], default="not_configured") serve_stub.set_defaults(func=cmd_steer_serve_stub) - preflight = steer_subcommands.add_parser("preflight", help="Inspect readiness for a registered steering sourceset") preflight.add_argument("--sourceset", required=True) preflight.add_argument("--pretty", action="store_true") preflight.set_defaults(func=cmd_steer_preflight) - serve = steer_subcommands.add_parser("serve", help="Serve sourceset-aware local /steer endpoint in fail-closed mode") serve.add_argument("--sourceset", required=True) serve.add_argument("--host", default="127.0.0.1") serve.add_argument("--port", type=int, default=8080) serve.set_defaults(func=cmd_steer_serve) - + resolve_artifacts = steer_subcommands.add_parser("resolve-artifacts", help="Resolve steering artifacts and emit a receipt") + resolve_artifacts.add_argument("--sourceset", required=True) + resolve_artifacts.add_argument("--local-dir", type=Path, required=True) + resolve_artifacts.add_argument("--receipt-out", type=Path, required=True) + resolve_artifacts.add_argument("--revision", default="main") + resolve_artifacts.add_argument("--allow-network", action="store_true") + resolve_artifacts.add_argument("--dry-run", action="store_true") + resolve_artifacts.add_argument("--pretty", action="store_true") + resolve_artifacts.set_defaults(func=cmd_steer_resolve_artifacts) return parser diff --git a/src/agent_machine/steering_artifacts.py b/src/agent_machine/steering_artifacts.py new file mode 100644 index 0000000..01fe0a3 --- /dev/null +++ b/src/agent_machine/steering_artifacts.py @@ -0,0 +1,259 @@ +"""Artifact resolution for local SAE steering. + +This module resolves model/tokenizer/SAE files into an operator-controlled local +artifact directory and emits a SteeringArtifactReceipt. It does not load models, +load SAEs, run inference, or perform activation injection. +""" + +from __future__ import annotations + +import hashlib +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from agent_machine.contracts import load_json, validate_by_kind +from agent_machine.paths import repo_root_from_file +from agent_machine.steering_runtime import SteeringRuntimeError, load_sourceset + +REPO_ROOT = repo_root_from_file(__file__) + +GPT2_MODEL_FILES = [ + "config.json", + "generation_config.json", + "merges.txt", + "model.safetensors", + "tokenizer.json", + "tokenizer_config.json", + "vocab.json", +] + +GPT2_RES_JB_SAE_FILES = [ + "blocks.6.hook_resid_pre/cfg.json", + "blocks.6.hook_resid_pre/sae_weights.safetensors", + "blocks.6.hook_resid_pre/sparsity.safetensors", +] + + +def resolve_steering_artifacts( + sourceset_id: str, + local_dir: Path, + receipt_out: Path, + *, + allow_network: bool = False, + dry_run: bool = False, + revision: str = "main", +) -> dict[str, Any]: + """Resolve registered steering artifacts and emit a receipt. + + `dry_run=True` emits a pending receipt shape without contacting external + services. `allow_network=True` is required for real Hugging Face resolution + and download. + """ + sourceset = load_sourceset(sourceset_id) + if sourceset_id != "gpt2-small.res-jb": + raise SteeringRuntimeError("artifact resolution currently supports only gpt2-small.res-jb") + + local_dir = Path(local_dir) + receipt_out = Path(receipt_out) + + if dry_run: + receipt = build_pending_receipt(sourceset_id) + else: + if not allow_network: + raise SteeringRuntimeError("real artifact resolution requires --allow-network") + receipt = resolve_gpt2_small_res_jb(sourceset, local_dir, revision=revision) + + receipt_out.parent.mkdir(parents=True, exist_ok=True) + receipt_out.write_text(json.dumps(receipt, indent=2, sort_keys=True) + "\n", encoding="utf-8") + validate_by_kind(receipt_out, REPO_ROOT) + return receipt + + +def resolve_gpt2_small_res_jb(sourceset: dict[str, Any], local_dir: Path, *, revision: str) -> dict[str, Any]: + try: + from huggingface_hub import HfApi, hf_hub_download + except ImportError as exc: + raise SteeringRuntimeError( + "missing optional dependency: huggingface_hub. Install requirements-steering.txt on the operator machine." + ) from exc + + api = HfApi() + generated_at = utc_now() + artifact_records: list[dict[str, Any]] = [] + + model_repo = require_repo(sourceset, "model") + model_revision = resolved_revision(api, model_repo, revision) + model_root = local_dir / sourceset["sourcesetId"] / safe_repo_name(model_repo) + for filename in GPT2_MODEL_FILES: + path = Path( + hf_hub_download( + repo_id=model_repo, + filename=filename, + revision=model_revision, + local_dir=str(model_root), + ) + ) + artifact_records.append( + artifact_record( + role=role_for_model_file(filename), + repo=model_repo, + file_path=filename, + resolved_revision_value=model_revision, + local_path=path, + ) + ) + + sae_repo = require_repo(sourceset, "sae") + sae_revision = resolved_revision(api, sae_repo, revision) + sae_root = local_dir / sourceset["sourcesetId"] / safe_repo_name(sae_repo) + for filename in GPT2_RES_JB_SAE_FILES: + path = Path( + hf_hub_download( + repo_id=sae_repo, + filename=filename, + revision=sae_revision, + local_dir=str(sae_root), + ) + ) + artifact_records.append( + artifact_record( + role="sae-config" if filename.endswith("cfg.json") else "sae-artifact", + repo=sae_repo, + file_path=filename, + resolved_revision_value=sae_revision, + local_path=path, + ) + ) + + return { + "specVersion": "0.1.0", + "id": f"urn:srcos:agent-machine:steering-artifact-receipt:{sourceset['sourcesetId']}.{receipt_stamp(generated_at)}", + "kind": "SteeringArtifactReceipt", + "sourcesetId": sourceset["sourcesetId"], + "status": "complete", + "generatedAt": generated_at, + "activationIssue": "active-steering-work", + "artifactRecords": artifact_records, + "missing": [], + "storageReceiptRefs": [], + "policyRefs": [], + "agentRegistryGrantRefs": [], + "receiptSafety": { + "includeRawArtifacts": False, + "includeAuthMaterial": False, + }, + "notes": [ + "This receipt records resolved artifact metadata only.", + "It does not load the model, load the SAE, run inference, or perform activation injection.", + "A separate storage receipt, policy admission, and grant record are still required before applied steering can be accepted.", + ], + } + + +def build_pending_receipt(sourceset_id: str) -> dict[str, Any]: + generated_at = utc_now() + return { + "specVersion": "0.1.0", + "id": f"urn:srcos:agent-machine:steering-artifact-receipt:{sourceset_id}.{receipt_stamp(generated_at)}.dryrun", + "kind": "SteeringArtifactReceipt", + "sourcesetId": sourceset_id, + "status": "pending", + "generatedAt": generated_at, + "activationIssue": "active-steering-work", + "artifactRecords": [], + "missing": [ + "network resolution not performed", + "artifact files not downloaded", + "artifact revisions not resolved", + "artifact sha256 digests not computed", + "storage receipts not emitted", + ], + "storageReceiptRefs": [], + "policyRefs": [], + "agentRegistryGrantRefs": [], + "receiptSafety": { + "includeRawArtifacts": False, + "includeAuthMaterial": False, + }, + "notes": [ + "Dry run receipt for validation only.", + "Run with --allow-network on an operator machine to produce a complete receipt.", + ], + } + + +def artifact_record( + *, + role: str, + repo: str, + file_path: str, + resolved_revision_value: str, + local_path: Path, +) -> dict[str, Any]: + return { + "role": role, + "source": { + "type": "huggingface", + "repo": repo, + "filePath": file_path, + "resolvedRevision": resolved_revision_value, + "url": f"https://huggingface.co/{repo}/blob/{resolved_revision_value}/{file_path}", + }, + "storage": { + "localPath": str(local_path), + "sizeBytes": local_path.stat().st_size, + "storageReceiptRef": None, + }, + "digest": { + "algorithm": "sha256", + "sha256": sha256_file(local_path), + "verified": True, + }, + } + + +def resolved_revision(api: Any, repo: str, revision: str) -> str: + info = api.model_info(repo_id=repo, revision=revision) + sha = getattr(info, "sha", None) + if not isinstance(sha, str) or not sha: + raise SteeringRuntimeError(f"could not resolve immutable revision for {repo}@{revision}") + return sha + + +def sha256_file(path: Path) -> str: + hasher = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def require_repo(sourceset: dict[str, Any], section: str) -> str: + value = sourceset.get(section, {}).get("source", {}).get("repo") + if not isinstance(value, str) or not value: + raise SteeringRuntimeError(f"sourceset missing {section}.source.repo") + return value + + +def role_for_model_file(filename: str) -> str: + if filename in {"config.json", "generation_config.json"}: + return "model-config" + if filename in {"tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt"}: + return "tokenizer" + if filename.endswith(".safetensors"): + return "model-weight" + return "other" + + +def safe_repo_name(repo: str) -> str: + return repo.replace("/", "__") + + +def utc_now() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def receipt_stamp(timestamp: str) -> str: + return timestamp.lower().replace("-", "").replace(":", "").replace("+", "").replace(".", "").replace("z", "z")