Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ validate-policy-fabric:

validate-activation:
$(PYTHON) scripts/validate-activation.py
$(PYTHON) scripts/evaluate-activation.py $(LOCAL_AGENTPOD) $(READY_POLICY) $(READY_GRANT) --deployment-receipt-id $(DEPLOYMENT_RECEIPT_ID) --storage-receipt-dir $(RECEIPT_DIR) --decided-at $(DECIDED_AT) --decision-id urn:srcos:agent-machine:activation-decision:local-llama-cpp-allowed --pretty >/tmp/agent-machine-evaluate-activation-allowed.json
$(PYTHON) scripts/evaluate-activation.py $(LOCAL_AGENTPOD) $(READY_POLICY) $(READY_GRANT) --deployment-receipt-id $(DEPLOYMENT_RECEIPT_ID) --storage-receipt-dir examples --decided-at $(DECIDED_AT) --decision-id urn:srcos:agent-machine:activation-decision:local-llama-cpp-allowed --pretty >/tmp/agent-machine-evaluate-activation-allowed.json
$(PYCLI) activate evaluate $(LOCAL_AGENTPOD) $(FAIL_POLICY) $(FAIL_GRANT) --deployment-receipt-id $(DEPLOYMENT_RECEIPT_ID) --storage-receipt-dir $(RECEIPT_DIR) --decided-at $(DECIDED_AT) --decision-id urn:srcos:agent-machine:activation-decision:local-llama-cpp-fail-closed --pretty >/tmp/agent-machine-pycli-evaluate-activation-fail-closed.json
$(PYCLI) activate evaluate $(LOCAL_AGENTPOD) $(READY_GRANT) --policy-dir $(POLICY_DIR) --expected-status allowed --deployment-receipt-id $(DEPLOYMENT_RECEIPT_ID) --agent-machine-id urn:srcos:agent-machine:m2-asahi-local --provider-id urn:srcos:agent-machine:inference-provider:asahi-llama-cpp --storage-receipt-dir $(RECEIPT_DIR) --decided-at $(DECIDED_AT) --decision-id urn:srcos:agent-machine:activation-decision:local-llama-cpp-allowed --pretty >/tmp/agent-machine-pycli-resolved-policy-activation-allowed.json
$(BOOTSTRAP_CLI) activate evaluate $(LOCAL_AGENTPOD) $(READY_POLICY) $(READY_GRANT) --deployment-receipt-id $(DEPLOYMENT_RECEIPT_ID) --storage-receipt-dir $(RECEIPT_DIR) --decided-at $(DECIDED_AT) --decision-id urn:srcos:agent-machine:activation-decision:local-llama-cpp-allowed --pretty >/tmp/agent-machine-bootstrap-evaluate-activation-allowed.json
Expand All @@ -85,6 +85,9 @@ validate-cli:
$(BOOTSTRAP_CLI) paths
$(BOOTSTRAP_CLI) doctor --format json
$(BOOTSTRAP_CLI) probe --format json
printf '%s\n' '{"prompt":"Write one short sentence about Paris.","model_id":"gpt2-small","steering":{"feature_id":"10200","layer":"6-res-jb","strength":5}}' >/tmp/agent-machine-steer-request.json
$(PYCLI) steer stub-response /tmp/agent-machine-steer-request.json --pretty >/tmp/agent-machine-pycli-steer-stub-response.json
$(BOOTSTRAP_CLI) steer stub-response /tmp/agent-machine-steer-request.json --pretty >/tmp/agent-machine-bootstrap-steer-stub-response.json
$(PYCLI) version
$(PYCLI) paths --format json
$(PYCLI) doctor --format json
Expand Down
6 changes: 6 additions & 0 deletions bin/agent-machine
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ Usage:
agent-machine render k8s <agentpod.json> [--compare <file>]
agent-machine policy resolve <agentpod.json> --policy-dir <dir> --deployment-receipt-id <id> [--expected-status allowed]
agent-machine activate evaluate <agentpod.json> [policy.json] <grant.json> --deployment-receipt-id <id> [--policy-dir <dir>] [--storage-receipt-dir <dir>] [--pretty]
agent-machine steer stub-response <request.json> [--status not_configured|noop] [--pretty]
agent-machine steer serve-stub [--host 127.0.0.1] [--port 8080] [--status not_configured|noop]

This is the bootstrap CLI. It is intentionally conservative: it discovers host/runtime hints and never emits secrets, raw prompts, raw KV-cache contents, or credentials.
EOF
Expand Down Expand Up @@ -284,6 +286,10 @@ case "$COMMAND" in
shift || true
delegate_python_cli activate "$@"
;;
steer)
shift || true
delegate_python_cli steer "$@"
;;
*)
print_help
exit 2
Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Agent Machine is a bootstrap runtime-control substrate for SourceOS agent worklo
| [Bootstrap MVP readiness](architecture/bootstrap-mvp-readiness.md) | Current maturity state, implemented surfaces, blockers, and recommended next work. |
| [World-class release gate](architecture/world-class-release-gate.md) | Release-blocking gates and production-readiness criteria. |
| [Local SAE steering inference readiness](inference-local-steering.md) | Inspection record for Neuronpedia-compatible local steering readiness and current gaps. |
| [Local /steer endpoint contract](local-steer-endpoint.md) | Noetica-compatible local steering endpoint contract and stub behavior. |

## Architecture

Expand Down
141 changes: 141 additions & 0 deletions docs/local-steer-endpoint.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Local Neuronpedia-Compatible `/steer` Endpoint Contract

Status: Issue #32 contract and stub. This document defines the local endpoint shape that Noetica can call through `NEURONPEDIA_BASE_URL=http://localhost:<port>` without changing Noetica code.

This is not the real activation-injection implementation. It does not load model weights, load SAE artifacts, register sourcesets, or intercept a model forward pass.

## Client contract

Client references:

- `SocioProphet/Noetica:lib/providers/neuronpedia.ts`
- `SocioProphet/Noetica:app/api/steer/route.ts`
- `SocioProphet/Noetica:docs/adapter-contracts.md`

Noetica endpoint behavior:

- hosted base URL `https://www.neuronpedia.org` resolves to `/api/steer`
- local base URL `http://localhost:<port>` resolves to `/steer`

Agent Machine satisfies the local form in this contract.

## Endpoint

```text
POST /steer
Content-Type: application/json
```

Minimal request shape:

```json
{
"prompt": "Write one short sentence about Paris.",
"model_id": "gpt2-small",
"steering": {
"feature_id": "10200",
"layer": "6-res-jb",
"strength": 5,
"preset": "optional"
}
}
```

Required fields:

- `prompt`: non-empty string
- `model_id`: non-empty string
- `steering.feature_id`: non-empty string
- `steering.layer`: non-empty string
- `steering.strength`: number

Optional fields:

- `steering.preset`: string

Response shape compatible with Noetica `SteeringResult`:

```json
{
"status": "not_configured",
"baseline": "Write one short sentence about Paris.",
"steered": "Write one short sentence about Paris.",
"diff_summary": "Agent Machine local steering endpoint is not configured for activation.",
"feature_id": "10200",
"layer": "6-res-jb",
"strength": 5
}
```

Allowed statuses:

- `applied`: real activation steering was applied. This is not returned by the Issue #32 stub.
- `not_configured`: sourceset/backend/model/SAE artifacts are unavailable.
- `noop`: request shape was accepted but no runtime intervention was applied.

## Health endpoints

The stub server provides:

```text
GET /health
GET /ready
```

Both return a secret-free JSON readiness payload indicating that the endpoint is stubbed and activation is not implemented.

## Stub commands

Render a response from a request JSON file:

```bash
agent-machine steer stub-response /tmp/steer-request.json --pretty
```

Serve the local contract stub:

```bash
agent-machine steer serve-stub --host 127.0.0.1 --port 8080 --status not_configured
```

Noetica can then be pointed at the stub:

```bash
NEURONPEDIA_BASE_URL=http://localhost:8080
```

No credentials are required for the stub unless Noetica itself enforces `NEURONPEDIA_API_KEY` before dispatch. The endpoint does not inspect or store credentials.

## Implementation posture

The Issue #32 endpoint is implemented as a native Agent Machine CLI stub using Python's standard library HTTP server.

It is not:

- an AgentPod workload
- a production inference provider
- a model loader
- an SAE artifact loader
- an activation-injection path

Future work:

- Issue #33 registers sourcesets such as `gpt2-small.res-jb`.
- Issue #34 implements controlled activation/injection behind policy and grant gates.

## Failure behavior

Invalid payloads return HTTP 400 with:

```json
{
"error": "invalid_steer_request",
"message": "..."
}
```

Unavailable sourcesets must return a valid `SteeringResult` with `status: not_configured`, not crash.

## Boundary

This contract exists so Noetica and Agent Machine do not drift. It allows Noetica to validate local endpoint routing, UI rendering, and evidence-chain behavior before real local SAE steering is available.
31 changes: 31 additions & 0 deletions src/agent_machine/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,22 @@ def cmd_activate_evaluate(args: argparse.Namespace) -> int:
return 0


def cmd_steer_stub_response(args: argparse.Namespace) -> int:
steering_stub = __import__("agent_machine.steering_stub", fromlist=["_unused"])
request = steering_stub.load_steer_request(str(args.request_json))
result = steering_stub.build_stub_steer_result(request, status=args.status)
if args.pretty:
print(json.dumps(result, indent=2, sort_keys=True))
else:
print(json.dumps(result, sort_keys=True, separators=(",", ":")))
return 0


def cmd_steer_serve_stub(args: argparse.Namespace) -> int:
steering_stub = __import__("agent_machine.steering_stub", fromlist=["_unused"])
return int(steering_stub.serve_stub(host=args.host, port=args.port, status=args.status))


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Agent Machine Python CLI")
subcommands = parser.add_subparsers(dest="command", required=True)
Expand Down Expand Up @@ -443,6 +459,21 @@ def build_parser() -> argparse.ArgumentParser:
activate_evaluate.add_argument("--pretty", action="store_true")
activate_evaluate.set_defaults(func=cmd_activate_evaluate)

steer = subcommands.add_parser("steer", help="Inspect or serve local steering endpoint stubs")
steer_subcommands = steer.add_subparsers(dest="steer_command", required=True)

stub_response = steer_subcommands.add_parser("stub-response", help="Render a Noetica-compatible steering stub response")
stub_response.add_argument("request_json", type=Path)
stub_response.add_argument("--status", choices=["not_configured", "noop"], default="not_configured")
stub_response.add_argument("--pretty", action="store_true")
stub_response.set_defaults(func=cmd_steer_stub_response)

serve_stub = steer_subcommands.add_parser("serve-stub", help="Serve local POST /steer contract stub")
serve_stub.add_argument("--host", default="127.0.0.1")
serve_stub.add_argument("--port", type=int, default=8080)
serve_stub.add_argument("--status", choices=["not_configured", "noop"], default="not_configured")
serve_stub.set_defaults(func=cmd_steer_serve_stub)

return parser


Expand Down
158 changes: 158 additions & 0 deletions src/agent_machine/steering_stub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""Neuronpedia-compatible local steering endpoint stub.

This module intentionally does not perform activation steering, model loading, SAE
artifact loading, or provider activation. It exists so Noetica can exercise the
local endpoint shape before the real controlled activation path exists.
"""

from __future__ import annotations

import json
import sys
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any, Literal

SteerStubStatus = Literal["not_configured", "noop"]
_ALLOWED_STUB_STATUSES: set[str] = {"not_configured", "noop"}


class SteeringStubError(AssertionError):
"""Raised when a local steering request does not match the contract."""


def load_steer_request(path: str) -> dict[str, Any]:
"""Load a steer request JSON object from a path or stdin marker."""
if path == "-":
payload = json.load(sys.stdin)
else:
with open(path, "r", encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict):
raise SteeringStubError("steer request root must be a JSON object")
return payload


def build_stub_steer_result(payload: dict[str, Any], status: SteerStubStatus = "not_configured") -> dict[str, Any]:
"""Return a Noetica-compatible SteeringResult without activation injection."""
if status not in _ALLOWED_STUB_STATUSES:
raise SteeringStubError(f"unsupported stub status: {status}")

prompt = require_string(payload, "prompt")
model_id = require_string(payload, "model_id")
steering = require_object(payload, "steering")
feature_id = require_string(steering, "feature_id")
layer = require_string(steering, "layer")
strength = require_number(steering, "strength")

if status == "noop":
diff_summary = (
"Agent Machine local steering stub accepted the request shape but deliberately applied no runtime intervention. "
"No model, sourceset, or SAE artifact was loaded."
)
else:
diff_summary = (
"Agent Machine local steering endpoint is not configured for activation. "
f"Sourceset/model readiness for {model_id} is outside this Issue #32 stub."
)

return {
"status": status,
"baseline": prompt,
"steered": prompt,
"diff_summary": diff_summary,
"feature_id": feature_id,
"layer": layer,
"strength": strength,
}


def serve_stub(host: str = "127.0.0.1", port: int = 8080, status: SteerStubStatus = "not_configured") -> int:
"""Serve a minimal local HTTP endpoint for contract testing."""
if status not in _ALLOWED_STUB_STATUSES:
raise SteeringStubError(f"unsupported stub status: {status}")

class Handler(BaseHTTPRequestHandler):
server_version = "AgentMachineSteerStub/0.1"

def do_GET(self) -> None: # noqa: N802 - BaseHTTPRequestHandler API
if self.path not in {"/health", "/ready"}:
self.send_json({"error": "not_found"}, status_code=404)
return
self.send_json(
{
"ok": True,
"kind": "NeuronpediaCompatibleLocalSteerStub",
"status": "stubbed",
"endpoint": "/steer",
"activationImplemented": False,
"modelWeightsLoaded": False,
"saeArtifactsLoaded": False,
}
)

def do_POST(self) -> None: # noqa: N802 - BaseHTTPRequestHandler API
if self.path != "/steer":
self.send_json({"error": "not_found"}, status_code=404)
return
try:
payload = self.read_json()
result = build_stub_steer_result(payload, status=status)
except (json.JSONDecodeError, UnicodeDecodeError, SteeringStubError) as exc:
self.send_json({"error": "invalid_steer_request", "message": str(exc)}, status_code=400)
return
self.send_json(result)

def read_json(self) -> dict[str, Any]:
length_header = self.headers.get("content-length")
if not length_header:
raise SteeringStubError("missing content-length")
length = int(length_header)
if length > 1_048_576:
raise SteeringStubError("request body exceeds 1 MiB")
raw = self.rfile.read(length)
payload = json.loads(raw.decode("utf-8"))
if not isinstance(payload, dict):
raise SteeringStubError("steer request root must be a JSON object")
return payload

def send_json(self, payload: dict[str, Any], status_code: int = 200) -> None:
body = json.dumps(payload, sort_keys=True).encode("utf-8")
self.send_response(status_code)
self.send_header("content-type", "application/json")
self.send_header("content-length", str(len(body)))
self.end_headers()
self.wfile.write(body)

def log_message(self, format: str, *args: Any) -> None: # noqa: A002 - inherited name
print(f"agent-machine steer stub: {self.address_string()} - {format % args}", file=sys.stderr)

server = ThreadingHTTPServer((host, port), Handler)
print(f"agent-machine steer stub serving http://{host}:{port}/steer status={status}", file=sys.stderr)
try:
server.serve_forever()
except KeyboardInterrupt:
print("agent-machine steer stub stopped", file=sys.stderr)
finally:
server.server_close()
return 0


def require_string(payload: dict[str, Any], key: str) -> str:
value = payload.get(key)
if not isinstance(value, str) or not value.strip():
raise SteeringStubError(f"missing non-empty string field: {key}")
return value


def require_object(payload: dict[str, Any], key: str) -> dict[str, Any]:
value = payload.get(key)
if not isinstance(value, dict):
raise SteeringStubError(f"missing object field: {key}")
return value


def require_number(payload: dict[str, Any], key: str) -> int | float:
value = payload.get(key)
if isinstance(value, bool) or not isinstance(value, (int, float)):
raise SteeringStubError(f"missing numeric field: {key}")
return value
Loading