From 38a3376d7c95c722f12eb99e6f3f9cfb89e85e9d Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 29 May 2026 01:56:40 -0400 Subject: [PATCH] feat(agents,budget): per-persona spending-cap primitive (OpenRouter Phase 0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DA must-fix #3 from the OpenRouter integration analysis: without a spending-cap envelope, fusion (4.4x cost) + recursing Hermes loops could drain $200/credit overnight. Land the primitive BEFORE adding any paid-surface provider (V1 OpenRouter upstream, V2 hal0-fusion MCP). Adds `[persona.budget]` TOML block + a pure-Python BudgetLedger (append-only JSONL at /var/lib/hal0/agents/{id}/personas/{pid}/spend.jsonl) + a check/charge API + a UI editor. API: - GET /api/agents/{id}/personas/{pid}/budget - PUT /api/agents/{id}/personas/{pid}/budget - POST /api/agents/{id}/personas/{pid}/budget/check (dry-run pre-call gate) - POST /api/agents/{id}/personas/{pid}/budget/charge (post-response record) Scope decision (PLANNING.md §5 Q2): per-persona only for v0.3. Per-agent + platform caps are containing scopes deferred to v0.4. Race-tolerance: append-only ledger + read-then-check is eventual consistency; periodic over-spend within one window is acceptable. No provider charges to this primitive yet. V1 (OpenRouter as Hermes upstream) will wire it in as a pre-call gate + post-response record. Refs openrouter-research-2026-05-28/PLANNING.md §3 Phase 0 + §5 Q2. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 43 ++ docs/agents/hermes/CONFIG.md | 50 ++ src/hal0/agents/budget.py | 565 ++++++++++++++++++ src/hal0/agents/personas.py | 19 + src/hal0/api/__init__.py | 14 + src/hal0/api/agents/budget.py | 371 ++++++++++++ tests/agents/test_budget_module.py | 396 ++++++++++++ .../agents/test_personas_budget_roundtrip.py | 108 ++++ tests/api/test_agents_budget.py | 306 ++++++++++ ui/src/api/endpoints.ts | 28 +- ui/src/api/hooks/useBudget.ts | 188 ++++++ .../dash/agents/persona-budget-hook-bridge.ts | 24 + ui/src/dash/agents/persona-budget-panel.jsx | 224 +++++++ ui/src/dash/agents/personas-tab.jsx | 14 + ui/src/main.tsx | 7 + 15 files changed, 2347 insertions(+), 10 deletions(-) create mode 100644 src/hal0/agents/budget.py create mode 100644 src/hal0/api/agents/budget.py create mode 100644 tests/agents/test_budget_module.py create mode 100644 tests/agents/test_personas_budget_roundtrip.py create mode 100644 tests/api/test_agents_budget.py create mode 100644 ui/src/api/hooks/useBudget.ts create mode 100644 ui/src/dash/agents/persona-budget-hook-bridge.ts create mode 100644 ui/src/dash/agents/persona-budget-panel.jsx diff --git a/CHANGELOG.md b/CHANGELOG.md index 12ddd9ed..9f3fc8ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,49 @@ Tags older than v0.2.0 ship release notes inside the GitHub release page; this CHANGELOG starts at v0.2.0 (the Lemonade migration cut). For ADR-level architecture context see `docs/internal/adr/`. +## Unreleased — Phase 0 OpenRouter prereq + v0.3 MCP completion + +### Added + +- **Per-persona spending-cap primitive (Phase 0 OpenRouter prereq)**. + Lands the `[persona.budget]` TOML sub-table + a pure-Python budget + enforcement layer BEFORE the V1 OpenRouter upstream provider and V2 + `hal0-fusion` MCP server. DA review of the OpenRouter integration + plan flagged this as P0 must-fix #3 — without a spending-cap + envelope, fusion (4.4x cost vs single-model) plus a recursing + Hermes loop could drain a $200/credit pool overnight. + - New `src/hal0/agents/budget.py` — `Budget` dataclass, append-only + `BudgetLedger`, pure `check_budget` / `record_charge` functions, + daily / monthly / lifetime aggregation windows + per-call max. + - New REST surface (mounted under `/api/agents/{id}/personas/{pid}`): + - `GET .../budget` — current caps + running spend stats + + per-window remaining headroom. + - `PUT .../budget` — replace the budget block; preserves every + other persona field on round-trip. + - `POST .../budget/check` — dry-run pre-call gate; takes + `{"estimated_cost_usd": float}`, returns `allowed` + `reason` + + `remaining_usd`. V1 OpenRouter provider calls this BEFORE + issuing the upstream request. + - `POST .../budget/charge` — post-response recorder; appends + `{ts, persona_id, surface, model, cost_usd, request_id}` to the + ledger. + - Ledger location: + `/var/lib/hal0/agents/{agent_id}/personas/{persona_id}/spend.jsonl`. + Append-only JSON-lines (no SQLite dependency); fsync after every + write; operator-inspectable via `tail -f` + `jq`. + - New dashboard editor — `PersonaBudgetPanel` mounts under the + Personas tab beneath the persona cards. Empty-state CTA reads + "no budget set — set caps to enable cloud providers". + - Persona seed (hermes + coder) keeps an empty budget block by + default; operators opt in by editing the TOML or PUT-ing through + the API. `hal0 agent reprovision hermes` preserves operator-set + budgets (idempotent persona seed with `overwrite=False`). + - **Scope decision (PLANNING.md §5 Q2):** per-persona only for v0.3. + Per-agent + platform-wide containing scopes are deferred to v0.4. + - **PREREQ — no provider charges to this primitive yet.** V1 + (OpenRouter as a Hermes upstream) wires it in as a pre-call gate + + post-response record. + ## Unreleased — v0.3 MCP completion + memory-map redesign End-to-end completion of the `hal0-admin` + `hal0-memory` bundled MCP diff --git a/docs/agents/hermes/CONFIG.md b/docs/agents/hermes/CONFIG.md index 3bd8d315..260cea31 100644 --- a/docs/agents/hermes/CONFIG.md +++ b/docs/agents/hermes/CONFIG.md @@ -74,8 +74,58 @@ require_approval = ["files.*", "shell.*"] # glob list [persona.model] preferred_upstream = "hal0" preferred_model = "" # empty = first available + +[persona.budget] +# Per-persona spending caps (Phase 0 OpenRouter prereq). Each USD cap is +# optional; the omitted ones leave that window uncapped. An explicit +# ``0.0`` blocks every paid request. ``hard_cap`` enforces (default); +# set to ``false`` for warn-only mode (allowed=true, reason logged). +daily_usd = 5.0 # rolls over at 00:00 UTC +monthly_usd = 50.0 # rolls over on the 1st UTC +lifetime_usd = 500.0 # never resets +per_call_max_usd = 0.10 # rejects any single request over this +hard_cap = true # block (true) vs warn-only (false) ``` +**Budget block (Phase 0 OpenRouter prereq):** + +The `[persona.budget]` sub-table arms the per-persona spending-cap +primitive. Every paid surface (V1 OpenRouter as a Hermes upstream, V2 +the `hal0-fusion` MCP) consults this block via two endpoints: + +| Endpoint | Direction | Effect | +|---|---|---| +| `POST /api/agents/{id}/personas/{pid}/budget/check` | Caller → hal0 | Dry-run pre-call gate; returns `allowed=false` with a `reason` if the estimated cost would breach a cap. | +| `POST /api/agents/{id}/personas/{pid}/budget/charge` | Caller → hal0 | Records a real charge into the append-only ledger after the upstream response lands. | + +The ledger lives at +`/var/lib/hal0/agents/{agent_id}/personas/{persona_id}/spend.jsonl` +(one JSON object per line, append-only, fsync after every write). +Operator-inspectable with `tail -f` + `jq`. Hard-cap semantics: + +- `hard_cap = true` (default) — `check` returns `allowed=false` when + the estimate would push spend past any configured cap; the caller is + expected to short-circuit the request. +- `hard_cap = false` — `check` always returns `allowed=true`, but + `reason` is populated so the caller can log a warning. Useful for + audit-only deployments where the operator wants visibility without + enforcement. + +**Race tolerance:** the check-then-record pattern is NOT serialised. +Two concurrent paid requests from the same persona can both pass +`check` (they read the same ledger state) before either records a +charge — periodic over-spend within a single window is tolerated. A +real lock + daemon-side enforcer is v0.4+ work; the JSONL layout +migrates cleanly. + +**Idempotency:** running `hal0 agent reprovision hermes` after the +operator PUTs a budget preserves the caps. `_phase_persona_seed` +calls `seed_default_personas(overwrite=False)` which skips existing +files; only `--repair` re-writes the seeds back to canonical empty. + +**Scope:** per-persona only for v0.3. Per-agent + platform-wide +containing scopes are deferred to v0.4 (PLANNING.md §5 Q2 default). + **Change effect:** The next bootstrap render (or `hal0 agent reprovision hermes`) picks up the new prompt. `hal0 agent personas activate ` switches the active persona AND sends a best-effort diff --git a/src/hal0/agents/budget.py b/src/hal0/agents/budget.py new file mode 100644 index 00000000..b2e7a8c2 --- /dev/null +++ b/src/hal0/agents/budget.py @@ -0,0 +1,565 @@ +"""Per-persona spending-cap primitive (OpenRouter Phase 0). + +Lands BEFORE the v0.3.x V1 OpenRouter upstream provider + the V2 +``hal0-fusion`` MCP server. The DA review of the OpenRouter integration +plan flagged this as the P0 must-fix #3: without a spending-cap +envelope, fusion (4.4x cost vs single-model) combined with a recursing +Hermes loop could drain a $200/credit pool overnight. We need a budget +gate every paid surface can consult BEFORE it makes a call and a +post-response charge recorder so the next gate sees the real number. + +Scope decision (PLANNING.md §5 Q2): per-persona only for v0.3. Per-agent +and platform-wide containing scopes are deferred to v0.4 — both are +strict supersets of this primitive and can wrap it without rewriting +the Budget dataclass shape. + +Architecture +============ + +* :class:`Budget` is the configuration dataclass. Every cap is + ``float | None``; ``None`` means "no cap on this window"; an explicit + ``0.0`` means "blocked". ``hard_cap=True`` (the default) is "deny + requests that would overshoot"; ``hard_cap=False`` is "log + allow + (operator wants visibility, not enforcement)". +* :class:`BudgetLedger` is an append-only JSON-lines log at + ``/var/lib/hal0/agents/{agent_id}/personas/{persona_id}/spend.jsonl``. + One row per recorded charge — operator-inspectable with ``tail -f`` + + ``jq``, easy to migrate to SQLite later if we ever need indexed + queries. No daemons, no cleanup cron, no lock files: append-only is + the entire mutation model. +* :func:`check_budget` is a PURE function — aggregates the ledger's + spend over each configured window and compares to caps. The caller + (V1's OpenRouter provider) supplies the estimated cost; this module + does not estimate. Decoupling estimation lets every paid surface + pick its own estimator (token-count x price, or a fixed per-request + fee, or whatever). +* :func:`record_charge` appends to the ledger with ``fsync`` so a + crashed process between check + record loses at most one in-flight + charge. + +Eventual consistency +==================== + +The check-then-record pattern is NOT serialised. Two concurrent calls +from the same persona can both pass :func:`check_budget` (they read +the same ledger state), both make their requests, and both later +:func:`record_charge` for sums that together exceed the cap. This is +acceptable for v0.3: we tolerate periodic over-spend within a single +window in exchange for keeping the primitive lock-free and the ledger +shape trivially auditable. A real lock + a daemon-style enforcer is +v0.4+ work; the JSONL layout migrates cleanly. + +Operator inspection +=================== + +The ledger format is one JSON object per line, sorted oldest-first by +append order. Each row carries:: + + { + "ts": "2026-05-29T00:00:00.000000+00:00", + "persona_id": "hermes", + "surface": "openrouter", + "model": "anthropic/claude-3.7-sonnet", + "cost_usd": 0.0421, + "request_id": "req_abc123" + } + +``tail -f`` shows live charges; ``jq -s 'map(.cost_usd) | add'`` totals +the lifetime sum; ``jq -r 'select(.ts | startswith("2026-05-29")) +.cost_usd'`` slices a day. Operators get a debuggable surface without +hal0 needing a query engine. +""" + +from __future__ import annotations + +import contextlib +import enum +import json +import os +from dataclasses import dataclass, field +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import structlog + +log = structlog.get_logger(__name__) + + +# Canonical on-disk store root for per-persona spend ledgers. Mirrors the +# personas store layout (``/var/lib/hal0/agents/{agent_id}/personas/...``) +# so a single operator backup picks up both knobs and history. Tests +# point at a tmp_path via the ``root`` keyword arg on every helper. +AGENTS_ROOT = Path("/var/lib/hal0/agents") +SPEND_LEDGER_FILENAME = "spend.jsonl" + + +class BudgetWindow(enum.StrEnum): + """Aggregation windows the budget primitive understands. + + * ``daily`` — sums charges since 00:00 UTC of the current day. + * ``monthly`` — sums charges since 00:00 UTC of the 1st of the + current calendar month. + * ``lifetime`` — sums every recorded charge. + + The enum exists so the API + UI have a stable string contract for + the window selector; the dataclass itself stores caps per window as + separate fields so the common "set them all at once" case stays + ergonomic. + """ + + DAILY = "daily" + MONTHLY = "monthly" + LIFETIME = "lifetime" + + +@dataclass +class Budget: + """Configuration block — one persona's spending caps. + + Every cap is ``float | None``; ``None`` means "no cap on this + window". An explicit ``0.0`` means "block every paid request" (the + operator deliberately fenced this persona off). ``hard_cap`` is the + enforcement toggle: ``True`` (default) denies requests that would + overshoot; ``False`` allows them through but :func:`check_budget` + still reports the breach so the caller can log a warning. + + The dataclass round-trips through TOML — :func:`parse_budget` reads + the ``[persona.budget]`` sub-table and :meth:`to_dict` writes it + back. Empty / unset budget renders as an empty table so seed + personas can ship an opt-in stub without setting any actual caps. + """ + + daily_usd: float | None = None + monthly_usd: float | None = None + lifetime_usd: float | None = None + per_call_max_usd: float | None = None + hard_cap: bool = True + + def is_empty(self) -> bool: + """``True`` when no caps are configured (the seed-stub shape).""" + return ( + self.daily_usd is None + and self.monthly_usd is None + and self.lifetime_usd is None + and self.per_call_max_usd is None + ) + + def to_dict(self) -> dict[str, Any]: + """Serialise to the TOML sub-table shape :func:`parse_budget` accepts. + + Unset caps (``None``) are omitted from the output so the + rendered table stays small and the operator can tell at a + glance which knobs are active. ``hard_cap`` is always written — + its default (``True``) is the safer pick so explicit-False + configurations are visible in the file. + """ + out: dict[str, Any] = {} + if self.daily_usd is not None: + out["daily_usd"] = float(self.daily_usd) + if self.monthly_usd is not None: + out["monthly_usd"] = float(self.monthly_usd) + if self.lifetime_usd is not None: + out["lifetime_usd"] = float(self.lifetime_usd) + if self.per_call_max_usd is not None: + out["per_call_max_usd"] = float(self.per_call_max_usd) + out["hard_cap"] = bool(self.hard_cap) + return out + + +@dataclass +class BudgetCheck: + """Outcome of :func:`check_budget`. + + * ``allowed`` — whether the caller should proceed. For + ``hard_cap=False`` budgets, ``allowed`` is ``True`` even when a + cap would have been breached; ``reason`` carries the would-be + block message so the caller can log it. + * ``reason`` — human-readable explanation, or ``None`` when the + request is squarely within every configured cap. Set when a cap + blocks (or would have blocked) the call. + * ``remaining_usd`` — per-window remaining headroom. Keys are the + window names from :class:`BudgetWindow`; values are + ``cap - spent`` (clamped at ``0.0``). Windows with no cap are + omitted from the dict — callers can iterate without checking for + ``None``. + """ + + allowed: bool + reason: str | None + remaining_usd: dict[str, float] = field(default_factory=dict) + + +# ── parse helpers ────────────────────────────────────────────────────────── + + +def _parse_optional_float(value: Any, field_name: str) -> float | None: + """Coerce a TOML scalar into ``float | None``. + + TOML emits ints + floats as distinct types; both are valid budget + amounts. ``None`` (missing key) stays ``None``. Anything else is a + :class:`ValueError` so the API + CLI can surface a structured 400 + instead of letting a bad TOML crash the agent loop later. + """ + if value is None: + return None + if isinstance(value, bool): + # bool is a subclass of int in Python; reject explicitly so an + # operator who writes ``daily_usd = true`` gets a clear error. + raise ValueError(f"[persona.budget].{field_name} must be a number, got bool") + if isinstance(value, int | float): + coerced = float(value) + if coerced < 0: + raise ValueError(f"[persona.budget].{field_name} must be >= 0") + return coerced + raise ValueError(f"[persona.budget].{field_name} must be a number, got {type(value).__name__}") + + +def parse_budget(toml_section: dict[str, Any] | None) -> Budget: + """Build a :class:`Budget` from the parsed ``[persona.budget]`` table. + + Accepts ``None`` / missing section as "no budget configured" — the + returned :class:`Budget` is empty (every cap ``None``, + ``hard_cap=True``). Unknown extra keys are silently ignored so a + future v0.4 superset (per-agent caps) can land additional knobs + without breaking v0.3 personas. + + Raises :class:`ValueError` (caller wraps in ``PersonaError``) when + a known field is the wrong type or negative. + """ + if toml_section is None: + return Budget() + if not isinstance(toml_section, dict): + raise ValueError("[persona.budget] must be a table") + + daily = _parse_optional_float(toml_section.get("daily_usd"), "daily_usd") + monthly = _parse_optional_float(toml_section.get("monthly_usd"), "monthly_usd") + lifetime = _parse_optional_float(toml_section.get("lifetime_usd"), "lifetime_usd") + per_call = _parse_optional_float(toml_section.get("per_call_max_usd"), "per_call_max_usd") + + hard_cap_raw = toml_section.get("hard_cap", True) + if not isinstance(hard_cap_raw, bool): + raise ValueError("[persona.budget].hard_cap must be a bool") + + return Budget( + daily_usd=daily, + monthly_usd=monthly, + lifetime_usd=lifetime, + per_call_max_usd=per_call, + hard_cap=hard_cap_raw, + ) + + +# ── ledger ───────────────────────────────────────────────────────────────── + + +def _ledger_path(agent_id: str, persona_id: str, *, root: Path | None = None) -> Path: + """Resolve the spend ledger path for one (agent, persona).""" + base = root if root is not None else AGENTS_ROOT + return base / agent_id / "personas" / persona_id / SPEND_LEDGER_FILENAME + + +@dataclass(frozen=True) +class SpendRow: + """One charge entry — what gets serialised to one JSONL line.""" + + ts: datetime + persona_id: str + surface: str + model: str + cost_usd: float + request_id: str + + def to_json(self) -> str: + return json.dumps( + { + "ts": self.ts.astimezone(UTC).isoformat(), + "persona_id": self.persona_id, + "surface": self.surface, + "model": self.model, + "cost_usd": float(self.cost_usd), + "request_id": self.request_id, + }, + separators=(",", ":"), + ) + + @classmethod + def from_json(cls, line: str) -> SpendRow: + body = json.loads(line) + ts_raw = body["ts"] + # ``fromisoformat`` accepts the offset suffix in 3.11+; tolerate + # the ``Z`` shorthand a future writer might emit. + if ts_raw.endswith("Z"): + ts_raw = ts_raw[:-1] + "+00:00" + return cls( + ts=datetime.fromisoformat(ts_raw), + persona_id=str(body["persona_id"]), + surface=str(body["surface"]), + model=str(body["model"]), + cost_usd=float(body["cost_usd"]), + request_id=str(body["request_id"]), + ) + + +class BudgetLedger: + """Append-only JSON-lines spend log for one (agent, persona). + + Construct with the resolved path (call sites typically use + :func:`ledger_for` to derive it). :meth:`append` writes one row + + ``fsync``; :meth:`iter_rows` streams everything back. The ledger + deliberately has NO compaction / rotation in v0.3 — the row size + is on the order of 200 bytes and a heavy-use persona doing 1000 + paid calls a day still only writes ~70 MB/yr. Rotation lands when + we ship per-agent scope in v0.4. + """ + + def __init__(self, path: Path) -> None: + self.path = path + + def ensure_parent(self) -> None: + """Create the agent/persona dir tree if missing. + + Called from :meth:`append` before the first write so a fresh + install (no operator-set budget yet) creates the directory + lazily. Separate method so tests can pre-create the dir to + inspect mode bits without triggering an actual append. + """ + self.path.parent.mkdir(parents=True, exist_ok=True) + + def append(self, row: SpendRow) -> None: + """Append + fsync one row. + + The fsync is what guarantees the row survives a crash between + the OpenRouter response landing and the next budget check; the + cost (~5ms per call) is acceptable since paid calls already + take 100s of milliseconds upstream. If fsync becomes a hot + path later, batching is a v0.4 problem. + """ + self.ensure_parent() + line = row.to_json() + "\n" + # Open + write + fsync + close. We deliberately open and close + # per-row instead of keeping a long-lived fd so an operator + # tail or rotate doesn't keep us on a deleted inode. + with open(self.path, "a", encoding="utf-8") as fh: + fh.write(line) + fh.flush() + with contextlib.suppress(OSError): # pragma: no cover — non-POSIX fs in tests + os.fsync(fh.fileno()) + + def iter_rows(self) -> list[SpendRow]: + """Read every recorded row, oldest first. + + Returns an empty list when the ledger doesn't exist yet — that's + the "no charges yet" state, not an error. Skips malformed lines + with a structured log line so one bad write (a half-flushed row + from a crashed process) doesn't blind every subsequent check. + """ + if not self.path.exists(): + return [] + out: list[SpendRow] = [] + with open(self.path, encoding="utf-8") as fh: + for lineno, raw in enumerate(fh, start=1): + line = raw.strip() + if not line: + continue + try: + out.append(SpendRow.from_json(line)) + except (ValueError, KeyError, json.JSONDecodeError) as exc: + log.warning( + "budget.ledger.skip_malformed", + path=str(self.path), + lineno=lineno, + error=str(exc), + ) + return out + + +def ledger_for( + agent_id: str, + persona_id: str, + *, + root: Path | None = None, +) -> BudgetLedger: + """Resolve the ledger for one (agent, persona). Convenience wrapper.""" + return BudgetLedger(_ledger_path(agent_id, persona_id, root=root)) + + +# ── window aggregation ───────────────────────────────────────────────────── + + +def _day_start(now: datetime) -> datetime: + return now.astimezone(UTC).replace(hour=0, minute=0, second=0, microsecond=0) + + +def _month_start(now: datetime) -> datetime: + return now.astimezone(UTC).replace(day=1, hour=0, minute=0, second=0, microsecond=0) + + +@dataclass(frozen=True) +class SpendStats: + """Aggregated spend across the canonical windows. + + Returned by :func:`spend_stats` so API responses (and the UI editor) + can render today / mtd / lifetime totals from one read pass over + the ledger. + """ + + today_usd: float + mtd_usd: float + lifetime_usd: float + + +def spend_stats(ledger: BudgetLedger, now: datetime | None = None) -> SpendStats: + """Aggregate the ledger over (today, month-to-date, lifetime). + + One pass over :meth:`BudgetLedger.iter_rows`. Cheap enough at v0.3 + volumes (kbs of rows) that we don't bother caching; if a heavy + persona starts to thrash this we'll add a per-day cumulative + sidecar in v0.4. + """ + moment = now if now is not None else datetime.now(UTC) + today_floor = _day_start(moment) + month_floor = _month_start(moment) + today = mtd = lifetime = 0.0 + for row in ledger.iter_rows(): + cost = float(row.cost_usd) + lifetime += cost + if row.ts >= today_floor: + today += cost + if row.ts >= month_floor: + mtd += cost + return SpendStats(today_usd=today, mtd_usd=mtd, lifetime_usd=lifetime) + + +# ── check + record ───────────────────────────────────────────────────────── + + +def check_budget( + budget: Budget, + ledger: BudgetLedger, + estimated_cost_usd: float, + *, + now: datetime | None = None, +) -> BudgetCheck: + """Pure check — does ``estimated_cost_usd`` fit inside the budget? + + Pre-call gate the V1 OpenRouter provider calls before issuing the + upstream request. Returns :class:`BudgetCheck` with ``allowed`` + + a structured ``reason`` when a cap blocks the call. The + ``remaining_usd`` map carries each configured window's headroom + AFTER subtracting the estimated cost — callers can surface "$X.YZ + left today" to the operator without re-aggregating themselves. + + Most-restrictive-wins ordering: per-call cap → daily → monthly → + lifetime. The first breached cap wins the reason string; we don't + enumerate every breach since the operator only sees one toast. + + ``hard_cap=False`` still computes the reason but keeps ``allowed`` + ``True`` — the caller is expected to log + proceed. + """ + moment = now if now is not None else datetime.now(UTC) + estimated = float(estimated_cost_usd) + if estimated < 0: + raise ValueError("estimated_cost_usd must be >= 0") + + stats = spend_stats(ledger, now=moment) + remaining: dict[str, float] = {} + reason: str | None = None + + # Per-call doesn't get a remaining entry — the cap is the call, + # not a window. + if budget.per_call_max_usd is not None and estimated > budget.per_call_max_usd: + reason = ( + f"per-call cap ${budget.per_call_max_usd:.4f} exceeded by estimate ${estimated:.4f}" + ) + + if budget.daily_usd is not None: + headroom = max(0.0, budget.daily_usd - stats.today_usd) + # Remaining reflects PRE-call headroom — the operator UI surfaces + # "you have $X.YZ left today", not "you'd have left if this call + # went through". The check's allowed bool is the gate; remaining + # is purely informational. + remaining[BudgetWindow.DAILY.value] = headroom + if reason is None and estimated > headroom: + reason = ( + f"daily cap ${budget.daily_usd:.4f} would be exceeded — " + f"spent ${stats.today_usd:.4f}, estimate ${estimated:.4f}" + ) + + if budget.monthly_usd is not None: + headroom = max(0.0, budget.monthly_usd - stats.mtd_usd) + remaining[BudgetWindow.MONTHLY.value] = headroom + if reason is None and estimated > headroom: + reason = ( + f"monthly cap ${budget.monthly_usd:.4f} would be exceeded — " + f"spent ${stats.mtd_usd:.4f}, estimate ${estimated:.4f}" + ) + + if budget.lifetime_usd is not None: + headroom = max(0.0, budget.lifetime_usd - stats.lifetime_usd) + remaining[BudgetWindow.LIFETIME.value] = headroom + if reason is None and estimated > headroom: + reason = ( + f"lifetime cap ${budget.lifetime_usd:.4f} would be exceeded — " + f"spent ${stats.lifetime_usd:.4f}, estimate ${estimated:.4f}" + ) + + if reason is None: + return BudgetCheck(allowed=True, reason=None, remaining_usd=remaining) + # Reason set: hard_cap decides whether we actually block. + return BudgetCheck( + allowed=not budget.hard_cap, + reason=reason, + remaining_usd=remaining, + ) + + +def record_charge( + ledger: BudgetLedger, + *, + persona_id: str, + surface: str, + model: str, + cost_usd: float, + request_id: str, + now: datetime | None = None, +) -> SpendRow: + """Append a charge to the ledger; return the row that was written. + + Caller (the OpenRouter provider in V1) computes the real cost from + the upstream's ``usage`` block + the model's posted price, then + calls this once per response. The recorded row is the source of + truth for the next :func:`check_budget` call. + """ + moment = now if now is not None else datetime.now(UTC) + row = SpendRow( + ts=moment, + persona_id=persona_id, + surface=surface, + model=model, + cost_usd=float(cost_usd), + request_id=request_id, + ) + ledger.append(row) + return row + + +__all__ = [ + "AGENTS_ROOT", + "Budget", + "BudgetCheck", + "BudgetLedger", + "BudgetWindow", + "SpendRow", + "SpendStats", + "check_budget", + "ledger_for", + "parse_budget", + "record_charge", + "spend_stats", +] + +# Anti-circular-import hint: also re-exposed as ``ledger_path`` for +# routes that need to surface the on-disk path without instantiating a +# ledger (CHANGELOG / debug response). +ledger_path = _ledger_path diff --git a/src/hal0/agents/personas.py b/src/hal0/agents/personas.py index 89307cee..2dfacaf6 100644 --- a/src/hal0/agents/personas.py +++ b/src/hal0/agents/personas.py @@ -28,6 +28,8 @@ import structlog +from hal0.agents.budget import Budget, parse_budget + log = structlog.get_logger(__name__) # Canonical on-disk store. State-dir, not config-dir, so a fresh install @@ -83,6 +85,11 @@ class Persona: approval: PersonaApproval = field(default_factory=PersonaApproval) preferred_upstream: str = "hal0" preferred_model: str = "" + # Phase 0 OpenRouter prereq: per-persona spending caps. Empty Budget + # means "no caps configured" — the round-trip preserves explicit + # zeros (which translate to "block every paid request"). See + # :mod:`hal0.agents.budget` for the dataclass shape + semantics. + budget: Budget = field(default_factory=Budget) @classmethod def from_dict(cls, data: dict[str, Any]) -> Persona: @@ -111,6 +118,7 @@ def from_dict(cls, data: dict[str, Any]) -> Persona: memory = persona.get("memory") or {} approval_raw = persona.get("approval") or {} model = persona.get("model") or {} + budget_raw = persona.get("budget") allowed = tools.get("allowed", ["*"]) if isinstance(allowed, str): @@ -134,6 +142,11 @@ def from_dict(cls, data: dict[str, Any]) -> Persona: f"ask/auto-approve/never; got {default_policy!r}" ) + try: + budget = parse_budget(budget_raw) + except ValueError as exc: + raise PersonaError(str(exc)) from exc + return cls( id=pid.strip(), display_name=str(persona.get("display_name") or pid).strip(), @@ -148,6 +161,7 @@ def from_dict(cls, data: dict[str, Any]) -> Persona: ), preferred_upstream=str(model.get("preferred_upstream") or "hal0").strip(), preferred_model=str(model.get("preferred_model") or "").strip(), + budget=budget, ) def to_dict(self) -> dict[str, Any]: @@ -174,6 +188,11 @@ def to_dict(self) -> dict[str, Any]: "preferred_upstream": self.preferred_upstream, "preferred_model": self.preferred_model, }, + # Always emit the budget table — round-trip preserves + # operator-set caps + the explicit hard_cap toggle. An + # empty budget renders as just ``hard_cap = true`` so + # the seed persona file still documents the knob. + "budget": self.budget.to_dict(), } } diff --git a/src/hal0/api/__init__.py b/src/hal0/api/__init__.py index b8b96a61..55edc79b 100644 --- a/src/hal0/api/__init__.py +++ b/src/hal0/api/__init__.py @@ -23,6 +23,9 @@ from hal0.lemonade.metrics_shim import MetricsShim from hal0 import __version__ +from hal0.api.agents import ( + budget as agents_budget_routes, +) from hal0.api.agents import ( memory_stats as agents_memory_stats_routes, ) @@ -969,6 +972,17 @@ def create_app() -> FastAPI: tags=["agents", "personas"], ) + # Per-persona spending-cap primitive (Phase 0 OpenRouter prereq). + # GET/PUT the budget block + check/charge endpoints so the V1 + # OpenRouter provider has a gate from day 1. Same /api/agents + # prefix as the personas router so the dashboard's persona editor + # can call both without juggling base URLs. + app.include_router( + agents_budget_routes.router, + prefix="/api/agents", + tags=["agents", "personas", "budget"], + ) + # Agent service restart (v0.3 PR-11). Wraps systemctl restart of the # hal0-agent@.service template unit. Flagged as missing during # PR-6/PR-8/PR-10 integration: the sidecar agent block + the diff --git a/src/hal0/api/agents/budget.py b/src/hal0/api/agents/budget.py new file mode 100644 index 00000000..b0d47e0c --- /dev/null +++ b/src/hal0/api/agents/budget.py @@ -0,0 +1,371 @@ +"""Per-persona budget endpoints (OpenRouter Phase 0 prereq). + +REST surface around :mod:`hal0.agents.budget`. Reads / writes the +``[persona.budget]`` block inside the persona TOML, surfaces the +ledger-derived spend stats, and exposes ``check`` (dry-run) + +``charge`` (post-response record) so the V1 OpenRouter provider has a +budget gate from day 1 without re-inventing this layer. + +Route shape mirrors :mod:`hal0.api.agents.personas` — every endpoint is +parameterized by ``agent_id`` so v0.4's pi-coder unlock + the per-agent +containing scope (deferred per PLANNING.md §5 Q2) light up by adding +registry rows, not rewriting the route table. + +Mounted from :mod:`hal0.api` at prefix ``/api/agents`` so the realized +routes are: + + GET /api/agents/{agent_id}/personas/{persona_id}/budget + PUT /api/agents/{agent_id}/personas/{persona_id}/budget + POST /api/agents/{agent_id}/personas/{persona_id}/budget/check + POST /api/agents/{agent_id}/personas/{persona_id}/budget/charge +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import structlog +from fastapi import APIRouter, Request + +from hal0.agents import budget as budget_mod +from hal0.agents import personas as personas_mod +from hal0.agents.budget import AGENTS_ROOT, Budget +from hal0.api.agents.personas import _AGENT_PERSONAS_ROOTS, _safe_error_message +from hal0.errors import BadRequest, Hal0Error, NotFound + +log = structlog.get_logger(__name__) + +router = APIRouter() + + +# ── agent ledger root registry ───────────────────────────────────────────── +# +# Mirrors :data:`hal0.api.agents.personas._AGENT_PERSONAS_ROOTS` but +# resolves the AGENT-LEVEL state directory (the personas/ subdir lives +# beneath it). Per ADR-0004 §2 + the personas module's seed-default +# convention, the canonical layout is:: +# +# /var/lib/hal0/agents/{agent_id}/personas/{persona_id}.toml +# /var/lib/hal0/agents/{agent_id}/personas/{persona_id}/spend.jsonl +# +# Tests rewrite this dict + ``_AGENT_PERSONAS_ROOTS`` to point at a +# tmp_path; in production the dict carries the canonical AGENTS_ROOT +# entry. + +_AGENT_LEDGER_ROOTS: dict[str, Path] = { + "hermes": AGENTS_ROOT, +} + + +def _resolve_agent_personas_root(agent_id: str) -> Path: + """Resolve the personas store root, raising NotFound for unknown agents. + + Reuses the same registry the personas module's CRUD routes use so a + test that monkey-patches one rewires the other automatically as long + as both registries are kept in sync (the test fixture below does so). + """ + root = _AGENT_PERSONAS_ROOTS.get(agent_id) + if root is None: + raise NotFound( + f"unknown agent {agent_id!r}", + code="agent.unknown", + details={"agent_id": agent_id}, + ) + return root + + +def _resolve_ledger_root(agent_id: str) -> Path: + """Resolve the AGENT-LEVEL state root (parent of personas/).""" + root = _AGENT_LEDGER_ROOTS.get(agent_id) + if root is None: + raise NotFound( + f"unknown agent {agent_id!r}", + code="agent.unknown", + details={"agent_id": agent_id}, + ) + return root + + +# ── helpers ──────────────────────────────────────────────────────────────── + + +def _load_persona_or_404( + personas_root: Path, + agent_id: str, + persona_id: str, +) -> personas_mod.Persona: + """Load + parse one persona; map errors onto the API contract. + + 404 for missing files; 400 for malformed TOML / invalid budget; + Hal0Error subclasses pass straight to the middleware. + """ + try: + return personas_mod.load_persona(persona_id, root=personas_root) + except FileNotFoundError as exc: + raise NotFound( + f"persona {persona_id!r} not found", + code="persona.not_found", + details={"agent_id": agent_id, "persona_id": persona_id}, + ) from exc + except personas_mod.PersonaError as exc: + raise BadRequest( + _safe_error_message(exc), + code="persona.malformed", + details={"agent_id": agent_id, "persona_id": persona_id}, + ) from exc + + +def _budget_response( + persona: personas_mod.Persona, + ledger: budget_mod.BudgetLedger, +) -> dict[str, Any]: + """Compose the response shape both GET + PUT + charge return. + + Carries the configured caps, the ledger-derived running totals, and + per-window remaining headroom. ``remaining_usd`` omits windows with + no configured cap so the UI can iterate without branching. + """ + stats = budget_mod.spend_stats(ledger) + remaining: dict[str, float] = {} + if persona.budget.daily_usd is not None: + remaining["daily_usd"] = max(0.0, persona.budget.daily_usd - stats.today_usd) + if persona.budget.monthly_usd is not None: + remaining["monthly_usd"] = max(0.0, persona.budget.monthly_usd - stats.mtd_usd) + if persona.budget.lifetime_usd is not None: + remaining["lifetime_usd"] = max(0.0, persona.budget.lifetime_usd - stats.lifetime_usd) + return { + "budget": persona.budget.to_dict(), + "spend": { + "today_usd": stats.today_usd, + "mtd_usd": stats.mtd_usd, + "lifetime_usd": stats.lifetime_usd, + }, + "remaining": remaining, + } + + +def _parse_budget_body(body: dict[str, Any] | None) -> Budget: + """Parse a PUT body into a :class:`Budget`. + + Accepts the same shape :meth:`Budget.to_dict` emits — missing keys + mean "no cap on that window". Wraps the value-error path in a 400 + so the dashboard's editor surface gets a structured message. + """ + if body is None: + body = {} + if not isinstance(body, dict): + raise BadRequest( + "request body must be a JSON object", + code="budget.invalid_body", + ) + try: + return budget_mod.parse_budget(body) + except ValueError as exc: + raise BadRequest(str(exc), code="budget.invalid") from exc + + +# ── GET ──────────────────────────────────────────────────────────────────── + + +@router.get("/{agent_id}/personas/{persona_id}/budget") +async def get_persona_budget(agent_id: str, persona_id: str) -> dict[str, Any]: + """Return the configured budget + running spend stats for one persona.""" + personas_root = _resolve_agent_personas_root(agent_id) + ledger_root = _resolve_ledger_root(agent_id) + persona = _load_persona_or_404(personas_root, agent_id, persona_id) + ledger = budget_mod.ledger_for(agent_id, persona_id, root=ledger_root) + return _budget_response(persona, ledger) + + +# ── PUT ──────────────────────────────────────────────────────────────────── + + +async def _read_json_body(request: Request) -> Any: + """Read + JSON-decode the request body, accepting empty/missing bodies. + + We accept-Any-validate-ourselves because FastAPI's typed body + declaration would reject an array body with a pydantic 422 envelope + that doesn't match our ``budget.invalid_body`` 400 contract. Empty + bodies (no Content-Length, or a literal ``null``) decode to ``None`` + so the handler's downstream validation can produce a structured 400. + """ + raw = await request.body() + if not raw: + return None + try: + import json as _json + + return _json.loads(raw) + except ValueError as exc: + raise BadRequest( + f"request body must be valid JSON: {exc}", + code="budget.invalid_body", + ) from exc + + +@router.put("/{agent_id}/personas/{persona_id}/budget") +async def put_persona_budget( + agent_id: str, + persona_id: str, + request: Request, +) -> dict[str, Any]: + """Replace the persona's ``[persona.budget]`` block; preserve everything else. + + The PUT mutates the persona's budget section in place — other fields + (system prompt, tool gating, approval policy, model preference) are + NOT touched, so a dashboard slider for the daily cap can't accidentally + re-render the prompt block. + + Re-running ``hal0 agent reprovision hermes`` after this PUT preserves + the operator-set budget: ``_phase_persona_seed`` calls + :func:`seed_default_personas` with ``overwrite=False`` (the default), + which skips existing files. Only ``--repair`` re-writes the seeds. + """ + personas_root = _resolve_agent_personas_root(agent_id) + ledger_root = _resolve_ledger_root(agent_id) + body = await _read_json_body(request) + new_budget = _parse_budget_body(body) + persona = _load_persona_or_404(personas_root, agent_id, persona_id) + persona.budget = new_budget + try: + personas_mod.save_persona(persona, root=personas_root) + except OSError as exc: # pragma: no cover — defensive + raise Hal0Error( + "failed to write persona file", + code="persona.write_failed", + details={"agent_id": agent_id, "persona_id": persona_id, "error": str(exc)}, + ) from exc + ledger = budget_mod.ledger_for(agent_id, persona_id, root=ledger_root) + log.info( + "budget.updated", + agent_id=agent_id, + persona_id=persona_id, + budget=new_budget.to_dict(), + ) + return _budget_response(persona, ledger) + + +# ── POST /check ──────────────────────────────────────────────────────────── + + +@router.post("/{agent_id}/personas/{persona_id}/budget/check") +async def check_persona_budget( + agent_id: str, + persona_id: str, + request: Request, +) -> dict[str, Any]: + """Dry-run pre-call gate: does ``estimated_cost_usd`` fit in budget? + + Body shape:: + + {"estimated_cost_usd": float} + + Returns the :class:`BudgetCheck` shape — ``allowed`` (bool), + ``reason`` (string or null), ``remaining_usd`` (per-window + headroom after subtracting the estimate). The V1 OpenRouter + provider calls this BEFORE issuing the upstream request; a + ``False`` ``allowed`` short-circuits with a structured envelope. + """ + body = await _read_json_body(request) + if not isinstance(body, dict): + raise BadRequest( + "request body must be a JSON object", + code="budget.invalid_body", + ) + raw = body.get("estimated_cost_usd") + if not isinstance(raw, int | float) or isinstance(raw, bool): + raise BadRequest( + "estimated_cost_usd must be a number", + code="budget.invalid_estimate", + ) + estimate = float(raw) + if estimate < 0: + raise BadRequest( + "estimated_cost_usd must be >= 0", + code="budget.invalid_estimate", + ) + + personas_root = _resolve_agent_personas_root(agent_id) + ledger_root = _resolve_ledger_root(agent_id) + persona = _load_persona_or_404(personas_root, agent_id, persona_id) + ledger = budget_mod.ledger_for(agent_id, persona_id, root=ledger_root) + result = budget_mod.check_budget(persona.budget, ledger, estimate) + return { + "allowed": result.allowed, + "reason": result.reason, + "remaining_usd": result.remaining_usd, + "hard_cap": persona.budget.hard_cap, + } + + +# ── POST /charge ─────────────────────────────────────────────────────────── + + +@router.post("/{agent_id}/personas/{persona_id}/budget/charge") +async def charge_persona_budget( + agent_id: str, + persona_id: str, + request: Request, +) -> dict[str, Any]: + """Post-response charge recorder. + + Body shape:: + + { + "surface": "openrouter", + "model": "anthropic/claude-3.7-sonnet", + "cost_usd": 0.0421, + "request_id": "req_abc123" + } + + Caller invokes this AFTER the upstream returns; we append a row to + the spend ledger + return the updated remaining-headroom map so the + caller can decide whether to short-circuit subsequent calls in the + same session without re-reading the budget. + """ + body = await _read_json_body(request) + if not isinstance(body, dict): + raise BadRequest( + "request body must be a JSON object", + code="budget.invalid_body", + ) + surface = body.get("surface") + model = body.get("model") + cost_raw = body.get("cost_usd") + request_id = body.get("request_id") + + if not isinstance(surface, str) or not surface.strip(): + raise BadRequest("'surface' must be a non-empty string", code="budget.invalid_charge") + if not isinstance(model, str) or not model.strip(): + raise BadRequest("'model' must be a non-empty string", code="budget.invalid_charge") + if not isinstance(cost_raw, int | float) or isinstance(cost_raw, bool): + raise BadRequest("'cost_usd' must be a number", code="budget.invalid_charge") + cost = float(cost_raw) + if cost < 0: + raise BadRequest("'cost_usd' must be >= 0", code="budget.invalid_charge") + if not isinstance(request_id, str) or not request_id.strip(): + raise BadRequest("'request_id' must be a non-empty string", code="budget.invalid_charge") + + personas_root = _resolve_agent_personas_root(agent_id) + ledger_root = _resolve_ledger_root(agent_id) + persona = _load_persona_or_404(personas_root, agent_id, persona_id) + ledger = budget_mod.ledger_for(agent_id, persona_id, root=ledger_root) + row = budget_mod.record_charge( + ledger, + persona_id=persona.id, + surface=surface, + model=model, + cost_usd=cost, + request_id=request_id, + ) + response = _budget_response(persona, ledger) + response["recorded"] = True + response["row"] = { + "ts": row.ts.isoformat(), + "surface": row.surface, + "model": row.model, + "cost_usd": row.cost_usd, + "request_id": row.request_id, + } + return response diff --git a/tests/agents/test_budget_module.py b/tests/agents/test_budget_module.py new file mode 100644 index 00000000..1d1a5e41 --- /dev/null +++ b/tests/agents/test_budget_module.py @@ -0,0 +1,396 @@ +"""Pure-Python unit tests for :mod:`hal0.agents.budget`. + +Pins the dataclass + ledger + check semantics every paid surface (V1 +OpenRouter, V2 fusion MCP) is going to lean on. Test ordering matches +the module's section layout: dataclass round-trip, parse helper, +ledger I/O, check matrix. +""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from pathlib import Path + +import pytest + +from hal0.agents.budget import ( + Budget, + BudgetCheck, + BudgetLedger, + BudgetWindow, + SpendRow, + SpendStats, + check_budget, + ledger_for, + parse_budget, + record_charge, + spend_stats, +) + +# ── Budget dataclass ──────────────────────────────────────────────────────── + + +def test_budget_default_is_empty() -> None: + """A fresh Budget has every cap None and hard_cap defaulting to True.""" + b = Budget() + assert b.daily_usd is None + assert b.monthly_usd is None + assert b.lifetime_usd is None + assert b.per_call_max_usd is None + assert b.hard_cap is True + assert b.is_empty() is True + + +def test_budget_to_dict_omits_unset_caps() -> None: + """Unset caps are skipped — the rendered TOML stays small + readable.""" + b = Budget(daily_usd=5.0, hard_cap=False) + out = b.to_dict() + assert out == {"daily_usd": 5.0, "hard_cap": False} + assert "monthly_usd" not in out + assert "lifetime_usd" not in out + + +def test_budget_to_dict_preserves_explicit_zero() -> None: + """Explicit 0.0 ("block everything") survives round-trip.""" + b = Budget(daily_usd=0.0) + out = b.to_dict() + assert out["daily_usd"] == 0.0 + + +# ── parse_budget ──────────────────────────────────────────────────────────── + + +def test_parse_budget_none_returns_empty_budget() -> None: + """Missing [persona.budget] sub-table is a no-op, not an error.""" + b = parse_budget(None) + assert b == Budget() + + +def test_parse_budget_full_table() -> None: + raw = { + "daily_usd": 1.0, + "monthly_usd": 10.0, + "lifetime_usd": 100.0, + "per_call_max_usd": 0.5, + "hard_cap": False, + } + b = parse_budget(raw) + assert b == Budget( + daily_usd=1.0, + monthly_usd=10.0, + lifetime_usd=100.0, + per_call_max_usd=0.5, + hard_cap=False, + ) + + +def test_parse_budget_rejects_negative() -> None: + with pytest.raises(ValueError, match=">= 0"): + parse_budget({"daily_usd": -1.0}) + + +def test_parse_budget_rejects_non_number() -> None: + with pytest.raises(ValueError, match="must be a number"): + parse_budget({"daily_usd": "abc"}) + + +def test_parse_budget_rejects_bool_for_number() -> None: + """``daily_usd = true`` is a TOML mistake; reject it explicitly.""" + with pytest.raises(ValueError, match="bool"): + parse_budget({"daily_usd": True}) + + +def test_parse_budget_rejects_non_bool_hard_cap() -> None: + with pytest.raises(ValueError, match="hard_cap"): + parse_budget({"hard_cap": "yes"}) + + +def test_parse_budget_ignores_unknown_keys() -> None: + """Forward-compat: a v0.4 superset's extra knobs don't trip v0.3.""" + b = parse_budget({"daily_usd": 1.0, "weekly_usd": 9.99, "future_field": "x"}) + assert b == Budget(daily_usd=1.0) + + +def test_parse_budget_accepts_int_as_float() -> None: + """TOML ints are valid amounts (``daily_usd = 5`` not just ``5.0``).""" + b = parse_budget({"daily_usd": 5}) + assert b.daily_usd == 5.0 + + +# ── SpendRow JSON round-trip ──────────────────────────────────────────────── + + +def test_spend_row_round_trip() -> None: + ts = datetime(2026, 5, 29, 12, 34, 56, tzinfo=UTC) + row = SpendRow( + ts=ts, + persona_id="hermes", + surface="openrouter", + model="claude-3.7", + cost_usd=0.0421, + request_id="req-1", + ) + decoded = SpendRow.from_json(row.to_json()) + assert decoded == row + + +def test_spend_row_tolerates_z_suffix() -> None: + """``Z`` shorthand round-trips even if a future writer emits it.""" + line = '{"ts":"2026-05-29T12:00:00Z","persona_id":"h","surface":"or","model":"x","cost_usd":0.01,"request_id":"r1"}' + row = SpendRow.from_json(line) + assert row.ts.tzinfo is not None + assert row.persona_id == "h" + + +# ── BudgetLedger round-trip ───────────────────────────────────────────────── + + +def test_ledger_round_trip_appends_and_reads(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + assert ledger.iter_rows() == [] + row = SpendRow( + ts=datetime(2026, 5, 29, 0, 0, tzinfo=UTC), + persona_id="hermes", + surface="openrouter", + model="m1", + cost_usd=0.1, + request_id="r-1", + ) + ledger.append(row) + rows = ledger.iter_rows() + assert len(rows) == 1 + assert rows[0] == row + + +def test_ledger_skips_malformed_lines(tmp_path: Path) -> None: + path = tmp_path / "spend.jsonl" + path.write_text( + "not json\n" + '{"ts":"2026-05-29T00:00:00+00:00","persona_id":"h","surface":"or","model":"m","cost_usd":0.5,"request_id":"r1"}\n' + "\n" + "{}\n", + encoding="utf-8", + ) + ledger = BudgetLedger(path) + rows = ledger.iter_rows() + assert len(rows) == 1 + assert rows[0].cost_usd == 0.5 + + +def test_ledger_creates_parent_dirs_on_append(tmp_path: Path) -> None: + """Fresh install: nobody has written the ledger dir yet — append seeds it.""" + deep = tmp_path / "a" / "b" / "c" / "spend.jsonl" + ledger = BudgetLedger(deep) + ledger.append( + SpendRow( + ts=datetime.now(UTC), + persona_id="h", + surface="or", + model="m", + cost_usd=0.01, + request_id="r1", + ) + ) + assert deep.exists() + assert deep.parent.is_dir() + + +def test_ledger_for_resolves_canonical_layout(tmp_path: Path) -> None: + ledger = ledger_for("hermes-agent", "hermes", root=tmp_path) + assert ledger.path == tmp_path / "hermes-agent" / "personas" / "hermes" / "spend.jsonl" + + +# ── spend_stats ───────────────────────────────────────────────────────────── + + +def _row( + *, + ts: datetime, + cost: float = 0.1, + request_id: str = "r", +) -> SpendRow: + return SpendRow( + ts=ts, + persona_id="hermes", + surface="openrouter", + model="m", + cost_usd=cost, + request_id=request_id, + ) + + +def test_spend_stats_aggregates_windows(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + now = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + # Today: 0.30 + ledger.append(_row(ts=now.replace(hour=1), cost=0.1, request_id="t1")) + ledger.append(_row(ts=now.replace(hour=2), cost=0.2, request_id="t2")) + # Yesterday but same month: 0.50 + ledger.append(_row(ts=now - timedelta(days=1), cost=0.5, request_id="y1")) + # Previous month: 1.00 + ledger.append(_row(ts=now - timedelta(days=40), cost=1.0, request_id="m1")) + stats = spend_stats(ledger, now=now) + assert stats.today_usd == pytest.approx(0.3) + assert stats.mtd_usd == pytest.approx(0.8) + assert stats.lifetime_usd == pytest.approx(1.8) + + +def test_spend_stats_empty_ledger_zeroes(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + stats = spend_stats(ledger) + assert stats == SpendStats(0.0, 0.0, 0.0) + + +# ── check_budget edge cases ───────────────────────────────────────────────── + + +def test_check_no_cap_allows_any_estimate(tmp_path: Path) -> None: + """Empty Budget → unconditional allow + empty remaining.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + result = check_budget(Budget(), ledger, 5.0) + assert result.allowed is True + assert result.reason is None + assert result.remaining_usd == {} + + +def test_check_daily_cap_blocks(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(daily_usd=1.0) + now = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + # Already spent 0.7 today + ledger.append(_row(ts=now.replace(hour=1), cost=0.7, request_id="t1")) + # Estimate 0.5 would push us to 1.2 — blocked. + result = check_budget(budget, ledger, 0.5, now=now) + assert result.allowed is False + assert result.reason is not None + assert "daily cap" in result.reason + # Remaining headroom snapshot still reflects the pre-call total. + assert result.remaining_usd[BudgetWindow.DAILY.value] == pytest.approx(0.3) + + +def test_check_per_call_blocks_independent_of_window(tmp_path: Path) -> None: + """A single oversized call breaches per_call_max even with empty ledger.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(per_call_max_usd=0.05, daily_usd=100.0) + result = check_budget(budget, ledger, 0.10) + assert result.allowed is False + assert "per-call cap" in (result.reason or "") + + +def test_check_hard_cap_false_warns_but_allows(tmp_path: Path) -> None: + """``hard_cap=False`` → ``allowed`` stays True even when a cap would block.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(daily_usd=1.0, hard_cap=False) + now = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + ledger.append(_row(ts=now.replace(hour=1), cost=0.9, request_id="t1")) + result = check_budget(budget, ledger, 0.5, now=now) + assert result.allowed is True + # …but the reason is populated so the caller can log a warning. + assert result.reason is not None + assert "daily cap" in result.reason + + +def test_check_most_restrictive_wins(tmp_path: Path) -> None: + """When daily + lifetime are both set, the FIRST breached wins the reason.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(daily_usd=1.0, lifetime_usd=2.0) + now = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + ledger.append(_row(ts=now.replace(hour=1), cost=0.9, request_id="t1")) + result = check_budget(budget, ledger, 0.5, now=now) + # Daily breach fires before lifetime is even checked. + assert result.allowed is False + assert "daily cap" in (result.reason or "") + + +def test_check_day_boundary_aggregation(tmp_path: Path) -> None: + """A charge from yesterday doesn't count against today's window.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(daily_usd=1.0) + now = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + # 23 hours ago is still "today" UTC (now=noon, charge at 13:00 prev-day) + # — actually 23 hours before noon is 13:00 yesterday, which is BEFORE + # today's midnight floor (00:00 today). So this should NOT count. + ledger.append(_row(ts=now - timedelta(hours=23), cost=10.0, request_id="y")) + result = check_budget(budget, ledger, 0.5, now=now) + assert result.allowed is True + # Today's spent total is 0 (yesterday's 10.0 doesn't roll over); + # remaining is the full daily cap. + assert result.remaining_usd[BudgetWindow.DAILY.value] == pytest.approx(1.0) + + +def test_check_lifetime_breach(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(lifetime_usd=10.0) + now = datetime(2026, 5, 29, tzinfo=UTC) + # Two months back — falls outside daily + monthly but counts lifetime. + ledger.append(_row(ts=now - timedelta(days=70), cost=9.5, request_id="old")) + result = check_budget(budget, ledger, 1.0, now=now) + assert result.allowed is False + assert "lifetime cap" in (result.reason or "") + + +def test_check_rejects_negative_estimate(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + with pytest.raises(ValueError): + check_budget(Budget(), ledger, -0.01) + + +def test_check_returns_remaining_pre_call_headroom( + tmp_path: Path, +) -> None: + """remaining_usd is the operator-facing pre-call headroom per window.""" + ledger = BudgetLedger(tmp_path / "spend.jsonl") + budget = Budget(daily_usd=10.0, monthly_usd=100.0) + now = datetime(2026, 5, 29, tzinfo=UTC) + ledger.append(_row(ts=now.replace(hour=1), cost=1.0, request_id="t1")) + result = check_budget(budget, ledger, 2.0, now=now) + assert result.allowed is True + # Pre-call headroom: daily 10 - 1 spent = 9; monthly 100 - 1 = 99. + assert result.remaining_usd[BudgetWindow.DAILY.value] == pytest.approx(9.0) + assert result.remaining_usd[BudgetWindow.MONTHLY.value] == pytest.approx(99.0) + + +# ── record_charge ────────────────────────────────────────────────────────── + + +def test_record_charge_appends_with_resolved_timestamp(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + fixed = datetime(2026, 5, 29, 12, 0, tzinfo=UTC) + row = record_charge( + ledger, + persona_id="hermes", + surface="openrouter", + model="claude-3.7", + cost_usd=0.05, + request_id="r1", + now=fixed, + ) + assert row.ts == fixed + saved = ledger.iter_rows() + assert saved == [row] + + +def test_record_charge_round_trip_preserves_metadata(tmp_path: Path) -> None: + ledger = BudgetLedger(tmp_path / "spend.jsonl") + record_charge( + ledger, + persona_id="hermes", + surface="fusion", + model="meta/llama-3.1-405b", + cost_usd=0.42, + request_id="req-meaning-of-life", + ) + rows = ledger.iter_rows() + assert len(rows) == 1 + assert rows[0].surface == "fusion" + assert rows[0].model == "meta/llama-3.1-405b" + assert rows[0].cost_usd == pytest.approx(0.42) + + +# ── BudgetCheck shape ────────────────────────────────────────────────────── + + +def test_budget_check_dataclass_defaults() -> None: + """remaining_usd defaults to empty dict — callers always get a mapping.""" + bc = BudgetCheck(allowed=True, reason=None) + assert bc.remaining_usd == {} diff --git a/tests/agents/test_personas_budget_roundtrip.py b/tests/agents/test_personas_budget_roundtrip.py new file mode 100644 index 00000000..f5a4b759 --- /dev/null +++ b/tests/agents/test_personas_budget_roundtrip.py @@ -0,0 +1,108 @@ +"""Round-trip tests — persona TOML preserves the budget block. + +PR-3's :func:`save_persona` + :func:`load_persona` now carry the +``[persona.budget]`` sub-table through the dataclass without dropping +operator-set caps. These tests pin that contract so a future refactor +of the writer can't silently elide budgets. +""" + +from __future__ import annotations + +from pathlib import Path + +from hal0.agents import personas as personas_mod +from hal0.agents.budget import Budget + + +def test_persona_with_budget_round_trips(tmp_path: Path) -> None: + persona = personas_mod.Persona( + id="example", + display_name="Example", + budget=Budget( + daily_usd=2.50, + monthly_usd=25.0, + lifetime_usd=200.0, + per_call_max_usd=0.10, + hard_cap=True, + ), + ) + personas_mod.save_persona(persona, root=tmp_path) + loaded = personas_mod.load_persona("example", root=tmp_path) + assert loaded.budget == persona.budget + + +def test_persona_empty_budget_round_trips(tmp_path: Path) -> None: + """Default Budget (no caps configured) survives save+load.""" + persona = personas_mod.Persona(id="ex", display_name="Ex") + personas_mod.save_persona(persona, root=tmp_path) + loaded = personas_mod.load_persona("ex", root=tmp_path) + assert loaded.budget == Budget() + assert loaded.budget.is_empty() + + +def test_persona_hard_cap_false_round_trips(tmp_path: Path) -> None: + """hard_cap=False (warn-only mode) survives the round-trip.""" + persona = personas_mod.Persona( + id="warn", + display_name="Warn", + budget=Budget(daily_usd=1.0, hard_cap=False), + ) + personas_mod.save_persona(persona, root=tmp_path) + loaded = personas_mod.load_persona("warn", root=tmp_path) + assert loaded.budget.hard_cap is False + assert loaded.budget.daily_usd == 1.0 + + +def test_persona_explicit_zero_budget_round_trips(tmp_path: Path) -> None: + """Explicit 0.0 (=block every paid call) survives — distinguished from None.""" + persona = personas_mod.Persona( + id="fenced", + display_name="Fenced", + budget=Budget(daily_usd=0.0), + ) + personas_mod.save_persona(persona, root=tmp_path) + loaded = personas_mod.load_persona("fenced", root=tmp_path) + assert loaded.budget.daily_usd == 0.0 + + +def test_seed_personas_have_empty_budget_by_default(tmp_path: Path) -> None: + """Default seeds ship an empty budget — operator opts in.""" + personas_mod.seed_default_personas(agent_id="hermes-agent", root=tmp_path) + hermes = personas_mod.load_persona("hermes", root=tmp_path) + coder = personas_mod.load_persona("coder", root=tmp_path) + assert hermes.budget.is_empty() + assert coder.budget.is_empty() + # …but hard_cap defaults to True so a later operator edit doesn't have + # to remember to flip it. + assert hermes.budget.hard_cap is True + assert coder.budget.hard_cap is True + + +def test_persona_with_budget_preserves_other_fields(tmp_path: Path) -> None: + """Mutating budget doesn't lose system prompt / approval / tools state.""" + persona = personas_mod.Persona( + id="full", + display_name="Full", + summary="A persona with everything set", + system_prompt="You are Full. Be terse.", + tools_allowed=("memory.*",), + memory_namespace="private:full", + budget=Budget(daily_usd=5.0), + ) + personas_mod.save_persona(persona, root=tmp_path) + loaded = personas_mod.load_persona("full", root=tmp_path) + assert loaded == persona + + +def test_malformed_budget_in_toml_raises_persona_error(tmp_path: Path) -> None: + """A bad budget field surfaces as PersonaError so load_persona's contract holds.""" + (tmp_path / "bad.toml").write_text( + '[persona]\nid = "bad"\ndisplay_name = "Bad"\n[persona.budget]\ndaily_usd = -1.0\n', + encoding="utf-8", + ) + try: + personas_mod.load_persona("bad", root=tmp_path) + except personas_mod.PersonaError as exc: + assert ">= 0" in str(exc) + else: + raise AssertionError("expected PersonaError") diff --git a/tests/api/test_agents_budget.py b/tests/api/test_agents_budget.py new file mode 100644 index 00000000..46ccf683 --- /dev/null +++ b/tests/api/test_agents_budget.py @@ -0,0 +1,306 @@ +"""HTTP tests for ``/api/agents/{agent_id}/personas/{persona_id}/budget``. + +Pins the REST shape the dashboard editor + the V1 OpenRouter provider +depend on. Wires the personas + ledger roots at a tmp_path so the test +runner doesn't write to ``/var/lib/hal0/``. +""" + +from __future__ import annotations + +from collections.abc import Iterator +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from hal0.agents import personas as personas_mod +from hal0.agents.budget import ledger_for +from hal0.api.agents import budget as budget_route +from hal0.api.agents import personas as personas_route + + +@pytest.fixture +def state_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> Iterator[Path]: + """Redirect personas store + ledger root to a tmp dir. + + Layout under tmp_path mirrors the production tree exactly: + + tmp_path/hermes-agent/personas/.toml + tmp_path/hermes-agent/personas//spend.jsonl + """ + agent_root = tmp_path / "hermes-agent" + personas_dir = agent_root / "personas" + personas_dir.mkdir(parents=True) + monkeypatch.setattr(personas_mod, "PERSONAS_ROOT", personas_dir) + monkeypatch.setitem(personas_route._AGENT_PERSONAS_ROOTS, "hermes", personas_dir) + monkeypatch.setitem(budget_route._AGENT_PERSONAS_ROOTS, "hermes", personas_dir) + monkeypatch.setitem(budget_route._AGENT_LEDGER_ROOTS, "hermes", tmp_path) + yield tmp_path + + +@pytest.fixture +def seeded(state_root: Path) -> Path: + """Seed the hermes + coder personas at the redirected root.""" + personas_dir = state_root / "hermes-agent" / "personas" + personas_mod.seed_default_personas(agent_id="hermes-agent", root=personas_dir) + return state_root + + +# ── GET ───────────────────────────────────────────────────────────────────── + + +def test_get_returns_empty_budget_for_seeded_persona(client: TestClient, seeded: Path) -> None: + r = client.get("/api/agents/hermes/personas/hermes/budget") + assert r.status_code == 200, r.text + body = r.json() + # Seeded persona has an empty budget — only hard_cap shows up. + assert body["budget"] == {"hard_cap": True} + assert body["spend"] == {"today_usd": 0.0, "mtd_usd": 0.0, "lifetime_usd": 0.0} + assert body["remaining"] == {} + + +def test_get_unknown_agent_returns_404(client: TestClient, state_root: Path) -> None: + r = client.get("/api/agents/pi-coder/personas/anything/budget") + assert r.status_code == 404 + assert r.json()["error"]["code"] == "agent.unknown" + + +def test_get_unknown_persona_returns_404(client: TestClient, seeded: Path) -> None: + r = client.get("/api/agents/hermes/personas/ghost/budget") + assert r.status_code == 404 + assert r.json()["error"]["code"] == "persona.not_found" + + +# ── PUT ───────────────────────────────────────────────────────────────────── + + +def test_put_sets_budget_and_returns_updated_state(client: TestClient, seeded: Path) -> None: + payload = { + "daily_usd": 2.50, + "monthly_usd": 25.00, + "lifetime_usd": 250.00, + "per_call_max_usd": 0.10, + "hard_cap": True, + } + r = client.put("/api/agents/hermes/personas/hermes/budget", json=payload) + assert r.status_code == 200, r.text + body = r.json() + assert body["budget"]["daily_usd"] == 2.50 + assert body["budget"]["monthly_usd"] == 25.00 + assert body["budget"]["lifetime_usd"] == 250.00 + assert body["budget"]["per_call_max_usd"] == 0.10 + assert body["budget"]["hard_cap"] is True + # Persisted on disk. + personas_dir = seeded / "hermes-agent" / "personas" + loaded = personas_mod.load_persona("hermes", root=personas_dir) + assert loaded.budget.daily_usd == 2.50 + # Other persona fields untouched. + assert loaded.display_name == "Hermes" + assert loaded.system_prompt.startswith("You are Hermes") + # Remaining headroom snapshot is consistent. + assert body["remaining"]["daily_usd"] == pytest.approx(2.50) + + +def test_put_with_warn_only_persists_hard_cap_false(client: TestClient, seeded: Path) -> None: + payload = {"daily_usd": 1.0, "hard_cap": False} + r = client.put("/api/agents/hermes/personas/hermes/budget", json=payload) + assert r.status_code == 200 + body = r.json() + assert body["budget"]["hard_cap"] is False + + +def test_put_invalid_estimate_returns_400(client: TestClient, seeded: Path) -> None: + r = client.put( + "/api/agents/hermes/personas/hermes/budget", + json={"daily_usd": -5.0}, + ) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid" + + +def test_put_unknown_persona_returns_404(client: TestClient, seeded: Path) -> None: + r = client.put("/api/agents/hermes/personas/ghost/budget", json={"daily_usd": 1.0}) + assert r.status_code == 404 + assert r.json()["error"]["code"] == "persona.not_found" + + +def test_put_unknown_agent_returns_404(client: TestClient, state_root: Path) -> None: + r = client.put("/api/agents/pi-coder/personas/x/budget", json={"daily_usd": 1.0}) + assert r.status_code == 404 + assert r.json()["error"]["code"] == "agent.unknown" + + +def test_put_non_object_body_returns_400(client: TestClient, seeded: Path) -> None: + """A JSON array body is malformed shape — 400, not 500.""" + r = client.put( + "/api/agents/hermes/personas/hermes/budget", + json=[1, 2, 3], + ) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid_body" + + +# ── POST /check ───────────────────────────────────────────────────────────── + + +def test_check_with_no_budget_allows_any_estimate(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/hermes/budget/check", + json={"estimated_cost_usd": 5.0}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["allowed"] is True + assert body["reason"] is None + + +def test_check_blocks_when_estimate_exceeds_per_call(client: TestClient, seeded: Path) -> None: + client.put( + "/api/agents/hermes/personas/hermes/budget", + json={"per_call_max_usd": 0.05}, + ) + r = client.post( + "/api/agents/hermes/personas/hermes/budget/check", + json={"estimated_cost_usd": 0.10}, + ) + assert r.status_code == 200 + body = r.json() + assert body["allowed"] is False + assert "per-call cap" in body["reason"] + + +def test_check_invalid_estimate_returns_400(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/hermes/budget/check", + json={"estimated_cost_usd": -1.0}, + ) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid_estimate" + + +def test_check_missing_body_returns_400(client: TestClient, seeded: Path) -> None: + r = client.post("/api/agents/hermes/personas/hermes/budget/check", json={}) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid_estimate" + + +def test_check_unknown_persona_returns_404(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/ghost/budget/check", + json={"estimated_cost_usd": 0.01}, + ) + assert r.status_code == 404 + + +# ── POST /charge ──────────────────────────────────────────────────────────── + + +def test_charge_records_to_ledger(client: TestClient, seeded: Path) -> None: + # Set a daily cap so we can verify remaining headroom shrinks. + client.put( + "/api/agents/hermes/personas/hermes/budget", + json={"daily_usd": 10.0}, + ) + r = client.post( + "/api/agents/hermes/personas/hermes/budget/charge", + json={ + "surface": "openrouter", + "model": "anthropic/claude-3.7-sonnet", + "cost_usd": 0.42, + "request_id": "req-1", + }, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["recorded"] is True + assert body["row"]["model"] == "anthropic/claude-3.7-sonnet" + assert body["spend"]["today_usd"] == pytest.approx(0.42) + assert body["spend"]["lifetime_usd"] == pytest.approx(0.42) + assert body["remaining"]["daily_usd"] == pytest.approx(9.58) + + # Ledger file actually exists on disk + carries the row. + ledger = ledger_for("hermes", "hermes", root=seeded) + rows = ledger.iter_rows() + assert len(rows) == 1 + assert rows[0].cost_usd == pytest.approx(0.42) + assert rows[0].request_id == "req-1" + + +def test_charge_missing_required_field_returns_400(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/hermes/budget/charge", + json={"surface": "openrouter", "model": "m", "cost_usd": 0.01}, + ) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid_charge" + + +def test_charge_negative_cost_returns_400(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/hermes/budget/charge", + json={ + "surface": "openrouter", + "model": "m", + "cost_usd": -0.01, + "request_id": "r", + }, + ) + assert r.status_code == 400 + assert r.json()["error"]["code"] == "budget.invalid_charge" + + +def test_charge_empty_surface_returns_400(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/hermes/budget/charge", + json={"surface": "", "model": "m", "cost_usd": 0.01, "request_id": "r"}, + ) + assert r.status_code == 400 + + +def test_charge_unknown_persona_returns_404(client: TestClient, seeded: Path) -> None: + r = client.post( + "/api/agents/hermes/personas/ghost/budget/charge", + json={ + "surface": "openrouter", + "model": "m", + "cost_usd": 0.01, + "request_id": "r", + }, + ) + assert r.status_code == 404 + assert r.json()["error"]["code"] == "persona.not_found" + + +def test_charge_then_check_reflects_new_spend(client: TestClient, seeded: Path) -> None: + client.put( + "/api/agents/hermes/personas/hermes/budget", + json={"daily_usd": 1.0}, + ) + # Charge 0.6 — should still allow another 0.3 but block 0.5. + client.post( + "/api/agents/hermes/personas/hermes/budget/charge", + json={ + "surface": "openrouter", + "model": "m", + "cost_usd": 0.6, + "request_id": "r1", + }, + ) + ok = client.post( + "/api/agents/hermes/personas/hermes/budget/check", + json={"estimated_cost_usd": 0.3}, + ) + assert ok.status_code == 200 + assert ok.json()["allowed"] is True + + block = client.post( + "/api/agents/hermes/personas/hermes/budget/check", + json={"estimated_cost_usd": 0.5}, + ) + assert block.status_code == 200 + blocked = block.json() + assert blocked["allowed"] is False + assert "daily cap" in blocked["reason"] diff --git a/ui/src/api/endpoints.ts b/ui/src/api/endpoints.ts index a73b1176..a8a924f8 100644 --- a/ui/src/api/endpoints.ts +++ b/ui/src/api/endpoints.ts @@ -72,22 +72,30 @@ export const ENDPOINTS = { `/api/agents/mcp/clients/${encodeURIComponent(name)}`, // ── Agents — bundled lifecycle + sidebar rollup (v0.3 PR-6) ────── - // `agents` lists installed bundled agents (ADR-0004 §2). The - // remaining endpoints under this block are surfaces the SidebarAgentBlock - // calls — most are NEW in v0.3 and may 404 against an older hal0-api; - // the consuming hooks fall back to "—" and console.warn once when a - // particular path returns 404 / network error so the sidebar - // degrades gracefully on partial deployments. - agents: '/api/agents', + // `agents` lives in the catalogue block above (one entry, used by + // both the bundled-list and sidebar surfaces). The remaining + // endpoints under this block are surfaces the SidebarAgentBlock + // calls — most are NEW in v0.3 and may 404 against an older + // hal0-api; the consuming hooks fall back to "—" and console.warn + // once when a particular path returns 404 / network error so the + // sidebar degrades gracefully on partial deployments. agentPersonas: (id: string) => `/api/agents/${encodeURIComponent(id)}/personas`, + // Per-persona spending-cap primitive (Phase 0 OpenRouter prereq). + // GET/PUT/check/charge — the V1 OpenRouter upstream + V2 fusion MCP + // both call ``check`` pre-flight and ``charge`` post-response. + agentPersonaBudget: (id: string, pid: string) => + `/api/agents/${encodeURIComponent(id)}/personas/${encodeURIComponent(pid)}/budget`, + agentPersonaBudgetCheck: (id: string, pid: string) => + `/api/agents/${encodeURIComponent(id)}/personas/${encodeURIComponent(pid)}/budget/check`, + agentPersonaBudgetCharge: (id: string, pid: string) => + `/api/agents/${encodeURIComponent(id)}/personas/${encodeURIComponent(pid)}/budget/charge`, agentActivity: (id: string) => `/api/agents/${encodeURIComponent(id)}/activity`, agentApprovals: '/api/agent/approvals', - // The two paths below DO NOT exist yet in any merged backend PR (the + // The path below DOES NOT exist yet in any merged backend PR (the // sidebar component degrades gracefully with "—" + warn). Recorded - // here so the wiring is single-place when the routes land. - agentSkills: '/api/agents/skills', + // here so the wiring is single-place when the route lands. agentMemoryStats: '/api/agents/hermes/memory/stats', // ── MCP host introspection (issue #206) ────────────────────────── diff --git a/ui/src/api/hooks/useBudget.ts b/ui/src/api/hooks/useBudget.ts new file mode 100644 index 00000000..a48fc632 --- /dev/null +++ b/ui/src/api/hooks/useBudget.ts @@ -0,0 +1,188 @@ +// hal0 v3 dashboard — per-persona budget hooks (Phase 0 OpenRouter prereq). +// +// TanStack Query wiring for the new ``/api/agents/{id}/personas/{pid}/ +// budget`` REST surface. The persona editor under personas-tab.jsx mounts +// a panel that: +// +// - GET-polls the current budget + running spend stats +// - PUT-mutates the budget block (other persona fields untouched) +// +// The check + charge endpoints are NOT consumed from the dashboard — V1's +// OpenRouter provider calls them server-side. We expose typed helpers +// here anyway so a future "what if I spent $X" preview surface can +// reuse them without re-fetching this hook's data. + +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' +import type { UseMutationResult, UseQueryResult } from '@tanstack/react-query' + +import { apiGet, apiPost, apiPut } from '../client' + +// ── Types ────────────────────────────────────────────────────────── + +/** + * Persona budget block — matches the server's :class:`Budget` shape. + * + * ``null`` on any cap field means "no cap configured"; an explicit + * ``0`` means "block every paid call". ``hard_cap=true`` (default) + * enforces; ``false`` is warn-only. + */ +export interface PersonaBudget { + daily_usd?: number | null + monthly_usd?: number | null + lifetime_usd?: number | null + per_call_max_usd?: number | null + hard_cap: boolean +} + +export interface PersonaSpendStats { + today_usd: number + mtd_usd: number + lifetime_usd: number +} + +export interface PersonaRemaining { + daily_usd?: number + monthly_usd?: number + lifetime_usd?: number +} + +export interface PersonaBudgetResponse { + budget: PersonaBudget + spend: PersonaSpendStats + remaining: PersonaRemaining +} + +export interface BudgetCheckRequest { + estimated_cost_usd: number +} + +export interface BudgetCheckResponse { + allowed: boolean + reason: string | null + remaining_usd: Record + hard_cap: boolean +} + +export interface BudgetChargeRequest { + surface: string + model: string + cost_usd: number + request_id: string +} + +// ── Polling cadence ──────────────────────────────────────────────── +// +// Budget data doesn't change as fast as slot state — operator edits +// it once, the OpenRouter provider records charges asynchronously. +// 15s is a sensible default; the editor panel always invalidates on +// PUT so the operator sees their own change immediately. + +const BUDGET_POLL_MS = 15_000 + +// ── Hooks ────────────────────────────────────────────────────────── + +/** + * Read the budget + spend snapshot for one persona. + * + * Pass ``null`` / ``undefined`` for either id to short-circuit the + * fetch — useful while the persona detail loads and the active id + * isn't known yet. + */ +export function usePersonaBudget( + agentId: string | null | undefined, + personaId: string | null | undefined, +): UseQueryResult { + return useQuery({ + queryKey: ['agents', 'persona', 'budget', agentId, personaId], + queryFn: async () => { + if (!agentId || !personaId) { + return { + budget: { hard_cap: true }, + spend: { today_usd: 0, mtd_usd: 0, lifetime_usd: 0 }, + remaining: {}, + } + } + return apiGet( + `/api/agents/${encodeURIComponent(agentId)}/personas/${encodeURIComponent(personaId)}/budget`, + ) + }, + enabled: !!agentId && !!personaId, + refetchInterval: BUDGET_POLL_MS, + refetchOnWindowFocus: true, + }) +} + +/** + * Mutation: replace the persona's budget block. + * + * Optimistically invalidates the read cache so the panel reflects the + * new caps the moment the PUT resolves. Other persona fields (system + * prompt, tool gating, approval policy) are NOT touched by this PUT + * — the server preserves them on round-trip. + */ +export function usePutPersonaBudget( + agentId: string | null | undefined, + personaId: string | null | undefined, +): UseMutationResult { + const qc = useQueryClient() + return useMutation({ + mutationFn: async (budget: PersonaBudget) => { + if (!agentId || !personaId) { + throw new Error('agentId + personaId required to PUT a budget') + } + return apiPut( + `/api/agents/${encodeURIComponent(agentId)}/personas/${encodeURIComponent(personaId)}/budget`, + budget as unknown as Record, + ) + }, + onSuccess: () => { + qc.invalidateQueries({ + queryKey: ['agents', 'persona', 'budget', agentId, personaId], + }) + }, + }) +} + +/** + * Convenience selector: pluck just the spend totals for surfacing in a + * sidebar / footer pill without rendering the whole budget panel. + */ +export function usePersonaSpendStats( + agentId: string | null | undefined, + personaId: string | null | undefined, +): PersonaSpendStats { + const q = usePersonaBudget(agentId, personaId) + return q.data?.spend ?? { today_usd: 0, mtd_usd: 0, lifetime_usd: 0 } +} + +/** + * Dry-run pre-call gate. Not used by the dashboard today (V1's + * OpenRouter provider calls this server-side) but exposed for a future + * "preview cost" surface in the composer. + */ +export async function checkPersonaBudget( + agentId: string, + personaId: string, + body: BudgetCheckRequest, +): Promise { + return apiPost( + `/api/agents/${encodeURIComponent(agentId)}/personas/${encodeURIComponent(personaId)}/budget/check`, + body as unknown as Record, + ) +} + +/** + * Post-response charge recorder. Same audience as ``checkPersonaBudget`` + * — exposed for completeness; the OpenRouter provider is the canonical + * caller. + */ +export async function chargePersonaBudget( + agentId: string, + personaId: string, + body: BudgetChargeRequest, +): Promise { + return apiPost( + `/api/agents/${encodeURIComponent(agentId)}/personas/${encodeURIComponent(personaId)}/budget/charge`, + body as unknown as Record, + ) +} diff --git a/ui/src/dash/agents/persona-budget-hook-bridge.ts b/ui/src/dash/agents/persona-budget-hook-bridge.ts new file mode 100644 index 00000000..c423b45d --- /dev/null +++ b/ui/src/dash/agents/persona-budget-hook-bridge.ts @@ -0,0 +1,24 @@ +// hal0 dashboard — window-globals bridge for persona-budget hooks. +// +// PersonaBudgetPanel is a .jsx prototype file (no ES imports across +// dash/*). This bridge republishes the TanStack Query hooks as +// `window.__hal0UsePersonaBudget` + `window.__hal0PutPersonaBudget` +// so the panel finds them the same way PersonasTab finds +// useAgentPersonas (see personas-tab-hook-bridge.ts). +// +// IMPORTED FROM main.tsx BEFORE persona-budget-panel.jsx evaluates. + +import { usePersonaBudget, usePutPersonaBudget } from '@/api/hooks/useBudget' + +;( + window as unknown as { + __hal0UsePersonaBudget?: typeof usePersonaBudget + __hal0PutPersonaBudget?: typeof usePutPersonaBudget + } +).__hal0UsePersonaBudget = usePersonaBudget +;( + window as unknown as { + __hal0UsePersonaBudget?: typeof usePersonaBudget + __hal0PutPersonaBudget?: typeof usePutPersonaBudget + } +).__hal0PutPersonaBudget = usePutPersonaBudget diff --git a/ui/src/dash/agents/persona-budget-panel.jsx b/ui/src/dash/agents/persona-budget-panel.jsx new file mode 100644 index 00000000..b3950016 --- /dev/null +++ b/ui/src/dash/agents/persona-budget-panel.jsx @@ -0,0 +1,224 @@ +// hal0 v0.3 Phase 0 — PersonaBudgetPanel. +// +// Per-persona spending caps editor. Mounts inside personas-tab.jsx +// when the operator opens a persona detail. Reads budget + running +// spend via `window.__hal0UsePersonaBudget`, mutates via +// `window.__hal0PutPersonaBudget` (TanStack bridge, same window-globals +// pattern as PersonasTab — see persona-budget-hook-bridge.ts). +// +// Empty-state copy is the CTA: "no budget set — set caps to enable +// cloud providers". That's the v0.3 line connecting this primitive to +// the V1 OpenRouter provider; once V1 ships, the empty-state message +// rewrites to "OpenRouter inactive — set a daily cap to enable". + +const { useEffect: useEffectPBP, useState: useStatePBP } = React + +function _fmtUsd(value) { + if (value === null || value === undefined || Number.isNaN(value)) return "—"; + const n = Number(value); + if (Number.isNaN(n)) return "—"; + return "$" + n.toFixed(4).replace(/0+$/, "").replace(/\.$/, ".00"); +} + +function _toForm(budget) { + // Convert API budget shape → editable form strings. ``null`` / missing + // caps render as empty strings so the operator sees the empty box + // (not a literal "null"); we only PUT back the fields the user typed. + const b = budget || { hard_cap: true }; + return { + daily_usd: b.daily_usd != null ? String(b.daily_usd) : "", + monthly_usd: b.monthly_usd != null ? String(b.monthly_usd) : "", + lifetime_usd: b.lifetime_usd != null ? String(b.lifetime_usd) : "", + per_call_max_usd: b.per_call_max_usd != null ? String(b.per_call_max_usd) : "", + hard_cap: b.hard_cap !== false, + }; +} + +function _formToPayload(form) { + const out = { hard_cap: !!form.hard_cap }; + const numeric = ["daily_usd", "monthly_usd", "lifetime_usd", "per_call_max_usd"]; + for (const k of numeric) { + const raw = (form[k] ?? "").trim(); + if (raw === "") continue; + const n = Number(raw); + if (!Number.isFinite(n) || n < 0) continue; + out[k] = n; + } + return out; +} + +function PersonaBudgetPanel({ agentId, personaId } = {}) { + const useBudget = window.__hal0UsePersonaBudget; + const usePut = window.__hal0PutPersonaBudget; + const query = useBudget ? useBudget(agentId, personaId) : { data: null, isLoading: false, isError: false }; + const mutation = usePut ? usePut(agentId, personaId) : { mutate: () => {}, isPending: false, error: null }; + + const [form, setForm] = useStatePBP(() => _toForm(query.data && query.data.budget)); + const [dirty, setDirty] = useStatePBP(false); + const [err, setErr] = useStatePBP(null); + + // Re-seed the form from the server snapshot when the persona changes + // OR the server-side budget changes due to a non-UI mutation (e.g. + // a /api/agents/.../budget/charge from V1's OpenRouter provider). We + // intentionally don't re-seed while the form is dirty — that would + // wipe the operator's pending edits during the 15s poll. + useEffectPBP(() => { + if (dirty) return; + setForm(_toForm(query.data && query.data.budget)); + }, [query.data, personaId, agentId, dirty]); + + const change = (key) => (event) => { + const value = event && event.target ? (key === "hard_cap" ? event.target.checked : event.target.value) : event; + setForm((prev) => ({ ...prev, [key]: value })); + setDirty(true); + setErr(null); + }; + + const save = async () => { + setErr(null); + const payload = _formToPayload(form); + try { + await mutation.mutateAsync(payload); + setDirty(false); + if (window.__hal0Toast) window.__hal0Toast("Budget saved", "ok"); + } catch (exc) { + setErr((exc && exc.message) || String(exc)); + } + }; + + const reset = () => { + setForm(_toForm(query.data && query.data.budget)); + setDirty(false); + setErr(null); + }; + + const budget = (query.data && query.data.budget) || { hard_cap: true }; + const spend = (query.data && query.data.spend) || { today_usd: 0, mtd_usd: 0, lifetime_usd: 0 }; + const remaining = (query.data && query.data.remaining) || {}; + const isEmpty = !budget.daily_usd && !budget.monthly_usd && !budget.lifetime_usd && !budget.per_call_max_usd; + + return ( +
+
+
+ Spending cap (persona-scoped) +
+ {isEmpty && ( + + no cap set + + )} + {!isEmpty && budget.hard_cap === false && ( + + warn-only + + )} +
+ + {isEmpty && ( +

+ No budget set — set caps to enable cloud providers (OpenRouter, fusion). + Without a cap a single recursing agent loop can drain a credit pool + overnight; that's why hal0 won't enable paid surfaces until at least + a daily limit is configured. +

+ )} + +
+
+
spent today
+
{_fmtUsd(spend.today_usd)}
+ {remaining.daily_usd != null && ( +
remaining {_fmtUsd(remaining.daily_usd)}
+ )} +
+
+
spent MTD
+
{_fmtUsd(spend.mtd_usd)}
+ {remaining.monthly_usd != null && ( +
remaining {_fmtUsd(remaining.monthly_usd)}
+ )} +
+
+
spent lifetime
+
{_fmtUsd(spend.lifetime_usd)}
+ {remaining.lifetime_usd != null && ( +
remaining {_fmtUsd(remaining.lifetime_usd)}
+ )} +
+
+ +
+ {[ + ["daily_usd", "Daily cap (USD)"], + ["monthly_usd", "Monthly cap (USD)"], + ["lifetime_usd", "Lifetime cap (USD)"], + ["per_call_max_usd", "Per-call max (USD)"], + ].map(([key, label]) => ( + + ))} +
+ + + + {err && ( +
+ {err} +
+ )} + +
+ + +
+
+ ); +} + +Object.assign(window, { PersonaBudgetPanel }); diff --git a/ui/src/dash/agents/personas-tab.jsx b/ui/src/dash/agents/personas-tab.jsx index 97ea00bb..d5bdbd40 100644 --- a/ui/src/dash/agents/personas-tab.jsx +++ b/ui/src/dash/agents/personas-tab.jsx @@ -82,6 +82,20 @@ function PersonasTab({ onEdit } = {}) { /api/agents/hermes/personas unreachable — showing fallback list. )} + {/* Phase 0 OpenRouter prereq — per-persona spending caps. + Mounted full-width beneath the persona cards; resolves the + active persona id from the live `/api/agents/{id}/personas` + response so the operator-set budget tracks whichever persona + the dashboard currently shows as active. Falls back to the + first card's id when the active pointer hasn't seeded yet. */} + {window.PersonaBudgetPanel && ( +
+ +
+ )} ); } diff --git a/ui/src/main.tsx b/ui/src/main.tsx index 8fb88c1a..b1f4fe08 100644 --- a/ui/src/main.tsx +++ b/ui/src/main.tsx @@ -74,6 +74,7 @@ import './dash/extras.jsx' // symbol the old monolith would have left behind (defence-in-depth — // the old monolith is already removed). import './dash/agents/personas-tab-hook-bridge' +import './dash/agents/persona-budget-hook-bridge' import './dash/agents/memory-tab-hook-bridge' // v0.3 PR-10: HermesChat surface — composer + transcript + sidecar over // the WS proxy from PR-9 (master plan §4 PR-10). The session store @@ -96,6 +97,12 @@ import './dash/agents/chat/transcript.jsx' import './dash/agents/chat/composer.jsx' import './dash/agents/chat/hermes-sidecar.jsx' import './dash/agents/hermes-chat-tab.jsx' +// Phase 0 OpenRouter prereq: PersonaBudgetPanel publishes itself on +// window via Object.assign + is read by personas-tab.jsx at render +// time. Load order: bridge already imported above, panel here BEFORE +// personas-tab.jsx so the symbol is registered before the tab's JSX +// references it. +import './dash/agents/persona-budget-panel.jsx' import './dash/agents/personas-tab.jsx' import './dash/agents/skills-tab.jsx' import './dash/agents/memory-tab.jsx'