From a5bf1e2fb5d6855a4e683e634d7fd960a1284ad3 Mon Sep 17 00:00:00 2001 From: anvil Date: Thu, 23 Apr 2026 23:27:57 +0000 Subject: [PATCH] fix(gateway): enforce agent-bound runtime tokens --- ax_cli/commands/gateway.py | 17 ++-- ax_cli/gateway.py | 62 ++++++++++++--- docs/gateway-agent-runtimes.md | 10 +++ tests/test_gateway_commands.py | 141 +++++++++++++++++++++++++++++++-- 4 files changed, 203 insertions(+), 27 deletions(-) diff --git a/ax_cli/commands/gateway.py b/ax_cli/commands/gateway.py index 49ad317..90b82f8 100644 --- a/ax_cli/commands/gateway.py +++ b/ax_cli/commands/gateway.py @@ -55,10 +55,12 @@ evaluate_runtime_attestation, find_agent_entry, gateway_dir, + gateway_environment, get_gateway_approval, hermes_setup_status, infer_asset_descriptor, list_gateway_approvals, + load_gateway_managed_agent_token, load_gateway_registry, load_gateway_session, load_recent_gateway_activity, @@ -172,13 +174,10 @@ def _load_managed_agent_or_exit(name: str) -> dict: def _load_managed_agent_client(entry: dict) -> AxClient: - token_file = Path(str(entry.get("token_file") or "")).expanduser() - if not token_file.exists(): - err_console.print(f"[red]Managed agent token is missing:[/red] {token_file}") - raise typer.Exit(1) - token = token_file.read_text().strip() - if not token: - err_console.print(f"[red]Managed agent token file is empty:[/red] {token_file}") + try: + token = load_gateway_managed_agent_token(entry) + except ValueError as exc: + err_console.print(f"[red]{exc}[/red]") raise typer.Exit(1) return AxClient( base_url=str(entry.get("base_url") or ""), @@ -650,6 +649,7 @@ def _status_payload(*, activity_limit: int = 10) -> dict: gateway["pid"] = None payload = { "gateway_dir": str(gateway_dir()), + "gateway_environment": gateway_environment(), "connected": bool(session), "base_url": session.get("base_url") if session else None, "space_id": session.get("space_id") if session else None, @@ -1401,7 +1401,8 @@ def _render_gateway_overview(payload: dict) -> Panel: ) grid.add_row("User", str(payload.get("user") or "-"), "Base URL", str(payload.get("base_url") or "-")) space_label = str(payload.get("space_name") or payload.get("space_id") or "-") - grid.add_row("Space", space_label, "PID", str(payload["daemon"].get("pid") or "-")) + grid.add_row("Space", space_label, "Environment", str(payload.get("gateway_environment") or "default")) + grid.add_row("PID", str(payload["daemon"].get("pid") or "-"), "State Dir", str(payload.get("gateway_dir") or "-")) grid.add_row("UI", str(ui.get("url") or "-"), "UI PID", str(ui.get("pid") or "-")) grid.add_row( "Session", diff --git a/ax_cli/gateway.py b/ax_cli/gateway.py index 20625c5..6b7da5f 100644 --- a/ax_cli/gateway.py +++ b/ax_cli/gateway.py @@ -1389,12 +1389,12 @@ def _normalize_allowed_spaces_payload(payload: object) -> list[dict[str, Any]]: def _fetch_allowed_spaces_for_entry(entry: dict[str, Any]) -> list[dict[str, Any]] | None: - token_file = Path(str(entry.get("token_file") or "")).expanduser() base_url = _normalized_base_url(entry.get("base_url")) - if not token_file.exists() or not base_url: + if not base_url: return None - token = token_file.read_text().strip() - if not token: + try: + token = load_gateway_managed_agent_token(entry) + except ValueError: return None client = AxClient( base_url=base_url, @@ -2189,12 +2189,32 @@ def annotate_runtime_health( def gateway_dir() -> Path: - path = _global_config_dir() / "gateway" + explicit = str(os.environ.get("AX_GATEWAY_DIR") or "").strip() + if explicit: + path = Path(explicit).expanduser() + else: + root = _global_config_dir() / "gateway" + env_name = gateway_environment() + path = root if env_name is None else root / "envs" / env_name path.mkdir(parents=True, exist_ok=True) path.chmod(0o700) return path +def gateway_environment() -> str | None: + raw = ( + str(os.environ.get("AX_GATEWAY_ENV") or "").strip() + or str(os.environ.get("AX_USER_ENV") or "").strip() + or str(os.environ.get("AX_ENV") or "").strip() + ) + if not raw: + return None + normalized = re.sub(r"[^a-z0-9_.-]+", "-", raw.lower()).strip(".-") + if not normalized or normalized in {"default", "user"}: + return None + return normalized + + def gateway_agents_dir() -> Path: path = gateway_dir() / "agents" path.mkdir(parents=True, exist_ok=True) @@ -2241,6 +2261,24 @@ def agent_token_path(name: str) -> Path: return agent_dir(name) / "token" +def load_gateway_managed_agent_token(entry: dict[str, Any]) -> str: + """Read a Gateway-managed runtime token and reject bootstrap credentials.""" + token_file = Path(str(entry.get("token_file") or "")).expanduser() + if not token_file.exists(): + raise ValueError(f"Gateway-managed token file is missing: {token_file}") + token = token_file.read_text().strip() + if not token: + raise ValueError(f"Gateway-managed token file is empty: {token_file}") + if token.startswith("axp_u_"): + raise ValueError( + "Gateway-managed agents require an agent-bound token. " + f"Refusing to use a user bootstrap PAT from {token_file}." + ) + if not str(entry.get("agent_id") or "").strip(): + raise ValueError("Gateway-managed agents require a bound agent_id before runtime use.") + return token + + def agent_pending_queue_path(name: str) -> Path: return agent_dir(name) / "pending.json" @@ -2394,6 +2432,8 @@ def daemon_status() -> dict[str, Any]: return { "pid": pid, "running": running, + "gateway_dir": str(gateway_dir()), + "gateway_environment": gateway_environment(), "registry_path": str(registry_path()), "session_path": str(session_path()), "registry": registry, @@ -2896,8 +2936,7 @@ def _build_hermes_sentinel_cmd(entry: dict[str, Any]) -> list[str]: def _build_hermes_sentinel_env(entry: dict[str, Any]) -> dict[str, str]: env = {k: v for k, v in os.environ.items() if k not in ENV_DENYLIST} - token_file = Path(str(entry.get("token_file") or "")).expanduser() - token = token_file.read_text().strip() if token_file.exists() else "" + token = load_gateway_managed_agent_token(entry) workdir = _hermes_sentinel_workdir(entry) agents_dir = _agents_dir_for_entry(entry) hermes_repo = str(entry.get("hermes_repo_path") or "").strip() or "/home/ax-agent/shared/repos/hermes-agent" @@ -3149,7 +3188,7 @@ def _log(self, message: str) -> None: self.logger(f"{self.name}: {message}") def _token(self) -> str: - return self.token_file.read_text().strip() + return load_gateway_managed_agent_token(self.entry) def _new_client(self): return self.client_factory( @@ -3313,7 +3352,6 @@ def _hermes_sentinel_log_path(self) -> Path: def _start_hermes_sentinel_process(self, *, runtime_instance_id: str) -> None: workdir = _hermes_sentinel_workdir(self.entry) script = _hermes_sentinel_script(self.entry) - token_file = Path(str(self.entry.get("token_file") or "")).expanduser() if not script.exists(): error = f"Hermes sentinel script not found: {script}" self._update_state( @@ -3321,8 +3359,10 @@ def _start_hermes_sentinel_process(self, *, runtime_instance_id: str) -> None: ) record_gateway_activity("runtime_error", entry=self.entry, error=error) return - if not token_file.exists() or not token_file.read_text().strip(): - error = f"Gateway-managed token file is missing or empty: {token_file}" + try: + load_gateway_managed_agent_token(self.entry) + except ValueError as exc: + error = str(exc) self._update_state( effective_state="error", current_status="error", current_activity=error, last_error=error ) diff --git a/docs/gateway-agent-runtimes.md b/docs/gateway-agent-runtimes.md index 587cf29..dc419d6 100644 --- a/docs/gateway-agent-runtimes.md +++ b/docs/gateway-agent-runtimes.md @@ -22,6 +22,12 @@ Gateway keeps those pieces, but moves operator management into one place: - Show liveness, queue state, activity, and tool signals. - Provide a single CLI/UI for dev, staging, and production operators. +Use separate Gateway state per environment. `AX_GATEWAY_ENV=dev/staging` stores +state under `~/.ax/gateway/envs/dev-staging`, while `AX_GATEWAY_ENV=prod` +stores a separate registry, session, PID file, UI state, queues, and agent token +files. `AX_GATEWAY_DIR=/path/to/gateway-state` is available when a deployment +needs an explicit state root. + ## Current PR88 State PR88 has enough Gateway plumbing to register agents, mint tokens, show status, @@ -80,6 +86,10 @@ owns the Hermes session, runtime plugin, message queue, and tool callbacks. The Gateway owns the credentials, process lifecycle, binding verification, and operator status. +Runtime token files must contain an agent-bound credential for the managed +agent. Gateway rejects user bootstrap PATs before sends or runtime launch so a +copied user token cannot become an agent runtime identity. + Do not treat the one-shot `examples/hermes_sentinel/hermes_bridge.py` demo as the production sentinel pattern. It is useful for proving that a Gateway command bridge can call Hermes, but it creates a fresh agent per message and does not diff --git a/tests/test_gateway_commands.py b/tests/test_gateway_commands.py index b2abc77..511ebe9 100644 --- a/tests/test_gateway_commands.py +++ b/tests/test_gateway_commands.py @@ -175,6 +175,28 @@ def test_gateway_login_saves_gateway_session(monkeypatch, tmp_path): assert recent[-1]["username"] == "madtank" +def test_gateway_state_dir_isolated_by_environment(monkeypatch, tmp_path): + config_dir = tmp_path / "config" + monkeypatch.setenv("AX_CONFIG_DIR", str(config_dir)) + monkeypatch.setenv("AX_GATEWAY_ENV", "dev/staging") + + assert gateway_core.gateway_environment() == "dev-staging" + assert gateway_core.gateway_dir() == config_dir / "gateway" / "envs" / "dev-staging" + assert gateway_core.session_path() == config_dir / "gateway" / "envs" / "dev-staging" / "session.json" + + +def test_gateway_state_dir_allows_explicit_override(monkeypatch, tmp_path): + config_dir = tmp_path / "config" + custom_dir = tmp_path / "custom-gateway" + monkeypatch.setenv("AX_CONFIG_DIR", str(config_dir)) + monkeypatch.setenv("AX_GATEWAY_ENV", "prod") + monkeypatch.setenv("AX_GATEWAY_DIR", str(custom_dir)) + + assert gateway_core.gateway_environment() == "prod" + assert gateway_core.gateway_dir() == custom_dir + assert gateway_core.registry_path() == custom_dir / "registry.json" + + def test_gateway_run_refuses_second_live_daemon(monkeypatch, tmp_path): config_dir = tmp_path / "config" monkeypatch.setenv("AX_CONFIG_DIR", str(config_dir)) @@ -427,7 +449,9 @@ def fake_spawn(command, *, log_path): monkeypatch.setattr(gateway_cmd, "active_gateway_pid", lambda: state["daemon_pid"]) monkeypatch.setattr(gateway_cmd, "active_gateway_ui_pid", lambda: state["ui_pid"]) monkeypatch.setattr(gateway_cmd, "_tail_log_lines", lambda path, lines=12: "address already in use") - monkeypatch.setattr(gateway_cmd, "_terminate_pids", lambda pids, timeout=3.0: (terminated.append(list(pids)) or (list(pids), []))) + monkeypatch.setattr( + gateway_cmd, "_terminate_pids", lambda pids, timeout=3.0: terminated.append(list(pids)) or (list(pids), []) + ) monkeypatch.setattr(gateway_core, "clear_gateway_pid", lambda pid=None: cleared.append(pid)) result = runner.invoke(app, ["gateway", "start", "--no-open"]) @@ -709,7 +733,9 @@ def test_gateway_approvals_approve_updates_binding(monkeypatch, tmp_path): attestation = gateway_core.evaluate_runtime_attestation(registry, drifted) gateway_core.save_gateway_registry(registry) - result = runner.invoke(app, ["gateway", "approvals", "approve", attestation["approval_id"], "--scope", "gateway", "--json"]) + result = runner.invoke( + app, ["gateway", "approvals", "approve", attestation["approval_id"], "--scope", "gateway", "--json"] + ) assert result.exit_code == 0, result.output payload = json.loads(result.stdout) @@ -768,6 +794,51 @@ def test_sanitize_exec_env_strips_ax_credentials(monkeypatch): assert env["OPENAI_API_KEY"] == "keep-me" +def test_gateway_managed_token_loader_rejects_user_bootstrap_pat(tmp_path): + token_file = tmp_path / "token" + token_file.write_text("axp_u_user.secret") + + with pytest.raises(ValueError, match="agent-bound token"): + gateway_core.load_gateway_managed_agent_token( + { + "name": "echo-bot", + "agent_id": "agent-1", + "token_file": str(token_file), + } + ) + + +def test_gateway_managed_token_loader_requires_bound_agent_id(tmp_path): + token_file = tmp_path / "token" + token_file.write_text("axp_a_agent.secret") + + with pytest.raises(ValueError, match="bound agent_id"): + gateway_core.load_gateway_managed_agent_token( + { + "name": "echo-bot", + "token_file": str(token_file), + } + ) + + +def test_hermes_sentinel_env_rejects_user_bootstrap_pat(tmp_path): + token_file = tmp_path / "token" + token_file.write_text("axp_u_user.secret") + + with pytest.raises(ValueError, match="agent-bound token"): + gateway_core._build_hermes_sentinel_env( + { + "name": "dev_sentinel", + "agent_id": "agent-1", + "space_id": "space-1", + "base_url": "https://paxai.app", + "runtime_type": "hermes_sentinel", + "token_file": str(token_file), + "workdir": str(tmp_path / "dev_sentinel"), + } + ) + + def test_managed_echo_runtime_processes_message(tmp_path, monkeypatch): config_dir = tmp_path / "config" config_dir.mkdir() @@ -1182,7 +1253,9 @@ def test_passive_runtime_snapshot_rehydrates_manual_queue_updates(tmp_path, monk def test_annotate_runtime_health_marks_stale_after_missed_heartbeat(): - old_seen = (datetime.now(timezone.utc) - timedelta(seconds=gateway_core.RUNTIME_STALE_AFTER_SECONDS + 5)).isoformat() + old_seen = ( + datetime.now(timezone.utc) - timedelta(seconds=gateway_core.RUNTIME_STALE_AFTER_SECONDS + 5) + ).isoformat() snapshot = gateway_core.annotate_runtime_health( { @@ -1226,7 +1299,9 @@ def test_annotate_runtime_health_derives_identity_space_snapshot(monkeypatch, tm "install_id": "inst-identity-1", } ] - gateway_core.ensure_gateway_identity_binding(registry, registry["agents"][0], session=gateway_core.load_gateway_session()) + gateway_core.ensure_gateway_identity_binding( + registry, registry["agents"][0], session=gateway_core.load_gateway_session() + ) snapshot = gateway_core.annotate_runtime_health(registry["agents"][0], registry=registry) @@ -1469,7 +1544,11 @@ def json(self) -> dict[str, object]: { "name": "nemotron-3-nano:latest", "modified_at": "2025-12-16T14:03:52.946489046-08:00", - "details": {"family": "nemotron_h_moe", "families": ["nemotron_h_moe"], "parameter_size": "31.6B"}, + "details": { + "family": "nemotron_h_moe", + "families": ["nemotron_h_moe"], + "parameter_size": "31.6B", + }, }, { "name": "gemma4:latest", @@ -1891,7 +1970,10 @@ def test_gateway_ui_handler_supports_agent_mutations(monkeypatch, tmp_path): assert tested_payload["target_agent"] == "ui-bot" assert tested_payload["author"] == "agent" assert tested_payload["sender_agent"].startswith("switchboard-") - assert tested_payload["content"] == "@ui-bot Reply with exactly: Gateway test OK. Then mention which local model answered." + assert ( + tested_payload["content"] + == "@ui-bot Reply with exactly: Gateway test OK. Then mention which local model answered." + ) doctored = client.post("/api/agents/ui-bot/doctor", json={}) assert doctored.status_code == 201 @@ -2156,6 +2238,45 @@ def test_gateway_agents_send_uses_managed_identity(monkeypatch, tmp_path): assert recent[-1]["event"] == "manual_message_sent" +def test_gateway_agents_send_rejects_user_bootstrap_pat(monkeypatch, tmp_path): + config_dir = tmp_path / "config" + monkeypatch.setenv("AX_CONFIG_DIR", str(config_dir)) + gateway_core.save_gateway_session( + { + "token": "axp_u_test.token", + "base_url": "https://paxai.app", + "space_id": "space-1", + "username": "codex", + } + ) + token_file = tmp_path / "sender.token" + token_file.write_text("axp_u_user.secret") + registry = gateway_core.load_gateway_registry() + registry["agents"] = [ + { + "name": "sender-bot", + "agent_id": "agent-1", + "space_id": "space-1", + "base_url": "https://paxai.app", + "runtime_type": "inbox", + "desired_state": "running", + "effective_state": "running", + "token_file": str(token_file), + "transport": "gateway", + "credential_source": "gateway", + } + ] + gateway_core.save_gateway_registry(registry) + monkeypatch.setattr(gateway_cmd, "AxClient", _FakeManagedSendClient) + + result = runner.invoke(app, ["gateway", "agents", "send", "sender-bot", "hello there", "--to", "codex"]) + + assert result.exit_code == 1, result.output + assert "agent-bound token" in result.output + assert "user" in result.output + assert "bootstrap PAT" in result.output + + def test_gateway_agents_send_acknowledges_pending_inbox_message(monkeypatch, tmp_path): config_dir = tmp_path / "config" monkeypatch.setenv("AX_CONFIG_DIR", str(config_dir)) @@ -2263,7 +2384,9 @@ def test_gateway_agents_send_blocks_identity_mismatch(monkeypatch, tmp_path): "install_id": "inst-sender-1", } ] - gateway_core.ensure_gateway_identity_binding(registry, registry["agents"][0], session=gateway_core.load_gateway_session()) + gateway_core.ensure_gateway_identity_binding( + registry, registry["agents"][0], session=gateway_core.load_gateway_session() + ) registry["identity_bindings"][0]["acting_identity"]["agent_name"] = "night_owl" gateway_core.save_gateway_registry(registry) monkeypatch.setattr(gateway_cmd, "AxClient", _FakeManagedSendClient) @@ -2431,7 +2554,9 @@ def test_gateway_status_payload_surfaces_alerts(monkeypatch, tmp_path): "runtime_type": "exec", "desired_state": "running", "effective_state": "running", - "last_seen_at": (datetime.now(timezone.utc) - timedelta(seconds=gateway_core.RUNTIME_STALE_AFTER_SECONDS + 5)).isoformat(), + "last_seen_at": ( + datetime.now(timezone.utc) - timedelta(seconds=gateway_core.RUNTIME_STALE_AFTER_SECONDS + 5) + ).isoformat(), "backlog_depth": 2, "last_error": None, "token_file": "/tmp/stale-token",