Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions areal/api/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,8 @@ def build_args(
host: str | None = None,
port: int | None = None,
dist_init_addr: str | None = None,
n_nodes: int = 1,
node_rank: int = 0,
):
args: dict = conf_as_dict(vllm_config)
args = dict(
Expand All @@ -1522,6 +1524,18 @@ def build_args(
args["port"] = port
if host is not None:
args["host"] = host
# Multi-node support
if n_nodes > 1:
args["nnodes"] = n_nodes
args["node_rank"] = node_rank
if dist_init_addr is not None:
from areal.utils.network import split_hostport

master_host, master_port = split_hostport(dist_init_addr)
args["master_addr"] = master_host
args["master_port"] = str(master_port)
if node_rank > 0:
args["headless"] = True
return args

@staticmethod
Expand All @@ -1536,6 +1550,8 @@ def build_cmd(
host: str | None = None,
port: int | None = None,
dist_init_addr: str | None = None,
n_nodes: int = 1,
node_rank: int = 0,
):
args = vLLMConfig.build_args(
vllm_config=vllm_config,
Expand All @@ -1544,6 +1560,8 @@ def build_cmd(
host=host,
port=port,
dist_init_addr=dist_init_addr,
n_nodes=n_nodes,
node_rank=node_rank,
)
return vLLMConfig.build_cmd_from_args(args)

Expand Down
34 changes: 25 additions & 9 deletions areal/experimental/agent_service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,29 +153,45 @@ Turn 2:

```
areal/experimental/agent_service/
├── __init__.py # Public exports
├── __init__.py # Public exports (AgentRequest, AgentResponse, etc.)
├── README.md # This document
├── auth.py # Admin key auth helpers (hmac-safe comparison)
├── protocol.py # Gateway protocol frame types
├── types.py # AgentRequest, AgentResponse, EventEmitter, AgentRunnable
├── controller/
│ ├── __init__.py # AgentServiceController, AgentServiceControllerConfig
│ ├── config.py # AgentServiceControllerConfig dataclass
│ └── controller.py # AgentServiceController orchestrator
├── guard/
│ ├── __init__.py # Module docstring
│ ├── __main__.py # python -m areal.experimental.agent_service.guard
│ └── app.py # Guard Flask app (pass-through to areal.infra.rpc.guard)
├── gateway/
│ ├── __init__.py # Public exports
│ ├── __main__.py # python -m areal.experimental.agent_service.gateway
│ ├── app.py # create_gateway_app()
│ └── bridge.py # OpenResponsesBridge, mount_bridge()
│ ├── bridge.py # OpenResponsesBridge, mount_bridge()
│ └── config.py # GatewayConfig dataclass
├── router/
│ ├── __init__.py # Public exports
│ ├── __main__.py # python -m areal.experimental.agent_service.router
│ ├── app.py # create_router_app()
│ └── client.py # RouterClient
│ ├── client.py # RouterClient
│ └── config.py # RouterConfig dataclass
├── data_proxy/
│ ├── __init__.py # Public exports
│ ├── __main__.py # python -m areal.experimental.agent_service.data_proxy
│ ├── app.py # create_data_proxy_app()
│ └── client.py # DataProxyClient
│ ├── client.py # DataProxyClient
│ └── config.py # DataProxyConfig dataclass
└── worker/
├── __init__.py # Public exports
├── __main__.py # python -m areal.experimental.agent_service.worker
└── app.py # create_worker_app()
├── app.py # create_worker_app()
└── config.py # WorkerConfig dataclass

examples/agent_service/
├── agent.py # Tau2Agent (PydanticAI)
├── config.yaml # Demo configuration
├── run_demo.py # One-click demo
└── README.md # Example documentation
├── agent.py # ClaudeAgent (Claude Agent SDK)
├── run_agent_service.py # Controller-based launcher + interactive demo
└── README.md # Example documentation
```
79 changes: 9 additions & 70 deletions areal/experimental/agent_service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,22 @@
Exposes complete agent sessions (autonomous multi-step reasoning, tool use,
memory) via independent HTTP microservices: Gateway, Router, DataProxy,
and Worker.
"""

from __future__ import annotations

import importlib
from typing import TYPE_CHECKING
Submodules
----------
- ``controller`` — :class:`AgentServiceController` orchestrator
- ``gateway`` — public HTTP/WebSocket entry point
- ``router`` — session-affine routing
- ``data_proxy`` — stateful session proxy
- ``worker`` — stateless agent execution
- ``protocol`` — WebSocket frame types and helpers
"""

from .protocol import (
EventFrame,
Frame,
FrameType,
QueueMode,
RequestFrame,
RequestMethod,
ResponseFrame,
RunStatus,
generate_run_id,
make_complete_response,
make_delta_event,
make_failed_response,
make_tool_call_event,
parse_frame,
serialize_frame,
)
from .types import AgentRequest, AgentResponse, AgentRunnable, EventEmitter

if TYPE_CHECKING:
from .data_proxy import DataProxyClient, create_data_proxy_app
from .gateway import OpenResponsesBridge, create_gateway_app, mount_bridge
from .router import RouterClient, create_router_app
from .worker import create_worker_app

_LAZY_IMPORTS: dict[str, str] = {
"DataProxyClient": ".data_proxy",
"OpenResponsesBridge": ".gateway",
"RouterClient": ".router",
"create_data_proxy_app": ".data_proxy",
"create_gateway_app": ".gateway",
"create_router_app": ".router",
"create_worker_app": ".worker",
"mount_bridge": ".gateway",
}


def __getattr__(name: str):
if name in _LAZY_IMPORTS:
module = importlib.import_module(_LAZY_IMPORTS[name], __package__)
return getattr(module, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


__all__ = [
"AgentRequest",
"AgentResponse",
"AgentRunnable",
"DataProxyClient",
"EventEmitter",
"EventFrame",
"Frame",
"FrameType",
"OpenResponsesBridge",
"QueueMode",
"RequestFrame",
"RequestMethod",
"ResponseFrame",
"RouterClient",
"RunStatus",
"create_data_proxy_app",
"create_gateway_app",
"create_router_app",
"create_worker_app",
"generate_run_id",
"make_complete_response",
"make_delta_event",
"make_failed_response",
"make_tool_call_event",
"mount_bridge",
"parse_frame",
"serialize_frame",
]
14 changes: 8 additions & 6 deletions areal/experimental/agent_service/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

from __future__ import annotations

import hmac

from fastapi import Header, HTTPException

DEFAULT_ADMIN_KEY = "areal-agent-admin"
DEFAULT_ADMIN_API_KEY = "areal-agent-admin"


async def verify_admin_key(
Expand All @@ -15,16 +17,16 @@ async def verify_admin_key(
expected_key: str,
) -> None:
expected = f"Bearer {expected_key}"
if authorization != expected:
if not hmac.compare_digest(authorization, expected):
raise HTTPException(status_code=401, detail="Invalid admin key")


def make_admin_dependency(admin_key: str):
def make_admin_dependency(admin_api_key: str):
async def _dep(authorization: str = Header(alias="Authorization")) -> None:
await verify_admin_key(authorization, expected_key=admin_key)
await verify_admin_key(authorization, expected_key=admin_api_key)

return _dep


def admin_headers(admin_key: str) -> dict[str, str]:
return {"Authorization": f"Bearer {admin_key}"}
def admin_headers(admin_api_key: str) -> dict[str, str]:
return {"Authorization": f"Bearer {admin_api_key}"}
11 changes: 11 additions & 0 deletions areal/experimental/agent_service/controller/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: Apache-2.0

"""Agent Service Controller — orchestrator for agent micro-services."""

from .config import AgentServiceControllerConfig
from .controller import AgentServiceController

__all__ = [
"AgentServiceController",
"AgentServiceControllerConfig",
]
63 changes: 63 additions & 0 deletions areal/experimental/agent_service/controller/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-License-Identifier: Apache-2.0

"""Configuration for the AgentServiceController."""

from __future__ import annotations

from dataclasses import dataclass, field

from ..auth import DEFAULT_ADMIN_API_KEY


@dataclass
class AgentServiceControllerConfig:
"""Unified configuration for AgentServiceController.

Consolidates settings for the guard, router, gateway, worker, and
data proxy micro-services launched by the controller.
"""

# -- Agent class -------------------------------------------------------
agent_cls_path: str = ""
"""Fully-qualified import path for the ``AgentRunnable`` implementation
(e.g. ``examples.agent_service.agent.Tau2Agent``)."""

# -- Authentication ----------------------------------------------------
admin_api_key: str = DEFAULT_ADMIN_API_KEY
"""Shared admin API key for inter-service Bearer auth."""

# -- Scaling -----------------------------------------------------------
num_pairs: int = 1
"""Number of Worker+DataProxy pairs to launch on initialize."""

# -- Timeouts ----------------------------------------------------------
setup_timeout: float = 120.0
"""Timeout (seconds) waiting for each service to become healthy."""

health_poll_interval: float = 5.0
"""Seconds between health polls for crash detection (0 = disabled)."""

drain_timeout: float = 30.0
"""Seconds to wait for active sessions to drain before force-killing a pair."""

# -- Log level ---------------------------------------------------------
log_level: str = "info"
"""Log level for spawned micro-services."""

# -- Environment -------------------------------------------------------
env: dict[str, str] = field(default_factory=dict)
"""Extra environment variables to pass to all forked child processes."""

def __post_init__(self) -> None:
if not self.agent_cls_path:
raise ValueError("agent_cls_path must be a non-empty import path")
if self.num_pairs < 0:
raise ValueError(f"num_pairs must be non-negative, got {self.num_pairs}")
if self.setup_timeout <= 0:
raise ValueError(
f"setup_timeout must be positive, got {self.setup_timeout}"
)
if self.drain_timeout < 0:
raise ValueError(
f"drain_timeout must be non-negative, got {self.drain_timeout}"
)
Loading
Loading