From 6c769a9abc891c3984be2a675e5d437b315f979c Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 15 Feb 2026 12:46:38 +0800 Subject: [PATCH 01/15] Enhance Pulsing Python API and Ray integration - Updated the documentation for the Python API, improving clarity and consistency in naming conventions and examples. - Introduced new functions for mounting and unmounting Python objects to the Pulsing network, facilitating better integration with Ray. - Enhanced the `resolve` function to support a timeout parameter, allowing for retries until an actor name appears or the timeout expires. - Added a new `pulsing.examples` module with built-in examples, showcasing the capabilities of Pulsing in a distributed environment. - Implemented tests for the new Ray integration features, ensuring robust functionality and error handling across various scenarios. --- crates/pulsing-py/src/actor.rs | 41 ++- llms.binding.md | 302 ++++++++++++--------- python/pulsing/__init__.py | 30 +++ python/pulsing/actor/__init__.py | 4 + python/pulsing/actor/remote.py | 217 ++++++++++++++- python/pulsing/cli/__main__.py | 60 +++++ python/pulsing/examples/__init__.py | 33 +++ python/pulsing/examples/counting_game.py | 121 +++++++++ python/pulsing/ray.py | 163 ++++++++++++ tests/python/test_ray_init.py | 321 +++++++++++++++++++++++ 10 files changed, 1156 insertions(+), 136 deletions(-) create mode 100644 python/pulsing/examples/__init__.py create mode 100644 python/pulsing/examples/counting_game.py create mode 100644 python/pulsing/ray.py create mode 100644 tests/python/test_ray_init.py diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 7a7fc638b..72a8997ec 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -1600,12 +1600,16 @@ impl PyActorSystem { } /// Resolve a named actor (selects one instance using load balancing) - #[pyo3(signature = (name, node_id=None))] + /// + /// When `timeout` is provided, retries resolution until the name appears + /// or the timeout expires (useful for waiting on gossip propagation). + #[pyo3(signature = (name, node_id=None, timeout=None))] fn resolve_named<'py>( &self, py: Python<'py>, name: String, node_id: Option, + timeout: Option, ) -> PyResult> { let system = self.inner.clone(); @@ -1623,23 +1627,44 @@ impl PyActorSystem { ActorPath::new(&name).map_err(to_py_value_err)? }; let node = node_id.map(NodeId::new); - let actor_ref = system - .resolve_named(&path, node.as_ref()) - .await - .map_err(to_pyerr)?; - Ok(PyActorRef { inner: actor_ref }) + + match timeout { + None => { + // 无 timeout:找不到立刻报错(原有行为) + let actor_ref = system + .resolve_named(&path, node.as_ref()) + .await + .map_err(to_pyerr)?; + Ok(PyActorRef { inner: actor_ref }) + } + Some(secs) => { + // 带 timeout:重试直到名字出现或超时 + let deadline = + tokio::time::Instant::now() + std::time::Duration::from_secs_f64(secs); + let mut last_err = None; + while tokio::time::Instant::now() < deadline { + match system.resolve_named(&path, node.as_ref()).await { + Ok(actor_ref) => return Ok(PyActorRef { inner: actor_ref }), + Err(e) => last_err = Some(e), + } + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + } + Err(to_pyerr(last_err.unwrap())) + } + } }) } /// Alias for resolve_named - resolve actor by name - #[pyo3(signature = (name, *, node_id=None))] + #[pyo3(signature = (name, *, node_id=None, timeout=None))] fn resolve<'py>( &self, py: Python<'py>, name: String, node_id: Option, + timeout: Option, ) -> PyResult> { - self.resolve_named(py, name, node_id) + self.resolve_named(py, name, node_id, timeout) } fn stop<'py>(&self, py: Python<'py>, actor_name: String) -> PyResult> { diff --git a/llms.binding.md b/llms.binding.md index 87b4720b2..c5359c1d6 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -4,11 +4,11 @@ `Pulsing` is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. -## Python 接口 +## Python API -### Actor System风格接口 +### Actor System Style -```Python +```python import pulsing as pul system = await pul.actor_system( @@ -50,19 +50,19 @@ await actorref.tell(msg: Any) -> None @pul.remote class Counter: - # 同步处理函数 + # Synchronous handler def incr(self): ... - # 异步处理函数 + # Asynchronous handler async def desc(self): ... -# 使用 +# Usage counter = await Counter.spawn(name="counter") -result = await counter.incr() # 返回 ActorProxy,直接调用方法 +result = await counter.incr() # Returns ActorProxy, call methods directly -# 队列接口 +# Queue API writer = await system.queue.write( topic: str, *, @@ -88,7 +88,7 @@ reader = await system.queue.read( records = await reader.get(limit: int = 100, wait: bool = False) -> list[dict] -# 队列使用示例 +# Queue usage example writer = await system.queue.write("my_queue", bucket_column="user_id") await writer.put({"user_id": "u1", "data": "hello"}) @@ -96,12 +96,12 @@ reader = await system.queue.read("my_queue") records = await reader.get(limit=10) ``` -### Ray风格异步接口 +### Async API with Global System ```python import pulsing as pul -# 初始化全局系统 +# Initialize global system await pul.init( addr: str | None = None, *, @@ -111,7 +111,7 @@ await pul.init( await pul.shutdown() -# 生成 Actor(使用全局系统) +# Spawn actor (using global system) actorref = await pul.spawn( actor: Actor, *, @@ -123,53 +123,113 @@ actorref = await pul.spawn( max_backoff: float = 30.0 ) -> ActorRef -# 通过 ActorId 获取引用(使用全局系统) +# Get reference by ActorId (using global system) actorref = await pul.refer(actorid: ActorId | str) -> ActorRef -# 通过名称解析 Actor(使用全局系统) -actorref = await pul.resolve( +# Resolve actor by name (using global system) +ref = await pul.resolve( name: str, *, - node_id: int | None = None -) -> ActorRef + node_id: int | None = None, + timeout: float | None = None # Seconds to wait for name to appear (gossip convergence) +) -> ActorRefView -# 发送消息并等待响应 -response = await actorref.ask(request: Any) -> Any +# Send message and wait for response +response = await ref.ask(request: Any) -> Any -# 发送消息(不等待响应) -await actorref.tell(msg: Any) -> None +# Send message without waiting (fire-and-forget) +await ref.tell(msg: Any) -> None + +# ── Proxy Generation ── + +# Untyped proxy (no need to know actor type, call any method by name) +ref = await pul.resolve("service_name") +proxy = ref.as_any() # Untyped proxy +result = await proxy.any_method(args) # Call any remote method +value = await proxy.some_attr # Read remote attribute (no parentheses) -# 将 ActorRef 绑定到类型,生成 ActorProxy -proxy = Counter.resolve(name) +# Typed proxy (generated from local class definition, with method signature validation) +ref = await pul.resolve("counter", timeout=30) +proxy = ref.as_type(Counter) # Bind to Counter type +result = await proxy.incr() # With type checking + +# @pul.remote classes can also resolve directly to a typed proxy +typed_proxy = await Counter.resolve("counter") +any_proxy = typed_proxy.as_any() # typed → untyped conversion @pul.remote class Counter: def __init__(self, init=0): self.value = init - # 同步处理函数 + # Synchronous handler def incr(self): ... - # 异步处理函数 + # Asynchronous handler async def desc(self): ... -# 使用方式1:通过 spawn 创建 +# Usage 1: Create via spawn counter = await Counter.spawn(name="counter") -result = await counter.incr() # 返回 ActorProxy,直接调用方法 +result = await counter.incr() # Returns ActorProxy, call methods directly -# 使用方式2:通过 resolve 解析已有 actor +# Usage 2: Resolve an existing actor proxy = await Counter.resolve("counter") result = await proxy.incr() ``` -### Ray风格兼容接口 +### Ray Integration + +`pul.mount` registers any Python object as a Pulsing actor, enabling tight integration between Ray actors and Pulsing. + +```python +import pulsing as pul + +# Mount object onto Pulsing network (sync, can be called in __init__) +pul.mount( + instance: Any, # Object to mount + *, + name: str, # Pulsing name, used for resolve discovery + public: bool = True, # Whether discoverable by other cluster nodes +) -> None +# Internally: +# 1. Initialize Pulsing (if not yet initialized in this process) +# 2. Wrap instance as a Pulsing actor +# 3. Register on Pulsing network, gossip broadcasts the name + +# Unmount (call when actor is destroyed) +pul.unmount(name: str) -> None +``` +Example: Ray handles process scheduling, Pulsing handles inter-actor communication. + +```python +import ray, pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # One line to join Pulsing + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_type(Worker) + return await proxy.greet(msg) # Cross-process Pulsing call + + async def greet(self, msg): + return f"hello from {self.name}: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello from w1: hi" +pul.cleanup_ray() +``` + +### Ray-Compatible API ```python from pulsing.compat import ray -# 初始化(同步接口,内部使用异步) +# Initialize (sync interface, async internally) ray.init( address: str | None = None, *, @@ -177,35 +237,35 @@ ray.init( **kwargs ) -> None -# 关闭系统 +# Shutdown ray.shutdown() -> None -# 检查是否已初始化 +# Check initialization status ray.is_initialized() -> bool -# 装饰器:将类转换为 Actor +# Decorator: convert class to Actor @ray.remote class MyActor: def __init__(self, ...): ... def method(self, ...): ... -# 创建 Actor(同步接口) +# Create Actor (sync interface) actor_handle = MyActor.remote(...) -> _ActorHandle -# 调用方法(返回 ObjectRef) +# Call method (returns ObjectRef) result_ref = actor_handle.method.remote(...) -> ObjectRef -# 获取结果(同步接口,支持单个或列表) +# Get result (sync, supports single or list) result = ray.get( refs: ObjectRef | list[ObjectRef], *, timeout: float | None = None ) -> Any | list[Any] -# 将值包装为 ObjectRef(用于 API 兼容) +# Wrap value as ObjectRef (for API compatibility) ref = ray.put(value: Any) -> ObjectRef -# 等待多个 ObjectRef 完成 +# Wait for multiple ObjectRefs ready, remaining = ray.wait( refs: list[ObjectRef], *, @@ -214,36 +274,36 @@ ready, remaining = ray.wait( ) -> tuple[list[ObjectRef], list[ObjectRef]] ``` -### Actor 行为 +### Actor Behavior -#### 基础 Actor(使用 `receive` 方法) +#### Basic Actor (using `receive` method) ```python from pulsing.actor import Actor class EchoActor(Actor): - """receive 方法 - 同步或异步均可,框架自动检测""" + """receive method - sync or async, framework auto-detects""" - # 方式1:同步方法 + # Option 1: Synchronous def receive(self, msg): return msg - # 方式2:异步方法(需要 await 时使用) + # Option 2: Asynchronous (use when you need await) async def receive(self, msg): result = await some_async_operation() return result class FireAndForget(Actor): - """无返回值(适合 tell 调用)""" + """No return value (suitable for tell calls)""" def receive(self, msg): print(f"Received: {msg}") - # 无返回值 + # No return value ``` -**注意:** `receive` 方法可以是 `def` 或 `async def`,Pulsing 会自动检测并正确处理。 -只有当方法内部需要 `await` 其他协程时,才需要使用 `async def`。 +**Note:** `receive` can be `def` or `async def`, Pulsing auto-detects and handles both correctly. +Only use `async def` when the method body needs to `await` other coroutines. -#### @pul.remote 装饰器(推荐) +#### @pul.remote Decorator (Recommended) ```python import pulsing as pul @@ -253,110 +313,110 @@ class Counter: def __init__(self, init=0): self.value = init - # 同步方法 - 阻塞处理,请求按顺序执行 - # 适合:快速计算、状态修改 + # Sync method - blocks actor, requests execute sequentially + # Best for: fast computation, state mutation def incr(self): self.value += 1 return self.value - # 异步方法 - 非阻塞,可并发处理多个请求 - # 适合:IO 密集型操作(网络请求、数据库查询) + # Async method - non-blocking, can handle other requests during await + # Best for: IO-bound operations (network, database) async def fetch_and_add(self, url): - data = await http_get(url) # 等待期间可处理其他请求 + data = await http_get(url) # Other requests served during await self.value += data return self.value - # 无返回值方法 - 适合 tell() 调用 + # No return value - suitable for tell() calls def reset(self): self.value = 0 -# 同步 vs 异步方法的并发行为: -# - def method(): 阻塞 Actor,请求排队顺序执行 -# - async def method(): 非阻塞,await 期间可处理其他请求(并发) +# Sync vs async concurrency behavior: +# - def method(): Blocks actor, requests queued sequentially +# - async def method(): Non-blocking, concurrent during await -# 使用 +# Usage counter = await Counter.spawn(name="counter") -result = await counter.incr() # ask 模式,等待返回 -await counter.reset() # 无返回值,但仍等待完成 +result = await counter.incr() # ask mode, waits for return +await counter.reset() # No return value, but still waits for completion ``` -#### 消息传递模式 +#### Message Passing Patterns ```python -# ask - 发送消息并等待响应 +# ask - send message and wait for response response = await actorref.ask({"action": "get"}) -# tell - 发送消息,不等待响应(fire-and-forget) +# tell - send message, don't wait (fire-and-forget) await actorref.tell({"action": "log", "data": "hello"}) ``` -#### Actor 生命周期 +#### Actor Lifecycle ```python from pulsing.actor import Actor, ActorId class MyActor(Actor): def on_start(self, actor_id: ActorId): - """Actor 启动时调用""" + """Called when actor starts""" print(f"Started: {actor_id}") def on_stop(self): - """Actor 停止时调用""" + """Called when actor stops""" print("Stopping...") def metadata(self) -> dict[str, str]: - """返回 Actor 元数据(用于诊断)""" + """Return actor metadata (for diagnostics)""" return {"type": "worker", "version": "1.0"} async def receive(self, msg): return msg ``` -#### 监督与重启策略 +#### Supervision and Restart Policies ```python @pul.remote( restart_policy="on_failure", # "never" | "on_failure" | "always" - max_restarts=3, # 最大重启次数 - min_backoff=0.1, # 最小退避时间(秒) - max_backoff=30.0, # 最大退避时间(秒) + max_restarts=3, # Maximum restart attempts + min_backoff=0.1, # Minimum backoff time (seconds) + max_backoff=30.0, # Maximum backoff time (seconds) ) class ResilientWorker: def process(self, data): - # 如果抛出异常,Actor 会自动重启 + # Actor auto-restarts on exception return heavy_computation(data) ``` -#### 流式响应 +#### Streaming Responses ```python @pul.remote class StreamingService: - # 直接返回 generator,Pulsing 自动处理为流式响应 + # Return a generator, Pulsing auto-handles as streaming response async def generate_stream(self, n): for i in range(n): yield f"chunk_{i}" - # 同步 generator 也支持 + # Sync generators also supported def sync_stream(self, n): for i in range(n): yield f"item_{i}" -# 使用 +# Usage service = await StreamingService.spawn() -# 客户端消费流 +# Client consumes stream async for chunk in service.generate_stream(10): print(chunk) # chunk_0, chunk_1, ... ``` -**注意:** 对于 `@pul.remote` 类,直接返回 generator(同步或异步)即可,Pulsing 会自动检测并按流式响应处理。 +**Note:** For `@pul.remote` classes, simply return a generator (sync or async) and Pulsing auto-detects and handles it as a streaming response. -## Rust 接口 +## Rust API -Rust API 通过 trait 定义契约,分为三层: +Rust API defines contracts via traits, organized in three layers: -### 快速入门 +### Quick Start ```rust use pulsing_actor::prelude::*; @@ -381,11 +441,11 @@ impl Actor for Echo { async fn main() -> anyhow::Result<()> { let system = ActorSystem::builder().build().await?; - // 命名 actor(可通过 resolve 发现,使用 namespace/name 格式) + // Named actor (discoverable via resolve, uses namespace/name format) let actor = system.spawn_named("services/echo", Echo).await?; let Pong(x): Pong = actor.ask(Ping(1)).await?; - // 匿名 actor(仅通过 ActorRef 访问) + // Anonymous actor (accessible only via ActorRef) let worker = system.spawn(Worker::new()).await?; system.shutdown().await?; @@ -393,55 +453,55 @@ async fn main() -> anyhow::Result<()> { } ``` -### Trait 分层 +### Trait Layers -#### ActorSystemCoreExt(主路径,prelude 自动导入) +#### ActorSystemCoreExt (Main path, auto-imported via prelude) -核心 spawn 与 resolve 能力: +Core spawn and resolve capabilities: ```rust -// Spawn - 简洁 API -system.spawn(actor).await?; // 匿名 actor(不可 resolve) -system.spawn_named(name, actor).await?; // 命名 actor(可 resolve) +// Spawn - Simple API +system.spawn(actor).await?; // Anonymous actor (not resolvable) +system.spawn_named(name, actor).await?; // Named actor (resolvable) -// Spawn - Builder 模式(高级配置) +// Spawn - Builder pattern (advanced configuration) system.spawning() - .name("services/counter") // 可选:有 name = 可 resolve + .name("services/counter") // Optional: named = resolvable .supervision(SupervisionSpec::on_failure().max_restarts(3)) .mailbox_capacity(256) .spawn(actor).await?; -// Resolve - 简单方式 -system.actor_ref(&actor_id).await?; // 按 ActorId 获取 -system.resolve(name).await?; // 按名称解析 +// Resolve - Simple +system.actor_ref(&actor_id).await?; // By ActorId +system.resolve(name).await?; // By name -// Resolve - Builder 模式(高级配置) +// Resolve - Builder pattern (advanced configuration) system.resolving() - .node(node_id) // 可选:指定目标节点 - .policy(RoundRobinPolicy::new()) // 可选:负载均衡策略 - .filter_alive(true) // 可选:只选存活节点 - .resolve(name).await?; // 解析单个 + .node(node_id) // Optional: target node + .policy(RoundRobinPolicy::new()) // Optional: load balancing + .filter_alive(true) // Optional: alive nodes only + .resolve(name).await?; // Resolve single system.resolving() - .list(name).await?; // 获取所有实例 + .list(name).await?; // Get all instances system.resolving() - .lazy(name)?; // 懒解析 + .lazy(name)?; // Lazy resolve ``` -#### ActorSystemAdvancedExt(高级:可重启 supervision) +#### ActorSystemAdvancedExt (Advanced: restartable supervision) -Factory 模式 spawn,支持 supervision 重启(仅命名 actor): +Factory-pattern spawn with supervision restart (named actors only): ```rust -// 命名 actor + factory(可重启 + 可 resolve) -// 注意:匿名 actor 不支持 supervision,因为无法重新解析 +// Named actor + factory (restartable + resolvable) +// Note: anonymous actors don't support supervision (cannot re-resolve) system.spawn_named_factory(name, || Ok(Service::new()), options).await?; ``` -#### ActorSystemOpsExt(运维/诊断/生命周期) +#### ActorSystemOpsExt (Operations / Diagnostics / Lifecycle) -系统信息、集群成员、停止/关闭等: +System info, cluster membership, stop/shutdown: ```rust system.node_id(); @@ -452,22 +512,22 @@ system.stop(name).await?; system.shutdown().await?; ``` -### 关键约定 +### Key Conventions -- **消息编码**:`Message::pack(&T)` 使用 bincode + `type_name::()`;跨版本协议建议 `Message::single("TypeV1", bytes)`。 -- **命名与解析**: - - `spawn_named(name, actor)`:创建可发现 actor,name 即为解析路径 - - `resolve(name)`:一次性解析(迁移后可能 stale) - - `resolve_lazy(name)`:懒解析 + 自动刷新(~5s TTL) -- **流式**:返回 `Message::Stream`,取消语义 best-effort。 -- **监督**:只有 `spawn_named_factory` 支持失败重启,匿名 actor 不支持 supervision。 +- **Message encoding**: `Message::pack(&T)` uses bincode + `type_name::()`; for cross-version protocols use `Message::single("TypeV1", bytes)`. +- **Naming and resolution**: + - `spawn_named(name, actor)`: Creates a discoverable actor, name is the resolution path + - `resolve(name)`: One-shot resolve (may become stale after migration) + - `resolve_lazy(name)`: Lazy resolve + auto-refresh (~5s TTL) +- **Streaming**: Return `Message::Stream`, cancellation is best-effort. +- **Supervision**: Only `spawn_named_factory` supports failure restart; anonymous actors do not support supervision. -### Behavior(类型安全,Akka Typed 风格) +### Behavior (Type-safe, Akka Typed style) -- **核心**:`Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` -- **约定**:`TypedRef` 要求 `M: Serialize + DeserializeOwned + Send + 'static` +- **Core**: `Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` +- **Constraint**: `TypedRef` requires `M: Serialize + DeserializeOwned + Send + 'static` -除了定义时候使用函数语法以外,其他与 Actor 完全相同: +Defined using function syntax, otherwise identical to Actor: ```rust fn counter(init: i32) -> Behavior { @@ -477,8 +537,8 @@ fn counter(init: i32) -> Behavior { }) } -// Behavior 实现 IntoActor trait,可以直接传给 spawn/spawn_named -// 无需手动包装,系统会自动转换 +// Behavior implements IntoActor, can be passed directly to spawn/spawn_named +// No manual wrapping needed, system converts automatically let counter = system.spawn(counter(0)).await?; let counter = system.spawn_named("actors/counter", counter(0)).await?; ``` diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index d6a55eddf..b21b9b155 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -70,6 +70,9 @@ def incr(self): self.value += 1; return self.value # Resolve function resolve, as_any, + # Mount (attach existing object to Pulsing network) + mount, + unmount, # Types Actor, ActorSystem as _ActorSystem, @@ -85,6 +88,27 @@ def incr(self): self.value += 1; return self.value PYTHON_ACTOR_SERVICE_NAME, ) + +# Ray integration (lazy import — 仅在 Ray 环境下可用) +def init_inside_ray(): + """在 Ray worker 中初始化 Pulsing 并加入集群(async 版本)。 + + 用法:: + + await pul.init_inside_ray() + """ + from pulsing.ray import async_init_in_ray + + return async_init_in_ray() + + +def cleanup_ray(): + """清理 Pulsing 在 Ray KV store 中的状态""" + from pulsing.ray import cleanup + + return cleanup() + + # Import exceptions from pulsing.exceptions import ( PulsingError, @@ -280,6 +304,12 @@ async def refer(actorid: ActorId | str) -> ActorRef: "is_initialized", # Decorator "remote", + # Mount (attach existing object to Pulsing network) + "mount", + "unmount", + # Ray integration + "init_inside_ray", + "cleanup_ray", # Types "Actor", "ActorSystem", diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/actor/__init__.py index bea6a7dad..7405373fe 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/actor/__init__.py @@ -201,6 +201,8 @@ async def tell_with_timeout( get_system_actor, health_check, list_actors, + mount, + unmount, ping, remote, resolve, @@ -222,6 +224,8 @@ async def tell_with_timeout( "shutdown", "remote", "resolve", + "mount", + "unmount", "get_system", "get_system_actor", "is_initialized", diff --git a/python/pulsing/actor/remote.py b/python/pulsing/actor/remote.py index 4b07a9be9..0047a9742 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/actor/remote.py @@ -301,12 +301,40 @@ def get_actor_metadata(name: str) -> dict[str, str] | None: return _actor_metadata_registry.get(name) +def _extract_methods(cls: type) -> tuple[list[str], set[str]]: + """Extract public method names and async method set from a class. + + Handles Ray-wrapped classes by unwrapping to the original class first. + """ + # 如果是 Ray ActorClass,提取原始类 + try: + from ray.actor import ActorClass as RayActorClass + + if isinstance(cls, RayActorClass): + # Ray ActorClass 的 __ray_metadata__ 有原始类引用 + if hasattr(cls, "__ray_metadata__"): + meta = cls.__ray_metadata__ + if hasattr(meta, "modified_class"): + cls = meta.modified_class + except ImportError: + pass + + methods = [] + async_methods = set() + for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): + if name.startswith("_"): + continue + methods.append(name) + if inspect.iscoroutinefunction(method) or inspect.isasyncgenfunction(method): + async_methods.add(name) + return methods, async_methods + + class ActorRefView: - """Wrapper around ActorRef that adds .as_any() for an untyped proxy. + """Wrapper around ActorRef that adds .as_any() / .as_type() for proxy generation. Returned by resolve(name). Delegates .ask(), .tell(), and other - ActorRef attributes to the underlying ref. Use .as_any() to get - a proxy that forwards any method call to the remote actor. + ActorRef attributes to the underlying ref. """ __slots__ = ("_ref",) @@ -318,6 +346,22 @@ def as_any(self) -> "ActorProxy": """Return an untyped proxy that forwards any method call to the remote actor.""" return ActorProxy(self._ref, method_names=None, async_methods=None) + def as_type(self, cls: type) -> "ActorProxy": + """Return a typed proxy based on the given class definition. + + Inspects ``cls`` for public methods and generates a proxy with + method name validation and correct sync/async detection. + Type info comes from the local class definition, not from the network. + + Example:: + + ref = await pul.resolve("counter", timeout=30) + proxy = ref.as_type(Counter) + await proxy.incr() + """ + methods, async_methods = _extract_methods(cls) + return ActorProxy(self._ref, methods, async_methods) + def __getattr__(self, name: str): return getattr(self._ref, name) @@ -369,7 +413,10 @@ def from_ref( class _MethodCaller: - """Method caller.""" + """Method caller. 支持两种用法: + - await proxy.method(args) — 方法调用 + - await proxy.attr — 属性读取(无参调用) + """ def __init__(self, actor_ref: ActorRef, method_name: str, is_async: bool = False): self._ref = actor_ref @@ -382,6 +429,10 @@ def __call__(self, *args, **kwargs): else: return self._sync_call(*args, **kwargs) + def __await__(self): + """支持 await proxy.attr 直接读取属性""" + return self().__await__() + async def _sync_call(self, *args, **kwargs) -> Any: """Synchronous method call.""" # Use configured protocol version (default v1) @@ -651,13 +702,22 @@ async def receive(self, msg) -> Any: return _wrap_response_v2(error=error_msg) return _wrap_response_v1(error=error_msg) - func = getattr(self._instance, method, None) - if func is None or not callable(func): + _MISSING = object() + attr = getattr(self._instance, method, _MISSING) + if attr is _MISSING: error_msg = f"Not found: {method}" if version == 2: return _wrap_response_v2(error=error_msg) return _wrap_response_v1(error=error_msg) + if not callable(attr): + # 属性读取:直接返回值 + if version == 2: + return _wrap_response_v2(result=attr) + return _wrap_response_v1(result=attr) + + func = attr + # Detect if it's an async method (including async generators) is_async_method = ( inspect.iscoroutinefunction(func) @@ -921,6 +981,19 @@ class ActorClass: counter = await Counter.local(system, init=10) """ + @staticmethod + def _unwrap_ray_class(cls): + """如果 cls 是 Ray ActorClass,提取原始用户类""" + try: + from ray.actor import ActorClass as RayActorClass + except ImportError: + return cls + if isinstance(cls, RayActorClass): + for base in type(cls).__bases__: + if base is not RayActorClass and base.__name__ != "Generic": + return base + return cls + def __init__( self, cls: type, @@ -929,6 +1002,10 @@ def __init__( min_backoff: float = 0.1, max_backoff: float = 30.0, ): + unwrapped = self._unwrap_ray_class(cls) + # 保留 Ray handle,使 .remote() 可用 + self._ray_cls = cls if unwrapped is not cls else None + cls = unwrapped self._cls = cls self._class_name = f"{cls.__module__}.{cls.__name__}" self._restart_policy = restart_policy @@ -953,6 +1030,10 @@ def __init__( # Register class _actor_class_registry[self._class_name] = cls + # 如果原始类被 @ray.remote 装饰,用 Ray 的 .remote() 覆盖实例方法 + if self._ray_cls is not None: + self.remote = self._ray_cls.remote + async def spawn( self, *args, @@ -1484,6 +1565,7 @@ async def resolve( name: str, *, node_id: int | None = None, + timeout: float | None = None, ): """Resolve a named actor by name. @@ -1495,6 +1577,8 @@ async def resolve( Args: name: Actor name node_id: Target node ID, searches in cluster if not provided + timeout: 等待名字出现的超时秒数。None 表示不等待(找不到立刻报错)。 + 设置后内部在 Rust 层重试,等待 gossip 收敛。 Returns: ActorRefView: Ref-like object with .as_any() for untyped proxy. @@ -1509,6 +1593,9 @@ async def resolve( proxy = ref.as_any() await proxy.send_text(chat_id, content) + # 等待名字出现(gossip 收敛) + ref = await resolve("peer_node", timeout=30) + # Low-level ask ref = await resolve("my_counter") result = await ref.ask({"__call__": "increment", "args": [], "kwargs": {}}) @@ -1519,7 +1606,7 @@ async def resolve( raise RuntimeError("Actor system not initialized. Call 'await init()' first.") try: - ref = await _global_system.resolve(name, node_id=node_id) + ref = await _global_system.resolve(name, node_id=node_id, timeout=timeout) return ActorRefView(ref) except RuntimeError as e: raise _convert_rust_error(e) from e @@ -1544,6 +1631,122 @@ def as_any(ref: ActorRef | ActorRefView) -> ActorProxy: return ActorProxy(ref, method_names=None, async_methods=None) +def mount(instance: Any, *, name: str, public: bool = True) -> None: + """将已有 Python 对象挂载到 Pulsing 通信网络。 + + 同步接口,可在 ``__init__`` 中调用。内部自动完成: + 1. 初始化 Pulsing(如果当前进程还没有,自动检测 Ray 环境) + 2. 将 instance 包装为 Pulsing actor + 3. 注册到 Pulsing 网络,其他节点可通过 ``pul.resolve(name)`` 发现 + + Args: + instance: 要挂载的对象(任意 Python 实例) + name: Pulsing 名称,其他节点通过此名字 resolve + public: 是否可被集群其他节点发现(默认 True) + + Example:: + + @ray.remote + class Counter: + def __init__(self, name, peers): + self.name = name + self.peers = sorted(peers) + pul.mount(self, name=name) + + async def greet(self, msg): + return f"Hello from {self.name}: {msg}" + """ + from . import _global_system + + # 自动初始化 Pulsing + if _global_system is None: + _auto_init_pulsing() + + from . import _global_system as system + + if system is None: + raise RuntimeError( + "Pulsing 初始化失败。请确保已调用 pul.init() 或在 Ray 环境中运行。" + ) + + actor_name = name if "/" in name else f"actors/{name}" + wrapped = _WrappedActor(instance) + + async def _do_mount(): + ref = await system.spawn(wrapped, name=actor_name, public=public) + return ref + + actor_ref = _run_sync_on_pulsing_loop(_do_mount()) + wrapped._inject_delayed(actor_ref) + _register_actor_metadata(actor_name, type(instance)) + + +def unmount(name: str) -> None: + """从 Pulsing 网络卸载一个已挂载的 actor。 + + Args: + name: 挂载时使用的名称 + """ + from . import _global_system + + if _global_system is None: + return + + actor_name = name if "/" in name else f"actors/{name}" + + async def _do_unmount(): + await _global_system.stop(actor_name) + + _run_sync_on_pulsing_loop(_do_unmount()) + + +def _auto_init_pulsing(): + """自动检测环境并初始化 Pulsing。""" + try: + import ray + + if ray.is_initialized(): + from pulsing.ray import init_in_ray + + init_in_ray() + return + except ImportError: + pass + + raise RuntimeError( + "Pulsing 未初始化。请先调用 await pul.init() 或确保在 Ray 环境中运行。" + ) + + +def _run_sync_on_pulsing_loop(coro): + """在 Pulsing 的后台事件循环上同步执行协程。""" + import asyncio + import concurrent.futures + + # 尝试使用 pulsing.ray 的后台 loop(Ray 环境) + try: + from pulsing.ray import _loop + + if _loop is not None: + fut = asyncio.run_coroutine_threadsafe(coro, _loop) + return fut.result(timeout=30) + except ImportError: + pass + + # 非 Ray 环境:尝试在当前线程创建新 loop + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop is None: + return asyncio.run(coro) + + # 已有 running loop(比如 async context),在新线程运行 + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + return pool.submit(asyncio.run, coro).result(timeout=30) + + RemoteClass = ActorClass # Keep old name as alias (backward compatibility) SystemActor = PythonActorService diff --git a/python/pulsing/cli/__main__.py b/python/pulsing/cli/__main__.py index 1b32f4514..6529ffe59 100644 --- a/python/pulsing/cli/__main__.py +++ b/python/pulsing/cli/__main__.py @@ -257,7 +257,67 @@ def bench( ) +@hp.param("examples") +def examples(name: str | None = None): + """ + 列出或查看 Pulsing 内置示例。 + + 不带参数时列出所有可用示例;指定名称时显示该示例的详细说明、 + 运行方式和源码路径。 + + Args: + name: 示例名称(可选)。留空则列出所有示例。 + + Examples: + # 列出所有示例 + pulsing examples + + # 查看某个示例的详情 + pulsing examples counting_game + """ + from pulsing.examples import get_example_detail, list_examples + + if name is None: + all_examples = list_examples() + if not all_examples: + print("暂无可用示例。") + return + print("可用示例:\n") + max_name_len = max(len(n) for n, _, _ in all_examples) + for n, summary, filepath in all_examples: + print(f" {n:<{max_name_len}} {summary}") + print("\n使用 'pulsing examples <名称>' 查看详情。") + return + + detail = get_example_detail(name) + if detail is None: + print(f"未知示例: '{name}'") + print("使用 'pulsing examples' 查看所有可用示例。") + return + + summary, docstring, filepath = detail + print(f"{'=' * 60}") + print(f" {summary}") + print(f"{'=' * 60}\n") + if docstring: + print(docstring) + print() + print(f"源码路径:\n {filepath}\n") + print(f"快速运行:\n python -m pulsing.examples.{name}") + + def main(): + import sys + + # 让 `pulsing examples ` 以位置参数方式工作 + # hp 框架把有默认值的参数当 --name 选项,这里做一层转换 + if ( + len(sys.argv) >= 3 + and sys.argv[1] == "examples" + and not sys.argv[2].startswith("-") + ): + sys.argv = [sys.argv[0], "examples", "--name", sys.argv[2]] + sys.argv[3:] + hp.launch() diff --git a/python/pulsing/examples/__init__.py b/python/pulsing/examples/__init__.py new file mode 100644 index 000000000..662418015 --- /dev/null +++ b/python/pulsing/examples/__init__.py @@ -0,0 +1,33 @@ +""" +pulsing.examples — Pulsing 内置示例集 + +每个子模块都是一个可独立运行的示例,同时也可被测试导入复用。 +""" + +import importlib +import inspect +from pathlib import Path + +# 注册所有 example:模块名 → 一句话摘要 +_EXAMPLES = { + "counting_game": "Pulsing + Ray 分布式报数游戏", +} + + +def list_examples(): + """返回 [(name, summary, module_path)] 列表""" + result = [] + examples_dir = Path(__file__).parent + for name, summary in _EXAMPLES.items(): + filepath = examples_dir / f"{name}.py" + result.append((name, summary, str(filepath))) + return result + + +def get_example_detail(name): + """返回 (summary, docstring, filepath),找不到则返回 None""" + if name not in _EXAMPLES: + return None + mod = importlib.import_module(f"pulsing.examples.{name}") + filepath = inspect.getfile(mod) + return (_EXAMPLES[name], (mod.__doc__ or "").strip(), filepath) diff --git a/python/pulsing/examples/counting_game.py b/python/pulsing/examples/counting_game.py new file mode 100644 index 000000000..b4bea4cbb --- /dev/null +++ b/python/pulsing/examples/counting_game.py @@ -0,0 +1,121 @@ +"""Counting Game — Pulsing 分布式报数游戏 + +20 个节点依次报数并广播,演示 Pulsing 的 actor 消息通信能力。 +Ray 仅用于启动多进程,报数逻辑完全由 Pulsing actor 完成。 + +运行: + python -m pulsing.examples.counting_game + python -m pulsing.examples.counting_game --num-workers 10 +""" + +import os +import time + +import ray + +import pulsing as pul + + +# ── 报数 Actor ─────────────────────────────────────────── + + +@ray.remote +class Counter: + """每个节点持有名字、有序节点列表、报数日志。""" + + def __init__(self, name, peers): + self.name = name + self.peers = sorted(peers) + self.log = [] + pul.mount(self, name=name) # 一行接入 Pulsing 网络 + + async def yield_number(self): + """报数:广播自己的编号给所有节点""" + num = self.peers.index(self.name) + 1 + for peer in self.peers: + proxy = (await pul.resolve(peer, timeout=30)).as_type(Counter) + await proxy.on_number(num, self.name) + + async def on_number(self, num, from_who): + """收到报数:记录,前序节点报完则接力""" + self.log.append({"number": num, "from": from_who}) + idx = self.peers.index(self.name) + if idx > 0 and from_who == self.peers[idx - 1]: + await self.yield_number() + + def get_pid(self): + return os.getpid() + + def get_log(self): + return list(self.log) + + +# ── 运行 ───────────────────────────────────────────────── + + +def run(num_workers=20): + """运行报数游戏(需要 Ray 已初始化)。返回各节点日志,失败抛异常。""" + names = [f"node_{i:02d}" for i in range(num_workers)] + t0 = time.time() + + # 1) 创建 Ray actor(__init__ 中自动 pul.mount 接入 Pulsing) + print(f"[counting_game] 启动 {num_workers} 个节点 ...") + actors = [Counter.remote(name, names) for name in names] + pids = ray.get([a.get_pid.remote() for a in actors]) + assert len(set(pids)) == num_workers, "worker 进程数不足" + print(f"[counting_game] {num_workers} 节点就绪 ({time.time()-t0:.1f}s)") + + # 2) node_00 报数 → 自动接力至 node_19 + print("[counting_game] node_00 开始报数 ...") + ray.get(actors[0].yield_number.remote()) + + # 3) 等待所有节点收齐日志 + deadline = time.time() + 30 + while time.time() < deadline: + logs = ray.get([a.get_log.remote() for a in actors]) + done = sum(1 for lg in logs if len(lg) == num_workers) + print(f"\r[counting_game] 收集日志 {done}/{num_workers}", end="", flush=True) + if done == num_workers: + break + time.sleep(0.5) + else: + raise TimeoutError("报数超时") + print() + + # 4) 验证:每条日志的 from 应与报数序号对应 + for entries in logs: + for e in entries: + assert e["from"] == f"node_{e['number']-1:02d}" + + # 5) 打印结果 + order = " → ".join(f"{i+1}:{names[i]}" for i in range(min(5, num_workers))) + if num_workers > 5: + order += f" → ... → {num_workers}:{names[-1]}" + elapsed = time.time() - t0 + print(f"[counting_game] 报数顺序: {order}") + print( + f"[counting_game] 通过! {num_workers}x{num_workers}={num_workers**2} 条消息, {elapsed:.1f}s" + ) + pul.cleanup_ray() + return logs + + +# ── CLI ────────────────────────────────────────────────── + + +def main(): + import argparse + + p = argparse.ArgumentParser(description="Pulsing 分布式报数游戏") + p.add_argument("--num-workers", type=int, default=20) + args = p.parse_args() + + ray.init(num_cpus=args.num_workers + 1) + try: + run(args.num_workers) + finally: + ray.shutdown() + + +if __name__ == "__main__": + main() diff --git a/python/pulsing/ray.py b/python/pulsing/ray.py new file mode 100644 index 000000000..9187910d5 --- /dev/null +++ b/python/pulsing/ray.py @@ -0,0 +1,163 @@ +""" +pulsing.ray - 在 Ray 集群中初始化 Pulsing + +每个 Ray worker 进程调用 init_in_ray() 即可启动 Pulsing 并自动加入集群。 +通过 Ray 的 internal KV store 协调 seed 节点发现。 + +推荐用法: + import ray + from pulsing.ray import init_in_ray + + ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + init_in_ray() # driver 进程也需要初始化 +""" + +try: + import ray + from ray.experimental.internal_kv import ( + _internal_kv_get, + _internal_kv_put, + _internal_kv_del, + ) +except ImportError: + raise ImportError( + "pulsing.ray requires Ray. Install with: pip install 'ray[default]'" + ) + +import asyncio +import threading + +_SEED_KEY = "pulsing:seed_addr" + +# 后台事件循环(供 sync init 使用) +_loop = None +_thread = None + + +def _get_node_ip(): + """获取当前 Ray 节点 IP""" + ctx = ray.get_runtime_context() + node_id = ctx.get_node_id() + for node in ray.nodes(): + if node["NodeID"] == node_id and node["Alive"]: + return node["NodeManagerAddress"] + raise RuntimeError("无法获取当前 Ray 节点 IP") + + +def _start_background_loop(): + """启动后台事件循环线程""" + global _loop, _thread + if _thread is not None: + return + + ready = threading.Event() + + def _run(): + global _loop + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + _loop = loop + ready.set() + loop.run_forever() + + _thread = threading.Thread(target=_run, daemon=True, name="pulsing-event-loop") + _thread.start() + ready.wait() + + +def _run_sync(coro): + """在后台事件循环中同步执行协程""" + fut = asyncio.run_coroutine_threadsafe(coro, _loop) + return fut.result(timeout=30) + + +async def _do_init(addr, seeds=None): + from pulsing.actor import init + + return await init(addr=addr, seeds=seeds) + + +async def _do_shutdown(): + from pulsing.actor import shutdown + + await shutdown() + + +def _get_seed(): + """从 Ray KV store 获取 seed 地址""" + data = _internal_kv_get(_SEED_KEY) + return data.decode() if data else None + + +def _try_set_seed(addr): + """原子写入 seed 地址,返回 True 表示写入成功(我是 seed)。 + + _internal_kv_put(overwrite=False) 返回值语义: + False = key 不存在,已写入(成功) + True = key 已存在,未覆盖(失败) + """ + already_exists = _internal_kv_put(_SEED_KEY, addr.encode(), overwrite=False) + return not already_exists + + +def init_in_ray(): + """在当前进程初始化 Pulsing 并加入集群。 + + 可直接调用,也可作为 Ray worker_process_setup_hook: + + ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + init_in_ray() # driver 也需要 + """ + if not ray.is_initialized(): + raise RuntimeError("Ray 未初始化,请先调用 ray.init()") + + node_ip = _get_node_ip() + _start_background_loop() + + # 已有 seed → 直接加入 + seed_addr = _get_seed() + if seed_addr is not None: + return _run_sync(_do_init(f"{node_ip}:0", seeds=[seed_addr])) + + # 启动为潜在 seed + system = _run_sync(_do_init(f"{node_ip}:0")) + my_addr = str(system.addr) + + if _try_set_seed(my_addr): + return system # 写入成功,我是 seed + + # 竞争失败(极罕见),重新加入实际 seed + _run_sync(_do_shutdown()) + return _run_sync(_do_init(f"{node_ip}:0", seeds=[_get_seed()])) + + +async def async_init_in_ray(): + """在当前进程初始化 Pulsing 并加入集群(异步版本)。 + + 适用于 async Ray actor。 + """ + if not ray.is_initialized(): + raise RuntimeError("Ray 未初始化,请先调用 ray.init()") + + node_ip = _get_node_ip() + + seed_addr = _get_seed() + if seed_addr is not None: + return await _do_init(f"{node_ip}:0", seeds=[seed_addr]) + + system = await _do_init(f"{node_ip}:0") + my_addr = str(system.addr) + + if _try_set_seed(my_addr): + return system + + await _do_shutdown() + return await _do_init(f"{node_ip}:0", seeds=[_get_seed()]) + + +def cleanup(): + """清理 Pulsing 在 Ray KV store 中的状态""" + _internal_kv_del(_SEED_KEY) + + +__all__ = ["init_in_ray", "async_init_in_ray", "cleanup"] diff --git a/tests/python/test_ray_init.py b/tests/python/test_ray_init.py new file mode 100644 index 000000000..092a73e0b --- /dev/null +++ b/tests/python/test_ray_init.py @@ -0,0 +1,321 @@ +""" +Tests for pulsing.ray - Pulsing initialization in Ray cluster + +Tests: +- init_in_ray() basic behavior +- Seed registration via Ray KV store +- Multi-actor cluster formation +- async_init_in_ray() +- cleanup() +- Error cases +""" + +import pytest + +# Skip all tests if ray is not installed +ray = pytest.importorskip("ray") + + +def _reset_pulsing_state(): + """Reset all Pulsing module state (system, background loop, KV).""" + import pulsing.actor as pa + import pulsing.ray as pr + + # Shutdown Pulsing system via background loop + if pa._global_system is not None and pr._loop is not None: + try: + pr._run_sync(pr._do_shutdown()) + except Exception: + pass + + # Force clear global system (safety net) + pa._global_system = None + + # Stop background event loop + if pr._loop is not None: + try: + pr._loop.call_soon_threadsafe(pr._loop.stop) + except Exception: + pass + if pr._thread is not None: + try: + pr._thread.join(timeout=5) + except Exception: + pass + pr._loop = None + pr._thread = None + + # Clean KV store + try: + pr.cleanup() + except Exception: + pass + + +NUM_WORKERS = 20 + + +@pytest.fixture +def ray_env(): + """Initialize local Ray cluster with clean Pulsing state.""" + ray.init(num_cpus=NUM_WORKERS + 1) + _reset_pulsing_state() # ensure clean state before test + yield + _reset_pulsing_state() # cleanup after test + ray.shutdown() + + +# ============================================================================ +# Test: init_in_ray() basic +# ============================================================================ + + +def test_init_returns_system(ray_env): + """init_in_ray() returns a Pulsing ActorSystem.""" + from pulsing.ray import init_in_ray + + system = init_in_ray() + assert system is not None + assert system.addr is not None + + +def test_init_stores_seed_in_kv(ray_env): + """First caller's address is stored as seed in Ray KV.""" + from pulsing.ray import _get_seed, init_in_ray + + system = init_in_ray() + seed_addr = _get_seed() + assert seed_addr is not None + assert seed_addr == str(system.addr) + + +def test_init_sets_global_system(ray_env): + """init_in_ray() sets pulsing.actor global system.""" + from pulsing.actor import is_initialized + from pulsing.ray import init_in_ray + + assert not is_initialized() + init_in_ray() + assert is_initialized() + + +# ============================================================================ +# Test: error cases +# ============================================================================ + + +def test_init_raises_without_ray(): + """init_in_ray() raises when Ray is not initialized.""" + from pulsing.ray import init_in_ray + + with pytest.raises(RuntimeError, match="Ray 未初始化"): + init_in_ray() + + +async def test_async_init_raises_without_ray(): + """async_init_in_ray() raises when Ray is not initialized.""" + from pulsing.ray import async_init_in_ray + + with pytest.raises(RuntimeError, match="Ray 未初始化"): + await async_init_in_ray() + + +# ============================================================================ +# Test: cleanup() +# ============================================================================ + + +def test_cleanup_clears_kv(ray_env): + """cleanup() removes seed from KV store.""" + from pulsing.ray import _get_seed, cleanup, init_in_ray + + init_in_ray() + assert _get_seed() is not None + + cleanup() + assert _get_seed() is None + + +# ============================================================================ +# Test: Ray actor integration +# ============================================================================ + + +def test_init_in_ray_actor(ray_env): + """init_in_ray() works inside a Ray actor.""" + + @ray.remote + class Worker: + def setup(self): + from pulsing.ray import init_in_ray + + system = init_in_ray() + return str(system.addr) + + def ping(self): + return "pong" + + worker = Worker.remote() + addr = ray.get(worker.setup.remote()) + assert addr is not None + assert ":" in addr + + result = ray.get(worker.ping.remote()) + assert result == "pong" + + +def test_multi_actor_same_seed(ray_env): + """All workers in separate processes discover the same seed.""" + import os + + from pulsing.ray import _get_seed, init_in_ray + + driver_pid = os.getpid() + + # Driver becomes seed + init_in_ray() + seed_addr = _get_seed() + + @ray.remote + class Worker: + def setup(self): + import os + + from pulsing.ray import init_in_ray + + init_in_ray() + return os.getpid() + + def get_seed(self): + from pulsing.ray import _get_seed + + return _get_seed() + + workers = [Worker.remote() for _ in range(NUM_WORKERS)] + pids = ray.get([w.setup.remote() for w in workers]) + + # Verify multi-process: all PIDs different from driver + assert all( + pid != driver_pid for pid in pids + ), "Workers should run in separate processes" + + # Verify multi-process: workers are in distinct processes + unique_pids = set(pids) + assert ( + len(unique_pids) == NUM_WORKERS + ), f"Expected {NUM_WORKERS} distinct processes, got {len(unique_pids)}" + + # All workers see the same seed + seeds = ray.get([w.get_seed.remote() for w in workers]) + assert all(s == seed_addr for s in seeds) + + +def test_concurrent_init_without_driver(ray_env): + """20 processes concurrently call init_in_ray(), exactly one becomes seed.""" + import os + + @ray.remote + class Worker: + def setup(self): + import os + + from pulsing.ray import init_in_ray + + system = init_in_ray() + return os.getpid(), str(system.addr) + + def get_seed(self): + from pulsing.ray import _get_seed + + return _get_seed() + + # Launch all workers at once — they race to become seed + workers = [Worker.remote() for _ in range(NUM_WORKERS)] + results = ray.get([w.setup.remote() for w in workers]) + pids = [r[0] for r in results] + addrs = [r[1] for r in results] + + # Verify multi-process: all workers in distinct processes + unique_pids = set(pids) + assert ( + len(unique_pids) == NUM_WORKERS + ), f"Expected {NUM_WORKERS} distinct processes, got {len(unique_pids)}" + # None should be the driver + assert os.getpid() not in unique_pids + + # All workers got a valid address + assert len(addrs) == NUM_WORKERS + assert all(a and ":" in a for a in addrs) + + # All workers see the same seed + seeds = ray.get([w.get_seed.remote() for w in workers]) + unique_seeds = set(seeds) + assert ( + len(unique_seeds) == 1 + ), f"Expected 1 seed, got {len(unique_seeds)}: {unique_seeds}" + + # The seed must be one of the workers' addresses + assert unique_seeds.pop() in addrs + + +def test_actor_becomes_seed_without_driver(ray_env): + """When driver doesn't init, first actor becomes seed.""" + + @ray.remote + class Worker: + def setup(self): + from pulsing.ray import init_in_ray + + system = init_in_ray() + return str(system.addr) + + def get_seed(self): + from pulsing.ray import _get_seed + + return _get_seed() + + # First actor becomes seed + w1 = Worker.remote() + addr1 = ray.get(w1.setup.remote()) + seed = ray.get(w1.get_seed.remote()) + assert seed == addr1 + + # Second actor joins + w2 = Worker.remote() + ray.get(w2.setup.remote()) + seed2 = ray.get(w2.get_seed.remote()) + assert seed2 == seed + + +# ============================================================================ +# Test: async_init_in_ray() +# ============================================================================ + + +async def test_async_init_returns_system(ray_env): + """async_init_in_ray() returns a system.""" + from pulsing.ray import async_init_in_ray + + system = await async_init_in_ray() + assert system is not None + assert system.addr is not None + + +async def test_async_init_stores_seed(ray_env): + """async_init_in_ray() stores seed in KV.""" + from pulsing.ray import _get_seed, async_init_in_ray + + system = await async_init_in_ray() + assert _get_seed() == str(system.addr) + + +# ============================================================================ +# Test: counting game (end-to-end Pulsing messaging across Ray workers) +# ============================================================================ + + +def test_counting_game(ray_env): + """20 个进程通过 Pulsing actor 玩报数游戏(复用 pulsing.examples)。""" + from pulsing.examples.counting_game import run + + run(num_workers=NUM_WORKERS) From a31671779e1e70c5682682e65631645321792166 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 15 Feb 2026 18:12:07 +0800 Subject: [PATCH 02/15] Refactor Pulsing Python API and documentation - Streamlined the Python API by consolidating actor initialization and lifecycle management, emphasizing the use of `await pul.init()` and `await pul.shutdown()`. - Updated examples and documentation to reflect the new API structure, enhancing clarity and usability for users transitioning from Ray. - Introduced typed and untyped proxy methods (`as_type` and `as_any`) for better actor reference handling. - Removed deprecated Ray-compatible API references, focusing on the native async API for improved performance and simplicity. - Enhanced examples to demonstrate the new features and best practices in actor management and communication. --- crates/pulsing-py/src/actor.rs | 20 ++ docs/src/agent/native.md | 36 ++- docs/src/agent/native.zh.md | 36 ++- docs/src/api/overview.md | 21 +- docs/src/api/overview.zh.md | 21 +- docs/src/api_reference.md | 50 ++-- docs/src/api_reference.zh.md | 50 ++-- docs/src/guide/actors.md | 40 +-- docs/src/guide/actors.zh.md | 40 +-- docs/src/guide/remote_actors.md | 7 + docs/src/guide/remote_actors.zh.md | 7 + docs/src/quickstart/migrate_from_ray.md | 148 +++------ docs/src/quickstart/migrate_from_ray.zh.md | 152 +++------- examples/agent/pulsing/mbti_discussion.py | 52 ++-- .../agent/pulsing/parallel_ideas_async.py | 19 +- .../pulsing/runtime_lifecycle_example.py | 68 ++--- examples/inspect/demo_service.py | 98 +++--- examples/python/cluster.py | 113 ++++--- examples/python/distributed_queue.py | 8 +- examples/python/message_patterns.py | 86 ++---- examples/python/named_actors.py | 38 +-- examples/python/native_async_example.py | 19 +- examples/python/ping_pong.py | 23 +- examples/python/ray_compat_example.py | 131 -------- examples/python/remote_actor_example.py | 12 +- examples/python/sync_queue_example.py | 8 +- examples/quickstart/ai_chat_room.py | 12 +- examples/quickstart/chaos_proof.py | 10 +- examples/quickstart/function_to_fleet.py | 10 +- examples/quickstart/hello_agent.py | 10 +- llms.binding.md | 283 +++++++----------- python/pulsing/__init__.py | 2 - python/pulsing/actor/__init__.py | 2 - python/pulsing/actor/remote.py | 77 ++--- .../python/apis/actor/test_actor_behavior.py | 64 ++++ tests/python/test_resolve_as_any.py | 91 +++++- 36 files changed, 745 insertions(+), 1119 deletions(-) delete mode 100644 examples/python/ray_compat_example.py diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 72a8997ec..13c3dc130 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -713,6 +713,26 @@ impl PyActorRef { }) } + /// Return an untyped proxy that forwards any method call to the remote actor. + fn as_any(&self, py: Python<'_>) -> PyResult { + let remote = py.import("pulsing.actor.remote")?; + let proxy_cls = remote.getattr("ActorProxy")?; + let proxy = proxy_cls.call1((self.clone(), py.None(), py.None()))?; + Ok(proxy.unbind()) + } + + /// Return a typed proxy based on the given class definition. + fn as_type(&self, py: Python<'_>, cls: PyObject) -> PyResult { + let remote = py.import("pulsing.actor.remote")?; + let extract_fn = remote.getattr("_extract_methods")?; + let result = extract_fn.call1((&cls,))?; + let methods = result.get_item(0)?; + let async_methods = result.get_item(1)?; + let proxy_cls = remote.getattr("ActorProxy")?; + let proxy = proxy_cls.call1((self.clone(), methods, async_methods))?; + Ok(proxy.unbind()) + } + fn __repr__(&self) -> String { format!( "ActorRef(id={}, local={})", diff --git a/docs/src/agent/native.md b/docs/src/agent/native.md index c37cb9c8b..faade31d5 100644 --- a/docs/src/agent/native.md +++ b/docs/src/agent/native.md @@ -18,7 +18,7 @@ The `@agent` decorator is equivalent to `@remote`, but attaches metadata for vis ```python import pulsing as pul -from pulsing.agent import agent, runtime, llm, get_agent_meta, list_agents +from pulsing.agent import agent, llm, get_agent_meta, list_agents # @pul.remote: Basic Actor @pul.remote @@ -38,7 +38,8 @@ class Researcher: ### Metadata Access ```python -async with runtime(): +await pul.init() +try: r = await Researcher.spawn(name="researcher") # Get metadata by name @@ -50,6 +51,8 @@ async with runtime(): # List all agents for name, meta in list_agents().items(): print(f"{name}: {meta.role}") +finally: + await pul.shutdown() ``` ### `@pul.remote` vs `@agent` @@ -64,28 +67,36 @@ async with runtime(): ## Runtime Management ```python -from pulsing.agent import runtime, cleanup +import pulsing as pul -async with runtime(): +await pul.init() +try: # Create and use agents agent = await MyAgent.spawn(name="agent") await agent.work() - -# Optional: cleanup global state -cleanup() +finally: + await pul.shutdown() ``` +`runtime()` is still available as a convenience context manager, but the primary recommended style is explicit `await pul.init()` / `await pul.shutdown()`. + ### Distributed Mode ```python # Node A -async with runtime(addr="0.0.0.0:8001"): +await pul.init(addr="0.0.0.0:8001") +try: await JudgeActor.spawn(name="judge") +finally: + await pul.shutdown() # Node B (auto-discovers Node A) -async with runtime(addr="0.0.0.0:8002", seeds=["node_a:8001"]): +await pul.init(addr="0.0.0.0:8002", seeds=["node_a:8001"]) +try: judge = await JudgeActor.resolve("judge") # Cross-node transparent call await judge.submit(idea) +finally: + await pul.shutdown() ``` ## LLM Integration @@ -125,7 +136,7 @@ value = extract_field(response, "answer", default="unknown") ```python import asyncio import pulsing as pul -from pulsing.agent import agent, runtime, llm, parse_json, get_agent_meta, list_agents +from pulsing.agent import agent, llm, parse_json, get_agent_meta, list_agents @pul.remote class Moderator: @@ -165,7 +176,8 @@ class Analyst: return opinion async def main(): - async with runtime(): + await pul.init() + try: # Create moderator moderator = await Moderator.spawn(topic="AI Trends", name="moderator") @@ -192,6 +204,8 @@ async def main(): # Get summary result = await moderator.summarize() print(f"Summary: {result}") + finally: + await pul.shutdown() if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/src/agent/native.zh.md b/docs/src/agent/native.zh.md index 1f7ecee77..655b7b5fb 100644 --- a/docs/src/agent/native.zh.md +++ b/docs/src/agent/native.zh.md @@ -18,7 +18,7 @@ Pulsing 提供轻量的原生 Agent 工具箱,用于构建多智能体应用 ```python import pulsing as pul -from pulsing.agent import agent, runtime, llm, get_agent_meta, list_agents +from pulsing.agent import agent, llm, get_agent_meta, list_agents # @pul.remote: 基础 Actor @pul.remote @@ -38,7 +38,8 @@ class Researcher: ### 元信息访问 ```python -async with runtime(): +await pul.init() +try: r = await Researcher.spawn(name="researcher") # 通过名称获取元信息 @@ -50,6 +51,8 @@ async with runtime(): # 列出所有 Agent for name, meta in list_agents().items(): print(f"{name}: {meta.role}") +finally: + await pul.shutdown() ``` ### `@pul.remote` vs `@agent` @@ -64,28 +67,36 @@ async with runtime(): ## 运行时管理 ```python -from pulsing.agent import runtime, cleanup +import pulsing as pul -async with runtime(): +await pul.init() +try: # 创建和使用 Agent agent = await MyAgent.spawn(name="agent") await agent.work() - -# 可选:清理全局状态 -cleanup() +finally: + await pul.shutdown() ``` +`runtime()` 仍可作为便捷 context manager 使用,但推荐主路径是显式 `await pul.init()` / `await pul.shutdown()`。 + ### 分布式模式 ```python # 节点 A -async with runtime(addr="0.0.0.0:8001"): +await pul.init(addr="0.0.0.0:8001") +try: await JudgeActor.spawn(name="judge") +finally: + await pul.shutdown() # 节点 B(自动发现节点 A) -async with runtime(addr="0.0.0.0:8002", seeds=["node_a:8001"]): +await pul.init(addr="0.0.0.0:8002", seeds=["node_a:8001"]) +try: judge = await JudgeActor.resolve("judge") # 跨节点透明调用 await judge.submit(idea) +finally: + await pul.shutdown() ``` ## LLM 集成 @@ -125,7 +136,7 @@ value = extract_field(response, "answer", default="unknown") ```python import asyncio import pulsing as pul -from pulsing.agent import agent, runtime, llm, parse_json, list_agents +from pulsing.agent import agent, llm, parse_json, list_agents @pul.remote class Moderator: @@ -165,7 +176,8 @@ class Analyst: return opinion async def main(): - async with runtime(): + await pul.init() + try: # 创建协调者 moderator = await Moderator.spawn(topic="AI 趋势", name="moderator") @@ -192,6 +204,8 @@ async def main(): # 获取总结 result = await moderator.summarize() print(f"总结: {result}") + finally: + await pul.shutdown() if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/src/api/overview.md b/docs/src/api/overview.md index 16ce1b1f5..6816b30dc 100644 --- a/docs/src/api/overview.md +++ b/docs/src/api/overview.md @@ -61,25 +61,6 @@ response = await actor.ask({"message": "hello"}) await pul.shutdown() ``` -#### 3. Ray-Compatible API (Migration) - -```python -from pulsing.compat import ray - -# Ray-compatible API for easy migration -ray.init(address="0.0.0.0:8000") - -@ray.remote -class MyActor: - def process(self, data): - return f"Processed: {data}" - -actor = MyActor.remote() -result = ray.get(actor.process.remote("hello")) - -ray.shutdown() -``` - ### Actor Patterns #### Remote Decorator (Recommended) @@ -174,7 +155,7 @@ class ResilientWorker: return risky_computation(data) ``` -### Distributed Queues +### Under the Hood: Distributed Queues Pulsing includes a distributed queue system for data pipelines: diff --git a/docs/src/api/overview.zh.md b/docs/src/api/overview.zh.md index ac5134537..e6c943588 100644 --- a/docs/src/api/overview.zh.md +++ b/docs/src/api/overview.zh.md @@ -61,25 +61,6 @@ response = await actor.ask({"message": "hello"}) await pul.shutdown() ``` -#### 3. Ray 兼容 API(迁移) - -```python -from pulsing.compat import ray - -# Ray 兼容 API,方便迁移 -ray.init(address="0.0.0.0:8000") - -@ray.remote -class MyActor: - def process(self, data): - return f"Processed: {data}" - -actor = MyActor.remote() -result = ray.get(actor.process.remote("hello")) - -ray.shutdown() -``` - ### Actor 模式 #### Remote 装饰器(推荐) @@ -174,7 +155,7 @@ class ResilientWorker: return risky_computation(data) ``` -### 分布式队列 +### Under the Hood:分布式队列 Pulsing 包含分布式队列系统,用于数据管道: diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index 07a28c654..78226dcb2 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -174,23 +174,26 @@ if __name__ == "__main__": ### pul.init / pul.shutdown -Global system initialization (Ray-style async API). +Global system initialization (recommended primary API). ```python import asyncio import pulsing as pul -class MyActor: - async def receive(self, msg): - return f"echo: {msg}" - async def main(): # Initialize global system await pul.init(addr=None, seeds=None, passphrase=None) # Use global system - actor = await pul.spawn(MyActor()) - ref = await pul.resolve("actor_name") + @pul.remote + class Counter: + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value + + counter = await Counter.spawn(name="counter") + ref = await pul.resolve("counter") + proxy = ref.as_type(Counter) + await proxy.incr() # Shutdown await pul.shutdown() @@ -262,6 +265,14 @@ class ActorRef: async def tell(self, msg): """Send a message without waiting for response (fire-and-forget).""" pass + + def as_any(self): + """Get untyped ActorProxy when remote class is unknown.""" + pass + + def as_type(self, cls): + """Get typed ActorProxy bound to class metadata.""" + pass ``` ### ActorProxy @@ -336,7 +347,7 @@ class ResilientWorker: def work(self, data): ... ``` -## Base Actor +## Under the Hood: Base Actor For low-level control, inherit from Actor base class. @@ -383,29 +394,6 @@ reader = await system.queue.read("my_queue") records = await reader.get(limit=100) ``` -## Ray Compatibility - -Drop-in replacement for Ray. - -```python -from pulsing.compat import ray - -ray.init() - -@ray.remote -class Counter: - def __init__(self): - self.value = 0 - def incr(self): - self.value += 1 - return self.value - -counter = Counter.remote() -result = ray.get(counter.incr.remote()) - -ray.shutdown() -``` - ## Rust API The Rust API is organized into three trait layers (all re-exported in `pulsing_actor::prelude::*`): diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index 036f9a9ae..2ce23112b 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -175,23 +175,26 @@ if __name__ == "__main__": ### pul.init / pul.shutdown -全局系统初始化(Ray 风格异步 API)。 +全局系统初始化(推荐主 API)。 ```python import asyncio import pulsing as pul -class MyActor: - async def receive(self, msg): - return f"echo: {msg}" - async def main(): # 初始化全局系统 await pul.init(addr=None, seeds=None, passphrase=None) # 使用全局系统 - actor = await pul.spawn(MyActor()) - ref = await pul.resolve("actor_name") + @pul.remote + class Counter: + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value + + counter = await Counter.spawn(name="counter") + ref = await pul.resolve("counter") + proxy = ref.as_type(Counter) + await proxy.incr() # 关闭 await pul.shutdown() @@ -285,6 +288,14 @@ class ActorRef: async def tell(self, msg): """发送消息但不等待响应(fire-and-forget)。""" pass + + def as_any(self): + """在未知远端类型时,返回无类型 ActorProxy。""" + pass + + def as_type(self, cls): + """绑定类元数据并返回有类型 ActorProxy。""" + pass ``` ### ActorProxy @@ -359,7 +370,7 @@ class ResilientWorker: def work(self, data): ... ``` -## 基础 Actor +## Under the Hood:基础 Actor 需要底层控制时,可使用基础 Actor 类。 @@ -406,29 +417,6 @@ reader = await system.queue.read("my_queue") records = await reader.get(limit=100) ``` -## Ray 兼容 - -Ray 的直接替换。 - -```python -from pulsing.compat import ray - -ray.init() - -@ray.remote -class Counter: - def __init__(self): - self.value = 0 - def incr(self): - self.value += 1 - return self.value - -counter = Counter.remote() -result = ray.get(counter.incr.remote()) - -ray.shutdown() -``` - ## Rust API Rust API 通过三层 trait 组织(均在 `pulsing_actor::prelude::*` 中 re-export): diff --git a/docs/src/guide/actors.md b/docs/src/guide/actors.md index 02d688c9b..0dda9b5a6 100644 --- a/docs/src/guide/actors.md +++ b/docs/src/guide/actors.md @@ -62,7 +62,6 @@ Pulsing follows the **classical Actor model** (like Erlang/Akka): | API | Import | Style | Best For | |-----|--------|-------|----------| | **Native Async** | `import pulsing as pul` | `async/await` | New projects, maximum performance | -| **Ray-Compatible** | `from pulsing.compat import ray` | Synchronous | Migrating from Ray, quick prototyping | ### Native Async API (Recommended) @@ -85,34 +84,6 @@ async def main(): await pul.shutdown() ``` -### Ray-Compatible API - -```python -from pulsing.compat import ray - -ray.init() - -@ray.remote -class Calculator: - def __init__(self, initial_value: int = 0): - self.value = initial_value - - def add(self, n: int) -> int: - self.value += n - return self.value - -calc = Calculator.remote(initial_value=100) -result = ray.get(calc.add.remote(50)) # 150 -ray.shutdown() -``` - -**Migration from Ray** — just change the import: - -```python -# Before: import ray -# After: from pulsing.compat import ray -``` - --- ## Message Patterns @@ -353,14 +324,7 @@ class ResilientActor: ```python import pulsing as pul -# Create system -system = await pul.actor_system() - -# Spawn named actor (discoverable via resolve) -actor = await system.spawn(MyActor(), name="my_actor") - -# Call method -result = await actor.ask({"action": "do_something"}) +await pul.init() # Using @pul.remote decorator (recommended) @pul.remote @@ -374,7 +338,7 @@ result = await service.process("hello") proxy = await MyService.resolve("service") # Shutdown -await system.shutdown() +await pul.shutdown() ``` --- diff --git a/docs/src/guide/actors.zh.md b/docs/src/guide/actors.zh.md index 84ea1e1eb..62ad1434e 100644 --- a/docs/src/guide/actors.zh.md +++ b/docs/src/guide/actors.zh.md @@ -62,7 +62,6 @@ Pulsing 遵循**经典 Actor 模型**(类似 Erlang/Akka): | API | 导入方式 | 风格 | 适用场景 | |-----|---------|------|----------| | **原生异步** | `import pulsing as pul` | `async/await` | 新项目,追求极致性能 | -| **Ray 兼容** | `from pulsing.compat import ray` | 同步调用 | 从 Ray 迁移,快速原型 | ### 原生异步 API(推荐) @@ -85,34 +84,6 @@ async def main(): await pul.shutdown() ``` -### Ray 兼容 API - -```python -from pulsing.compat import ray - -ray.init() - -@ray.remote -class Calculator: - def __init__(self, initial_value: int = 0): - self.value = initial_value - - def add(self, n: int) -> int: - self.value += n - return self.value - -calc = Calculator.remote(initial_value=100) -result = ray.get(calc.add.remote(50)) # 150 -ray.shutdown() -``` - -**从 Ray 迁移** — 只需修改导入: - -```python -# 之前: import ray -# 之后: from pulsing.compat import ray -``` - --- ## 消息模式 @@ -353,14 +324,7 @@ class ResilientActor: ```python import pulsing as pul -# 创建系统 -system = await pul.actor_system() - -# 生成命名 actor(可通过 resolve 发现) -actor = await system.spawn(MyActor(), name="my_actor") - -# 调用方法 -result = await actor.ask({"action": "do_something"}) +await pul.init() # 使用 @remote 装饰器(推荐) @pul.remote @@ -374,7 +338,7 @@ result = await service.process("hello") proxy = await MyService.resolve("service") # 关闭 -await system.shutdown() +await pul.shutdown() ``` --- diff --git a/docs/src/guide/remote_actors.md b/docs/src/guide/remote_actors.md index cb1be0f8b..c005ccba5 100644 --- a/docs/src/guide/remote_actors.md +++ b/docs/src/guide/remote_actors.md @@ -37,6 +37,10 @@ await asyncio.sleep(1.0) # Find actor by name (searches entire cluster) remote_ref = await system.resolve("worker") response = await remote_ref.ask({"action": "process", "data": "hello"}) + +# Convert ActorRef to proxy +any_proxy = remote_ref.as_any() # Unspecified/unknown type +typed_proxy = remote_ref.as_type(Worker) # Typed proxy when class is known ``` ### Using @remote Class.resolve() @@ -51,6 +55,9 @@ worker = await Worker.resolve("worker") result = await worker.process("hello") # Direct method call ``` +!!! note + For new code, prefer `Class.resolve(name)` (typed proxy). Use `system.resolve(name)` when you only have a runtime name and then call `.as_type()` / `.as_any()` on the returned `ActorRef`. + ## Named vs Anonymous Actors ### Named Actors (Discoverable) diff --git a/docs/src/guide/remote_actors.zh.md b/docs/src/guide/remote_actors.zh.md index 6eb1b8cc9..d674221d5 100644 --- a/docs/src/guide/remote_actors.zh.md +++ b/docs/src/guide/remote_actors.zh.md @@ -37,6 +37,10 @@ await asyncio.sleep(1.0) # 按名称查找 actor(搜索整个集群) remote_ref = await system.resolve("worker") response = await remote_ref.ask({"action": "process", "data": "hello"}) + +# 将 ActorRef 转换为代理 +any_proxy = remote_ref.as_any() # 未知类型时使用 +typed_proxy = remote_ref.as_type(Worker) # 已知类型时使用 ``` ### 使用 @remote 类的 resolve() @@ -51,6 +55,9 @@ worker = await Worker.resolve("worker") result = await worker.process("hello") # 直接调用方法 ``` +!!! note + 新代码优先使用 `Class.resolve(name)`(typed proxy)。仅在只有运行时名称时使用 `system.resolve(name)`,随后对返回的 `ActorRef` 调用 `.as_type()` / `.as_any()`。 + ## 命名 vs 匿名 Actor ### 命名 Actor(可发现) diff --git a/docs/src/quickstart/migrate_from_ray.md b/docs/src/quickstart/migrate_from_ray.md index df909833b..71f0e100c 100644 --- a/docs/src/quickstart/migrate_from_ray.md +++ b/docs/src/quickstart/migrate_from_ray.md @@ -1,36 +1,35 @@ # Tutorial: Migrate from Ray -Replace Ray with Pulsing in **5 minutes**. One import change, zero external dependencies. +Migrate Ray actor code to Pulsing's native async API. --- -## Why Migrate? +## Why This Migration Changed -| | Ray | Pulsing | -|---|-----|---------| -| **Dependencies** | Ray cluster, Redis, GCS | None | -| **Startup time** | Seconds | Milliseconds | -| **Memory overhead** | High | Low | -| **Actor model** | Stateful remote objects | Classical (mailbox, FIFO) | -| **Streaming** | Manual | Native | +This project no longer recommends the Ray-compatible layer (`pulsing.compat.ray`). +Use Pulsing's primary API directly: ---- - -## Step 1: Change the Import +- `import pulsing as pul` +- `@pul.remote` +- `await pul.init()` / `await pul.shutdown()` +- `await Class.spawn()` / `await Class.resolve()` -```python -# Before (Ray) -import ray +--- -# After (Pulsing) -from pulsing.compat import ray -``` +## API Mapping (Ray -> Pulsing) -**That's it.** Your existing code works. +| Ray | Pulsing | +|---|---| +| `ray.init()` | `await pul.init()` | +| `ray.shutdown()` | `await pul.shutdown()` | +| `@ray.remote` | `@pul.remote` | +| `Actor.remote(args...)` | `await Actor.spawn(args...)` | +| `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | +| `ray.get_actor(name)` | `await Actor.resolve(name)` or `await pul.resolve(name)` | --- -## Step 2: Run Your Code +## Minimal Example ### Before (Ray) @@ -43,136 +42,69 @@ ray.init() class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - +print(ray.get(counter.inc.remote())) ray.shutdown() ``` ### After (Pulsing) ```python -from pulsing.compat import ray # ← only this line changed - -ray.init() +import pulsing as pul -@ray.remote +@pul.remote class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value -counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - -ray.shutdown() +async def main(): + await pul.init() + counter = await Counter.spawn(name="counter") + print(await counter.inc()) + await pul.shutdown() ``` --- -## Supported APIs - -| API | Status | -|-----|--------| -| `ray.init()` | ✅ | -| `ray.shutdown()` | ✅ | -| `@ray.remote` (class) | ✅ | -| `@ray.remote` (function) | ✅ | -| `ray.get()` | ✅ | -| `ray.put()` | ✅ | -| `ray.wait()` | ✅ | -| `ActorClass.remote()` | ✅ | -| `actor.method.remote()` | ✅ | - ---- - -## Distributed Mode - -Ray requires a cluster. Pulsing just needs `--addr` and `--seeds`: +## Distributed Mode Mapping ### Node 1 (seed) ```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8000") +import pulsing as pul -@ray.remote +@pul.remote class Worker: - def process(self, data): + def process(self, data: str) -> str: return f"processed: {data}" -worker = Worker.remote() -# Keep running... -``` - -### Node 2 (join) - -```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) - -# Find remote actor -worker = ray.get_actor("Worker") -result = ray.get(worker.process.remote("hello")) +await pul.init(addr="0.0.0.0:8000") +await Worker.spawn(name="worker") ``` ---- - -## Native Async API (Optional) - -For new code, consider the native async API: +### Node 2 (join + resolve) ```python import pulsing as pul -@pul.remote -class Counter: - def __init__(self): - self.value = 0 - - def inc(self): - self.value += 1 - return self.value - -async def main(): - await pul.init() - counter = await Counter.spawn() - print(await counter.inc()) # 1 - await pul.shutdown() +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) +worker = await Worker.resolve("worker") +result = await worker.process("hello") ``` -**Benefits:** - -- Cleaner `async/await` syntax -- No `ray.get()` boilerplate -- IDE autocompletion works -- Access to streaming messages - --- -## Limitations - -The Ray-compatible API does not support: - -- Ray Serve -- Ray Tune -- Ray Data -- Object Store (large objects) -- Placement Groups +## Notes -For these features, continue using Ray. Pulsing focuses on the Actor model. +- Prefer typed proxy: `await Class.resolve(name)`. +- If only a runtime name is available: `ref = await pul.resolve(name)` then `ref.as_type(Class)` / `ref.as_any()`. --- diff --git a/docs/src/quickstart/migrate_from_ray.zh.md b/docs/src/quickstart/migrate_from_ray.zh.md index c6083b156..442946642 100644 --- a/docs/src/quickstart/migrate_from_ray.zh.md +++ b/docs/src/quickstart/migrate_from_ray.zh.md @@ -1,38 +1,37 @@ # 教程:从 Ray 迁移 -**5 分钟**内用 Pulsing 替换 Ray。一行导入改动,零外部依赖。 +将 Ray Actor 代码迁移到 Pulsing 原生异步 API。 --- -## 为什么迁移? +## 为什么这篇迁移说明改了 -| | Ray | Pulsing | -|---|-----|---------| -| **依赖** | Ray 集群、Redis、GCS | 无 | -| **启动时间** | 秒级 | 毫秒级 | -| **内存开销** | 高 | 低 | -| **Actor 模型** | 带状态的远程对象 | 经典模型(邮箱、FIFO) | -| **流式消息** | 手动实现 | 原生支持 | +当前项目不再推荐 Ray 兼容层(`pulsing.compat.ray`)。 +请直接使用 Pulsing 主 API: ---- - -## 步骤 1:修改导入 +- `import pulsing as pul` +- `@pul.remote` +- `await pul.init()` / `await pul.shutdown()` +- `await Class.spawn()` / `await Class.resolve()` -```python -# 之前 (Ray) -import ray +--- -# 之后 (Pulsing) -from pulsing.compat import ray -``` +## API 对照表(Ray -> Pulsing) -**完成了。** 现有代码直接可用。 +| Ray | Pulsing | +|---|---| +| `ray.init()` | `await pul.init()` | +| `ray.shutdown()` | `await pul.shutdown()` | +| `@ray.remote` | `@pul.remote` | +| `Actor.remote(args...)` | `await Actor.spawn(args...)` | +| `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | +| `ray.get_actor(name)` | `await Actor.resolve(name)` 或 `await pul.resolve(name)` | --- -## 步骤 2:运行代码 +## 最小迁移示例 -### 之前 (Ray) +### 之前(Ray) ```python import ray @@ -43,136 +42,69 @@ ray.init() class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - +print(ray.get(counter.inc.remote())) ray.shutdown() ``` -### 之后 (Pulsing) +### 之后(Pulsing) ```python -from pulsing.compat import ray # ← 只改了这一行 - -ray.init() +import pulsing as pul -@ray.remote +@pul.remote class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value -counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - -ray.shutdown() +async def main(): + await pul.init() + counter = await Counter.spawn(name="counter") + print(await counter.inc()) + await pul.shutdown() ``` --- -## 支持的 API - -| API | 状态 | -|-----|------| -| `ray.init()` | ✅ | -| `ray.shutdown()` | ✅ | -| `@ray.remote` (类) | ✅ | -| `@ray.remote` (函数) | ✅ | -| `ray.get()` | ✅ | -| `ray.put()` | ✅ | -| `ray.wait()` | ✅ | -| `ActorClass.remote()` | ✅ | -| `actor.method.remote()` | ✅ | - ---- - -## 分布式模式 - -Ray 需要集群。Pulsing 只需要 `--addr` 和 `--seeds`: +## 分布式模式对照 ### 节点 1(种子) ```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8000") +import pulsing as pul -@ray.remote +@pul.remote class Worker: - def process(self, data): + def process(self, data: str) -> str: return f"processed: {data}" -worker = Worker.remote() -# 保持运行... -``` - -### 节点 2(加入) - -```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) - -# 查找远程 Actor -worker = ray.get_actor("Worker") -result = ray.get(worker.process.remote("hello")) +await pul.init(addr="0.0.0.0:8000") +await Worker.spawn(name="worker") ``` ---- - -## 原生异步 API(可选) - -新代码建议使用原生异步 API: +### 节点 2(加入 + 解析) ```python import pulsing as pul -@pul.remote -class Counter: - def __init__(self): - self.value = 0 - - def inc(self): - self.value += 1 - return self.value - -async def main(): - await pul.init() - counter = await Counter.spawn() - print(await counter.inc()) # 1 - await pul.shutdown() +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) +worker = await Worker.resolve("worker") +result = await worker.process("hello") ``` -**优势:** - -- 更简洁的 `async/await` 语法 -- 无需 `ray.get()` 样板代码 -- IDE 自动补全正常工作 -- 可使用流式消息 - --- -## 限制 - -Ray 兼容 API 不支持: - -- Ray Serve -- Ray Tune -- Ray Data -- Object Store(大对象) -- Placement Groups +## 说明 -这些功能请继续使用 Ray。Pulsing 专注于 Actor 模型。 +- 优先使用 typed proxy:`await Class.resolve(name)`。 +- 若只有运行时名称:`ref = await pul.resolve(name)`,再使用 `ref.as_type(Class)` / `ref.as_any()`。 --- diff --git a/examples/agent/pulsing/mbti_discussion.py b/examples/agent/pulsing/mbti_discussion.py index 638b2ab30..ae2af3b83 100644 --- a/examples/agent/pulsing/mbti_discussion.py +++ b/examples/agent/pulsing/mbti_discussion.py @@ -1,13 +1,7 @@ """ Multi-Agent Discussion and Voting Example Based on MBTI Personality Types -Demonstrates the difference between @remote and @agent: - - @remote: Basic Actor decorator - - @agent: Actor with metadata (for visualization/debugging) - -In this example: - - ModeratorActor: Uses @remote (regular Actor) - - MBTIAgent: Uses @agent (with MBTI role metadata) +This example uses `@pul.remote` for all actors. Usage: python mbti_discussion.py --mock --topic "Remote work vs On-site work" @@ -21,8 +15,8 @@ import random from collections import Counter -from pulsing.actor import remote, resolve -from pulsing.agent import agent, runtime, llm, parse_json, list_agents +import pulsing as pul +from pulsing.agent import llm, parse_json # ============================================================================ # MBTI Personality Configuration @@ -154,13 +148,13 @@ def sample_mbti_group(size: int) -> list[str]: # ============================================================================ -# Moderator - Uses @remote (Regular Actor, no metadata) +# Moderator Actor # ============================================================================ -@remote +@pul.remote class ModeratorActor: - """Moderator Actor: Coordinates the entire discussion process (uses @remote)""" + """Moderator Actor: Coordinates the entire discussion process.""" def __init__(self, topic: str, rounds: int, debate_time: float, mock: bool): self.topic = topic @@ -200,7 +194,7 @@ async def start_discussion(self) -> dict: print(f"{'=' * 60}") for agent_info in self.agents: - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) await proxy.form_opinion(self.opinions[-10:]) print(f"\n{'=' * 60}") @@ -228,7 +222,7 @@ async def start_discussion(self) -> dict: continue target = random.choice(opponents) - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) result = await proxy.debate(target) if result.get("success"): @@ -244,7 +238,7 @@ async def start_discussion(self) -> dict: print(f"{'=' * 60}") for agent_info in self.agents: - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) await proxy.vote() return self._summarize() @@ -277,17 +271,13 @@ def _summarize(self) -> dict: # ============================================================================ -# MBTI Agent - Uses @agent (with metadata, can be used for visualization) +# MBTI Agent # ============================================================================ -@agent( - role="MBTI Participant", - goal="Participate in discussion based on personality traits", - backstory="Express views according to MBTI personality type", -) +@pul.remote class MBTIAgent: - """MBTI Agent: Autonomous Actor participating in discussion (uses @agent, with metadata)""" + """MBTI Agent: Autonomous actor participating in discussion.""" def __init__( self, agent_name: str, mbti: str, topic: str, moderator: str, mock: bool @@ -336,7 +326,7 @@ async def form_opinion(self, others: list[dict]) -> dict: self.stance = data.get("stance", "Neutral") self.argument = data.get("argument", "Needs discussion") - moderator = await resolve(self.moderator_name) + moderator = await ModeratorActor.resolve(self.moderator_name) await moderator.submit_opinion(self.name, self.mbti, self.stance, self.argument) return {"mbti": self.mbti, "stance": self.stance} @@ -390,7 +380,7 @@ async def debate(self, target: dict) -> dict: async def vote(self) -> dict: if self.mock: await asyncio.sleep(random.uniform(0.02, 0.05)) - moderator = await resolve(self.moderator_name) + moderator = await ModeratorActor.resolve(self.moderator_name) await moderator.submit_vote(self.mbti, self.stance or "Abstain") return {"mbti": self.mbti, "vote": self.stance} @@ -416,14 +406,15 @@ async def run( ) print(f"Mode: {'Mock' if mock else 'LLM'}") - async with runtime(): + await pul.init() + try: mbti_group = sample_mbti_group(group_size) dist = Counter(mbti_group) print("\nGroup:") for mbti, count in sorted(dist.items(), key=lambda x: -x[1]): print(f" {mbti} ({MBTI_TYPES[mbti]['name']}): {count}") - # Create moderator (@remote) + # Create moderator moderator = await ModeratorActor.spawn( topic=topic, rounds=rounds, @@ -432,7 +423,7 @@ async def run( name="moderator", ) - # Create participants (@agent, with metadata) + # Create participants for i, mbti in enumerate(mbti_group): agent_name = f"agent_{i}_{mbti}" await MBTIAgent.spawn( @@ -445,14 +436,13 @@ async def run( ) await moderator.register_agent(agent_name, mbti) - # Show @agent metadata functionality - print("\nRegistered Agents (via metadata):") - for name, meta in list_agents().items(): - print(f" {name}: {meta.role}") + print(f"\nRegistered Agents: {len(mbti_group)}") # Start discussion result = await moderator.start_discussion() return result + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/agent/pulsing/parallel_ideas_async.py b/examples/agent/pulsing/parallel_ideas_async.py index 5afc1415d..336a4c78a 100644 --- a/examples/agent/pulsing/parallel_ideas_async.py +++ b/examples/agent/pulsing/parallel_ideas_async.py @@ -15,8 +15,8 @@ import json import random import time -from pulsing.actor import remote, resolve -from pulsing.agent import runtime, llm, parse_json +import pulsing as pul +from pulsing.agent import llm, parse_json # ============================================================================ # Configuration @@ -86,7 +86,7 @@ async def get_llm(): # ============================================================================ -@remote +@pul.remote class JudgeActor: def __init__(self, timeout: float, mock: bool): self.timeout = timeout @@ -132,7 +132,7 @@ async def _timer(self): # Stop all agents for name in self._agents: try: - agent = await resolve(name) + agent = await IdeaAgent.resolve(name) await agent.stop() except Exception as e: print(f"[Judge] Error stopping agent '{name}': {e}") @@ -173,7 +173,7 @@ async def get_result(self) -> dict: # ============================================================================ -@remote +@pul.remote class IdeaAgent: def __init__( self, @@ -293,7 +293,7 @@ async def _run(self) -> dict: elapsed = time.time() - start print(f" [{self.persona}] 📤 Submitted ({elapsed:.1f}s)") - judge = await resolve(self.judge_name) + judge = await JudgeActor.resolve(self.judge_name) result = await judge.submit(self.idea, self.iterations) return { @@ -460,7 +460,7 @@ async def _collaborate(self, experts: list[str]) -> list[dict]: print(f" [{self.persona}] 🤝 Requesting [{expert}]") try: - peer = await asyncio.wait_for(resolve(peer_name), timeout=5) + peer = await asyncio.wait_for(IdeaAgent.resolve(peer_name), timeout=5) resp = await asyncio.wait_for( peer.assist(from_agent=self.persona, context={"idea": self.idea}), timeout=10, @@ -511,7 +511,8 @@ async def run( ) print("=" * 50) - async with runtime(): + await pul.init() + try: # Create Judge judge = await JudgeActor.spawn(timeout=timeout, mock=mock, name="judge") @@ -563,6 +564,8 @@ async def run( ) return {"final": final, "agents": results} + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/agent/pulsing/runtime_lifecycle_example.py b/examples/agent/pulsing/runtime_lifecycle_example.py index 687364875..b11fb3207 100644 --- a/examples/agent/pulsing/runtime_lifecycle_example.py +++ b/examples/agent/pulsing/runtime_lifecycle_example.py @@ -6,10 +6,10 @@ import asyncio -from pulsing.agent import agent, cleanup, runtime +import pulsing as pul -@agent(role="Counter", goal="Accumulate numbers") +@pul.remote class Counter: def __init__(self, initial: int = 0): self.value = initial @@ -25,11 +25,14 @@ async def get_value(self) -> int: async def example_simple(): """Example 1: Simple scenario (no cleanup needed)""" print("\n=== Example 1: Simple Scenario ===") - async with runtime(): + await pul.init() + try: counter = await Counter.spawn(name="counter", initial=0) for _ in range(5): value = await counter.increment() print(f"Current value: {value}") + finally: + await pul.shutdown() async def example_repeated_with_cleanup(): @@ -38,31 +41,29 @@ async def example_repeated_with_cleanup(): for i in range(3): try: - async with runtime(): - counter = await Counter.spawn(name=f"counter_{i}", initial=i * 10) - value = await counter.increment() - print(f"Task {i}: result = {value}") + await pul.init() + counter = await Counter.spawn(name=f"counter_{i}", initial=i * 10) + value = await counter.increment() + print(f"Task {i}: result = {value}") finally: - cleanup() # ⭐ Ensure cleanup each time + await pul.shutdown() print(f"Task {i}: cleaned up") async def example_batch_processing(): """Example 3: Batch processing (shared runtime)""" print("\n=== Example 3: Batch Processing (shared runtime) ===") + await pul.init() try: - async with runtime(): - # Create multiple counters - counters = [] - for i in range(5): - counter = await Counter.spawn(name=f"counter_{i}", initial=i) - counters.append(counter) - - # Concurrent processing - results = await asyncio.gather(*[c.increment() for c in counters]) - print(f"Results: {results}") + counters = [] + for i in range(5): + counter = await Counter.spawn(name=f"counter_{i}", initial=i) + counters.append(counter) + + results = await asyncio.gather(*[c.increment() for c in counters]) + print(f"Results: {results}") finally: - cleanup() + await pul.shutdown() async def example_error_handling(): @@ -71,19 +72,18 @@ async def example_error_handling(): for i in range(2): try: - async with runtime(): - counter = await Counter.spawn(name=f"counter_{i}", initial=i) - await counter.increment() + await pul.init() + counter = await Counter.spawn(name=f"counter_{i}", initial=i) + await counter.increment() - if i == 0: - # Simulate error - raise ValueError("Simulated error") + if i == 0: + raise ValueError("Simulated error") - print(f"Task {i} succeeded") + print(f"Task {i} succeeded") except ValueError as e: print(f"Task {i} failed: {e}") finally: - cleanup() # ⭐ Clean up even on error + await pul.shutdown() print(f"Task {i} cleaned up") @@ -92,15 +92,15 @@ async def example_helper_pattern(): print("\n=== Example 5: Helper Function Pattern ===") async def run_counter_task(task_id: int, increments: int) -> int: - """Encapsulated task function (auto cleanup)""" + """Encapsulated task function""" try: - async with runtime(): - counter = await Counter.spawn(name=f"task_{task_id}", initial=0) - for _ in range(increments): - await counter.increment() - return await counter.get_value() + await pul.init() + counter = await Counter.spawn(name=f"task_{task_id}", initial=0) + for _ in range(increments): + await counter.increment() + return await counter.get_value() finally: - cleanup() + await pul.shutdown() # Run multiple tasks tasks = [run_counter_task(i, i + 1) for i in range(3)] diff --git a/examples/inspect/demo_service.py b/examples/inspect/demo_service.py index 4a56651eb..3c8be3129 100644 --- a/examples/inspect/demo_service.py +++ b/examples/inspect/demo_service.py @@ -24,6 +24,7 @@ import pulsing as pul +@pul.remote class WorkerActor: """A simple worker actor that processes tasks""" @@ -31,25 +32,17 @@ def __init__(self, worker_id: str): self.worker_id = worker_id self.tasks_processed = 0 - def on_start(self, actor_id): - print(f"[Worker {self.worker_id}] Started") + async def process(self, task: str) -> dict[str, str | int]: + self.tasks_processed += 1 + result = f"Processed: {task} (total: {self.tasks_processed})" + print(f"[Worker {self.worker_id}] {result}") + return {"result": result, "worker": self.worker_id} - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "process": - task = msg.get("task", "") - self.tasks_processed += 1 - result = f"Processed: {task} (total: {self.tasks_processed})" - print(f"[Worker {self.worker_id}] {result}") - return {"result": result, "worker": self.worker_id} - - if action == "stats": - return {"worker_id": self.worker_id, "tasks": self.tasks_processed} - - return {"error": "unknown action"} + def stats(self) -> dict[str, str | int]: + return {"worker_id": self.worker_id, "tasks": self.tasks_processed} +@pul.remote class DispatcherActor: """A dispatcher actor that distributes tasks to workers (for demo purposes)""" @@ -57,56 +50,36 @@ def __init__(self): self.workers = [] self.tasks_dispatched = 0 - def on_start(self, actor_id): - print("[Dispatcher] Started") - - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "route": - self.tasks_dispatched += 1 - task = msg.get("task", "") - # Simulate routing logic - worker_id = f"worker-{random.randint(1, 3)}" - return { - "task": task, - "worker": worker_id, - "dispatched": self.tasks_dispatched, - } + def route(self, task: str) -> dict[str, str | int | bool]: + self.tasks_dispatched += 1 + worker_id = f"worker-{random.randint(1, 3)}" + return { + "task": task, + "worker": worker_id, + "dispatched": self.tasks_dispatched, + } - if action == "stats": - return {"dispatcher": True, "tasks_dispatched": self.tasks_dispatched} - - return {"error": "unknown action"} + def stats(self) -> dict[str, int | bool]: + return {"dispatcher": True, "tasks_dispatched": self.tasks_dispatched} +@pul.remote class CacheActor: """A cache actor that stores key-value pairs""" def __init__(self): self.cache = {} - def on_start(self, actor_id): - print("[Cache] Started") - - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "get": - key = msg.get("key", "") - value = self.cache.get(key, None) - return {"key": key, "value": value, "found": value is not None} - - if action == "set": - key = msg.get("key", "") - value = msg.get("value", "") - self.cache[key] = value - return {"key": key, "success": True} + def get(self, key: str) -> dict[str, object]: + value = self.cache.get(key, None) + return {"key": key, "value": value, "found": value is not None} - if action == "stats": - return {"cache_size": len(self.cache)} + def set(self, key: str, value: object) -> dict[str, str | bool]: + self.cache[key] = value + return {"key": key, "success": True} - return {"error": "unknown action"} + def stats(self) -> dict[str, int]: + return {"cache_size": len(self.cache)} async def run_node(port: int, seed: str | None): @@ -118,8 +91,9 @@ async def run_node(port: int, seed: str | None): addr = f"127.0.0.1:{port}" seeds = [seed] if seed else None - system = await pul.actor_system(addr, seeds=seeds) - print(f"✓ System started: {system.node_id} @ {system.addr}") + await pul.init(addr=addr, seeds=seeds) + system = pul.ActorSystem(pul.get_system()) + print(f"✓ System started: {addr}") if seed: print(f" Joined via: {seed}") print() @@ -128,12 +102,12 @@ async def run_node(port: int, seed: str | None): if seed is None: # Node 1: Create dispatcher and some workers print("Creating actors on node 1...") - await system.spawn(DispatcherActor(), name="dispatcher") + await DispatcherActor.spawn(name="dispatcher") print(" ✓ actors/dispatcher") for i in range(1, 3): worker_name = f"worker-{i}" - await system.spawn(WorkerActor(worker_name), name=worker_name) + await WorkerActor.spawn(worker_name, name=worker_name) print(f" ✓ actors/{worker_name}") print("\n✓ Node 1 ready!") @@ -156,7 +130,7 @@ async def run_node(port: int, seed: str | None): print("Creating actors on node 2...") for i in range(3, 5): worker_name = f"worker-{i}" - await system.spawn(WorkerActor(worker_name), name=worker_name) + await WorkerActor.spawn(worker_name, name=worker_name) print(f" ✓ actors/{worker_name}") print("\n✓ Node 2 ready!") @@ -164,7 +138,7 @@ async def run_node(port: int, seed: str | None): # Node 3: Add cache await asyncio.sleep(1) print("Creating actors on node 3...") - await system.spawn(CacheActor(), name="cache") + await CacheActor.spawn(name="cache") print(" ✓ actors/cache") print("\n✓ Node 3 ready!") @@ -182,7 +156,7 @@ async def run_node(port: int, seed: str | None): except KeyboardInterrupt: print("\n\nShutting down...") finally: - await system.shutdown() + await pul.shutdown() print("✓ Shutdown complete") diff --git a/examples/python/cluster.py b/examples/python/cluster.py index 1c04a96b4..f219be4e8 100644 --- a/examples/python/cluster.py +++ b/examples/python/cluster.py @@ -15,23 +15,19 @@ import pulsing as pul +@pul.remote class SharedCounter: def __init__(self, node_id: str): self.count = 0 self.node_id = node_id - def on_start(self, actor_id): - print(f"[{actor_id}] Started on {self.node_id}") + def get(self) -> dict[str, int | str]: + return {"count": self.count, "from_node": self.node_id} - async def receive(self, msg): - if msg.get("action") == "get": - return {"count": self.count, "from_node": self.node_id} - elif msg.get("action") == "incr": - n = msg.get("n", 1) - self.count += n - print(f"[{self.node_id}] +{n} -> {self.count}") - return {"count": self.count, "from_node": self.node_id} - return {"error": "unknown action"} + def incr(self, n: int = 1) -> dict[str, int | str]: + self.count += n + print(f"[{self.node_id}] +{n} -> {self.count}") + return {"count": self.count, "from_node": self.node_id} async def run_node(port: int, seed: str | None): @@ -40,59 +36,52 @@ async def run_node(port: int, seed: str | None): addr = f"127.0.0.1:{port}" seeds = [seed] if seed else None - system = await pul.actor_system(addr, seeds=seeds) - print(f"✓ Started: {system.node_id} @ {system.addr}") - if seed: - print(f" Joined via: {seed}") - print() - - if seed is None: - # Node 1: Create actor - await system.spawn( - SharedCounter(str(system.node_id)), - name="counter", - ) - print("✓ Created: counter") - print("Start node 2: python cluster.py --port 8001 --seed 127.0.0.1:8000\n") - - try: - while True: - await asyncio.sleep(5) - members = await system.members() - print(f"Cluster: {len(members)} members") - except asyncio.CancelledError: - pass - await system.shutdown() - else: - # Node 2: Join and interact - await asyncio.sleep(2) - - # Resolve remote actor - actor = None - for _ in range(10): - try: - actor = await system.resolve("counter") - break - except Exception: - print(".", end="", flush=True) - await asyncio.sleep(0.5) - - if not actor: - print("\n✗ Failed to resolve actor") - return - - print("✓ Resolved\n") - - # Interact using simple Python dicts - resp = await actor.ask({"action": "get"}) - print(f"Initial: {resp['count']} (from {resp['from_node']})") + await pul.init(addr=addr, seeds=seeds) + try: + print(f"✓ Started: {addr}") + if seed: + print(f" Joined via: {seed}") + print() - for i in range(1, 4): - resp = await actor.ask({"action": "incr", "n": i * 10}) - print(f"After +{i * 10}: {resp['count']} (from {resp['from_node']})") + if seed is None: + await SharedCounter.spawn(str(port), name="counter") + print("✓ Created: counter") + print("Start node 2: python cluster.py --port 8001 --seed 127.0.0.1:8000\n") - print("\n✓ Done!") - await system.shutdown() + try: + while True: + await asyncio.sleep(5) + print("Cluster running... (press Ctrl+C to stop)") + except asyncio.CancelledError: + pass + else: + await asyncio.sleep(2) + + actor = None + for _ in range(10): + try: + actor = await SharedCounter.resolve("counter") + break + except Exception: + print(".", end="", flush=True) + await asyncio.sleep(0.5) + + if not actor: + print("\n✗ Failed to resolve actor") + return + + print("✓ Resolved\n") + + resp = await actor.get() + print(f"Initial: {resp['count']} (from {resp['from_node']})") + + for i in range(1, 4): + resp = await actor.incr(i * 10) + print(f"After +{i * 10}: {resp['count']} (from {resp['from_node']})") + + print("\n✓ Done!") + finally: + await pul.shutdown() def main(): diff --git a/examples/python/distributed_queue.py b/examples/python/distributed_queue.py index f666dd6b1..0ef27494b 100644 --- a/examples/python/distributed_queue.py +++ b/examples/python/distributed_queue.py @@ -23,9 +23,9 @@ async def main(): """Main function""" logger.info("=== Distributed Memory Queue Example ===\n") - # Create Actor system - system = await pul.actor_system() - logger.info("✓ Actor system started\n") + await pul.init() + system = pul.ActorSystem(pul.get_system()) + logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing @@ -66,7 +66,7 @@ async def main(): logger.info("✓ Example completed!") finally: - await system.shutdown() + await pul.shutdown() logger.info("System shutdown") diff --git a/examples/python/message_patterns.py b/examples/python/message_patterns.py index 80c6fac16..373e675c7 100644 --- a/examples/python/message_patterns.py +++ b/examples/python/message_patterns.py @@ -10,42 +10,13 @@ import pulsing as pul -class PatternDemo: - """Base Actor with various message patterns.""" - - def __init__(self): - self.value = 0 - - async def receive(self, msg): - # Pattern 1: Simple object messaging (dict, list, string, etc.) - if isinstance(msg, dict): - if msg.get("action") == "add": - self.value += msg.get("n", 1) - return {"value": self.value} - if msg.get("action") == "get": - return {"value": self.value} - - # Pattern 2: Streaming response - just return a generator! - if msg == "stream": - - async def generate(): - for token in ["Hello", " ", "World", "!"]: - yield {"token": token} - await asyncio.sleep(0.1) - - return generate() - - return f"unknown: {msg}" - - @pul.remote -class RemotePatternDemo: - """@pul.remote Actor with cleaner API (recommended).""" +class PatternDemo: + """Actor with various message patterns.""" def __init__(self): self.value = 0 - # Sync method - simple request/response def add(self, n: int = 1) -> dict: self.value += n return {"value": self.value} @@ -61,44 +32,35 @@ async def stream(self): async def main(): - system = await pul.actor_system() - - print("=" * 50) - print("Pattern 1: Base Actor with dict messages") - print("=" * 50) - - actor = await system.spawn(PatternDemo(), name="demo") - - print(await actor.ask({"action": "add", "n": 10})) # {'value': 10} - print(await actor.ask({"action": "add", "n": 5})) # {'value': 15} - print(await actor.ask({"action": "get"})) # {'value': 15} + await pul.init() + try: + print("=" * 50) + print("Pattern 1: Simple method calls") + print("=" * 50) - print("\n" + "=" * 50) - print("Pattern 2: Base Actor streaming (return generator)") - print("=" * 50) + demo = await PatternDemo.spawn(name="demo") - response = await actor.ask("stream") - async for chunk in response.stream_reader(): - print(chunk["token"], end="") - print() + print(await demo.add(10)) # {'value': 10} + print(await demo.add(5)) # {'value': 15} + print(await demo.get()) # {'value': 15} - print("\n" + "=" * 50) - print("Pattern 3: @pul.remote (recommended)") - print("=" * 50) + print("\n" + "=" * 50) + print("Pattern 2: Typed resolve") + print("=" * 50) - service = await RemotePatternDemo.local(system) + resolved = await PatternDemo.resolve("demo") + print(await resolved.get()) - # Direct method calls - no need for ask/tell! - print(await service.add(10)) # {'value': 10} - print(await service.add(5)) # {'value': 15} - print(await service.get()) # {'value': 15} + print("\n" + "=" * 50) + print("Pattern 3: Async generator streaming") + print("=" * 50) - print("\n--- Async generator streaming ---") - async for chunk in service.stream(): - print(chunk["token"], end="") - print() + async for chunk in demo.stream(): + print(chunk["token"], end="") + print() + finally: + await pul.shutdown() - await system.shutdown() print("\n✓ Done!") diff --git a/examples/python/named_actors.py b/examples/python/named_actors.py index d47af1274..1c66d99cb 100644 --- a/examples/python/named_actors.py +++ b/examples/python/named_actors.py @@ -13,15 +13,11 @@ import pulsing as pul +@pul.remote class EchoActor: """Simple echo actor that can be discovered by name.""" - def on_start(self, actor_id): - print(f"[{actor_id}] Started") - - async def receive(self, msg): - # Accept dict messages - message = msg.get("message", "") if isinstance(msg, dict) else str(msg) + def echo(self, message: str) -> dict[str, str]: print(f"[Echo] {message}") return {"echo": message} @@ -29,27 +25,21 @@ async def receive(self, msg): async def main(): print("=== Pulsing Named Actors ===\n") - system = await pul.actor_system() - print(f"✓ System started: {system.node_id}\n") - - # Create named actor (named actors are discoverable via resolve) - await system.spawn(EchoActor(), name="echo") - print("✓ Created: echo (named, discoverable)\n") + await pul.init() + try: + print("✓ System started\n") - # Resolve by name - print("--- Resolve by name ---") - actor = await system.resolve("echo") - resp = await actor.ask({"message": "Hello!"}) - print(f"Response: {resp['echo']}\n") + await EchoActor.spawn(name="echo") + print("✓ Created: echo (named, discoverable)\n") - # List instances - instances = await system.get_named_instances("actors/echo") - print(f"Instances of 'actors/echo': {len(instances)}") - for i in instances: - print(f" {i['node_id']} @ {i['addr']} ({i['status']})") + print("--- Resolve by name ---") + actor = await EchoActor.resolve("echo") + resp = await actor.echo("Hello!") + print(f"Response: {resp['echo']}\n") - print("\n✓ Done!") - await system.shutdown() + print("✓ Done!") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/native_async_example.py b/examples/python/native_async_example.py index c9ebc6399..fc5801f5d 100644 --- a/examples/python/native_async_example.py +++ b/examples/python/native_async_example.py @@ -9,11 +9,10 @@ import asyncio -# Pulsing native API -from pulsing.actor import init, shutdown, remote +import pulsing as pul -@remote +@pul.remote class Counter: """Distributed counter""" @@ -28,7 +27,7 @@ def increment(self, n: int = 1) -> int: return self.value -@remote +@pul.remote class Calculator: """Distributed calculator""" @@ -39,7 +38,7 @@ def multiply(self, a: int, b: int) -> int: return a * b -@remote +@pul.remote class AsyncWorker: """Async Worker""" @@ -64,7 +63,7 @@ async def main(): print("=" * 60) # Initialize (simple!) - await init() + await pul.init() print("✓ Pulsing initialized") # --- Counter --- @@ -99,7 +98,7 @@ async def main(): print(f"Process result: {result}") # --- Shutdown --- - await shutdown() + await pul.shutdown() print("\n✓ Done!") @@ -113,12 +112,12 @@ async def main(): # # | Operation | Pulsing Native (async) | Ray Compat Layer (sync) | # |----------------|-----------------------------|-----------------------------| -# | Initialize | await init() | ray.init() | -# | Decorator | @remote | @ray.remote | +# | Initialize | await pul.init() | ray.init() | +# | Decorator | @pul.remote | @ray.remote | # | Create actor | await Counter.spawn() | Counter.remote() | # | Call method | await counter.incr() | counter.incr.remote() | # | Get result | Direct return | ray.get(ref) | -# | Shutdown | await shutdown() | ray.shutdown() | +# | Shutdown | await pul.shutdown() | ray.shutdown() | # # Recommended to use native API: # - More Pythonic (standard async/await) diff --git a/examples/python/ping_pong.py b/examples/python/ping_pong.py index 12085be6d..cef44f1b1 100644 --- a/examples/python/ping_pong.py +++ b/examples/python/ping_pong.py @@ -9,23 +9,26 @@ import pulsing as pul +@pul.remote class PingPong: - async def receive(self, msg): - if msg == "ping": - return "pong" + def ping(self) -> str: + return "pong" + + def echo(self, msg: str) -> str: return f"echo: {msg}" async def main(): - system = await pul.actor_system() - actor = await system.spawn(PingPong()) + await pul.init() + try: + actor = await PingPong.spawn() - # Simple string message - print(await actor.ask("ping")) # -> pong - print(await actor.ask("hello")) # -> echo: hello + print(await actor.ping()) # -> pong + print(await actor.echo("hello")) # -> echo: hello - await asyncio.sleep(1) # Allow background tasks to complete - await system.shutdown() + await asyncio.sleep(1) # Allow background tasks to complete + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/ray_compat_example.py b/examples/python/ray_compat_example.py deleted file mode 100644 index ea38a857f..000000000 --- a/examples/python/ray_compat_example.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -Ray Compatibility Layer Example (for Migration) - -Demonstrates how to use pulsing.compat.ray to migrate from Ray to Pulsing. -Migration only requires changing one import line! - -Usage: python examples/python/ray_compat_example.py -""" - -# ============================================ -# Migrate from Ray: Just change this line! -# ============================================ -# Before: import ray -# After: -from pulsing.compat import ray - - -@ray.remote -class Counter: - """Distributed counter (Ray style)""" - - def __init__(self, init_value: int = 0): - self.value = init_value - - def get(self) -> int: - return self.value - - def increment(self, n: int = 1) -> int: - self.value += n - return self.value - - -@ray.remote -class Calculator: - """Distributed calculator (Ray style)""" - - def add(self, a: int, b: int) -> int: - return a + b - - def multiply(self, a: int, b: int) -> int: - return a * b - - -def main(): - print("=" * 60) - print("Ray Compatibility Layer Example (from pulsing.compat import ray)") - print("=" * 60) - - # Initialize (Ray style) - ray.init() - print("✓ Pulsing (Ray compat) initialized") - - # --- Counter --- - print("\n--- Counter ---") - counter = Counter.remote(init_value=10) - - # Ray style calls - print(f"Initial value: {ray.get(counter.get.remote())}") - print(f"increment(5): {ray.get(counter.increment.remote(5))}") - print(f"Final value: {ray.get(counter.get.remote())}") - - # --- Calculator --- - print("\n--- Calculator ---") - calc = Calculator.remote() - - print(f"add(10, 20): {ray.get(calc.add.remote(10, 20))}") - print(f"multiply(5, 6): {ray.get(calc.multiply.remote(5, 6))}") - - # --- Batch get --- - print("\n--- Batch Get ---") - refs = [ - calc.add.remote(1, 2), - calc.add.remote(3, 4), - calc.multiply.remote(5, 6), - ] - results = ray.get(refs) - print(f"Batch results: {results}") - - # --- Object Store --- - print("\n--- put/get ---") - ref = ray.put({"message": "Hello from pulsing.compat.ray!"}) - print(f"Result: {ray.get(ref)}") - - # Shutdown (Ray style) - ray.shutdown() - print("\n✓ Done!") - - -if __name__ == "__main__": - main() - - -# ============================================================================= -# Migration Guide -# ============================================================================= -# -# Step 1: Change import -# ------------------- -# Before: -# import ray -# -# After: -# from pulsing.compat import ray -# -# Step 2: Rest of the code remains unchanged! -# ------------------------- -# ray.init() -# @ray.remote -# Counter.remote() -# counter.incr.remote() -# ray.get(ref) -# ray.shutdown() -# -# ============================================================================= -# Next Step: Migrate to Native API (Optional, Better Performance) -# ============================================================================= -# -# from pulsing.actor import init, shutdown, remote -# -# await init() -# -# @remote -# class Counter: -# ... -# -# counter = await Counter.spawn() -# result = await counter.incr() # No need for .remote() + get()! -# -# await shutdown() -# diff --git a/examples/python/remote_actor_example.py b/examples/python/remote_actor_example.py index 3e9b0a013..11d1d5970 100644 --- a/examples/python/remote_actor_example.py +++ b/examples/python/remote_actor_example.py @@ -14,13 +14,13 @@ import asyncio import logging -from pulsing.actor import init, shutdown, remote +import pulsing as pul logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -@remote +@pul.remote class Counter: """Distributed counter""" @@ -39,7 +39,7 @@ def decrement(self, n: int = 1) -> int: return self.value -@remote +@pul.remote class KeyValueStore: """Distributed key-value store""" @@ -56,7 +56,7 @@ def keys(self) -> list: return list(self.store.keys()) -@remote +@pul.remote class AsyncWorker: """Supports async methods""" @@ -79,7 +79,7 @@ async def main(): print("=" * 60) # Simple initialization - await init() + await pul.init() # --- Counter --- print("\n--- Counter ---") @@ -125,7 +125,7 @@ async def main(): print("\n✓ Done!") # Shutdown - await shutdown() + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/sync_queue_example.py b/examples/python/sync_queue_example.py index 79d451af8..5b19ba2bf 100644 --- a/examples/python/sync_queue_example.py +++ b/examples/python/sync_queue_example.py @@ -23,9 +23,9 @@ async def main(): """Main function""" logger.info("=== Distributed Memory Queue Example (Synchronous Version) ===\n") - # Create Actor system - system = await pul.actor_system() - logger.info("✓ Actor system started\n") + await pul.init() + system = pul.ActorSystem(pul.get_system()) + logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing, get synchronous wrapper @@ -68,7 +68,7 @@ async def main(): logger.info("✓ Example completed!") finally: - await system.shutdown() + await pul.shutdown() logger.info("System shutdown") diff --git a/examples/quickstart/ai_chat_room.py b/examples/quickstart/ai_chat_room.py index 003f37f06..e08fe4a29 100644 --- a/examples/quickstart/ai_chat_room.py +++ b/examples/quickstart/ai_chat_room.py @@ -13,8 +13,7 @@ import argparse import asyncio import random -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul # AI persona configuration AI_PERSONAS = { @@ -57,7 +56,7 @@ } -@remote +@pul.remote class ChatRoom: """Chat room - coordinates agent conversations""" @@ -94,7 +93,7 @@ def get_history(self) -> list[dict]: return self.messages -@remote +@pul.remote class ChatAgent: """AI agent in the chat room""" @@ -141,7 +140,8 @@ async def main(topic: str, rounds: int): print(f"🔄 Discussion rounds: {rounds}") print("\n--- Participants entering ---\n") - async with runtime(): + await pul.init() + try: # Create chat room room = await ChatRoom.spawn(topic=topic, name="chat_room") @@ -181,6 +181,8 @@ async def main(topic: str, rounds: int): print(f" Total messages: {len(history)}") print(f" Participants: {len(agents)} AIs") print(f" Discussion rounds: {rounds}") + finally: + await pul.shutdown() print("\n" + "=" * 60) print("✅ Chat ended!") diff --git a/examples/quickstart/chaos_proof.py b/examples/quickstart/chaos_proof.py index 3cee8a892..d3c550980 100644 --- a/examples/quickstart/chaos_proof.py +++ b/examples/quickstart/chaos_proof.py @@ -4,11 +4,10 @@ import asyncio import random -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote(restart_policy="on_failure", max_restarts=50) +@pul.remote(restart_policy="on_failure", max_restarts=50) class FlakyWorker: def __init__(self): self.call_count = 0 @@ -21,7 +20,8 @@ def work(self, x: int) -> int: async def main(): - async with runtime(): + await pul.init() + try: w = await FlakyWorker.spawn(name="flaky") results, retries = [], 0 @@ -50,6 +50,8 @@ async def main(): else: print(f"⚠️ {50 - ok} tasks failed") print("=" * 50 + "\n") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/quickstart/function_to_fleet.py b/examples/quickstart/function_to_fleet.py index cac61c78a..eb799e1a6 100644 --- a/examples/quickstart/function_to_fleet.py +++ b/examples/quickstart/function_to_fleet.py @@ -1,11 +1,10 @@ import asyncio import os import time -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote +@pul.remote class Worker: async def run(self, x: int) -> int: await asyncio.sleep(0.02) # simulate I/O @@ -15,7 +14,8 @@ async def run(self, x: int) -> int: async def main(): n = int(os.getenv("WORKERS", "8")) m = int(os.getenv("ITEMS", "200")) - async with runtime(): + await pul.init() + try: ws = [await Worker.spawn(name=f"w{i}") for i in range(n)] t0 = time.perf_counter() res = await asyncio.gather(*(ws[i % n].run(i) for i in range(m))) @@ -30,6 +30,8 @@ async def main(): print("=" * 50) print("✅ Same code, more workers = higher throughput") print("=" * 50 + "\n") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/quickstart/hello_agent.py b/examples/quickstart/hello_agent.py index e3e1098ab..1196a47e9 100644 --- a/examples/quickstart/hello_agent.py +++ b/examples/quickstart/hello_agent.py @@ -7,11 +7,10 @@ """ import asyncio -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote +@pul.remote class Greeter: """A simple greeting agent""" @@ -39,7 +38,8 @@ async def main(): print("🎉 Pulsing Multi-Agent Quick Start") print("=" * 50) - async with runtime(): + await pul.init() + try: # Create two agents alice = await Greeter.spawn(display_name="Alice", name="alice") bob = await Greeter.spawn(display_name="Bob", name="bob") @@ -53,6 +53,8 @@ async def main(): # Bob greets Alice await bob.say_hello_to("alice") + finally: + await pul.shutdown() print("\n" + "=" * 50) print("✅ Done! You've created your first Multi-Agent application") diff --git a/llms.binding.md b/llms.binding.md index c5359c1d6..9181700f4 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -4,185 +4,93 @@ `Pulsing` is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. -## Python API - -### Actor System Style +## Quick Start ```python import pulsing as pul -system = await pul.actor_system( - addr: str | None = None, - *, - seeds: list[str] | None = None, - passphrase: str | None = None -) -> ActorSystem - -await system.shutdown() - -class MyActor: - async def receive(self, msg: Any) -> Any: - ... - -actorref = await system.spawn( - actor: Actor, # MyActor() - *, - name: str | None = None, - public: bool = False, - restart_policy: str = "never", - max_restarts: int = 3, - min_backoff: float = 0.1, - max_backoff: float = 30.0 -) -> ActorRef - -actorref = await system.refer(actorid: ActorId | str) -> ActorRef - -actorref = await system.resolve( - name: str, - *, - node_id: int | None = None -) -> ActorRef - -response = await actorref.ask(request: Any) -> Any - -await actorref.tell(msg: Any) -> None - +await pul.init() @pul.remote class Counter: - # Synchronous handler - def incr(self): - ... - - # Asynchronous handler - async def desc(self): - ... + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value -# Usage +# Create actor counter = await Counter.spawn(name="counter") -result = await counter.incr() # Returns ActorProxy, call methods directly - -# Queue API -writer = await system.queue.write( - topic: str, - *, - bucket_column: str = "id", - num_buckets: int = 4, - batch_size: int = 100, - storage_path: str | None = None, - backend: str = "memory", -) -> QueueWriter +print(await counter.incr()) # 1 -await writer.put(record: dict | list[dict]) -> None -await writer.flush() -> None - -reader = await system.queue.read( - topic: str, - *, - bucket_id: int | None = None, - bucket_ids: list[int] | None = None, - rank: int | None = None, - world_size: int | None = None, - num_buckets: int = 4, -) -> QueueReader - -records = await reader.get(limit: int = 100, wait: bool = False) -> list[dict] +# Resolve from another process / node +counter2 = await Counter.resolve("counter") +print(await counter2.incr()) # 2 -# Queue usage example -writer = await system.queue.write("my_queue", bucket_column="user_id") -await writer.put({"user_id": "u1", "data": "hello"}) - -reader = await system.queue.read("my_queue") -records = await reader.get(limit=10) +await pul.shutdown() ``` -### Async API with Global System +## Python API + +You must call `await pul.init()` before using `spawn`, `resolve`, or other APIs. ```python import pulsing as pul -# Initialize global system +# ── Lifecycle ── + await pul.init( addr: str | None = None, *, seeds: list[str] | None = None, passphrase: str | None = None -) -> ActorSystem +) await pul.shutdown() -# Spawn actor (using global system) -actorref = await pul.spawn( - actor: Actor, - *, - name: str | None = None, - public: bool = False, - restart_policy: str = "never", - max_restarts: int = 3, - min_backoff: float = 0.1, - max_backoff: float = 30.0 -) -> ActorRef - -# Get reference by ActorId (using global system) -actorref = await pul.refer(actorid: ActorId | str) -> ActorRef - -# Resolve actor by name (using global system) -ref = await pul.resolve( - name: str, - *, - node_id: int | None = None, - timeout: float | None = None # Seconds to wait for name to appear (gossip convergence) -) -> ActorRefView - -# Send message and wait for response -response = await ref.ask(request: Any) -> Any - -# Send message without waiting (fire-and-forget) -await ref.tell(msg: Any) -> None - -# ── Proxy Generation ── - -# Untyped proxy (no need to know actor type, call any method by name) -ref = await pul.resolve("service_name") -proxy = ref.as_any() # Untyped proxy -result = await proxy.any_method(args) # Call any remote method -value = await proxy.some_attr # Read remote attribute (no parentheses) - -# Typed proxy (generated from local class definition, with method signature validation) -ref = await pul.resolve("counter", timeout=30) -proxy = ref.as_type(Counter) # Bind to Counter type -result = await proxy.incr() # With type checking - -# @pul.remote classes can also resolve directly to a typed proxy -typed_proxy = await Counter.resolve("counter") -any_proxy = typed_proxy.as_any() # typed → untyped conversion +# ── Define actor with @pul.remote ── @pul.remote class Counter: def __init__(self, init=0): self.value = init - # Synchronous handler - def incr(self): - ... + def incr(self): # sync method + self.value += 1 + return self.value - # Asynchronous handler - async def desc(self): - ... + async def fetch_and_add(self, url): # async method + data = await http_get(url) + self.value += data + return self.value -# Usage 1: Create via spawn -counter = await Counter.spawn(name="counter") -result = await counter.incr() # Returns ActorProxy, call methods directly +# ── Create and call ── + +counter = await Counter.spawn(name="counter") # create actor, returns typed proxy +result = await counter.incr() # call method directly -# Usage 2: Resolve an existing actor +# ── Resolve existing actor (e.g. from another process / node) ── +# Prefer typed proxy via Counter.resolve() when you know the actor type. +# Fall back to ref.as_any() when the remote type is unknown. + +# 1. Typed proxy (recommended) proxy = await Counter.resolve("counter") result = await proxy.incr() +# 2. Typed proxy — manual bind +ref = await pul.resolve("counter", timeout=30) +proxy = ref.as_type(Counter) +result = await proxy.incr() + +# 3. Untyped proxy — when remote type is unknown +ref = await pul.resolve("service_name") +proxy = ref.as_any() +result = await proxy.any_method(args) + ``` ### Ray Integration `pul.mount` registers any Python object as a Pulsing actor, enabling tight integration between Ray actors and Pulsing. +**Running Pulsing in a Ray cluster:** Every process (driver and workers) must initialize Pulsing. Use `pulsing.ray.init_in_ray()` and pass it in `ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})` so workers call it on startup; the driver must call `init_in_ray()` once in code. See the `pulsing.ray` module for details. + ```python import pulsing as pul @@ -200,6 +108,9 @@ pul.mount( # Unmount (call when actor is destroyed) pul.unmount(name: str) -> None + +# Cleanup Pulsing state in Ray environment (call before ray.shutdown()) +pul.cleanup_ray() -> None ``` Example: Ray handles process scheduling, Pulsing handles inter-actor communication. @@ -212,7 +123,7 @@ class Worker: pul.mount(self, name=name) # One line to join Pulsing async def call_peer(self, peer_name, msg): - proxy = (await pul.resolve(peer_name, timeout=30)).as_type(Worker) + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() return await proxy.greet(msg) # Cross-process Pulsing call async def greet(self, msg): @@ -224,54 +135,82 @@ ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello from w1: hi" pul.cleanup_ray() ``` -### Ray-Compatible API +### Under the Hood: Actor System & Low-level APIs + +The global API is backed by an `ActorSystem` instance. You can create one explicitly when you need multiple systems or finer control. The low-level `spawn`/`refer`/`resolve` APIs operate on `ActorRef` (not typed proxy) and require actors to implement a `receive(self, msg)` method. ```python -from pulsing.compat import ray +import pulsing as pul + +# ── Explicit ActorSystem ── -# Initialize (sync interface, async internally) -ray.init( - address: str | None = None, +system = await pul.actor_system( + addr: str | None = None, *, - ignore_reinit_error: bool = False, - **kwargs -) -> None + seeds: list[str] | None = None, + passphrase: str | None = None +) -> ActorSystem -# Shutdown -ray.shutdown() -> None +await system.shutdown() -# Check initialization status -ray.is_initialized() -> bool +# ── Low-level spawn (actor must have receive method) ── -# Decorator: convert class to Actor -@ray.remote -class MyActor: - def __init__(self, ...): ... - def method(self, ...): ... +actorref = await pul.spawn( # global system + actor: Actor, + *, + name: str | None = None, + public: bool = False, + restart_policy: str = "never", + max_restarts: int = 3, + min_backoff: float = 0.1, + max_backoff: float = 30.0 +) -> ActorRef -# Create Actor (sync interface) -actor_handle = MyActor.remote(...) -> _ActorHandle +actorref = await system.spawn( # explicit system, same signature + actor: Actor, ... +) -> ActorRef -# Call method (returns ObjectRef) -result_ref = actor_handle.method.remote(...) -> ObjectRef +# ── Low-level resolve / refer ── -# Get result (sync, supports single or list) -result = ray.get( - refs: ObjectRef | list[ObjectRef], +actorref = await pul.refer(actorid: ActorId | str) -> ActorRef +actorref = await pul.resolve(name: str, *, node_id=None, timeout=None) -> ActorRef +actorref = await system.resolve(name: str, *, node_id=None) -> ActorRef + +# ── ActorRef message passing ── + +response = await actorref.ask(request: Any) -> Any +await actorref.tell(msg: Any) -> None + +# ── @pul.remote with explicit system ── + +counter = await Counter.local(system, name="counter") # spawn on explicit system +result = await counter.incr() + +# Queue API (on system) +writer = await system.queue.write( + topic: str, *, - timeout: float | None = None -) -> Any | list[Any] + bucket_column: str = "id", + num_buckets: int = 4, + batch_size: int = 100, + storage_path: str | None = None, + backend: str = "memory", +) -> QueueWriter -# Wrap value as ObjectRef (for API compatibility) -ref = ray.put(value: Any) -> ObjectRef +await writer.put(record: dict | list[dict]) -> None +await writer.flush() -> None -# Wait for multiple ObjectRefs -ready, remaining = ray.wait( - refs: list[ObjectRef], +reader = await system.queue.read( + topic: str, *, - num_returns: int = 1, - timeout: float | None = None -) -> tuple[list[ObjectRef], list[ObjectRef]] + bucket_id: int | None = None, + bucket_ids: list[int] | None = None, + rank: int | None = None, + world_size: int | None = None, + num_buckets: int = 4, +) -> QueueReader + +records = await reader.get(limit: int = 100, wait: bool = False) -> list[dict] ``` ### Actor Behavior diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index b21b9b155..637e6af48 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -77,7 +77,6 @@ def incr(self): self.value += 1; return self.value Actor, ActorSystem as _ActorSystem, ActorRef, - ActorRefView, ActorId, ActorProxy, Message, @@ -314,7 +313,6 @@ async def refer(actorid: ActorId | str) -> ActorRef: "Actor", "ActorSystem", "ActorRef", - "ActorRefView", "ActorId", "ActorProxy", "Message", diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/actor/__init__.py index 7405373fe..5bc76c682 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/actor/__init__.py @@ -190,7 +190,6 @@ async def tell_with_timeout( PYTHON_ACTOR_SERVICE_NAME, ActorClass, ActorProxy, - ActorRefView, PythonActorService, PythonActorServiceProxy, SystemActorProxy, @@ -236,7 +235,6 @@ async def tell_with_timeout( "SystemConfig", "ActorSystem", "ActorRef", - "ActorRefView", "ActorId", "ActorProxy", "as_any", diff --git a/python/pulsing/actor/remote.py b/python/pulsing/actor/remote.py index 0047a9742..121989f1c 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/actor/remote.py @@ -304,14 +304,17 @@ def get_actor_metadata(name: str) -> dict[str, str] | None: def _extract_methods(cls: type) -> tuple[list[str], set[str]]: """Extract public method names and async method set from a class. - Handles Ray-wrapped classes by unwrapping to the original class first. + Handles @pul.remote ActorClass and Ray-wrapped classes by unwrapping first. """ + # If it's an ActorClass (@pul.remote decorated), extract the original class + if isinstance(cls, ActorClass): + cls = cls._cls + # 如果是 Ray ActorClass,提取原始类 try: from ray.actor import ActorClass as RayActorClass if isinstance(cls, RayActorClass): - # Ray ActorClass 的 __ray_metadata__ 有原始类引用 if hasattr(cls, "__ray_metadata__"): meta = cls.__ray_metadata__ if hasattr(meta, "modified_class"): @@ -330,42 +333,6 @@ def _extract_methods(cls: type) -> tuple[list[str], set[str]]: return methods, async_methods -class ActorRefView: - """Wrapper around ActorRef that adds .as_any() / .as_type() for proxy generation. - - Returned by resolve(name). Delegates .ask(), .tell(), and other - ActorRef attributes to the underlying ref. - """ - - __slots__ = ("_ref",) - - def __init__(self, ref: ActorRef): - self._ref = ref - - def as_any(self) -> "ActorProxy": - """Return an untyped proxy that forwards any method call to the remote actor.""" - return ActorProxy(self._ref, method_names=None, async_methods=None) - - def as_type(self, cls: type) -> "ActorProxy": - """Return a typed proxy based on the given class definition. - - Inspects ``cls`` for public methods and generates a proxy with - method name validation and correct sync/async detection. - Type info comes from the local class definition, not from the network. - - Example:: - - ref = await pul.resolve("counter", timeout=30) - proxy = ref.as_type(Counter) - await proxy.incr() - """ - methods, async_methods = _extract_methods(cls) - return ActorProxy(self._ref, methods, async_methods) - - def __getattr__(self, name: str): - return getattr(self._ref, name) - - PYTHON_ACTOR_SERVICE_NAME = "system/python_actor_service" @@ -689,6 +656,11 @@ def on_stop(self) -> None: if hasattr(self._instance, "on_stop"): self._instance.on_stop() + def metadata(self) -> dict[str, str]: + if hasattr(self._instance, "metadata") and callable(self._instance.metadata): + return self._instance.metadata() + return {} + async def receive(self, msg) -> Any: # Handle dict-based call format (supporting both v1 and v2) if isinstance(msg, dict): @@ -1259,6 +1231,7 @@ async def resolve( *, system: ActorSystem | None = None, node_id: int | None = None, + timeout: float | None = None, ) -> ActorProxy: """Resolve actor by name, return typed ActorProxy @@ -1266,6 +1239,8 @@ async def resolve( name: Actor name system: ActorSystem instance, uses global system if not provided node_id: Target node ID, searches in cluster if not provided + timeout: Seconds to wait for the name to appear (gossip convergence). + None means no wait (error immediately if not found). Returns: ActorProxy: Proxy with method type information @@ -1298,7 +1273,7 @@ async def generate(self, prompt): ... # async method, streaming response ) system = _global_system - actor_ref = await system.resolve_named(name, node_id=node_id) + actor_ref = await system.resolve_named(name, node_id=node_id, timeout=timeout) return ActorProxy(actor_ref, self._methods, self._async_methods) @@ -1569,19 +1544,20 @@ async def resolve( ): """Resolve a named actor by name. - Returns an object that supports .ask(), .tell(), and .as_any(). + Returns an ActorRef that supports .ask(), .tell(), .as_any(), and .as_type(). Use .as_any() to get an untyped proxy that forwards any method call. + Use .as_type(Counter) to get a typed proxy with method validation. For typed ActorProxy with method calls, use Counter.resolve(name) instead. Args: name: Actor name node_id: Target node ID, searches in cluster if not provided - timeout: 等待名字出现的超时秒数。None 表示不等待(找不到立刻报错)。 - 设置后内部在 Rust 层重试,等待 gossip 收敛。 + timeout: Seconds to wait for the name to appear (gossip convergence). + None means no wait (error immediately if not found). Returns: - ActorRefView: Ref-like object with .as_any() for untyped proxy. + ActorRef: Actor reference with .as_any() / .as_type() for proxy generation. Example: from pulsing.actor import init, remote, resolve @@ -1606,29 +1582,26 @@ async def resolve( raise RuntimeError("Actor system not initialized. Call 'await init()' first.") try: - ref = await _global_system.resolve(name, node_id=node_id, timeout=timeout) - return ActorRefView(ref) + return await _global_system.resolve(name, node_id=node_id, timeout=timeout) except RuntimeError as e: raise _convert_rust_error(e) from e -def as_any(ref: ActorRef | ActorRefView) -> ActorProxy: +def as_any(ref: ActorRef) -> ActorProxy: """Return an untyped proxy that forwards any method call to the remote actor. - Use when you have an ActorRef (or ref from resolve()) and want to call - methods by name without the typed class. + Use when you have an ActorRef and want to call methods by name + without the typed class. Args: - ref: ActorRef from resolve(name), or raw ActorRef from system.resolve_named(). + ref: ActorRef from resolve(name). Example: ref = await resolve("channel.discord") proxy = as_any(ref) # or proxy = ref.as_any() await proxy.send_text(chat_id, content) """ - if isinstance(ref, ActorRefView): - return ref.as_any() - return ActorProxy(ref, method_names=None, async_methods=None) + return ref.as_any() def mount(instance: Any, *, name: str, public: bool = True) -> None: diff --git a/tests/python/apis/actor/test_actor_behavior.py b/tests/python/apis/actor/test_actor_behavior.py index 88c115942..52b2aed05 100644 --- a/tests/python/apis/actor/test_actor_behavior.py +++ b/tests/python/apis/actor/test_actor_behavior.py @@ -417,3 +417,67 @@ async def test_base_actor_async_generator_stream(system): items.append(response) assert len(items) >= 1 # At least one item + + +# ============================================================================ +# Test: on_stop lifecycle hook +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_on_stop(): + """Test on_stop lifecycle hook is called when actor system shuts down.""" + # Use a separate system so shutdown doesn't affect other tests + sys = await pul.actor_system() + + LifecycleActor.stopped = False + await sys.spawn(LifecycleActor(), name="on_stop_actor") + await asyncio.sleep(0.1) + + await sys.shutdown() + await asyncio.sleep(0.1) + + assert LifecycleActor.stopped is True + + +# ============================================================================ +# Test: @pul.remote metadata() delegation via _WrappedActor +# ============================================================================ + + +@pul.remote +class _MetadataService: + """Remote service with custom metadata.""" + + def metadata(self) -> dict[str, str]: + return {"service": "metadata_test", "version": "2.0"} + + def ping(self): + return "pong" + + +@pytest.mark.asyncio +async def test_remote_metadata_delegation(): + """_WrappedActor delegates metadata() to user instance.""" + from pulsing.actor.remote import _WrappedActor + + # Create raw instance and wrap it + instance = object.__new__(_MetadataService._cls) + instance.__init__() + wrapped = _WrappedActor(instance) + meta = wrapped.metadata() + assert meta == {"service": "metadata_test", "version": "2.0"} + + +@pytest.mark.asyncio +async def test_remote_metadata_delegation_no_metadata(): + """_WrappedActor returns empty dict when user instance has no metadata().""" + from pulsing.actor.remote import _WrappedActor + + class _NoMeta: + def ping(self): + return "pong" + + wrapped = _WrappedActor(_NoMeta()) + meta = wrapped.metadata() + assert meta == {} diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index 858ae4d3b..a7dfb891d 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -1,9 +1,10 @@ """ -Tests for resolve().as_any() and as_any(ref): untyped proxy that forwards any method call. +Tests for resolve().as_any() / .as_type() and as_any(ref): proxy generation on ActorRef. Covers: -- resolve(name) returns an object with .as_any() -- ref.as_any() returns a proxy; await proxy.method(...) works without knowing the actor type +- resolve(name) returns ActorRef with .as_any() and .as_type() +- ref.as_any() returns an untyped proxy +- ref.as_type(cls) returns a typed proxy - as_any(ref) function works with ref from resolve() or raw ActorRef - typed_proxy.as_any() returns an any proxy with the same underlying ref - ref.ask() / ref.tell() still work (backward compatibility) @@ -14,7 +15,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorRefView, as_any, remote +from pulsing.actor import Actor, ActorRef, as_any, remote # ============================================================================ @@ -55,12 +56,14 @@ async def test_resolve_returns_ref_view_with_as_any(initialized_pul): @pytest.mark.asyncio -async def test_resolve_returns_actor_ref_view(initialized_pul): - """resolve(name) returns ActorRefView (or equivalent with .as_any()).""" +async def test_resolve_returns_actor_ref(initialized_pul): + """resolve(name) returns ActorRef with .as_any() and .as_type().""" await pul.spawn(_EchoActor(), name="ref_view_echo", public=True) ref = await pul.resolve("ref_view_echo") - assert isinstance(ref, ActorRefView) + assert isinstance(ref, ActorRef) + assert hasattr(ref, "as_any") + assert hasattr(ref, "as_type") # ============================================================================ @@ -204,7 +207,7 @@ async def test_typed_proxy_as_any(initialized_pul): @pytest.mark.asyncio async def test_resolve_ref_ask_still_works(initialized_pul): - """After resolve(), ref.ask(msg) still works (ActorRefView delegates to _ref).""" + """After resolve(), ref.ask(msg) still works.""" await pul.spawn(_EchoActor(), name="compat_ask_echo", public=True) ref = await pul.resolve("compat_ask_echo") @@ -234,3 +237,75 @@ async def receive(self, msg): await asyncio.sleep(0.05) result = await ref.ask("get") assert result == 3 + + +# ============================================================================ +# Test: ref.as_type(cls) — typed proxy from ActorRef +# ============================================================================ + + +@pytest.mark.asyncio +async def test_as_type_on_actor_ref(initialized_pul): + """ref.as_type(cls) returns a typed proxy with method validation.""" + await _ServiceWithMethods.spawn(name="as_type_svc", public=True) + + ref = await pul.resolve("as_type_svc") + proxy = ref.as_type(_ServiceWithMethods) + + result = await proxy.get_value() + assert result == 0 + + result = await proxy.set_value(99) + assert result == 99 + + result = await proxy.get_value() + assert result == 99 + + +@pytest.mark.asyncio +async def test_as_type_rejects_invalid_method(initialized_pul): + """Typed proxy from as_type() rejects methods not on the class.""" + await _ServiceWithMethods.spawn(name="as_type_reject_svc", public=True) + + ref = await pul.resolve("as_type_reject_svc") + proxy = ref.as_type(_ServiceWithMethods) + + with pytest.raises(AttributeError, match="No method"): + proxy.nonexistent_method # Access triggers __getattr__ validation + + +@pytest.mark.asyncio +async def test_as_type_async_method(initialized_pul): + """as_type() proxy correctly handles async methods.""" + await _ServiceWithMethods.spawn(name="as_type_async_svc", public=True) + + ref = await pul.resolve("as_type_async_svc") + proxy = ref.as_type(_ServiceWithMethods) + + result = await proxy.async_incr() + assert result == 1 + result = await proxy.async_incr() + assert result == 2 + + +# ============================================================================ +# Test: Counter.resolve(name, timeout=...) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_counter_resolve_with_timeout(initialized_pul): + """Counter.resolve(name, timeout=...) passes timeout to underlying resolve.""" + await _ServiceWithMethods.spawn(name="timeout_svc", public=True) + + # Should succeed with timeout (actor already exists) + proxy = await _ServiceWithMethods.resolve("timeout_svc", timeout=5) + result = await proxy.get_value() + assert result == 0 + + +@pytest.mark.asyncio +async def test_counter_resolve_timeout_not_found(initialized_pul): + """Counter.resolve(name, timeout=...) raises after timeout if not found.""" + with pytest.raises(RuntimeError): + await _ServiceWithMethods.resolve("nonexistent_actor", timeout=0.3) From 5f2294069c31a526ec3506453dc3f12ba824c15e Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 15 Feb 2026 19:39:01 +0800 Subject: [PATCH 03/15] Update documentation and API references in mkdocs - Added "Complete Reference" to the navigation structure in mkdocs for better accessibility to API documentation. - Updated the API reference section to include new global async API examples, emphasizing the use of `await pul.init()` and `await pul.shutdown()`. - Enhanced clarity in the documentation by refining the descriptions and examples related to actor management and communication. - Improved consistency in naming conventions across English and Chinese documentation to facilitate user understanding. --- docs/mkdocs.yml | 3 +- docs/src/agent/index.md | 8 +++-- docs/src/agent/index.zh.md | 8 +++-- docs/src/agent/native.md | 2 +- docs/src/agent/native.zh.md | 2 +- docs/src/api/overview.md | 39 ++++++++++++------------ docs/src/api/overview.zh.md | 39 ++++++++++++------------ docs/src/api/python.md | 4 --- docs/src/api/python.zh.md | 4 --- docs/src/api_reference.md | 57 +++++++---------------------------- docs/src/api_reference.zh.md | 58 +++++++----------------------------- docs/src/guide/actors.md | 8 ++--- docs/src/guide/actors.zh.md | 8 ++--- docs/src/guide/index.md | 3 +- docs/src/guide/index.zh.md | 3 +- docs/src/index.md | 5 ++-- docs/src/index.zh.md | 5 ++-- 17 files changed, 92 insertions(+), 164 deletions(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d46a09fc5..c96b0e9a7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -82,10 +82,10 @@ plugins: build: true site_name: "Pulsing 文档" nav_translations: - Home: 首页 Home: 首页 Getting Started: 开始使用 Overview: 概述 + Complete Reference: 完整参考 LLM Inference: LLM 推理 Distributed Agents: 分布式 Agent Migrate from Ray: 从 Ray 迁移 @@ -163,6 +163,7 @@ nav: - Distributed Counter: examples/distributed_counter.md - API Reference: - Overview: api/overview.md + - Complete Reference: api_reference.md - Python: api/python.md - Rust: api/rust.md - FAQ: faq.md diff --git a/docs/src/agent/index.md b/docs/src/agent/index.md index 70384f545..d4d335ebb 100644 --- a/docs/src/agent/index.md +++ b/docs/src/agent/index.md @@ -23,7 +23,8 @@ Pulsing provides native support for popular agent frameworks, enabling seamless For building multi-agent applications from scratch, use Pulsing's native `@agent` decorator: ```python -from pulsing.agent import agent, runtime, llm, list_agents +import pulsing as pul +from pulsing.agent import agent, llm, list_agents @agent(role="Researcher", goal="Deep analysis") class Researcher: @@ -31,13 +32,16 @@ class Researcher: client = await llm() return await client.ainvoke(f"Analyze: {topic}") -async with runtime(): +await pul.init() +try: r = await Researcher.spawn(name="researcher") result = await r.analyze("AI trends") # Access metadata for visualization for name, meta in list_agents().items(): print(f"{name}: {meta.role}") +finally: + await pul.shutdown() ``` **Key features:** diff --git a/docs/src/agent/index.zh.md b/docs/src/agent/index.zh.md index 8f5fb27c7..6739ad82f 100644 --- a/docs/src/agent/index.zh.md +++ b/docs/src/agent/index.zh.md @@ -23,7 +23,8 @@ Pulsing 原生支持主流 Agent 框架,让您的应用轻松从单进程扩 从零构建多智能体应用时,使用 Pulsing 原生的 `@agent` 装饰器: ```python -from pulsing.agent import agent, runtime, llm, list_agents +import pulsing as pul +from pulsing.agent import agent, llm, list_agents @agent(role="研究员", goal="深入分析") class Researcher: @@ -31,13 +32,16 @@ class Researcher: client = await llm() return await client.ainvoke(f"分析: {topic}") -async with runtime(): +await pul.init() +try: r = await Researcher.spawn(name="researcher") result = await r.analyze("AI 趋势") # 访问元信息用于可视化 for name, meta in list_agents().items(): print(f"{name}: {meta.role}") +finally: + await pul.shutdown() ``` **核心特点:** diff --git a/docs/src/agent/native.md b/docs/src/agent/native.md index faade31d5..fc49091d3 100644 --- a/docs/src/agent/native.md +++ b/docs/src/agent/native.md @@ -78,7 +78,7 @@ finally: await pul.shutdown() ``` -`runtime()` is still available as a convenience context manager, but the primary recommended style is explicit `await pul.init()` / `await pul.shutdown()`. +`runtime()` is still available as a convenience context manager. This page uses explicit `await pul.init()` / `await pul.shutdown()`. ### Distributed Mode diff --git a/docs/src/agent/native.zh.md b/docs/src/agent/native.zh.md index 655b7b5fb..5abb8db64 100644 --- a/docs/src/agent/native.zh.md +++ b/docs/src/agent/native.zh.md @@ -78,7 +78,7 @@ finally: await pul.shutdown() ``` -`runtime()` 仍可作为便捷 context manager 使用,但推荐主路径是显式 `await pul.init()` / `await pul.shutdown()`。 +`runtime()` 仍可作为便捷 context manager 使用。本文统一使用显式 `await pul.init()` / `await pul.shutdown()`。 ### 分布式模式 diff --git a/docs/src/api/overview.md b/docs/src/api/overview.md index 6816b30dc..f3cb7f861 100644 --- a/docs/src/api/overview.md +++ b/docs/src/api/overview.md @@ -19,46 +19,47 @@ Pulsing is built around the [Actor Model](https://en.wikipedia.org/wiki/Actor_mo - **Streaming Support**: Native support for streaming requests/responses - **Multi-Language**: Python-first with Rust core, extensible to other languages -## API Styles +## Python API -### Python APIs - -Pulsing provides multiple API styles to fit different use cases: - -#### 1. Actor System Style (Explicit Management) +### Global Async API ```python import pulsing as pul -# Create and manage actor system explicitly -system = await pul.actor_system(addr="0.0.0.0:8000") +await pul.init(addr="0.0.0.0:8000") -# Spawn actors -actor = await system.spawn(MyActor(), name="my_actor") +@pul.remote +class MyActor: + def process(self, data): + return f"Processed: {data}" -# Communicate -response = await actor.ask({"message": "hello"}) +actor = await MyActor.spawn(name="my_actor") +response = await actor.process("hello") # Shutdown -await system.shutdown() +await pul.shutdown() ``` -#### 2. Ray-Style Global API (Convenience) +### Under the Hood: Actor System API (Explicit Management) ```python import pulsing as pul -# Initialize global system -await pul.init(addr="0.0.0.0:8000") +# Create and manage actor system explicitly +system = await pul.actor_system(addr="0.0.0.0:8000") -# Spawn actors using global system -actor = await pul.spawn(MyActor(), name="my_actor") +class MyActor: + async def receive(self, msg): + return f"echo: {msg}" + +# Spawn actor object directly (low-level) +actor = await system.spawn(MyActor(), name="my_actor") # Communicate response = await actor.ask({"message": "hello"}) # Shutdown -await pul.shutdown() +await system.shutdown() ``` ### Actor Patterns diff --git a/docs/src/api/overview.zh.md b/docs/src/api/overview.zh.md index e6c943588..1ded01150 100644 --- a/docs/src/api/overview.zh.md +++ b/docs/src/api/overview.zh.md @@ -19,46 +19,47 @@ Pulsing 基于[Actor 模型](https://en.wikipedia.org/wiki/Actor_model)构建, - **流式支持**:原生支持流式请求/响应 - **多语言**:Python 优先,Rust 核心,可扩展到其他语言 -## API 风格 +## Python API -### Python API - -Pulsing 提供多种 API 风格来适应不同用例: - -#### 1. Actor System 风格(显式管理) +### 全局异步 API ```python import pulsing as pul -# 显式创建和管理 actor 系统 -system = await pul.actor_system(addr="0.0.0.0:8000") +await pul.init(addr="0.0.0.0:8000") -# 生成 actor -actor = await system.spawn(MyActor(), name="my_actor") +@pul.remote +class MyActor: + def process(self, data): + return f"Processed: {data}" -# 通信 -response = await actor.ask({"message": "hello"}) +actor = await MyActor.spawn(name="my_actor") +response = await actor.process("hello") # 关闭 -await system.shutdown() +await pul.shutdown() ``` -#### 2. Ray 风格全局 API(便捷) +### Under the Hood:Actor System API(显式管理) ```python import pulsing as pul -# 初始化全局系统 -await pul.init(addr="0.0.0.0:8000") +# 显式创建和管理 actor 系统 +system = await pul.actor_system(addr="0.0.0.0:8000") -# 使用全局系统生成 actor -actor = await pul.spawn(MyActor(), name="my_actor") +class MyActor: + async def receive(self, msg): + return f"echo: {msg}" + +# 直接生成 actor 对象(低层) +actor = await system.spawn(MyActor(), name="my_actor") # 通信 response = await actor.ask({"message": "hello"}) # 关闭 -await pul.shutdown() +await system.shutdown() ``` ### Actor 模式 diff --git a/docs/src/api/python.md b/docs/src/api/python.md index 222d2480d..190c123ba 100644 --- a/docs/src/api/python.md +++ b/docs/src/api/python.md @@ -30,10 +30,6 @@ pip install -e . ::: pulsing.agent -## Compatibility Module - -::: pulsing.compat - ## Queue Module ::: pulsing.queue diff --git a/docs/src/api/python.zh.md b/docs/src/api/python.zh.md index 92a86aa1b..1a46bcac3 100644 --- a/docs/src/api/python.zh.md +++ b/docs/src/api/python.zh.md @@ -30,10 +30,6 @@ pip install -e . ::: pulsing.agent -## 兼容性模块 - -::: pulsing.compat - ## 队列模块 ::: pulsing.queue diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index 78226dcb2..7f8f87876 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -120,22 +120,17 @@ Note: Error type information is preserved for both local and remote calls. Remot ## Core Functions -### pul.actor_system +### pul.init / pul.shutdown -Create a new Actor System instance. +Global system initialization. ```python import asyncio import pulsing as pul async def example(): - system = await pul.actor_system( - addr=None, # Bind address, None for standalone - # Keyword-only arguments follow - seeds=None, # Seed nodes for cluster (list[str] or None) - passphrase=None, # TLS passphrase (str or None) - ) - return system + await pul.init(addr=None, seeds=None, passphrase=None) + await pul.shutdown() # Usage example if __name__ == "__main__": @@ -144,9 +139,14 @@ if __name__ == "__main__": pass ``` -**Returns:** `ActorSystem` instance +**Parameters:** +- `addr`: Bind address (str or None for standalone) +- `seeds`: Seed nodes to join cluster (list[str] or None) +- `passphrase`: TLS passphrase (str or None) + +### Under the Hood: pul.actor_system -**Example:** +Create a new explicit `ActorSystem` instance when you need low-level control. ```python import asyncio @@ -172,41 +172,6 @@ if __name__ == "__main__": asyncio.run(main()) ``` -### pul.init / pul.shutdown - -Global system initialization (recommended primary API). - -```python -import asyncio -import pulsing as pul - -async def main(): - # Initialize global system - await pul.init(addr=None, seeds=None, passphrase=None) - - # Use global system - @pul.remote - class Counter: - def __init__(self): self.value = 0 - def incr(self): self.value += 1; return self.value - - counter = await Counter.spawn(name="counter") - ref = await pul.resolve("counter") - proxy = ref.as_type(Counter) - await proxy.incr() - - # Shutdown - await pul.shutdown() - -if __name__ == "__main__": - asyncio.run(main()) -``` - -**Parameters:** -- `addr`: Bind address (str or None for standalone) -- `seeds`: Seed nodes to join cluster (list[str] or None) -- `passphrase`: TLS passphrase (str or None) - ## Core Classes ### ActorSystem diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index 2ce23112b..9bcb6b5f2 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -120,23 +120,17 @@ except PulsingRuntimeError as e: ## 核心函数 -### pul.actor_system +### pul.init / pul.shutdown -创建新的 Actor System 实例。 +全局系统初始化。 ```python import asyncio import pulsing as pul async def example(): - # 函数签名: actor_system(addr=None, *, seeds=None, passphrase=None) -> ActorSystem - system = await pul.actor_system( - addr=None, # 绑定地址(str 或 None 表示单机模式) -# # 关键字参数开始 # 关键字参数开始 - seeds=None, # 集群种子节点(list[str] 或 None) - passphrase=None, # TLS 密码短语(str 或 None) - ) - return system + await pul.init(addr=None, seeds=None, passphrase=None) + await pul.shutdown() # 使用示例 if __name__ == "__main__": @@ -145,9 +139,14 @@ if __name__ == "__main__": pass ``` -**返回:** `ActorSystem` 实例 +**参数:** +- `addr`: 绑定地址(str 或 None 表示单机模式) +- `seeds`: 加入集群的种子节点(list[str] 或 None) +- `passphrase`: TLS 密码短语(str 或 None) -**示例:** +### Under the Hood:pul.actor_system + +当需要低层控制时,创建显式 `ActorSystem` 实例。 ```python import asyncio @@ -173,41 +172,6 @@ if __name__ == "__main__": asyncio.run(main()) ``` -### pul.init / pul.shutdown - -全局系统初始化(推荐主 API)。 - -```python -import asyncio -import pulsing as pul - -async def main(): - # 初始化全局系统 - await pul.init(addr=None, seeds=None, passphrase=None) - - # 使用全局系统 - @pul.remote - class Counter: - def __init__(self): self.value = 0 - def incr(self): self.value += 1; return self.value - - counter = await Counter.spawn(name="counter") - ref = await pul.resolve("counter") - proxy = ref.as_type(Counter) - await proxy.incr() - - # 关闭 - await pul.shutdown() - -if __name__ == "__main__": - asyncio.run(main()) -``` - -**参数:** -- `addr`: 绑定地址(str 或 None 表示单机模式) -- `seeds`: 加入集群的种子节点(list[str] 或 None) -- `passphrase`: TLS 密码短语(str 或 None) - ## 核心类 ### ActorSystem diff --git a/docs/src/guide/actors.md b/docs/src/guide/actors.md index 0dda9b5a6..f0072daa9 100644 --- a/docs/src/guide/actors.md +++ b/docs/src/guide/actors.md @@ -57,13 +57,9 @@ Pulsing follows the **classical Actor model** (like Erlang/Akka): --- -## Two API Styles +## Python API -| API | Import | Style | Best For | -|-----|--------|-------|----------| -| **Native Async** | `import pulsing as pul` | `async/await` | New projects, maximum performance | - -### Native Async API (Recommended) +### Global Async API ```python import pulsing as pul diff --git a/docs/src/guide/actors.zh.md b/docs/src/guide/actors.zh.md index 62ad1434e..166b9c5ad 100644 --- a/docs/src/guide/actors.zh.md +++ b/docs/src/guide/actors.zh.md @@ -57,13 +57,9 @@ Pulsing 遵循**经典 Actor 模型**(类似 Erlang/Akka): --- -## 两种 API 风格 +## Python API -| API | 导入方式 | 风格 | 适用场景 | -|-----|---------|------|----------| -| **原生异步** | `import pulsing as pul` | `async/await` | 新项目,追求极致性能 | - -### 原生异步 API(推荐) +### 全局异步 API ```python import pulsing as pul diff --git a/docs/src/guide/index.md b/docs/src/guide/index.md index fd0883b2d..7810a901f 100644 --- a/docs/src/guide/index.md +++ b/docs/src/guide/index.md @@ -49,4 +49,5 @@ This guide covers **how to build** with Pulsing. For design rationale, see [Desi | Reliability patterns | [Reliability](reliability.md) | | Secure your cluster | [Security](security.md) | | Run LLM inference | [LLM Inference](../examples/llm_inference.md) | -| API details | [API Reference](../api_reference.md) | +| API details | [API Overview](../api/overview.md) | +| Full API contract | [Complete Reference](../api_reference.md) | diff --git a/docs/src/guide/index.zh.md b/docs/src/guide/index.zh.md index eb4cda1a4..0f253cc90 100644 --- a/docs/src/guide/index.zh.md +++ b/docs/src/guide/index.zh.md @@ -49,4 +49,5 @@ | 可靠性模式 | [可靠性](reliability.zh.md) | | 保护集群安全 | [安全](security.zh.md) | | 运行 LLM 推理 | [LLM 推理](../examples/llm_inference.zh.md) | -| API 详情 | [API 参考](../api_reference.md) | +| API 详情 | [API 概述](../api/overview.zh.md) | +| 完整 API 契约 | [完整参考](../api_reference.zh.md) | diff --git a/docs/src/index.md b/docs/src/index.md index 499196d24..c23d94925 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -67,7 +67,7 @@ A **distributed actor framework** that provides a communication backbone for bui --- - Drop-in compatible API. Migrate from Ray with one import change. + Migrate Ray actors to Pulsing's native async API. [:octicons-arrow-right-24: Migrate from Ray](quickstart/migrate_from_ray.md) @@ -116,7 +116,8 @@ asyncio.run(main()) | Build a cluster | [Guide: Remote Actors](guide/remote_actors.md) | | Operate your system | [Guide: CLI Operations](guide/operations.md) | | Deep dive into design | [Design Documents](design/architecture.md) | -| API details | [API Reference](api/overview.md) | +| API details | [API Overview](api/overview.md) | +| Full API contract | [Complete Reference](api_reference.md) | --- diff --git a/docs/src/index.zh.md b/docs/src/index.zh.md index a23275b83..305a5864a 100644 --- a/docs/src/index.zh.md +++ b/docs/src/index.zh.md @@ -67,7 +67,7 @@ hide: toc --- - 兼容 API,一行导入即可从 Ray 迁移。 + 将 Ray Actor 迁移到 Pulsing 原生异步 API。 [:octicons-arrow-right-24: 从 Ray 迁移](quickstart/migrate_from_ray.zh.md) @@ -116,7 +116,8 @@ asyncio.run(main()) | 构建集群 | [指南:远程 Actor](guide/remote_actors.zh.md) | | 运维系统 | [指南:CLI 操作](guide/operations.zh.md) | | 深入设计 | [设计文档](design/architecture.md) | -| API 详情 | [API 参考](api_reference.md) | +| API 详情 | [API 概述](api/overview.zh.md) | +| 完整 API 契约 | [完整参考](api_reference.zh.md) | --- From c6645fced7b9848cc6dc15e640141d21cd7e8a46 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 15 Feb 2026 19:55:18 +0800 Subject: [PATCH 04/15] Refactor Queue and Topic APIs in documentation and examples - Updated the documentation to reflect the new Queue and Topic APIs, emphasizing the use of `pul.queue` and `pul.topic` for data handling. - Simplified examples across various documentation files to demonstrate the new API structure, enhancing clarity for users. - Removed deprecated references to `system.queue` and `system.topic`, ensuring consistency in the API usage. - Improved the organization of the queue and topic sections in the guides and API references for better accessibility and understanding. --- docs/src/api/overview.md | 204 ++++++++++++------------ docs/src/api/overview.zh.md | 221 ++++++++++++-------------- docs/src/api_reference.md | 6 +- docs/src/api_reference.zh.md | 6 +- docs/src/guide/queue.md | 28 ++-- docs/src/guide/queue.zh.md | 28 ++-- examples/python/distributed_queue.py | 7 +- examples/python/sync_queue_example.py | 5 +- llms.binding.md | 91 ++++++++--- python/pulsing/__init__.py | 83 +++++----- python/pulsing/topic/__init__.py | 71 ++++++++- 11 files changed, 419 insertions(+), 331 deletions(-) diff --git a/docs/src/api/overview.md b/docs/src/api/overview.md index f3cb7f861..3a6f2bf3b 100644 --- a/docs/src/api/overview.md +++ b/docs/src/api/overview.md @@ -4,8 +4,6 @@ Pulsing is a distributed actor framework that provides a communication backbone ## Core Concepts -### Actor System - Pulsing is built around the [Actor Model](https://en.wikipedia.org/wiki/Actor_model), where actors are the fundamental units of computation. Actors communicate via asynchronous message passing, providing: - **Location Transparency**: Same API for local and remote actors @@ -19,112 +17,94 @@ Pulsing is built around the [Actor Model](https://en.wikipedia.org/wiki/Actor_mo - **Streaming Support**: Native support for streaming requests/responses - **Multi-Language**: Python-first with Rust core, extensible to other languages -## Python API - -### Global Async API +## Quick Start ```python import pulsing as pul -await pul.init(addr="0.0.0.0:8000") +await pul.init() @pul.remote -class MyActor: - def process(self, data): - return f"Processed: {data}" +class Counter: + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value + +counter = await Counter.spawn(name="counter") +print(await counter.incr()) # 1 -actor = await MyActor.spawn(name="my_actor") -response = await actor.process("hello") +counter2 = await Counter.resolve("counter") +print(await counter2.incr()) # 2 -# Shutdown await pul.shutdown() ``` -### Under the Hood: Actor System API (Explicit Management) - -```python -import pulsing as pul +## Python API -# Create and manage actor system explicitly -system = await pul.actor_system(addr="0.0.0.0:8000") +You must call `await pul.init()` before using `spawn`, `resolve`, or other APIs. -class MyActor: - async def receive(self, msg): - return f"echo: {msg}" +### Lifecycle -# Spawn actor object directly (low-level) -actor = await system.spawn(MyActor(), name="my_actor") +```python +import pulsing as pul -# Communicate -response = await actor.ask({"message": "hello"}) +await pul.init( + addr=None, # Bind address, None for standalone + seeds=None, # Seed nodes for cluster + passphrase=None, # TLS passphrase +) -# Shutdown -await system.shutdown() +await pul.shutdown() ``` -### Actor Patterns +### Define Actor -#### Remote Decorator (Recommended) +Use `@pul.remote` to turn any class into a distributed actor: ```python -import pulsing as pul - @pul.remote class Counter: def __init__(self, init=0): self.value = init - # Synchronous method - serial execution - def incr(self): + def incr(self): # sync method — serial execution self.value += 1 return self.value - # Asynchronous method - concurrent execution - async def fetch_and_add(self, url): + async def fetch_and_add(self, url): # async method — concurrent during await data = await http_get(url) self.value += data return self.value - -# Usage -counter = await Counter.spawn(name="counter") -result = await counter.incr() ``` -#### Base Actor Class +### Create and Call -```python -from pulsing.actor import Actor +`Class.spawn()` creates an actor and returns a typed proxy: -class MyActor(Actor): - async def receive(self, msg): - if msg.get("action") == "greet": - return f"Hello, {msg.get('name', 'World')}!" - return "Unknown action" - -# Usage -system = await pul.actor_system() -actor = await system.spawn(MyActor(), name="greeter") -response = await actor.ask({"action": "greet", "name": "Alice"}) +```python +counter = await Counter.spawn(name="counter", init=10) +result = await counter.incr() # direct method call ``` -### Message Passing - -#### Ask vs Tell - -- **`ask(msg)`**: Request/response pattern, waits for and returns a response -- **`tell(msg)`**: Fire-and-forget pattern, sends message without waiting +### Resolve Existing Actor ```python -# Ask - get response -response = await actor.ask({"action": "compute", "data": [1, 2, 3]}) - -# Tell - no response expected -await actor.tell({"action": "log", "level": "info", "message": "Event occurred"}) +# Typed proxy — when you know the class +proxy = await Counter.resolve("counter") +result = await proxy.incr() + +# Typed proxy — manual bind +ref = await pul.resolve("counter", timeout=30) +proxy = ref.as_type(Counter) + +# Untyped proxy — when remote type is unknown +ref = await pul.resolve("service_name") +proxy = ref.as_any() +result = await proxy.any_method(args) ``` ### Streaming -Pulsing supports streaming responses for large data or continuous generation: +Return a generator for streaming responses: ```python @pul.remote @@ -133,45 +113,78 @@ class StreamingService: for token in generate_tokens(prompt): yield token -# Usage service = await StreamingService.spawn() async for token in service.generate_tokens("Hello world"): print(token, end="") ``` -### Supervision & Fault Tolerance - -Actors can be configured with restart policies for fault tolerance: +### Supervision ```python @pul.remote( restart_policy="on_failure", # "never", "on_failure", "always" max_restarts=3, min_backoff=0.1, - max_backoff=30.0 + max_backoff=30.0, ) class ResilientWorker: def process(self, data): - # If this raises an exception, the actor will be restarted return risky_computation(data) ``` -### Under the Hood: Distributed Queues +### Queue -Pulsing includes a distributed queue system for data pipelines: +Distributed queue with bucket-based partitioning: ```python -# Writer -writer = await system.queue.write("my_topic", bucket_column="user_id") +writer = await pul.queue.write("my_queue", bucket_column="user_id") await writer.put({"user_id": "u1", "data": "hello"}) await writer.flush() -# Reader -reader = await system.queue.read("my_topic") +reader = await pul.queue.read("my_queue") records = await reader.get(limit=100) ``` -## Rust APIs +### Topic + +Lightweight pub/sub for real-time messaging: + +```python +writer = await pul.topic.write("events") +await writer.publish({"type": "user_login", "user": "alice"}) + +reader = await pul.topic.read("events") + +@reader.on_message +async def handle(msg): + print(f"Received: {msg}") + +await reader.start() +``` + +### Under the Hood + +#### ActorSystem (Explicit Management) + +```python +import pulsing as pul + +system = await pul.actor_system(addr="0.0.0.0:8000") + +class MyActor: + async def receive(self, msg): + return f"echo: {msg}" + +actor = await system.spawn(MyActor(), name="my_actor") +response = await actor.ask({"message": "hello"}) +await actor.tell({"event": "fire_and_forget"}) + +await system.shutdown() +``` + +--- + +## Rust API ### Core Traits @@ -200,7 +213,6 @@ struct MyActor; #[async_trait] impl Actor for MyActor { async fn receive(&mut self, msg: Message, _ctx: &mut ActorContext) -> anyhow::Result { - // Process message and return response Message::pack(&Pong(42)) } } @@ -218,26 +230,30 @@ fn counter(init: i32) -> Behavior { }) } -// Usage let counter = system.spawn(counter(0)).await?; ``` +--- + ## Error Handling ### Python ```python +from pulsing.exceptions import ( + PulsingBusinessError, + PulsingSystemError, + PulsingRuntimeError, +) + try: - response = await actor.ask({"action": "process", "data": data}) -except RuntimeError as e: - # Actor-side exceptions are wrapped as RuntimeError - print(f"Actor error: {e}") -except ConnectionError as e: - # Network errors - print(f"Connection error: {e}") -except asyncio.TimeoutError as e: - # Timeout errors - print(f"Timeout: {e}") + result = await service.process(data) +except PulsingBusinessError as e: + print(f"Business error [{e.code}]: {e.message}") +except PulsingSystemError as e: + print(f"System error: {e.error}, recoverable: {e.recoverable}") +except PulsingRuntimeError as e: + print(f"Framework error: {e}") ``` ### Rust @@ -253,20 +269,12 @@ match actor.ask(Ping(42)).await { ## Security Considerations -### Trust Boundaries - - **Pickle payloads** in Python-Python communication can lead to RCE if untrusted - Use TLS in production deployments - Treat the cluster as an authenticated trust boundary -### Network Security - ```python -# Enable TLS -system = await pul.actor_system( - addr="0.0.0.0:8000", - passphrase="your-secret-passphrase" -) +await pul.init(addr="0.0.0.0:8000", passphrase="your-secret-passphrase") ``` ## Performance Characteristics diff --git a/docs/src/api/overview.zh.md b/docs/src/api/overview.zh.md index 1ded01150..522ca5f79 100644 --- a/docs/src/api/overview.zh.md +++ b/docs/src/api/overview.zh.md @@ -4,9 +4,7 @@ Pulsing 是一个分布式 actor 框架,为构建分布式系统提供通信 ## 核心概念 -### Actor 系统 - -Pulsing 基于[Actor 模型](https://en.wikipedia.org/wiki/Actor_model)构建,其中 actor 是计算的基本单位。Actor 通过异步消息传递进行通信,提供: +Pulsing 基于 [Actor 模型](https://en.wikipedia.org/wiki/Actor_model)构建,actor 是计算的基本单位,通过异步消息传递通信: - **位置透明性**:本地和远程 actor 使用相同 API - **容错性**:Actor 可以独立失败,不会影响其他 actor @@ -19,112 +17,94 @@ Pulsing 基于[Actor 模型](https://en.wikipedia.org/wiki/Actor_model)构建, - **流式支持**:原生支持流式请求/响应 - **多语言**:Python 优先,Rust 核心,可扩展到其他语言 -## Python API - -### 全局异步 API +## Quick Start ```python import pulsing as pul -await pul.init(addr="0.0.0.0:8000") +await pul.init() @pul.remote -class MyActor: - def process(self, data): - return f"Processed: {data}" +class Counter: + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value + +counter = await Counter.spawn(name="counter") +print(await counter.incr()) # 1 -actor = await MyActor.spawn(name="my_actor") -response = await actor.process("hello") +counter2 = await Counter.resolve("counter") +print(await counter2.incr()) # 2 -# 关闭 await pul.shutdown() ``` -### Under the Hood:Actor System API(显式管理) - -```python -import pulsing as pul +## Python API -# 显式创建和管理 actor 系统 -system = await pul.actor_system(addr="0.0.0.0:8000") +使用任何 API 之前,必须先调用 `await pul.init()`。 -class MyActor: - async def receive(self, msg): - return f"echo: {msg}" +### 生命周期 -# 直接生成 actor 对象(低层) -actor = await system.spawn(MyActor(), name="my_actor") +```python +import pulsing as pul -# 通信 -response = await actor.ask({"message": "hello"}) +await pul.init( + addr=None, # 绑定地址,单机模式为 None + seeds=None, # 集群种子节点 + passphrase=None, # TLS 密码短语 +) -# 关闭 -await system.shutdown() +await pul.shutdown() ``` -### Actor 模式 +### 定义 Actor -#### Remote 装饰器(推荐) +使用 `@pul.remote` 将任意类变为分布式 actor: ```python -import pulsing as pul - @pul.remote class Counter: def __init__(self, init=0): self.value = init - # 同步方法 - 串行执行 - def incr(self): + def incr(self): # 同步方法 — 串行执行 self.value += 1 return self.value - # 异步方法 - 并发执行 - async def fetch_and_add(self, url): + async def fetch_and_add(self, url): # 异步方法 — await 期间可并发 data = await http_get(url) self.value += data return self.value - -# 使用 -counter = await Counter.spawn(name="counter") -result = await counter.incr() ``` -#### 基础 Actor 类 +### 创建与调用 -```python -from pulsing.actor import Actor +`Class.spawn()` 创建 actor 并返回类型化代理: -class MyActor(Actor): - async def receive(self, msg): - if msg.get("action") == "greet": - return f"Hello, {msg.get('name', 'World')}!" - return "Unknown action" - -# 使用 -system = await pul.actor_system() -actor = await system.spawn(MyActor(), name="greeter") -response = await actor.ask({"action": "greet", "name": "Alice"}) +```python +counter = await Counter.spawn(name="counter", init=10) +result = await counter.incr() # 直接方法调用 ``` -### 消息传递 - -#### Ask vs Tell - -- **`ask(msg)`**:请求/响应模式,等待并返回响应 -- **`tell(msg)`**:发射后不管模式,发送消息不等待 +### 解析已有 Actor ```python -# Ask - 获取响应 -response = await actor.ask({"action": "compute", "data": [1, 2, 3]}) - -# Tell - 无需响应 -await actor.tell({"action": "log", "level": "info", "message": "Event occurred"}) +# 类型化代理 — 已知 actor 类型时 +proxy = await Counter.resolve("counter") +result = await proxy.incr() + +# 类型化代理 — 手动绑定 +ref = await pul.resolve("counter", timeout=30) +proxy = ref.as_type(Counter) + +# 无类型代理 — 远端类型未知时 +ref = await pul.resolve("service_name") +proxy = ref.as_any() +result = await proxy.any_method(args) ``` ### 流式响应 -Pulsing 支持流式响应,用于大数据或持续生成: +远程方法返回生成器即可进行流式传输: ```python @pul.remote @@ -133,7 +113,6 @@ class StreamingService: for token in generate_tokens(prompt): yield token -# 使用 service = await StreamingService.spawn() async for token in service.generate_tokens("Hello world"): print(token, end="") @@ -141,36 +120,70 @@ async for token in service.generate_tokens("Hello world"): ### 监督与容错 -Actor 可以配置重启策略以实现容错: - ```python @pul.remote( restart_policy="on_failure", # "never", "on_failure", "always" max_restarts=3, min_backoff=0.1, - max_backoff=30.0 + max_backoff=30.0, ) class ResilientWorker: def process(self, data): - # 如果抛出异常,Actor 会自动重启 return risky_computation(data) ``` -### Under the Hood:分布式队列 +### 队列 (Queue) -Pulsing 包含分布式队列系统,用于数据管道: +分布式队列,支持 bucket 分区: ```python -# 写入 -writer = await system.queue.write("my_topic", bucket_column="user_id") +writer = await pul.queue.write("my_queue", bucket_column="user_id") await writer.put({"user_id": "u1", "data": "hello"}) await writer.flush() -# 读取 -reader = await system.queue.read("my_topic") +reader = await pul.queue.read("my_queue") records = await reader.get(limit=100) ``` +### 主题 (Topic) + +轻量级发布/订阅,用于实时消息分发: + +```python +writer = await pul.topic.write("events") +await writer.publish({"type": "user_login", "user": "alice"}) + +reader = await pul.topic.read("events") + +@reader.on_message +async def handle(msg): + print(f"Received: {msg}") + +await reader.start() +``` + +### Under the Hood + +#### ActorSystem(显式管理) + +```python +import pulsing as pul + +system = await pul.actor_system(addr="0.0.0.0:8000") + +class MyActor: + async def receive(self, msg): + return f"echo: {msg}" + +actor = await system.spawn(MyActor(), name="my_actor") +response = await actor.ask({"message": "hello"}) +await actor.tell({"event": "fire_and_forget"}) + +await system.shutdown() +``` + +--- + ## Rust API ### 核心 Trait @@ -197,7 +210,6 @@ Factory 模式生成,支持监督重启(仅命名 actor): let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); -// 仅命名 actor 支持 supervision system.spawn_named_factory("services/worker", || Ok(Worker::new()), options).await?; ``` @@ -219,56 +231,27 @@ system.shutdown().await?; - **核心**:`Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` - **约定**:`TypedRef` 要求 `M: Serialize + DeserializeOwned + Send + 'static` +--- + ## 错误处理 ### Python -Pulsing 提供了统一的错误类型系统,区分框架错误和 Actor 执行错误: - ```python from pulsing.exceptions import ( - PulsingActorError, - PulsingRuntimeError, PulsingBusinessError, PulsingSystemError, + PulsingRuntimeError, ) try: - response = await actor.ask({"action": "process", "data": data}) + result = await service.process(data) except PulsingBusinessError as e: - # 业务错误:用户输入验证失败等 print(f"业务错误 [{e.code}]: {e.message}") except PulsingSystemError as e: - # 系统错误:内部处理失败(可能触发 Actor 重启) print(f"系统错误: {e.error}, 可恢复: {e.recoverable}") -except PulsingActorError as e: - # 其他 Actor 执行错误 - print(f"Actor 错误: {e}") except PulsingRuntimeError as e: - # 框架错误:网络、集群、Actor 系统等 print(f"框架错误: {e}") -except asyncio.TimeoutError as e: - # 超时错误(使用 ask_with_timeout 时) - print(f"超时: {e}") -``` - -#### 在 Actor 中抛出错误 - -```python -from pulsing.exceptions import PulsingBusinessError, PulsingSystemError - -@pul.remote -class Processor: - async def process(self, data: str) -> str: - if not data: - # 抛出业务错误 - raise PulsingBusinessError(400, "数据不能为空") - - try: - return expensive_operation(data) - except Exception as e: - # 抛出系统错误 - raise PulsingSystemError(f"处理失败: {e}", recoverable=True) ``` ### Rust @@ -284,20 +267,12 @@ match actor.ask(Ping(42)).await { ## 安全考虑 -### 信任边界 - - **Pickle 载荷**在 Python-Python 通信中可能导致 RCE - 生产环境使用 TLS - 将集群视为经过认证的信任边界 -### 网络安全 - ```python -# 启用 TLS -system = await pul.actor_system( - addr="0.0.0.0:8000", - passphrase="your-secret-passphrase" -) +await pul.init(addr="0.0.0.0:8000", passphrase="your-secret-passphrase") ``` ## 性能特性 @@ -309,7 +284,7 @@ system = await pul.actor_system( ## 后续步骤 -- **[Python API](python.md)**: Python 接口完整文档 -- **[Rust API](rust.md)**: Rust 接口完整文档 -- **[示例](../../examples/)**: 工作代码示例 -- **[指南](../../guide/)**: 深入指南和教程 +- **[Python API](python.md)**:Python 接口完整文档 +- **[Rust API](rust.md)**:Rust 接口完整文档 +- **[示例](../../examples/)**:工作代码示例 +- **[指南](../../guide/)**:深入指南和教程 diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index 7f8f87876..8a20e0248 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -346,8 +346,8 @@ Distributed queue for data pipelines. ```python # Write -writer = await system.queue.write( - topic="my_queue", +writer = await pul.queue.write( + "my_queue", bucket_column="user_id", num_buckets=4, ) @@ -355,7 +355,7 @@ await writer.put({"user_id": "u1", "data": "hello"}) await writer.flush() # Read -reader = await system.queue.read("my_queue") +reader = await pul.queue.read("my_queue") records = await reader.get(limit=100) ``` diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index 9bcb6b5f2..a4da617bb 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -368,8 +368,8 @@ response = await actor.ask({"action": "add", "n": 10}) ```python # 写入 -writer = await system.queue.write( - topic="my_queue", +writer = await pul.queue.write( + "my_queue", bucket_column="user_id", num_buckets=4, ) @@ -377,7 +377,7 @@ await writer.put({"user_id": "u1", "data": "hello"}) await writer.flush() # 读取 -reader = await system.queue.read("my_queue") +reader = await pul.queue.read("my_queue") records = await reader.get(limit=100) ``` diff --git a/docs/src/guide/queue.md b/docs/src/guide/queue.md index ce1019a62..70dccf6e8 100644 --- a/docs/src/guide/queue.md +++ b/docs/src/guide/queue.md @@ -65,15 +65,15 @@ import pulsing as pul async def main(): - system = await pul.actor_system() + await pul.init() try: - writer = await system.queue.write( + writer = await pul.queue.write( "my_queue", bucket_column="user_id", num_buckets=4, batch_size=10, ) - reader = await system.queue.read("my_queue") + reader = await pul.queue.read("my_queue") # write await writer.put({"user_id": "u1", "payload": "hello"}) @@ -85,7 +85,7 @@ async def main(): # persist buffered records await writer.flush() finally: - await system.shutdown() + await pul.shutdown() asyncio.run(main()) @@ -96,8 +96,8 @@ asyncio.run(main()) If you need a blocking API (e.g. called from a thread), use `.sync()`: ```python -writer = (await system.queue.write("my_queue")).sync() -reader = (await system.queue.read("my_queue")).sync() +writer = (await pul.queue.write("my_queue")).sync() +reader = (await pul.queue.read("my_queue")).sync() writer.put({"id": "1", "value": 100}) records = reader.get(limit=10) @@ -114,7 +114,7 @@ Note: don't call the sync wrapper **inside** an async function (it blocks). ## Reading modes -`system.queue.read()` supports: +`pul.queue.read()` supports: - **All buckets** (default): one reader iterates all buckets - **Specific buckets**: `bucket_id=` or `bucket_ids=` @@ -123,8 +123,8 @@ Note: don't call the sync wrapper **inside** an async function (it blocks). Example: ```python -reader0 = await system.queue.read("q", rank=0, world_size=2, num_buckets=4) # [0, 2] -reader1 = await system.queue.read("q", rank=1, world_size=2, num_buckets=4) # [1, 3] +reader0 = await pul.queue.read("q", rank=0, world_size=2, num_buckets=4) # [0, 2] +reader1 = await pul.queue.read("q", rank=1, world_size=2, num_buckets=4) # [1, 3] ``` ## Streaming & blocking reads @@ -167,7 +167,7 @@ flowchart LR The default `MemoryBackend` stores data in memory without persistence: ```python -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="memory", # default, can be omitted ) @@ -186,17 +186,17 @@ import persisting as pst register_backend("lance", pst.queue.LanceBackend) register_backend("persisting", pst.queue.PersistingBackend) -system = await pul.actor_system() +await pul.init() # Use Lance backend for persistence -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="lance", storage_path="/data/queues", ) # Or use enhanced Persisting backend with WAL -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="persisting", storage_path="/data/queues", @@ -218,7 +218,7 @@ class MyBackend: # ... other methods register_backend("my_backend", MyBackend) -writer = await system.queue.write("topic", backend="my_backend") +writer = await pul.queue.write("topic", backend="my_backend") ``` ## Multi-consumer offsets: strategy & limitations diff --git a/docs/src/guide/queue.zh.md b/docs/src/guide/queue.zh.md index 1a12cb49e..823d5acfa 100644 --- a/docs/src/guide/queue.zh.md +++ b/docs/src/guide/queue.zh.md @@ -65,15 +65,15 @@ import pulsing as pul async def main(): - system = await pul.actor_system() + await pul.init() try: - writer = await system.queue.write( + writer = await pul.queue.write( "my_queue", bucket_column="user_id", num_buckets=4, batch_size=10, ) - reader = await system.queue.read("my_queue") + reader = await pul.queue.read("my_queue") # 写入 await writer.put({"user_id": "u1", "payload": "hello"}) @@ -85,7 +85,7 @@ async def main(): # 持久化缓冲区 await writer.flush() finally: - await system.shutdown() + await pul.shutdown() asyncio.run(main()) @@ -96,8 +96,8 @@ asyncio.run(main()) 如果你需要阻塞式 API(例如在线程里调用),用 `.sync()`: ```python -writer = (await system.queue.write("my_queue")).sync() -reader = (await system.queue.read("my_queue")).sync() +writer = (await pul.queue.write("my_queue")).sync() +reader = (await pul.queue.read("my_queue")).sync() writer.put({"id": "1", "value": 100}) records = reader.get(limit=10) @@ -114,7 +114,7 @@ writer.flush() ## 读取模式 -`system.queue.read()` 支持: +`pul.queue.read()` 支持: - **读取所有 bucket**(默认) - **读取指定 bucket**:`bucket_id=` / `bucket_ids=` @@ -123,8 +123,8 @@ writer.flush() 例子: ```python -reader0 = await system.queue.read("q", rank=0, world_size=2, num_buckets=4) # [0, 2] -reader1 = await system.queue.read("q", rank=1, world_size=2, num_buckets=4) # [1, 3] +reader0 = await pul.queue.read("q", rank=0, world_size=2, num_buckets=4) # [0, 2] +reader1 = await pul.queue.read("q", rank=1, world_size=2, num_buckets=4) # [1, 3] ``` ## 流式读取与阻塞等待 @@ -167,7 +167,7 @@ flowchart LR 默认的 `MemoryBackend` 将数据存储在内存中,无持久化: ```python -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="memory", # 默认,可省略 ) @@ -186,17 +186,17 @@ import persisting as pst register_backend("lance", pst.queue.LanceBackend) register_backend("persisting", pst.queue.PersistingBackend) -system = await pul.actor_system() +await pul.init() # 使用 Lance 后端实现持久化 -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="lance", storage_path="/data/queues", ) # 或使用增强版 Persisting 后端(支持 WAL) -writer = await system.queue.write( +writer = await pul.queue.write( "my_queue", backend="persisting", storage_path="/data/queues", @@ -218,7 +218,7 @@ class MyBackend: # ... 其他方法 register_backend("my_backend", MyBackend) -writer = await system.queue.write("topic", backend="my_backend") +writer = await pul.queue.write("topic", backend="my_backend") ``` ## 多消费者 offset:策略与局限 diff --git a/examples/python/distributed_queue.py b/examples/python/distributed_queue.py index 0ef27494b..236a8e4b0 100644 --- a/examples/python/distributed_queue.py +++ b/examples/python/distributed_queue.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Distributed memory queue example -Demonstrates how to use system.queue.write/read for basic data read/write operations. +Demonstrates how to use pul.queue.write/read for basic data read/write operations. Architecture features: - Each bucket corresponds to an independent BucketStorage Actor @@ -24,12 +24,11 @@ async def main(): logger.info("=== Distributed Memory Queue Example ===\n") await pul.init() - system = pul.ActorSystem(pul.get_system()) logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing - writer = await system.queue.write( + writer = await pul.queue.write( "my_queue", bucket_column="user_id", # Bucket by user_id num_buckets=4, @@ -38,7 +37,7 @@ async def main(): logger.info("✓ Queue created (one Actor per bucket)\n") # Consumer: open queue for reading - reader = await system.queue.read("my_queue") + reader = await pul.queue.read("my_queue") logger.info("✓ Queue opened\n") # Write data (data immediately visible to consumers, no need to wait for persistence) diff --git a/examples/python/sync_queue_example.py b/examples/python/sync_queue_example.py index 5b19ba2bf..ba05d3ec8 100644 --- a/examples/python/sync_queue_example.py +++ b/examples/python/sync_queue_example.py @@ -24,13 +24,12 @@ async def main(): logger.info("=== Distributed Memory Queue Example (Synchronous Version) ===\n") await pul.init() - system = pul.ActorSystem(pul.get_system()) logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing, get synchronous wrapper writer = ( - await system.queue.write( + await pul.queue.write( "my_queue", bucket_column="user_id", # Bucket by user_id num_buckets=4, @@ -40,7 +39,7 @@ async def main(): logger.info("✓ Queue created (synchronous writer)\n") # Consumer: open queue for reading, get synchronous wrapper - reader = (await system.queue.read("my_queue")).sync() + reader = (await pul.queue.read("my_queue")).sync() logger.info("✓ Queue opened (synchronous reader)\n") # Synchronously write data diff --git a/llms.binding.md b/llms.binding.md index 9181700f4..529a4f17e 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -186,31 +186,11 @@ await actorref.tell(msg: Any) -> None counter = await Counter.local(system, name="counter") # spawn on explicit system result = await counter.incr() -# Queue API (on system) -writer = await system.queue.write( - topic: str, - *, - bucket_column: str = "id", - num_buckets: int = 4, - batch_size: int = 100, - storage_path: str | None = None, - backend: str = "memory", -) -> QueueWriter - -await writer.put(record: dict | list[dict]) -> None -await writer.flush() -> None - -reader = await system.queue.read( - topic: str, - *, - bucket_id: int | None = None, - bucket_ids: list[int] | None = None, - rank: int | None = None, - world_size: int | None = None, - num_buckets: int = 4, -) -> QueueReader - -records = await reader.get(limit: int = 100, wait: bool = False) -> list[dict] +# Queue / Topic on explicit system (same API as pul.queue / pul.topic) +writer = await system.queue.write("my_queue") +reader = await system.queue.read("my_queue") +writer = await system.topic.write("events") +reader = await system.topic.read("events") ``` ### Actor Behavior @@ -351,6 +331,67 @@ async for chunk in service.generate_stream(10): **Note:** For `@pul.remote` classes, simply return a generator (sync or async) and Pulsing auto-detects and handles it as a streaming response. +### Queue API + +Distributed queue with bucket-based partitioning, for data pipelines: + +```python +import pulsing as pul + +await pul.init() + +# ── Write ── +writer = await pul.queue.write( + "my_queue", + *, + bucket_column: str = "id", # Column for partitioning + num_buckets: int = 4, + batch_size: int = 100, + storage_path: str | None = None, + backend: str = "memory", # Pluggable: "memory" or custom +) -> QueueWriter + +await writer.put({"id": "u1", "data": "hello"}) +await writer.put([{"id": "u1", "data": "a"}, {"id": "u2", "data": "b"}]) +await writer.flush() + +# ── Read ── +reader = await pul.queue.read( + "my_queue", + *, + bucket_id: int | None = None, + bucket_ids: list[int] | None = None, + rank: int | None = None, # For distributed consumption + world_size: int | None = None, + num_buckets: int = 4, +) -> QueueReader + +records = await reader.get(limit=100, wait=False) +``` + +### Topic API + +Lightweight pub/sub for real-time message distribution: + +```python +import pulsing as pul + +await pul.init() + +# ── Publish ── +writer = await pul.topic.write("events") +await writer.publish({"type": "user_login", "user": "alice"}) + +# ── Subscribe ── +reader = await pul.topic.read("events") + +@reader.on_message +async def handle(msg): + print(f"Received: {msg}") + +await reader.start() +``` + ## Rust API Rust API defines contracts via traits, organized in three layers: diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index 637e6af48..d97781074 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -1,24 +1,7 @@ """ Pulsing - Distributed Actor Framework -Two API styles: - -1. Actor System style (explicit system management): - import pulsing as pul - - system = await pul.actor_system() - - @pul.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = await Counter.spawn(name="counter") - result = await counter.incr() - - await system.shutdown() - -2. Ray-style async API (global system): +Usage: import pulsing as pul await pul.init() @@ -32,25 +15,6 @@ def incr(self): self.value += 1; return self.value result = await counter.incr() await pul.shutdown() - -3. Ray-compatible sync API (for migration): - from pulsing.compat import ray - - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - - ray.shutdown() - -Submodules: -- pulsing.actor: Native async API (recommended) -- pulsing.compat.ray: Ray-compatible sync API (for migration) """ import asyncio @@ -121,17 +85,19 @@ def cleanup_ray(): class ActorSystem: - """ActorSystem wrapper with queue API + """ActorSystem wrapper with queue/topic API This wraps the Rust ActorSystem and adds Python-level extensions - like the queue API. + like queue and topic APIs. """ def __init__(self, inner: _ActorSystem): self._inner = inner from pulsing.queue import QueueAPI + from pulsing.topic import TopicAPI self.queue = QueueAPI(inner) + self.topic = TopicAPI(inner) async def refer(self, actorid: ActorId | str) -> ActorRef: """Get actor reference by ID @@ -286,6 +252,42 @@ async def refer(actorid: ActorId | str) -> ActorRef: return await system.refer(actorid) +class _GlobalQueueAPI: + """Lazy proxy for pul.queue that uses the global system.""" + + async def write(self, topic, **kwargs): + """Open queue for writing. See QueueAPI.write() for args.""" + from pulsing.queue import QueueAPI + + return await QueueAPI(get_system()).write(topic, **kwargs) + + async def read(self, topic, **kwargs): + """Open queue for reading. See QueueAPI.read() for args.""" + from pulsing.queue import QueueAPI + + return await QueueAPI(get_system()).read(topic, **kwargs) + + +class _GlobalTopicAPI: + """Lazy proxy for pul.topic that uses the global system.""" + + async def write(self, topic, **kwargs): + """Open topic for writing. See TopicAPI.write() for args.""" + from pulsing.topic import TopicAPI + + return await TopicAPI(get_system()).write(topic, **kwargs) + + async def read(self, topic, **kwargs): + """Open topic for reading. See TopicAPI.read() for args.""" + from pulsing.topic import TopicAPI + + return await TopicAPI(get_system()).read(topic, **kwargs) + + +queue = _GlobalQueueAPI() +topic = _GlobalTopicAPI() + + # Export all public APIs __all__ = [ # Version @@ -306,6 +308,9 @@ async def refer(actorid: ActorId | str) -> ActorRef: # Mount (attach existing object to Pulsing network) "mount", "unmount", + # Queue & Topic (global entry points) + "queue", + "topic", # Ray integration "init_inside_ray", "cleanup_ray", diff --git a/python/pulsing/topic/__init__.py b/python/pulsing/topic/__init__.py index 759aab6dd..30fdba9a1 100644 --- a/python/pulsing/topic/__init__.py +++ b/python/pulsing/topic/__init__.py @@ -4,14 +4,14 @@ ensuring only one broker per topic in the cluster. Usage: - from pulsing.topic import write_topic, read_topic + import pulsing as pul - # Publish message - writer = await write_topic(system, "events") + await pul.init() + + writer = await pul.topic.write("events") await writer.publish({"type": "user_login"}) - # Subscribe to messages - reader = await read_topic(system, "events") + reader = await pul.topic.read("events") @reader.on_message async def handle(msg): @@ -20,6 +20,8 @@ async def handle(msg): await reader.start() """ +from typing import TYPE_CHECKING + from pulsing.topic.topic import ( PublishMode, PublishResult, @@ -30,7 +32,66 @@ async def handle(msg): write_topic, ) +if TYPE_CHECKING: + from pulsing._core import ActorSystem + + +class TopicAPI: + """Topic API entry point via system.topic or pul.topic + + Example: + writer = await pul.topic.write("events") + await writer.publish({"type": "user_login"}) + + reader = await pul.topic.read("events") + """ + + def __init__(self, system: "ActorSystem"): + self._system = system + + async def write( + self, + topic: str, + *, + writer_id: str | None = None, + ) -> TopicWriter: + """Open topic for writing + + Args: + topic: Topic name + writer_id: Writer ID (optional) + + Returns: + TopicWriter for publish operations + """ + return await write_topic(self._system, topic, writer_id=writer_id) + + async def read( + self, + topic: str, + *, + reader_id: str | None = None, + auto_start: bool = False, + ) -> TopicReader: + """Open topic for reading + + Args: + topic: Topic name + reader_id: Reader ID (optional) + auto_start: Whether to automatically start receiving + + Returns: + TopicReader for subscribing to messages + """ + return await read_topic( + self._system, topic, reader_id=reader_id, auto_start=auto_start + ) + + __all__ = [ + # High-level API + "TopicAPI", + # Async API "write_topic", "read_topic", "subscribe_to_topic", From dac8f702a09d9d8b41a61d9ba37e85728957f792 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 15 Feb 2026 20:07:55 +0800 Subject: [PATCH 05/15] Add Cluster Networking documentation and update mkdocs navigation - Introduced a new document detailing the Cluster Networking features, explaining how to form and operate a Pulsing cluster with three distinct networking modes. - Updated mkdocs configuration to include the new Cluster Networking section in both English and Chinese, enhancing accessibility for users. - Improved navigation structure in mkdocs to reflect the addition of Cluster Networking, ensuring users can easily find relevant documentation. - Adjusted existing architecture documentation to include links to the new Cluster Networking document, providing a comprehensive overview of the system's design. --- docs/mkdocs.yml | 2 + docs/src/design/architecture.md | 9 +- docs/src/design/architecture.zh.md | 9 +- docs/src/design/cluster-networking.md | 279 +++++++++++++++++++++++ docs/src/design/cluster-networking.zh.md | 279 +++++++++++++++++++++++ 5 files changed, 570 insertions(+), 8 deletions(-) create mode 100644 docs/src/design/cluster-networking.md create mode 100644 docs/src/design/cluster-networking.zh.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c96b0e9a7..e062f5eb9 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -114,6 +114,7 @@ plugins: Actor System: Actor 系统 Behavior API: Behavior API Node Discovery: 节点发现 + Cluster Networking: 集群组网 Actor Addressing: Actor 寻址 HTTP2 Transport: HTTP2 传输 Load Sync: 负载同步 @@ -172,6 +173,7 @@ nav: - Actor System: design/actor-system.md - Behavior API: design/behavior.md - Node Discovery: design/node-discovery.md + - Cluster Networking: design/cluster-networking.md - Actor Addressing: design/actor-addressing.md - HTTP2 Transport: design/http2-transport.md - Load Sync: design/load_sync.md diff --git a/docs/src/design/architecture.md b/docs/src/design/architecture.md index 7cf14b90d..e6c359db9 100644 --- a/docs/src/design/architecture.md +++ b/docs/src/design/architecture.md @@ -110,7 +110,8 @@ sequenceDiagram ## For More Details -- [Actor System Design](../design/actor-system.md) -- [Node Discovery](../design/node-discovery.md) -- [Actor Addressing](../design/actor-addressing.md) -- [HTTP2 Transport](../design/http2-transport.md) +- [Actor System Design](actor-system.md) +- [Node Discovery](node-discovery.md) +- [Cluster Networking](cluster-networking.md) +- [Actor Addressing](actor-addressing.md) +- [HTTP2 Transport](http2-transport.md) diff --git a/docs/src/design/architecture.zh.md b/docs/src/design/architecture.zh.md index 025d46c73..f9bc9c55b 100644 --- a/docs/src/design/architecture.zh.md +++ b/docs/src/design/architecture.zh.md @@ -110,7 +110,8 @@ sequenceDiagram ## 更多详情 -- [Actor 系统设计](../design/actor-system.md) -- [节点发现](../design/node-discovery.md) -- [Actor 寻址](../design/actor-addressing.md) -- [HTTP2 传输](../design/http2-transport.md) +- [Actor 系统设计](actor-system.md) +- [节点发现](node-discovery.md) +- [集群组网](cluster-networking.zh.md) +- [Actor 寻址](actor-addressing.md) +- [HTTP2 传输](http2-transport.md) diff --git a/docs/src/design/cluster-networking.md b/docs/src/design/cluster-networking.md new file mode 100644 index 000000000..d7a71c992 --- /dev/null +++ b/docs/src/design/cluster-networking.md @@ -0,0 +1,279 @@ +# Cluster Networking + +This document describes how to form and operate a Pulsing cluster. Pulsing supports three distinct ways to build a distributed network of nodes, each with different trade-offs and use cases. + +## Overview + +A Pulsing **cluster** is a set of nodes that share: + +- **Membership**: who is in the cluster and whether they are alive +- **Actor registry**: which named actors exist and on which node(s) they run + +All cluster traffic (membership, registry, and actor messages) uses a single HTTP/2 port per node. No external services like etcd, NATS, or Redis are required. + +The three supported **networking modes** are: + +| Mode | Description | Best for | +|------|-------------|----------| +| **1. Gossip + seed** | Nodes discover each other via a gossip protocol; you provide one or more seed addresses to join. | Kubernetes, bare metal, cloud VMs; flexible scaling; no single point of failure. | +| **2. Head node** | One node is the head; workers register with the head and get membership/registry from it. | Simple deployments; environments where a single coordinator is acceptable. | +| **3. Init in Ray** | Pulsing runs inside a Ray cluster; Ray’s internal KV store is used to discover the first seed, then gossip is used. | Existing Ray users; running Pulsing alongside Ray jobs. | + +The rest of this document explains each mode in detail, then compares them and gives practical guidance. + +--- + +## Mode 1: Gossip + Seed Nodes + +### How it works + +- You configure each node with a **bind address** and (for non-first nodes) one or more **seed** addresses. +- A node with seeds **joins** by sending a join request to each seed (and, if the seed is behind a load balancer, possibly multiple times to discover different peers). It receives a **Welcome** with the current member list. +- Once in the cluster, nodes run a **gossip loop**: they periodically exchange membership, failure information, and actor registry with a subset of peers (see [Node Discovery](node-discovery.md)). +- **SWIM**-style failure detection runs over the same transport; suspected/dead nodes are removed from the view. +- Optionally, nodes periodically **re-probe** the seed address(es) (e.g. every 15s). This helps with network partition recovery and discovering new nodes when the seed is a load-balanced endpoint (e.g. a Kubernetes Service). + +So: **seed nodes are only used to get an initial member list**; after that, the cluster is maintained by gossip. There is no permanent “master”; any node can serve as a seed for newcomers. + +### Configuration + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// First node (no seeds) – can still bind for incoming connections +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// Later nodes – join via seeds +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# First node +await pul.init(addr="0.0.0.0:8000") + +# Later nodes – join via seeds +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +With multiple seeds (e.g. for HA or when the seed is a K8s Service), pass a list; the node will probe until it gets a member list. + +### Kubernetes-friendly usage + +When the seed is a **Kubernetes Service** (ClusterIP or headless), new pods use the Service name as the seed. The platform’s load balancer may send each probe to a different pod, so the new node can discover several members in a few probes. See [Node Discovery](node-discovery.md) for the recommended `seed_probe_count` and `seed_rejoin_interval` behavior. + +```yaml +# Example: pods use the service as seed +# seed_nodes: ["actor-cluster.default.svc.cluster.local:8080"] +``` + +### When to use + +- You want **no single point of failure** for discovery. +- You run on **Kubernetes**, **bare metal**, or **cloud VMs** and can expose one or more stable addresses (or a Service) as seeds. +- You are fine with **eventual consistency** of membership and actor registry (gossip propagates in a few hundred ms typically). + +### Summary + +- **Seeds**: only for initial join; then gossip maintains the cluster. +- **Single port**: actor RPC and gossip share the same HTTP/2 server. +- **No external store**: no etcd/NATS/Redis. + +--- + +## Mode 2: Head Node + +### How it works + +- One node is designated the **head**; all other nodes are **workers**. +- The **head** holds the authoritative membership and actor registry in memory. It does not run gossip; it only accepts worker registration and heartbeat. +- **Workers** at startup connect to the head’s address, register themselves, and then run **heartbeat** and **sync** loops (pull membership/registry from the head at intervals). +- Actor registration/deregistration from workers is sent to the head; the head updates its state and workers get it on the next sync. + +So: the head is a **central coordinator**. If the head is down, workers cannot discover each other or resolve actors until the head is back (or you reconfigure them to a new head). + +### Configuration + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head node +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker nodes +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +Head node mode is supported via the Rust `SystemConfig` (e.g. `with_head_node()` / `with_head_addr()`). The Python high-level `init(addr=..., seeds=...)` API currently only supports **Gossip + seed** mode. To use head node from Python you need to build a `SystemConfig` with head options (if exposed on the Python `SystemConfig` in your version) and pass it to `ActorSystem.create(config, loop)`. Check the API for `SystemConfig` in the Python bindings for availability. + +### Head node parameters + +The head backend uses a small set of timers (configurable in Rust via `HeadNodeConfig`): + +- **Sync interval**: how often workers pull membership/registry from the head (default 5s). +- **Heartbeat interval**: how often workers send a heartbeat to the head (default 10s). +- **Heartbeat timeout**: after how long the head considers a worker dead (default 30s). + +Tuning these affects how quickly failed workers are removed from the view. + +### When to use + +- You want **simple operations**: one fixed address (the head) to open in firewalls and to monitor. +- You accept a **single point of failure** for coordination (head down ⇒ no new discovery until head is back). +- You prefer **strong consistency** of membership/registry from the head’s perspective (workers eventually see the same view after each sync). + +### Comparison with Gossip + +| Aspect | Gossip + seed | Head node | +|--------|----------------|-----------| +| Discovery | Decentralized; seeds only for join, then gossip | Centralized; workers talk only to head | +| Failure of “special” node | No single point of failure; any node can be seed | Head down ⇒ no new joins/updates until head recovers | +| Consistency | Eventually consistent (propagation delay) | Head is source of truth; workers eventually consistent with head | +| Config complexity | Need at least one reachable seed address | Need head address for every worker | + +--- + +## Mode 3: Init in Ray (Pulsing on top of Ray) + +### How it works + +- You already have a **Ray cluster** and run your code with `ray.init(...)` (or equivalent). +- You use **`pulsing.ray.init_in_ray()`** so that **each process** (driver and each worker that uses Pulsing) starts a Pulsing actor system and **joins a single Pulsing cluster**. +- Seed discovery is done via **Ray’s internal KV store**: + - The first process that calls `init_in_ray()` starts Pulsing with no seeds, gets its own bind address, and **writes that address** into Ray KV under a fixed key (e.g. `pulsing:seed_addr`). + - Any later process that calls `init_in_ray()` **reads** that key, gets the seed address, and starts Pulsing **with that seed**. So all Pulsing nodes join the same gossip cluster, with the first node’s address as the initial seed. +- Under the hood, Pulsing still uses **Gossip + seed**: the “seed” is simply supplied by Ray KV instead of by your config. So you get one Pulsing cluster per Ray cluster (or per KV namespace, if you use it that way), with no extra etcd/NATS. + +This is “**init in Ray**”: Pulsing is **networked** using its own gossip protocol, but **deployed and discovered** using Ray’s runtime. + +### Configuration and usage + +**Requirements** + +- Ray must be installed and **initialized** before calling `init_in_ray()`. +- Every process that uses Pulsing (driver and workers) must call `init_in_ray()` (or the async variant) in that process. + +**Basic usage** + +```python +import ray +from pulsing.ray import init_in_ray + +# Option A: init_in_ray as worker_process_setup_hook (recommended) +# Then every worker process will run init_in_ray at startup. +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# Driver process must also initialize Pulsing +init_in_ray() + +# Now use Pulsing as usual +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") # can be on any node +``` + +**Async variant (e.g. for async Ray actors)** + +```python +from pulsing.ray import async_init_in_ray + +# Inside an async Ray actor or async context +await async_init_in_ray() +``` + +**Cleanup (optional)** + +If you want to clear the seed key from Ray KV when tearing down (e.g. in tests): + +```python +from pulsing.ray import cleanup +cleanup() +``` + +### How the seed is chosen + +- **First writer wins**: the first process that calls `init_in_ray()` and successfully writes the KV key becomes the “seed” node (its address is stored). +- If the key already exists, the process reads it and uses that address as `seeds=[...]`, so it joins the existing cluster. +- In the rare case of a race (two processes start with no seed, both write), the implementation may shut down one Pulsing instance and re-join using the winning seed. See `pulsing.ray` source for the exact logic. + +So you do **not** configure seeds manually; Ray KV provides the first seed, and from then on the cluster runs in **Gossip + seed** mode with that node as the initial contact. + +### When to use + +- You already run **Ray** (for other workloads or for scheduling) and want Pulsing actors to run on the same nodes and form one cluster. +- You want **one-line** cluster formation per process (`init_in_ray()`) without managing seed lists or head addresses yourself. +- You are okay depending on **Ray’s runtime** (and its KV) for bootstrap; after that, Pulsing uses only its own HTTP/2 + gossip. + +### Limitations + +- **Ray is required**: `init_in_ray()` depends on Ray and its internal KV. Do not use this mode if you are not using Ray. +- **Process model**: each process that uses Pulsing must call `init_in_ray()` (or `async_init_in_ray()`). The hook ensures workers get it; the driver must call it explicitly. +- **Single cluster per Ray cluster**: the KV key is global to the Ray cluster, so all callers of `init_in_ray()` in that cluster join the same Pulsing cluster. + +--- + +## Comparison and choice + +| Criterion | Gossip + seed | Head node | Init in Ray | +|-----------|----------------|-----------|-------------| +| **External deps** | None | None | Ray (and its KV) | +| **Single point of failure** | No | Yes (head) | No (after bootstrap, same as gossip) | +| **Config** | Bind addr + optional seeds | Bind addr + head addr or head role | None (Ray KV provides seed) | +| **Best environment** | K8s, VMs, bare metal | Single coordinator acceptable | Existing Ray cluster | +| **Consistency** | Eventually consistent | Head-authoritative, then eventual | Same as Gossip + seed | +| **Python `init()`** | Yes (`addr`, `seeds`) | Via `SystemConfig` if exposed | `init_in_ray()` | + +**Suggested choice:** + +- **Already on Ray** → use **init in Ray** for minimal config and automatic seed discovery. +- **Need no SPOF and no Ray** → use **Gossip + seed** (and, in K8s, a Service as seed). +- **Want one fixed coordinator and simple ops** → use **Head node**. + +--- + +## Best practices + +1. **Gossip + seed** + - In Kubernetes, use a **Service** (or multiple seeds) as seed; set `seed_probe_count` (e.g. 3) and `seed_rejoin_interval` (e.g. 15s) so new nodes and partition recovery work well. + - Ensure the **same port** is open for all nodes (actor + gossip); avoid extra firewall rules. + +2. **Head node** + - Run the head on a **stable host/port** and optionally put it behind a load balancer for HA (replace head process but keep the same address). + - Tune **heartbeat timeout** so workers are not marked dead too early under load. + +3. **Init in Ray** + - Call **`init_in_ray()` in the driver** and use **`worker_process_setup_hook`** so every worker process joins the same Pulsing cluster. + - For tests, call **`cleanup()`** when tearing down the Ray cluster if you want a clean KV state. + +4. **Security** + - For any mode, you can enable **TLS** (e.g. passphrase-derived certs) so that actor and cluster traffic are encrypted and authenticated. See [Security](../guide/security.md). + +--- + +## See also + +- [Node Discovery](node-discovery.md) – Gossip protocol and seed probing in detail. +- [Architecture](architecture.md) – System components and message flow. +- [Migrate from Ray](../quickstart/migrate_from_ray.md) – API mapping from Ray to Pulsing. diff --git a/docs/src/design/cluster-networking.zh.md b/docs/src/design/cluster-networking.zh.md new file mode 100644 index 000000000..d8f2be565 --- /dev/null +++ b/docs/src/design/cluster-networking.zh.md @@ -0,0 +1,279 @@ +# 集群组网 + +本文说明如何组建和运维 Pulsing 集群。Pulsing 支持三种不同的分布式组网方式,各有适用场景与取舍。 + +## 概述 + +Pulsing 的**集群**由若干节点组成,节点之间共享: + +- **成员关系**:哪些节点在集群中、是否存活 +- **Actor 注册表**:哪些命名 Actor 存在、分别运行在哪些节点上 + +所有集群通信(成员、注册表与 Actor 消息)共用每个节点上的**单一 HTTP/2 端口**,且不依赖 etcd、NATS、Redis 等外部服务。 + +三种**组网方式**如下: + +| 方式 | 说明 | 适用场景 | +|------|------|----------| +| **1. Gossip + Seed 节点** | 节点通过 Gossip 协议互相发现;你只需提供若干 seed 地址即可加入集群。 | Kubernetes、裸机、云 VM;弹性扩缩;无单点故障。 | +| **2. Head 节点** | 指定一个节点为 Head,其余为 Worker;Worker 向 Head 注册并拉取成员/注册表。 | 部署简单、可接受单一协调节点的环境。 | +| **3. 借助 Ray 组网(init in Ray)** | 在 Ray 集群内运行 Pulsing,用 Ray 的 internal KV 发现首个 seed,再走 Gossip。 | 已有 Ray 用户;与 Ray 任务同机部署。 | + +下文分别说明三种方式,最后对比并给出选型建议。 + +--- + +## 方式一:Gossip + Seed 节点组网 + +### 原理 + +- 每个节点配置**本机监听地址**,非首节点还需配置一个或多个 **seed** 地址。 +- 带 seed 的节点**加入**时向 seed 发送 Join 请求(若 seed 是负载均衡入口,会多次探测以发现多个对端),收到 **Welcome** 后得到当前成员列表。 +- 加入后节点运行 **Gossip 循环**:周期性与部分对端交换成员、故障信息和 Actor 注册表(详见 [节点发现](node-discovery.zh.md))。 +- **SWIM** 故障检测在同一传输上运行,疑似/死亡节点会从视图中剔除。 +- 可选:节点周期性地**重新探测** seed 地址(如每 15s),便于网络分区恢复,以及在 seed 为负载均衡端点(如 K8s Service)时发现新节点。 + +因此:**Seed 仅用于首次加入**;之后由 Gossip 维持集群,没有常驻“主节点”,任意节点都可作为新节点的 seed。 + +### 配置 + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// 首节点(不配 seeds,仅绑定地址) +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// 后续节点:通过 seeds 加入 +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# 首节点 +await pul.init(addr="0.0.0.0:8000") + +# 后续节点:通过 seeds 加入 +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +若有多个 seed(如高可用或 K8s Service),传入列表即可;节点会探测直到获得成员列表。 + +### 与 Kubernetes 配合 + +当 seed 是 **Kubernetes Service**(ClusterIP 或 headless)时,新 Pod 将 Service 名作为 seed。平台负载均衡可能把每次探测打到不同 Pod,新节点几次探测即可发现多个成员。推荐参数见 [节点发现](node-discovery.zh.md) 中的 `seed_probe_count` 与 `seed_rejoin_interval`。 + +```yaml +# 示例:Pod 使用 Service 作为 seed +# seed_nodes: ["actor-cluster.default.svc.cluster.local:8080"] +``` + +### 何时选用 + +- 希望**发现逻辑无单点故障**。 +- 运行在 **Kubernetes、裸机或云 VM**,能提供至少一个稳定地址(或 Service)作为 seed。 +- 能接受成员与 Actor 注册表的**最终一致性**(通常几百毫秒内收敛)。 + +### 小结 + +- **Seed**:仅用于首次加入,之后由 Gossip 维持集群。 +- **单端口**:Actor RPC 与 Gossip 共用同一 HTTP/2 服务。 +- **无外部存储**:不依赖 etcd/NATS/Redis。 + +--- + +## 方式二:Head 节点组网 + +### 原理 + +- 指定一个节点为 **Head**,其余为 **Worker**。 +- **Head** 在内存中维护权威的成员列表与 Actor 注册表,不跑 Gossip,只接受 Worker 的注册与心跳。 +- **Worker** 启动时连接 Head 地址完成注册,并运行**心跳**与**同步**循环(按间隔从 Head 拉取成员/注册表)。 +- Worker 上的 Actor 注册/注销会上报给 Head;Head 更新状态后,Worker 在下次同步时得到一致视图。 + +因此:Head 是**中心协调者**。Head 宕机期间,Worker 无法完成新发现或解析 Actor,直到 Head 恢复(或你改为指向新 Head)。 + +### 配置 + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head 节点 +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker 节点 +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +Head 模式在 Rust 侧通过 `SystemConfig` 的 `with_head_node()` / `with_head_addr()` 支持。当前 Python 高层 API `init(addr=..., seeds=...)` 仅支持 **Gossip + seed**。若要在 Python 中使用 Head 模式,需通过 Python 绑定的 `SystemConfig`(若已暴露 head 相关接口)构建配置并传给 `ActorSystem.create(config, loop)`,具体以当前版本 API 为准。 + +### Head 相关参数 + +Head 后端使用少量定时参数(Rust 中通过 `HeadNodeConfig` 配置): + +- **同步间隔**:Worker 从 Head 拉取成员/注册表的周期(默认 5s)。 +- **心跳间隔**:Worker 向 Head 发送心跳的周期(默认 10s)。 +- **心跳超时**:超过多久未收到心跳则视为 Worker 死亡(默认 30s)。 + +调整这些参数可影响故障 Worker 从视图中剔除的速度。 + +### 何时选用 + +- 希望**运维简单**:只需固定一个 Head 地址做防火墙与监控。 +- 可接受**协调层面的单点**(Head 宕机期间无法新加入或更新视图,直到恢复)。 +- 希望成员/注册表以 Head 为**强一致**来源,Worker 每次同步后与其一致。 + +### 与 Gossip 对比 + +| 维度 | Gossip + seed | Head 节点 | +|------|----------------|-----------| +| 发现方式 | 去中心化;seed 仅用于加入,之后 Gossip | 中心化;Worker 只与 Head 通信 | +| “特殊”节点故障 | 无单点;任意节点都可作 seed | Head 宕机则无法新加入/更新,直到恢复 | +| 一致性 | 最终一致(有传播延迟) | Head 为唯一真相源;Worker 与 Head 最终一致 | +| 配置复杂度 | 至少一个可达 seed 地址 | 每个 Worker 需配置 Head 地址 | + +--- + +## 方式三:借助 Ray 组网(init in Ray) + +### 原理 + +- 你已有 **Ray 集群**,并通过 `ray.init(...)` 等方式拉起任务。 +- 使用 **`pulsing.ray.init_in_ray()`**,让**每个进程**(driver 与需要使用 Pulsing 的 worker)都启动一套 Pulsing 并**加入同一个 Pulsing 集群**。 +- 首个 seed 的发现依赖 **Ray 的 internal KV**: + - 第一个调用 `init_in_ray()` 的进程以“无 seed”方式启动 Pulsing,得到本机地址后,将该地址**写入** Ray KV 的固定 key(如 `pulsing:seed_addr`)。 + - 之后任意进程调用 `init_in_ray()` 时**读取**该 key,得到 seed 地址,并以该 seed 启动 Pulsing,从而加入已有集群。 +- 底层仍是 **Gossip + seed**:只是 seed 由 Ray KV 提供,而不是你在配置里写死。因此每个 Ray 集群(或每个 KV 命名空间)对应一个 Pulsing 集群,且无需额外 etcd/NATS。 + +这就是“**init in Ray**”:Pulsing 的**组网**仍用自己的 Gossip,但**部署与发现**借助 Ray 运行时完成。 + +### 配置与用法 + +**前置条件** + +- 需安装 Ray,且必须先 **`ray.init()`**,再调用 `init_in_ray()`。 +- 每个要使用 Pulsing 的进程(driver 与 worker)都必须在该进程中调用 `init_in_ray()`(或异步版本)。 + +**基本用法** + +```python +import ray +from pulsing.ray import init_in_ray + +# 方式 A:将 init_in_ray 设为 worker_process_setup_hook(推荐) +# 这样每个 worker 进程启动时都会执行 init_in_ray。 +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# driver 进程也需要初始化 Pulsing +init_in_ray() + +# 之后按常规使用 Pulsing +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") # 可落在任意节点 +``` + +**异步版本(如在 async Ray actor 中)** + +```python +from pulsing.ray import async_init_in_ray + +# 在 async Ray actor 或 async 上下文中 +await async_init_in_ray() +``` + +**清理(可选)** + +若在 teardown(如测试)时希望清除 Ray KV 中的 seed key: + +```python +from pulsing.ray import cleanup +cleanup() +``` + +### Seed 如何确定 + +- **先写先得**:第一个成功写入 KV key 的 `init_in_ray()` 调用者成为“seed 节点”,其地址被写入。 +- 若 key 已存在,进程读取该地址并作为 `seeds=[...]` 启动,从而加入已有集群。 +- 极少数并发下可能发生竞争(两个进程都以为没有 seed 并写入),实现上会对其中一个实例做 shutdown 并用胜出者的地址重新 join,详见 `pulsing.ray` 源码。 + +因此**无需手动配置 seed**:Ray KV 提供首个 seed,之后集群按 **Gossip + seed** 运行,该节点为初始联络点。 + +### 何时选用 + +- 已在用 **Ray**(做调度或其他任务),希望 Pulsing Actor 跑在同一批节点并组成一个集群。 +- 希望**每个进程一行代码**完成组网(`init_in_ray()`),而不自己维护 seed 列表或 Head 地址。 +- 可以接受在**启动阶段**依赖 Ray 运行时(及其 KV);启动后仅使用 Pulsing 自己的 HTTP/2 + Gossip。 + +### 限制 + +- **依赖 Ray**:`init_in_ray()` 依赖 Ray 及其 internal KV,未使用 Ray 时不要选此方式。 +- **进程模型**:每个使用 Pulsing 的进程都必须调用 `init_in_ray()`(或 `async_init_in_ray()`)。通过 hook 可保证 worker 调用;driver 需显式调用。 +- **一个 Ray 集群对应一个 Pulsing 集群**:KV key 在 Ray 集群内全局唯一,因此该集群内所有 `init_in_ray()` 调用者都会加入同一个 Pulsing 集群。 + +--- + +## 三种方式对比与选型 + +| 维度 | Gossip + seed | Head 节点 | Init in Ray | +|------|----------------|-----------|-------------| +| **外部依赖** | 无 | 无 | Ray(及其 KV) | +| **单点故障** | 无 | 有(Head) | 无(启动后与 Gossip 一致) | +| **配置** | 绑定地址 + 可选 seeds | 绑定地址 + Head 地址或 Head 角色 | 无(由 Ray KV 提供 seed) | +| **适用环境** | K8s、VM、裸机 | 可接受单一协调节点 | 已有 Ray 集群 | +| **一致性** | 最终一致 | Head 权威,再最终一致 | 与 Gossip + seed 相同 | +| **Python init()** | 支持(`addr`、`seeds`) | 需通过 SystemConfig(若暴露) | 使用 `init_in_ray()` | + +**选型建议:** + +- **已有 Ray** → 用 **init in Ray**,配置最少、自动发现 seed。 +- **不要单点且不用 Ray** → 用 **Gossip + seed**(K8s 下用 Service 作 seed)。 +- **希望一个固定协调节点、运维简单** → 用 **Head 节点**。 + +--- + +## 最佳实践 + +1. **Gossip + seed** + - 在 Kubernetes 中用 **Service**(或多个 seed)作 seed;合理设置 `seed_probe_count`(如 3)和 `seed_rejoin_interval`(如 15s),便于新节点加入与分区恢复。 + - 保证各节点**同一端口**开放(Actor + Gossip),避免多余防火墙规则。 + +2. **Head 节点** + - Head 部署在**稳定主机/端口**,可按需前面挂负载均衡做 HA(进程可换,地址不变)。 + - 根据负载调整**心跳超时**,避免 Worker 在压力下被误判为死亡。 + +3. **Init in Ray** + - **Driver 中调用 `init_in_ray()`**,并设置 **`worker_process_setup_hook`**,确保每个 worker 进程都加入同一 Pulsing 集群。 + - 测试场景下若希望 KV 干净,可在 Ray 集群 teardown 时调用 **`cleanup()`**。 + +4. **安全** + - 任意方式下均可开启 **TLS**(如基于 passphrase 的证书),对 Actor 与集群流量加密和认证,见 [安全](../guide/security.zh.md)。 + +--- + +## 相关文档 + +- [节点发现](node-discovery.zh.md) — Gossip 协议与 seed 探测细节。 +- [架构](architecture.zh.md) — 系统组件与消息流。 +- [从 Ray 迁移](../quickstart/migrate_from_ray.zh.md) — Ray 到 Pulsing 的 API 映射。 From 0cc888acc787a13600e1e9bd760af0cd1cbc517d Mon Sep 17 00:00:00 2001 From: Reiase Date: Mon, 16 Feb 2026 19:34:15 +0800 Subject: [PATCH 06/15] Update repository links and enhance documentation for Pulsing - Changed repository URLs from "reiase" to "DeepLink-org" across multiple files, ensuring consistency in project references. - Updated the project description to emphasize Pulsing as a backbone for distributed AI systems, highlighting its features such as streaming support and zero dependencies. - Enhanced the README and documentation to provide clearer insights into Pulsing's capabilities and use cases, improving accessibility for users. - Added an overview document to summarize Pulsing's functionality and target audience, facilitating better understanding for new users. --- .github/ISSUE_TEMPLATE/config.yml | 2 +- CHANGELOG.md | 4 +- CONTRIBUTING.md | 6 +- Cargo.toml | 2 +- README.md | 14 +- README.zh.md | 14 +- ROADMAP.md | 2 +- SECURITY.md | 2 +- crates/pulsing-actor/Cargo.toml | 2 +- crates/pulsing-py/src/actor.rs | 15 + docs/Makefile | 7 +- docs/mkdocs.yml | 66 +++-- docs/overrides/home.html | 22 +- docs/pyproject.toml | 2 +- docs/src/api/overview.md | 2 +- docs/src/api/overview.zh.md | 2 +- docs/src/design/cluster-networking.md | 285 ++++--------------- docs/src/design/cluster-networking.zh.md | 281 +++--------------- docs/src/faq.md | 45 +-- docs/src/faq.zh.md | 45 +-- docs/src/guide/index.md | 21 +- docs/src/guide/index.zh.md | 19 +- docs/src/guide/remote_actors.md | 16 +- docs/src/guide/remote_actors.zh.md | 16 +- docs/src/index.md | 43 +-- docs/src/index.zh.md | 41 +-- docs/src/overview.md | 48 ++++ docs/src/overview.zh.md | 48 ++++ docs/src/quickstart/cluster_networking.md | 206 ++++++++++++++ docs/src/quickstart/cluster_networking.zh.md | 206 ++++++++++++++ docs/src/quickstart/index.md | 118 +++++++- docs/src/quickstart/index.zh.md | 116 +++++++- docs/src/quickstart/llm_inference.md | 10 + docs/src/quickstart/llm_inference.zh.md | 10 + docs/src/quickstart/migrate_from_ray.md | 69 ++++- docs/src/quickstart/migrate_from_ray.zh.md | 69 ++++- docs/src/quickstart/patterns.md | 48 ++++ docs/src/quickstart/patterns.zh.md | 48 ++++ llms.binding.md | 4 +- pyproject.toml | 12 +- python/pulsing/actor/__init__.py | 26 +- python/pulsing/agent/runtime.py | 10 +- 42 files changed, 1332 insertions(+), 692 deletions(-) create mode 100644 docs/src/overview.md create mode 100644 docs/src/overview.zh.md create mode 100644 docs/src/quickstart/cluster_networking.md create mode 100644 docs/src/quickstart/cluster_networking.zh.md create mode 100644 docs/src/quickstart/patterns.md create mode 100644 docs/src/quickstart/patterns.zh.md diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index ccbab97fc..f2ff724b8 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: false contact_links: - name: Pulsing GitHub Discussions - url: https://github.com/reiase/Pulsing/discussions + url: https://github.com/DeepLink-org/Pulsing/discussions about: Ask questions and discuss ideas in GitHub Discussions. diff --git a/CHANGELOG.md b/CHANGELOG.md index f65ed4939..c9cd2218e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,5 +38,5 @@ - Python 绑定 - 基础文档和示例 -[Unreleased]: https://github.com/reiase/pulsing/compare/v0.1.0...HEAD -[0.1.0]: https://github.com/reiase/pulsing/releases/tag/v0.1.0 +[Unreleased]: https://github.com/DeepLink-org/Pulsing/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/DeepLink-org/Pulsing/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd90f1444..976e182bd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ ```bash # 克隆仓库 -git clone https://github.com/reiase/pulsing.git +git clone https://github.com/DeepLink-org/Pulsing.git cd pulsing # 安装 Python 依赖 @@ -38,7 +38,7 @@ pytest tests/ ```bash git clone https://github.com/YOUR_USERNAME/pulsing.git cd pulsing -git remote add upstream https://github.com/reiase/pulsing.git +git remote add upstream https://github.com/DeepLink-org/Pulsing.git ``` ### 3. 创建分支 @@ -136,4 +136,4 @@ pytest tests/actor_system/ ## 问题? -如果你有任何问题,请通过 [GitHub Issues](https://github.com/reiase/pulsing/issues) 联系我们。 +如果你有任何问题,请通过 [GitHub Issues](https://github.com/DeepLink-org/Pulsing/issues) 联系我们。 diff --git a/Cargo.toml b/Cargo.toml index 1e0e62cb5..d7d7bda28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ edition = "2021" description = "Pulsing - Distributed Actor Framework" authors = ["Reiase "] license = "Apache-2.0" -repository = "https://github.com/reiase/pulsing" +repository = "https://github.com/DeepLink-org/Pulsing" keywords = ["actor", "distributed", "async", "inference"] [workspace.dependencies] diff --git a/README.md b/README.md index c6675a3ee..be34e43a7 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,19 @@ **[中文文档](README.zh.md)** -**Pulsing is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications.** +**Backbone for distributed AI systems.** -🚀 **Zero Dependencies** — Pure Rust + Tokio, no NATS/etcd/Redis +**Actor runtime. Streaming-first. Zero dependencies. Built-in discovery.** + +Pulsing is a distributed actor runtime built in Rust, designed for Python. Connect AI agents and services across machines — no Redis, no etcd, no YAML. Just `pip install pulsing`. -🌐 **Auto Discovery** — Built-in Gossip protocol for cluster management +🚀 **Zero Dependencies** — Pure Rust + Tokio, no NATS/etcd/Redis -🔀 **Location Transparent** — Same API for local and remote Actors +⚡ **Streaming-first** — Native support for streaming responses, built for LLM token generation -⚡ **Streaming Ready** — Native support for LLM streaming responses +🌐 **Built-in Discovery** — SWIM/Gossip protocol for automatic cluster management -🤖 **Agent Friendly** — Integrates with AutoGen, LangGraph out of the box +🔀 **Same API Everywhere** — Same `await actor.method()` for local and remote Actors ## 🚀 Get Started in 5 Minutes diff --git a/README.zh.md b/README.zh.md index 6611b76ea..e476e3795 100644 --- a/README.zh.md +++ b/README.zh.md @@ -7,17 +7,19 @@ **[English](README.md)** -**Pulsing 是一个分布式 actor 框架,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。** +**分布式 AI 系统的通信骨干。** -🚀 **零外部依赖** — 纯 Rust + Tokio,无需 NATS/etcd/Redis +**Actor 运行时。流式优先。零依赖。内置发现。** + +Pulsing 是一个用 Rust 构建、为 Python 设计的分布式 Actor 运行时。跨机器连接 AI Agent 和服务——不需要 Redis,不需要 etcd,不需要 YAML。只需 `pip install pulsing`。 -🌐 **自动发现** — 内置 Gossip 协议管理集群 +🚀 **零外部依赖** — 纯 Rust + Tokio,无需 NATS/etcd/Redis -🔀 **位置透明** — 本地和远程 Actor 使用相同 API +⚡ **流式优先** — 原生流式响应支持,为 LLM token 生成而设计 -⚡ **流式支持** — 原生支持 LLM 流式响应 +🌐 **内置发现** — SWIM/Gossip 协议实现自动集群管理 -🤖 **Agent 友好** — 开箱即用集成 AutoGen、LangGraph +🔀 **统一 API** — 本地和远程 Actor 使用相同的 `await actor.method()` ## 🚀 5分钟快速体验 diff --git a/ROADMAP.md b/ROADMAP.md index 5d047c6bd..b989ed11e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -108,7 +108,7 @@ ## 贡献 -欢迎社区贡献!如果你对某个功能感兴趣,请在 [GitHub Issues](https://github.com/reiase/pulsing/issues) 中讨论。 +欢迎社区贡献!如果你对某个功能感兴趣,请在 [GitHub Issues](https://github.com/DeepLink-org/Pulsing/issues) 中讨论。 ## 参考 diff --git a/SECURITY.md b/SECURITY.md index bb5969cba..b4030e4d6 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,7 +13,7 @@ ### 报告方式 1. **GitHub Private Vulnerability Reporting**(推荐) - - 访问 [Security Advisories](https://github.com/reiase/pulsing/security/advisories) + - 访问 [Security Advisories](https://github.com/DeepLink-org/Pulsing/security/advisories) - 点击 "Report a vulnerability" 2. **邮件** diff --git a/crates/pulsing-actor/Cargo.toml b/crates/pulsing-actor/Cargo.toml index f0fc8630f..5b67ed392 100644 --- a/crates/pulsing-actor/Cargo.toml +++ b/crates/pulsing-actor/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" description = "Lightweight distributed actor framework for Pulsing" authors.workspace = true license.workspace = true -repository = "https://github.com/reiase/pulsing" +repository = "https://github.com/DeepLink-org/Pulsing" keywords = ["actor", "distributed", "cluster", "gossip"] [features] diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 13c3dc130..9f603ed2e 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -774,6 +774,21 @@ impl PySystemConfig { }) } + /// Run this node as the head node (workers will register with it). + fn with_head_node(&self) -> Self { + Self { + inner: self.inner.clone().with_head_node(), + } + } + + /// Connect to a head node at the given address (makes this node a worker). + fn with_head_addr(&self, addr: String) -> PyResult { + let socket_addr: SocketAddr = addr.parse().map_err(to_py_value_err)?; + Ok(Self { + inner: self.inner.clone().with_head_addr(socket_addr), + }) + } + /// Enable TLS with passphrase-derived certificates /// /// All nodes using the same passphrase will be able to communicate securely. diff --git a/docs/Makefile b/docs/Makefile index ca6bc6bba..80fd06a78 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,11 +2,14 @@ help: @echo "Available commands:" - @echo " make serve - Serve documentation locally (auto-installs deps)" + @echo " make serve - Serve documentation locally (auto-installs deps, livereload on)" @echo " make build - Build documentation (auto-installs deps)" @echo " make clean - Clean build artifacts" @echo " make check-links - Check for broken links" @echo " make sync - Sync dependencies with pyproject.toml" + @echo "" + @echo " If livereload does not refresh on file change, try:" + @echo " WATCHDOG_USE_POLLING=1 make serve" sync: uv sync --all-extras @@ -14,7 +17,7 @@ sync: serve: @echo "Installing Pulsing in development mode..." cd .. && uv run maturin develop - uv run mkdocs serve + uv run mkdocs serve --livereload -w src -w mkdocs.yml -w overrides build: @echo "Installing Pulsing in development mode..." diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index e062f5eb9..5362b7ff1 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -1,5 +1,5 @@ site_name: Pulsing -site_description: Pulsing is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. +site_description: "Pulsing: Backbone for distributed AI systems. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery." # Docs may be hosted elsewhere, but repo is the canonical entry point. site_url: https://github.com/DeepLink-org/pulsing docs_dir: src @@ -83,12 +83,23 @@ plugins: site_name: "Pulsing 文档" nav_translations: Home: 首页 - Getting Started: 开始使用 Overview: 概述 + Quick Start: 快速开始 + First Actor: 第一个 Actor + Actor Patterns: Actor 模式 + Cluster Setup: 集群组网 + Get Started: 入门 + Getting Started: 开始使用 + Core Concepts: 核心概念 + Architecture Overview: 架构概览 + Cluster & Network: 集群与网络 + Implementation: 实现细节 + User Guides: 用户指南 + Architecture & Design: 架构与设计 Complete Reference: 完整参考 LLM Inference: LLM 推理 Distributed Agents: 分布式 Agent - Migrate from Ray: 从 Ray 迁移 + Ray + Pulsing: Ray + Pulsing User Guide: 用户指南 Guide: 指南 Actors: Actor 指南 @@ -105,11 +116,12 @@ plugins: Ping-Pong: Ping-Pong Distributed Counter: 分布式计数器 API Reference: API 参考 - Overview: API 概述 + API Overview: API 概述 Python: Python API Rust: Rust API FAQ: 常见问题 Design: 设计文档 + Actor Basics: Actor 基础 Architecture: 架构概览 Actor System: Actor 系统 Behavior API: Behavior API @@ -137,18 +149,21 @@ plugins: nav: - Home: index.md - - Getting Started: - - Overview: quickstart/index.md + - Overview: overview.md + - Quick Start: + - First Actor: quickstart/index.md + - Actor Patterns: quickstart/patterns.md + - Cluster Setup: quickstart/cluster_networking.md - LLM Inference: quickstart/llm_inference.md - Distributed Agents: quickstart/agent.md - - Migrate from Ray: quickstart/migrate_from_ray.md - - User Guide: - - Guide: guide/index.md - - Actors: guide/actors.md + - Ray + Pulsing: quickstart/migrate_from_ray.md + - Core Concepts: + - Actor Basics: guide/actors.md - Communication Patterns: guide/communication_patterns.md - Remote Actors: guide/remote_actors.md - - Operations: guide/operations.md - Reliability: guide/reliability.md + - User Guides: + - Operations: guide/operations.md - Security: guide/security.md - Distributed Queue: guide/queue.md - Semantics: guide/semantics.md @@ -163,28 +178,31 @@ nav: - Ping-Pong: examples/ping_pong.md - Distributed Counter: examples/distributed_counter.md - API Reference: - - Overview: api/overview.md + - API Overview: api/overview.md - Complete Reference: api_reference.md - Python: api/python.md - Rust: api/rust.md - FAQ: faq.md - - Design: - - Architecture: design/architecture.md - - Actor System: design/actor-system.md - - Behavior API: design/behavior.md - - Node Discovery: design/node-discovery.md - - Cluster Networking: design/cluster-networking.md - - Actor Addressing: design/actor-addressing.md - - HTTP2 Transport: design/http2-transport.md - - Load Sync: design/load_sync.md - - AS Actor Decorator: design/as-actor-decorator.md - - Communication Evolution: design/cluster-communication-evolution.md + - Architecture & Design: + - Architecture Overview: design/architecture.md + - Actor System: + - Actor System: design/actor-system.md + - Behavior API: design/behavior.md + - Cluster & Network: + - Cluster Networking: design/cluster-networking.md + - Node Discovery: design/node-discovery.md + - HTTP2 Transport: design/http2-transport.md + - Actor Addressing: design/actor-addressing.md + - Load Sync: design/load_sync.md + - Implementation: + - AS Actor Decorator: design/as-actor-decorator.md + - Communication Evolution: design/cluster-communication-evolution.md extra: generator: false social: - icon: fontawesome/brands/github - link: https://github.com/reiase/pulsing + link: https://github.com/DeepLink-org/Pulsing extra_css: - assets/stylesheets/home.css diff --git a/docs/overrides/home.html b/docs/overrides/home.html index 65beacd5c..20d320aac 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -485,9 +485,11 @@

Pulsing

{% if config.theme.language == "zh" %} -

为可扩展 AI 系统设计的轻量级分布式 Actor 框架

+

分布式 AI 系统的通信骨干

+

Actor 运行时。流式优先。零依赖。内置发现。

{% else %} -

Lightweight Distributed Actor Framework for Scalable AI Systems

+

Backbone for Distributed AI Systems

+

Actor runtime. Streaming-first. Zero dependencies. Built-in discovery.

{% endif %}
@@ -700,11 +702,11 @@

LLM Inference Services

🔀
{% if config.theme.language == "zh" %} -

分布式计算

-

替代 Ray 用于轻量级分布式工作负载。非常适合 ML 流水线、数据处理和微服务。

+

增强 Ray 通信

+

为 Ray Actor 增加流式、发现和跨集群通信能力。Ray 负责调度,Pulsing 负责通信。

{% else %} -

Distributed Computing

-

Replace Ray for lightweight distributed workloads. Perfect for ML pipelines, data processing, and microservices.

+

Enhance Ray Communication

+

Add streaming, discovery, and cross-cluster calls to Ray actors. Ray handles scheduling, Pulsing handles communication.

{% endif %}
@@ -904,19 +906,19 @@

Join the Community

Pulsing is open source and community-driven. Get involved!

{% endif %} @@ -104,7 +108,7 @@ async def main(): asyncio.run(main()) ``` -[:octicons-arrow-right-24: Getting Started](quickstart/index.md){ .md-button } +[:octicons-arrow-right-24: Quick Start](quickstart/index.md){ .md-button } --- @@ -112,10 +116,11 @@ asyncio.run(main()) | Goal | Link | |------|------| -| Understand the Actor model | [Guide: Actors](guide/actors.md) | -| Build a cluster | [Guide: Remote Actors](guide/remote_actors.md) | -| Operate your system | [Guide: CLI Operations](guide/operations.md) | -| Deep dive into design | [Design Documents](design/architecture.md) | +| What is Pulsing / who is it for? | [Overview](overview.md) | +| Understand the Actor model | [Actor Basics](guide/actors.md) | +| Build a cluster | [Remote Actors](guide/remote_actors.md) | +| Operate your system | [CLI Operations](guide/operations.md) | +| Architecture and design | [Architecture & Design](design/architecture.md) | | API details | [API Overview](api/overview.md) | | Full API contract | [Complete Reference](api_reference.md) | diff --git a/docs/src/index.zh.md b/docs/src/index.zh.md index 305a5864a..db78f0dd8 100644 --- a/docs/src/index.zh.md +++ b/docs/src/index.zh.md @@ -1,7 +1,7 @@ --- template: home.html -title: Pulsing - 轻量级分布式 Actor 框架 -description: Pulsing 是一个分布式 actor 框架,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。 +title: Pulsing - 分布式 AI 系统的通信骨干 +description: "Pulsing:分布式 AI 系统的通信骨干。Actor 运行时。流式优先。零依赖。内置发现。" hide: toc --- @@ -9,35 +9,39 @@ hide: toc # Pulsing -一个**分布式 actor 框架**,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。 +**分布式 AI 系统的通信骨干。** + +Actor 运行时。流式优先。零依赖。内置发现。 + +用 Rust 构建、为 Python 设计的分布式 Actor 运行时。跨机器连接 AI Agent 和服务——不需要 Redis,不需要 etcd,不需要 YAML。 ## 为什么选择 Pulsing?
-- :material-package-variant-closed:{ .lg .middle } **零外部依赖** +- :material-package-variant-closed:{ .lg .middle } **零依赖** --- - 纯 Rust + Tokio 实现。无需 etcd、NATS、Redis 或 Consul。 + 纯 Rust + Tokio 实现。无需 etcd、NATS、Redis 或 Consul。只需 `pip install pulsing`。 -- :material-radar:{ .lg .middle } **内置集群发现** +- :material-lightning-bolt:{ .lg .middle } **流式优先** --- - SWIM/Gossip 协议实现自动节点发现和故障检测。 + 原生流式支持,为 LLM token 生成和实时通信而设计。 -- :material-lightning-bolt:{ .lg .middle } **高性能** +- :material-radar:{ .lg .middle } **内置发现** --- - 异步运行时 + HTTP/2 传输 + 原生流式支持。 + SWIM/Gossip 协议实现自动节点发现和故障检测。无需配置。 -- :material-language-python:{ .lg .middle } **Python 优先** +- :material-language-python:{ .lg .middle } **Rust 构建,Python 设计** --- - 通过 PyO3 提供完整 Python API。`@remote` 装饰器将任意类变成 Actor。 + 通过 PyO3 提供完整异步 Python API。`@remote` 装饰器将任意类变成分布式 Actor。
@@ -63,13 +67,13 @@ hide: toc [:octicons-arrow-right-24: 分布式 Agent](quickstart/agent.zh.md) -- :material-swap-horizontal:{ .lg .middle } **替代 Ray** +- :material-swap-horizontal:{ .lg .middle } **与 Ray 协同** --- - 将 Ray Actor 迁移到 Pulsing 原生异步 API。 + 用 Pulsing 作为 Ray Actor 的通信层。流式、发现、跨集群调用——开箱即用。 - [:octicons-arrow-right-24: 从 Ray 迁移](quickstart/migrate_from_ray.zh.md) + [:octicons-arrow-right-24: Ray + Pulsing](quickstart/migrate_from_ray.zh.md)
@@ -112,10 +116,11 @@ asyncio.run(main()) | 目标 | 链接 | |------|------| -| 理解 Actor 模型 | [指南:Actor](guide/actors.zh.md) | -| 构建集群 | [指南:远程 Actor](guide/remote_actors.zh.md) | -| 运维系统 | [指南:CLI 操作](guide/operations.zh.md) | -| 深入设计 | [设计文档](design/architecture.md) | +| Pulsing 是什么 / 适合谁? | [概述](overview.zh.md) | +| 理解 Actor 模型 | [Actor 基础](guide/actors.zh.md) | +| 构建集群 | [远程 Actor](guide/remote_actors.zh.md) | +| 运维系统 | [CLI 运维](guide/operations.zh.md) | +| 架构与设计 | [架构与设计](design/architecture.zh.md) | | API 详情 | [API 概述](api/overview.zh.md) | | 完整 API 契约 | [完整参考](api_reference.zh.md) | diff --git a/docs/src/overview.md b/docs/src/overview.md new file mode 100644 index 000000000..8ba0896aa --- /dev/null +++ b/docs/src/overview.md @@ -0,0 +1,48 @@ +# Pulsing Overview + +## What is Pulsing? + +**Pulsing: Backbone for distributed AI systems.** + +Pulsing is a distributed actor runtime built in Rust, designed for Python. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery. + +In one sentence: turn any Python class into a distributed Actor with `@remote` — no etcd, NATS, or Redis required. Same API for local and remote, with native streaming support. + +--- + +## What Can You Do with Pulsing? + +| Use case | What you get | +|----------|----------------| +| **LLM inference services** | Scalable backends with streaming, OpenAI-compatible API, and optional vLLM/Transformers workers. | +| **Distributed agents** | Multi-agent systems with native integration for AutoGen and LangGraph; same code runs locally or across machines. | +| **Enhance Ray communication** | Add streaming, actor discovery, and cross-cluster calls to Ray actors via `pul.mount()`. Use Ray for scheduling, Pulsing for communication. | +| **Custom distributed apps** | Build services and workers that discover each other via built-in gossip or a head node, over a single HTTP/2 port. | + +--- + +## Who Is It For? + +| Role | Benefit | +|------|---------| +| **AI / ML application developers** | One-line scaling: add `addr` and `seeds` (or use init-in-Ray) to run agents and inference across nodes without learning a new paradigm. | +| **Distributed systems engineers** | Zero external coordination stores; built-in SWIM/gossip and optional head-node topology; single-port networking. | +| **Ray users** | Use Pulsing as a communication layer alongside Ray: `pul.mount()` bridges Ray actors onto the Pulsing network for streaming, discovery, and cross-cluster calls. | + +You don't need to be a distributed systems expert to get value — the API is designed to stay simple from single process to multi-node. + +--- + +## Design Principles + +- **Zero external dependencies** — Pure Rust core + Tokio; no etcd, NATS, or Redis. Cluster discovery uses built-in gossip or an optional head node. +- **Location transparency** — Same API for local and remote actors: `await actor.method()` whether the actor is on this process or another machine. +- **Python first** — `@pul.remote` turns a class into an Actor; `spawn()` and `resolve()` for creation and discovery; native async/await and streaming. +- **Single port** — Actor RPC and cluster protocol share one HTTP/2 port per node, simplifying deployment and firewalls. + +--- + +## Next Steps + +- **[Quick Start](quickstart/index.md)** — Run your first Actor in minutes, then go stateful and distributed. +- **[Ray + Pulsing](quickstart/migrate_from_ray.md)** — Use Pulsing as Ray's communication layer, or use the standalone API. diff --git a/docs/src/overview.zh.md b/docs/src/overview.zh.md new file mode 100644 index 000000000..0bf8eb32b --- /dev/null +++ b/docs/src/overview.zh.md @@ -0,0 +1,48 @@ +# Pulsing 概述 + +## 什么是 Pulsing? + +**Pulsing:分布式 AI 系统的通信骨干。** + +Pulsing 是一个用 Rust 构建、为 Python 设计的分布式 Actor 运行时。流式优先。零依赖。内置发现。 + +一句话:用 `@remote` 把任意 Python 类变成分布式 Actor,无需 etcd、NATS 或 Redis。本地和远程使用同一套 API,原生支持流式通信。 + +--- + +## 你能用 Pulsing 做什么? + +| 场景 | 你能得到什么 | +|------|----------------| +| **LLM 推理服务** | 可扩展的推理后端、流式输出、OpenAI 兼容 API,以及可选的 vLLM/Transformers Worker。 | +| **分布式 Agent** | 多智能体系统,原生集成 AutoGen 与 LangGraph;同一套代码可在本机或跨机运行。 | +| **增强 Ray 通信** | 通过 `pul.mount()` 为 Ray Actor 增加流式、发现和跨集群调用能力。Ray 负责调度,Pulsing 负责通信。 | +| **自定义分布式应用** | 通过内置 Gossip 或 Head 节点组网,单端口 HTTP/2,构建服务与 Worker。 | + +--- + +## 适合谁用? + +| 角色 | 收益 | +|------|------| +| **AI / ML 应用开发者** | 一行级扩展:加上 `addr` 和 `seeds`(或用 init-in-Ray),即可在多节点跑 Agent 与推理,无需学习新范式。 | +| **分布式系统工程师** | 零外部协调存储;内置 SWIM/Gossip 与可选 Head 拓扑;单端口组网。 | +| **Ray 用户** | 用 Pulsing 作为 Ray 的通信层:`pul.mount()` 将 Ray Actor 接入 Pulsing 网络,获得流式、发现和跨集群调用能力。 | + +你不需要成为分布式系统专家也能用好 —— 从单进程到多节点,API 保持简洁。 + +--- + +## 设计理念 + +- **零外部依赖** — 核心纯 Rust + Tokio;不依赖 etcd、NATS、Redis。集群发现采用内置 Gossip 或可选 Head 节点。 +- **位置透明** — 本地与远程 Actor 同一套 API:`await actor.method()` 无论 Actor 在本进程还是远程。 +- **Python 优先** — `@pul.remote` 将类变成 Actor;`spawn()` / `resolve()` 用于创建与发现;原生 async/await 与流式。 +- **单端口** — 每节点一个 HTTP/2 端口同时承载 Actor RPC 与集群协议,便于部署与防火墙配置。 + +--- + +## 下一步 + +- **[快速开始](quickstart/index.zh.md)** — 几分钟内跑起第一个 Actor,再进阶到有状态与分布式。 +- **[Ray + Pulsing](quickstart/migrate_from_ray.zh.md)** — 用 Pulsing 作为 Ray 的通信层,或使用 Pulsing 独立 API。 diff --git a/docs/src/quickstart/cluster_networking.md b/docs/src/quickstart/cluster_networking.md new file mode 100644 index 000000000..faa7c548f --- /dev/null +++ b/docs/src/quickstart/cluster_networking.md @@ -0,0 +1,206 @@ +# Cluster Networking (How to Use) + +This page describes **how to form and use** a Pulsing cluster. For protocol and implementation details, see [Cluster Networking (Design)](../design/cluster-networking.md). + +--- + +## Three modes + +| Mode | What you configure | Best for | +|------|--------------------|----------| +| **Gossip + seed** | Bind address + optional seed addresses to join | Kubernetes, VMs, bare metal; no single point of failure | +| **Head node** | One node as head, others with head address | Simple ops; one fixed coordinator address | +| **Init in Ray** | `init_in_ray()` in each process; no seeds | Already using Ray; automatic seed discovery | + +All modes use a **single HTTP/2 port** per node. No etcd, NATS, or Redis. + +--- + +## Mode 1: Gossip + seed + +### Configuration + +**Python** + +```python +import pulsing as pul + +# First node +await pul.init(addr="0.0.0.0:8000") + +# Later nodes — join via seeds +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// First node +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// Later nodes +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +With multiple seeds (e.g. a Kubernetes Service), pass a list; the node probes until it gets a member list. + +### Kubernetes + +Use the Service name as the seed so new pods can join: + +```python +await pul.init(addr="0.0.0.0:8080", seeds=["actor-cluster.default.svc.cluster.local:8080"]) +``` + +### When to use + +- No single point of failure for discovery +- You run on K8s, VMs, or bare metal and can expose at least one address (or Service) as seed +- Eventual consistency of membership is acceptable (typically hundreds of ms) + +--- + +## Mode 2: Head node + +### Configuration + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head node +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker nodes +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# Head node +await pul.init(addr="0.0.0.0:8000", is_head_node=True) + +# Worker nodes +await pul.init(addr="0.0.0.0:8001", head_addr="192.168.1.10:8000") +``` + +You can also use `SystemConfig.with_head_node()` / `.with_head_addr(addr)` and pass the config to `ActorSystem.create(config, loop)` for advanced use. + +### Head parameters (Rust) + +- **Sync interval**: how often workers pull from head (default 5s) +- **Heartbeat interval**: worker → head (default 10s) +- **Heartbeat timeout**: head marks worker dead after (default 30s) + +### When to use + +- One fixed address (the head) for firewalls and monitoring +- You accept a single point of failure for coordination until head recovers +- You want the head as the single source of truth for membership/registry + +--- + +## Mode 3: Init in Ray + +### Requirements + +- Ray installed and `ray.init()` called before `init_in_ray()` +- Every process that uses Pulsing (driver and workers) must call `init_in_ray()` in that process + +### Usage + +```python +import ray +from pulsing.ray import init_in_ray + +# Recommended: hook so every worker runs init_in_ray at startup +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# Driver must also init +init_in_ray() + +# Use Pulsing as usual +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") +``` + +**Async** (e.g. async Ray actors): + +```python +from pulsing.ray import async_init_in_ray +await async_init_in_ray() +``` + +**Cleanup** (e.g. tests): + +```python +from pulsing.ray import cleanup +cleanup() +``` + +### When to use + +- You already run Ray and want Pulsing on the same nodes as one cluster +- You want one-line cluster formation per process without managing seeds or head address +- You are okay depending on Ray’s KV only for bootstrap; after that Pulsing uses its own gossip + +### Limitations + +- Requires Ray and its internal KV +- Every process must call `init_in_ray()` (driver explicitly; workers via hook) +- One Pulsing cluster per Ray cluster (one KV key) + +--- + +## Comparison and choice + +| Criterion | Gossip + seed | Head node | Init in Ray | +|-----------|----------------|-----------|-------------| +| External deps | None | None | Ray | +| Single point of failure | No | Yes (head) | No | +| Config | addr + optional seeds | addr + head addr or head role | None (Ray KV) | +| Best environment | K8s, VMs, bare metal | One coordinator OK | Existing Ray cluster | +| Python `init()` | `addr`, `seeds` | Via SystemConfig if exposed | `init_in_ray()` | + +**Suggested choice:** + +- **Already on Ray** → **Init in Ray** +- **No SPOF, no Ray** → **Gossip + seed** (use a K8s Service as seed when on K8s) +- **One fixed coordinator, simple ops** → **Head node** + +--- + +## Best practices + +1. **Gossip + seed**: In K8s use a Service as seed; keep one port open for all nodes (actor + gossip). +2. **Head node**: Run head on a stable host/port; tune heartbeat timeout under load. +3. **Init in Ray**: Call `init_in_ray()` in the driver and set `worker_process_setup_hook`; use `cleanup()` in tests if needed. +4. **Security**: For any mode, enable TLS (e.g. passphrase) for cluster traffic — see [Security](../guide/security.md). + +--- + +## See also + +- [Cluster Networking (Design)](../design/cluster-networking.md) — how the protocols and backends work +- [Remote Actors](../guide/remote_actors.md) — resolve, named actors, multi-node +- [Ray + Pulsing](migrate_from_ray.md) — use Pulsing as Ray's communication layer diff --git a/docs/src/quickstart/cluster_networking.zh.md b/docs/src/quickstart/cluster_networking.zh.md new file mode 100644 index 000000000..c121b6fe6 --- /dev/null +++ b/docs/src/quickstart/cluster_networking.zh.md @@ -0,0 +1,206 @@ +# 集群组网(如何使用) + +本页说明**如何组建和使用** Pulsing 集群。协议与实现细节见 [集群组网(设计)](../design/cluster-networking.zh.md)。 + +--- + +## 三种方式 + +| 方式 | 你需要配置什么 | 适用场景 | +|------|----------------|----------| +| **Gossip + seed** | 绑定地址 + 可选 seed 地址以加入 | Kubernetes、VM、裸机;无单点故障 | +| **Head 节点** | 一个节点作 Head,其余填 Head 地址 | 运维简单;一个固定协调地址 | +| **Init in Ray** | 每个进程调用 `init_in_ray()`,无需 seeds | 已在用 Ray;自动发现 seed | + +所有方式每节点**单一 HTTP/2 端口**,不依赖 etcd、NATS、Redis。 + +--- + +## 方式一:Gossip + seed + +### 配置 + +**Python** + +```python +import pulsing as pul + +# 首节点 +await pul.init(addr="0.0.0.0:8000") + +# 后续节点:通过 seeds 加入 +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// 首节点 +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// 后续节点 +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +多个 seed(如 Kubernetes Service)时传入列表即可,节点会探测直到获得成员列表。 + +### Kubernetes + +用 Service 名作为 seed,新 Pod 即可加入: + +```python +await pul.init(addr="0.0.0.0:8080", seeds=["actor-cluster.default.svc.cluster.local:8080"]) +``` + +### 何时选用 + +- 发现逻辑无单点故障 +- 运行在 K8s、VM 或裸机,能提供至少一个地址(或 Service)作 seed +- 能接受成员关系的最终一致性(通常几百毫秒内收敛) + +--- + +## 方式二:Head 节点 + +### 配置 + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head 节点 +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker 节点 +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# Head 节点 +await pul.init(addr="0.0.0.0:8000", is_head_node=True) + +# Worker 节点 +await pul.init(addr="0.0.0.0:8001", head_addr="192.168.1.10:8000") +``` + +也可使用 `SystemConfig.with_head_node()` / `.with_head_addr(addr)` 后传给 `ActorSystem.create(config, loop)` 做高级用法。 + +### Head 参数(Rust) + +- **同步间隔**:Worker 从 Head 拉取的周期(默认 5s) +- **心跳间隔**:Worker 向 Head 发送心跳的周期(默认 10s) +- **心跳超时**:Head 将 Worker 判为死亡的时间(默认 30s) + +### 何时选用 + +- 希望一个固定地址(Head)做防火墙与监控 +- 可接受协调单点(Head 宕机期间无法新加入直到恢复) +- 希望以 Head 为成员/注册表的唯一真相源 + +--- + +## 方式三:Init in Ray + +### 前置条件 + +- 已安装 Ray,且先执行 `ray.init()` 再调用 `init_in_ray()` +- 每个使用 Pulsing 的进程(driver 与 worker)都必须在该进程中调用 `init_in_ray()` + +### 用法 + +```python +import ray +from pulsing.ray import init_in_ray + +# 推荐:用 hook 让每个 worker 启动时执行 init_in_ray +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# driver 也必须初始化 +init_in_ray() + +# 按常规使用 Pulsing +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") +``` + +**异步**(如 async Ray actor): + +```python +from pulsing.ray import async_init_in_ray +await async_init_in_ray() +``` + +**清理**(如测试): + +```python +from pulsing.ray import cleanup +cleanup() +``` + +### 何时选用 + +- 已在用 Ray,希望 Pulsing 在同一批节点上组成一个集群 +- 希望每个进程一行代码完成组网,无需自己维护 seed 或 Head 地址 +- 能接受仅在启动阶段依赖 Ray 的 KV;之后仅用 Pulsing 自己的 gossip + +### 限制 + +- 依赖 Ray 及其 internal KV +- 每个进程都必须调用 `init_in_ray()`(driver 显式;worker 通过 hook) +- 一个 Ray 集群对应一个 Pulsing 集群(一个 KV key) + +--- + +## 对比与选型 + +| 维度 | Gossip + seed | Head 节点 | Init in Ray | +|------|----------------|-----------|-------------| +| 外部依赖 | 无 | 无 | Ray | +| 单点故障 | 无 | 有(Head) | 无 | +| 配置 | addr + 可选 seeds | addr + Head 地址或 Head 角色 | 无(Ray KV) | +| 适用环境 | K8s、VM、裸机 | 可接受单一协调节点 | 已有 Ray 集群 | +| Python init() | `addr`、`seeds` | 通过 SystemConfig(若暴露) | `init_in_ray()` | + +**选型建议:** + +- **已有 Ray** → **Init in Ray** +- **不要单点且不用 Ray** → **Gossip + seed**(K8s 下用 Service 作 seed) +- **一个固定协调节点、运维简单** → **Head 节点** + +--- + +## 最佳实践 + +1. **Gossip + seed**:K8s 下用 Service 作 seed;各节点开放同一端口(Actor + Gossip)。 +2. **Head 节点**:Head 部署在稳定主机/端口;根据负载调整心跳超时。 +3. **Init in Ray**:Driver 中调用 `init_in_ray()` 并设置 `worker_process_setup_hook`;测试中如需可调用 `cleanup()`。 +4. **安全**:任意方式均可为集群流量开启 TLS(如 passphrase),见 [安全](../guide/security.zh.md)。 + +--- + +## 相关文档 + +- [集群组网(设计)](../design/cluster-networking.zh.md) — 协议与后端如何实现 +- [远程 Actor](../guide/remote_actors.zh.md) — resolve、命名 Actor、多节点 +- [Ray + Pulsing](migrate_from_ray.zh.md) — 用 Pulsing 作为 Ray 的通信层 diff --git a/docs/src/quickstart/index.md b/docs/src/quickstart/index.md index fb8c02316..4eff8930c 100644 --- a/docs/src/quickstart/index.md +++ b/docs/src/quickstart/index.md @@ -1,6 +1,8 @@ -# Getting Started +# Quick Start -Get Pulsing running in **5 minutes**. +Get from zero to a **distributed Actor** in about **10 minutes** with three steps: your first Actor, a stateful Actor, then the same code across two nodes. + +--- ## Installation @@ -10,7 +12,35 @@ pip install pulsing --- -## Your First Actor +## 1. Your First Actor (~2 minutes) + +Define a class, add `@pul.remote`, then spawn and call it. + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Greeter: + def greet(self, name: str) -> str: + return f"Hello, {name}!" + +async def main(): + await pul.init() + greeter = await Greeter.spawn() + print(await greeter.greet("World")) # Hello, World! + await pul.shutdown() + +asyncio.run(main()) +``` + +The `@pul.remote` decorator turns the class into a distributed Actor. `spawn()` creates an instance; method calls use normal `await`. + +--- + +## 2. Stateful Actor (~3 minutes) + +Actors hold state. Here, a counter keeps a value and exposes `inc` and `get`. ```python import asyncio @@ -18,24 +48,81 @@ import pulsing as pul @pul.remote class Counter: - def __init__(self, value=0): + def __init__(self, value: int = 0): self.value = value - def inc(self): - self.value += 1 + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + + def get(self) -> int: return self.value async def main(): await pul.init() counter = await Counter.spawn(value=0) - print(await counter.inc()) # 1 - print(await counter.inc()) # 2 + print(await counter.inc()) # 1 + print(await counter.inc(2)) # 3 + print(await counter.get()) # 3 + await pul.shutdown() + +asyncio.run(main()) +``` + +Same idea: one Actor instance, private state, messages via method calls. No shared memory, no locks. + +--- + +## 3. Distributed: Same Code, Two Nodes (~5 minutes) + +Run the same Actor type on two processes. Only the **initialization** changes: bind an address on the first node, join with `seeds` on the second. + +**Node 1 (seed):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8000") + await Counter.spawn(value=0, name="counter") + await asyncio.Event().wait() # keep running + +asyncio.run(main()) +``` + +**Node 2 (join cluster, then resolve and call):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8001", seeds=["127.0.0.1:8000"]) + counter = await Counter.resolve("counter") + print(await counter.inc(10)) # 10 — same API, remote actor await pul.shutdown() asyncio.run(main()) ``` -The `@pul.remote` decorator turns any Python class into a distributed Actor. +**What changed:** `init(addr=..., seeds=...)` and `Counter.resolve("counter")` instead of `spawn()`. The rest of your code stays the same — **location transparency**. --- @@ -59,11 +146,11 @@ The `@pul.remote` decorator turns any Python class into a distributed Actor. [:octicons-arrow-right-24: ~10 minutes](agent.md) -- :material-swap-horizontal:{ .lg .middle } **Migrate from Ray** +- :material-swap-horizontal:{ .lg .middle } **Use with Ray** --- - Replace Ray with one import change. Zero external dependencies. + Bridge Ray actors onto the Pulsing network with `pul.mount()`. Add streaming and discovery to your Ray cluster. [:octicons-arrow-right-24: ~5 minutes](migrate_from_ray.md) @@ -75,6 +162,9 @@ The `@pul.remote` decorator turns any Python class into a distributed Actor. | Goal | Link | |------|------| -| Understand the Actor model | [Guide: Actors](../guide/actors.md) | -| Build a cluster | [Guide: Remote Actors](../guide/remote_actors.md) | -| Operate your system | [Guide: Operations](../guide/operations.md) | +| Named actors and ask vs tell | [Actor Patterns](patterns.md) | +| Form a cluster (Gossip / Head / Ray) | [Cluster Setup](cluster_networking.md) | +| Actor basics and patterns | [Actor Guide](../guide/actors.md) | +| When to use ask / tell / streaming | [Communication Patterns](../guide/communication_patterns.md) | +| Cluster setup and resolve | [Remote Actors](../guide/remote_actors.md) | +| Operate and inspect | [Operations](../guide/operations.md) | diff --git a/docs/src/quickstart/index.zh.md b/docs/src/quickstart/index.zh.md index 503e970bd..56123092e 100644 --- a/docs/src/quickstart/index.zh.md +++ b/docs/src/quickstart/index.zh.md @@ -1,6 +1,8 @@ # 快速开始 -**5 分钟**让 Pulsing 跑起来。 +用三个步骤、约 **10 分钟** 从零到**分布式 Actor**:第一个 Actor、有状态 Actor,再到同一套代码跑在两个节点上。 + +--- ## 安装 @@ -10,7 +12,35 @@ pip install pulsing --- -## 第一个 Actor +## 1. 第一个 Actor(约 2 分钟) + +定义一个类,加上 `@pul.remote`,然后 spawn 并调用。 + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Greeter: + def greet(self, name: str) -> str: + return f"Hello, {name}!" + +async def main(): + await pul.init() + greeter = await Greeter.spawn() + print(await greeter.greet("World")) # Hello, World! + await pul.shutdown() + +asyncio.run(main()) +``` + +`@pul.remote` 把类变成分布式 Actor;`spawn()` 创建实例,方法调用就是普通的 `await`。 + +--- + +## 2. 有状态 Actor(约 3 分钟) + +Actor 自带状态。下面这个计数器维护一个值,并暴露 `inc` 和 `get`。 ```python import asyncio @@ -18,24 +48,81 @@ import pulsing as pul @pul.remote class Counter: - def __init__(self, value=0): + def __init__(self, value: int = 0): self.value = value - def inc(self): - self.value += 1 + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + + def get(self) -> int: return self.value async def main(): await pul.init() counter = await Counter.spawn(value=0) - print(await counter.inc()) # 1 - print(await counter.inc()) # 2 + print(await counter.inc()) # 1 + print(await counter.inc(2)) # 3 + print(await counter.get()) # 3 + await pul.shutdown() + +asyncio.run(main()) +``` + +同样的思路:一个 Actor 实例、私有状态、通过方法调用发消息。无共享内存、无锁。 + +--- + +## 3. 分布式:同一套代码,两个节点(约 5 分钟) + +在两个进程里跑同一种 Actor。**只有初始化不同**:第一个节点绑定地址,第二个节点用 `seeds` 加入集群。 + +**节点 1(seed):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8000") + await Counter.spawn(value=0, name="counter") + await asyncio.Event().wait() # 保持运行 + +asyncio.run(main()) +``` + +**节点 2(加入集群后 resolve 并调用):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8001", seeds=["127.0.0.1:8000"]) + counter = await Counter.resolve("counter") + print(await counter.inc(10)) # 10 — 同一套 API,远程 Actor await pul.shutdown() asyncio.run(main()) ``` -`@pul.remote` 装饰器将任意 Python 类变成分布式 Actor。 +**变化只有:** `init(addr=..., seeds=...)` 和用 `Counter.resolve("counter")` 代替 `spawn()`。其余代码不变 —— **位置透明**。 --- @@ -59,11 +146,11 @@ asyncio.run(main()) [:octicons-arrow-right-24: ~10 分钟](agent.zh.md) -- :material-swap-horizontal:{ .lg .middle } **从 Ray 迁移** +- :material-swap-horizontal:{ .lg .middle } **与 Ray 配合使用** --- - 一行导入替换 Ray。零外部依赖。 + 通过 `pul.mount()` 将 Ray Actor 接入 Pulsing 网络。为 Ray 集群增加流式和发现能力。 [:octicons-arrow-right-24: ~5 分钟](migrate_from_ray.zh.md) @@ -75,6 +162,9 @@ asyncio.run(main()) | 目标 | 链接 | |------|------| -| 理解 Actor 模型 | [指南:Actor](../guide/actors.zh.md) | -| 构建集群 | [指南:远程 Actor](../guide/remote_actors.zh.md) | -| 运维系统 | [指南:运维操作](../guide/operations.zh.md) | +| 命名 Actor 与 ask/tell | [Actor 模式](patterns.zh.md) | +| 组建集群(Gossip / Head / Ray) | [集群组网](cluster_networking.zh.md) | +| Actor 基础与模式 | [Actor 指南](../guide/actors.zh.md) | +| 何时用 ask / tell / streaming | [通信范式](../guide/communication_patterns.zh.md) | +| 集群搭建与 resolve | [远程 Actor](../guide/remote_actors.zh.md) | +| 运维与巡检 | [运维操作](../guide/operations.zh.md) | diff --git a/docs/src/quickstart/llm_inference.md b/docs/src/quickstart/llm_inference.md index 99a49f6d3..48115d8c3 100644 --- a/docs/src/quickstart/llm_inference.md +++ b/docs/src/quickstart/llm_inference.md @@ -2,6 +2,16 @@ Build a **scalable LLM inference backend** with Pulsing in 10 minutes. +**Before / After:** + +| | Before (single process or ad‑hoc script) | After (Pulsing) | +|---|------------------------------------------|-----------------| +| **API** | Your own HTTP or in-process only | OpenAI-compatible HTTP API (`/v1/chat/completions`) | +| **Scaling** | One process, one model | Router + N workers; add nodes and workers as needed | +| **Streaming** | Hand-rolled if any | Native streaming from Router to client | + +You get a **Router** (HTTP API + load balancing) and **Workers** (model backends). Same Actor model; add more workers or nodes without changing client code. + **What you'll build:** - A Router that exposes an **OpenAI-compatible HTTP API** diff --git a/docs/src/quickstart/llm_inference.zh.md b/docs/src/quickstart/llm_inference.zh.md index d42dd2982..42f9a715d 100644 --- a/docs/src/quickstart/llm_inference.zh.md +++ b/docs/src/quickstart/llm_inference.zh.md @@ -2,6 +2,16 @@ 10 分钟内用 Pulsing 构建一个**可扩展的 LLM 推理后端**。 +**前后对比:** + +| | 之前(单进程或临时脚本) | 之后(Pulsing) | +|---|--------------------------|-----------------| +| **API** | 自建 HTTP 或仅进程内 | OpenAI 兼容 HTTP API(`/v1/chat/completions`) | +| **扩展** | 单进程、单模型 | Router + N 个 Worker;按需增加节点与 Worker | +| **流式** | 若有则手写 | Router 到客户端的原生流式 | + +你会得到一个 **Router**(HTTP API + 负载均衡)和若干 **Worker**(模型后端)。同一套 Actor 模型;增加 Worker 或节点无需改客户端代码。 + **你将构建:** - 一个暴露 **OpenAI 兼容 HTTP API** 的 Router diff --git a/docs/src/quickstart/migrate_from_ray.md b/docs/src/quickstart/migrate_from_ray.md index 71f0e100c..0a13092ed 100644 --- a/docs/src/quickstart/migrate_from_ray.md +++ b/docs/src/quickstart/migrate_from_ray.md @@ -1,22 +1,51 @@ -# Tutorial: Migrate from Ray +# Tutorial: Ray + Pulsing -Migrate Ray actor code to Pulsing's native async API. +Use Pulsing as the communication backbone for your Ray actors — add streaming, actor discovery, and cross-cluster calls without replacing Ray. --- -## Why This Migration Changed +## Two Ways to Use Pulsing with Ray -This project no longer recommends the Ray-compatible layer (`pulsing.compat.ray`). -Use Pulsing's primary API directly: +1. **Bridge mode** — Keep your Ray actors, add Pulsing communication via `pul.mount()` +2. **Standalone mode** — Use Pulsing's native API directly (for new projects or full migration) -- `import pulsing as pul` -- `@pul.remote` -- `await pul.init()` / `await pul.shutdown()` -- `await Class.spawn()` / `await Class.resolve()` +--- + +## Bridge Mode: Add Pulsing to Ray Actors + +The simplest path — keep Ray for scheduling, add Pulsing for communication: + +```python +import ray +import pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # One line: join the Pulsing network + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + return await proxy.greet(msg) # Cross-process Pulsing call + + async def greet(self, msg): + return f"hello: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello: hi" +pul.cleanup_ray() +``` + +**What you get:** Ray handles process scheduling and resource management. Pulsing adds streaming, named actor discovery, and direct actor-to-actor communication — without going through Ray's object store. --- -## API Mapping (Ray -> Pulsing) +## Standalone Mode: Pulsing Native API + +For new projects or when you want Pulsing's full feature set: + +### API Mapping (Ray -> Pulsing) | Ray | Pulsing | |---|---| @@ -27,11 +56,9 @@ Use Pulsing's primary API directly: | `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | | `ray.get_actor(name)` | `await Actor.resolve(name)` or `await pul.resolve(name)` | ---- - -## Minimal Example +### Minimal Example -### Before (Ray) +**Ray:** ```python import ray @@ -51,7 +78,7 @@ print(ray.get(counter.inc.remote())) ray.shutdown() ``` -### After (Pulsing) +**Pulsing:** ```python import pulsing as pul @@ -71,6 +98,18 @@ async def main(): await pul.shutdown() ``` +**Key differences:** + +| Aspect | Ray | Pulsing | +|--------|-----|---------| +| Create actor | `Counter.remote()` | `await Counter.spawn()` — native async | +| Call method | `ray.get(counter.inc.remote())` | `await counter.inc()` — direct await | +| Get by name | `ray.get_actor("counter")` | `await Counter.resolve("counter")` — typed proxy | +| Streaming | Not built-in | Native `async for chunk in actor.stream()` | +| Discovery | Needs GCS | Built-in gossip, zero external deps | + +Same mental model (remote class, spawn, method calls). Pulsing adds native async, streaming, and self-contained clustering. + --- ## Distributed Mode Mapping diff --git a/docs/src/quickstart/migrate_from_ray.zh.md b/docs/src/quickstart/migrate_from_ray.zh.md index 442946642..5eea3bfa8 100644 --- a/docs/src/quickstart/migrate_from_ray.zh.md +++ b/docs/src/quickstart/migrate_from_ray.zh.md @@ -1,22 +1,51 @@ -# 教程:从 Ray 迁移 +# 教程:Ray + Pulsing -将 Ray Actor 代码迁移到 Pulsing 原生异步 API。 +用 Pulsing 作为 Ray Actor 的通信骨干——增加流式、Actor 发现和跨集群调用能力,无需替换 Ray。 --- -## 为什么这篇迁移说明改了 +## 两种使用方式 -当前项目不再推荐 Ray 兼容层(`pulsing.compat.ray`)。 -请直接使用 Pulsing 主 API: +1. **桥接模式** — 保留 Ray Actor,通过 `pul.mount()` 接入 Pulsing 通信 +2. **独立模式** — 直接使用 Pulsing 原生 API(适合新项目或完全迁移) -- `import pulsing as pul` -- `@pul.remote` -- `await pul.init()` / `await pul.shutdown()` -- `await Class.spawn()` / `await Class.resolve()` +--- + +## 桥接模式:为 Ray Actor 增加 Pulsing 通信 + +最简单的路径——Ray 负责调度,Pulsing 负责通信: + +```python +import ray +import pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # 一行代码:接入 Pulsing 网络 + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + return await proxy.greet(msg) # 跨进程 Pulsing 调用 + + async def greet(self, msg): + return f"hello: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello: hi" +pul.cleanup_ray() +``` + +**你获得的能力:** Ray 处理进程调度和资源管理。Pulsing 增加流式、命名 Actor 发现和直接的 Actor 间通信——不经过 Ray 的对象存储。 --- -## API 对照表(Ray -> Pulsing) +## 独立模式:Pulsing 原生 API + +适合新项目或需要 Pulsing 完整特性的场景: + +### API 对照表(Ray -> Pulsing) | Ray | Pulsing | |---|---| @@ -27,11 +56,9 @@ | `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | | `ray.get_actor(name)` | `await Actor.resolve(name)` 或 `await pul.resolve(name)` | ---- - -## 最小迁移示例 +### 最小示例 -### 之前(Ray) +**Ray:** ```python import ray @@ -51,7 +78,7 @@ print(ray.get(counter.inc.remote())) ray.shutdown() ``` -### 之后(Pulsing) +**Pulsing:** ```python import pulsing as pul @@ -71,6 +98,18 @@ async def main(): await pul.shutdown() ``` +**关键差异:** + +| 方面 | Ray | Pulsing | +|------|-----|---------| +| 创建 Actor | `Counter.remote()` | `await Counter.spawn()` — 原生 async | +| 调用方法 | `ray.get(counter.inc.remote())` | `await counter.inc()` — 直接 await | +| 按名获取 | `ray.get_actor("counter")` | `await Counter.resolve("counter")` — 带类型代理 | +| 流式 | 非内置 | 原生 `async for chunk in actor.stream()` | +| 发现 | 需要 GCS | 内置 gossip,零外部依赖 | + +心智模型一致(远程类、spawn、方法调用)。Pulsing 增加了原生 async、流式和自包含集群能力。 + --- ## 分布式模式对照 diff --git a/docs/src/quickstart/patterns.md b/docs/src/quickstart/patterns.md new file mode 100644 index 000000000..115218399 --- /dev/null +++ b/docs/src/quickstart/patterns.md @@ -0,0 +1,48 @@ +# Actor Patterns + +Common patterns right after your first Actor: named actors, resolve, and when to use ask vs tell. + +--- + +## Named actors and resolve + +Give an actor a **name** so other code can find it with **resolve** (same process or across the cluster): + +```python +import pulsing as pul + +@pul.remote +class Worker: + def process(self, data: str) -> str: + return f"processed: {data}" + +async def main(): + await pul.init() + # Spawn with a name — discoverable via resolve + await Worker.spawn(name="worker") + # Later (or on another node): get a proxy by name + worker = await Worker.resolve("worker") + result = await worker.process("hello") + await pul.shutdown() +``` + +Anonymous actors (no `name=`) are only reachable via the `ActorRef` returned by `spawn()`. + +--- + +## Ask vs tell + +| Pattern | Method | Use when | +|--------|--------|----------| +| **Request–response** | `await ref.ask(msg)` or `await proxy.method()` | You need a return value. | +| **Fire-and-forget** | `await ref.tell(msg)` | You don't need a reply; best-effort delivery. | + +For typed proxies, method calls are like **ask** (they return the result). Use **tell** when you have an `ActorRef` and want to send without waiting. + +--- + +## Next steps + +- [Cluster Setup](cluster_networking.md) — form a cluster (Gossip / Head / Ray) +- [Actor Basics](../guide/actors.md) — deeper model and API +- [Communication Patterns](../guide/communication_patterns.md) — streaming, timeouts, and more diff --git a/docs/src/quickstart/patterns.zh.md b/docs/src/quickstart/patterns.zh.md new file mode 100644 index 000000000..189685161 --- /dev/null +++ b/docs/src/quickstart/patterns.zh.md @@ -0,0 +1,48 @@ +# Actor 模式 + +在写完第一个 Actor 之后常用的几种写法:命名 Actor、resolve,以及何时用 ask / tell。 + +--- + +## 命名 Actor 与 resolve + +给 Actor 起一个 **name**,其他代码(本进程或集群内)可以用 **resolve** 按名查找: + +```python +import pulsing as pul + +@pul.remote +class Worker: + def process(self, data: str) -> str: + return f"processed: {data}" + +async def main(): + await pul.init() + # 带名字 spawn,可通过 resolve 发现 + await Worker.spawn(name="worker") + # 之后(或另一节点):按名拿到 proxy + worker = await Worker.resolve("worker") + result = await worker.process("hello") + await pul.shutdown() +``` + +匿名 Actor(不传 `name=`)只能通过 `spawn()` 返回的 `ActorRef` 访问。 + +--- + +## Ask 与 tell + +| 模式 | 方法 | 适用场景 | +|------|------|----------| +| **请求–响应** | `await ref.ask(msg)` 或 `await proxy.method()` | 需要返回值。 | +| **发送即忘** | `await ref.tell(msg)` | 不需要回复;尽力而为投递。 | + +有类型 proxy 时,方法调用相当于 **ask**(会返回结果)。只有在手头是 `ActorRef` 且不想等待回复时再用 **tell**。 + +--- + +## 下一步 + +- [集群组网](cluster_networking.zh.md) — 组建集群(Gossip / Head / Ray) +- [Actor 基础](../guide/actors.zh.md) — 模型与 API 深入 +- [通信范式](../guide/communication_patterns.zh.md) — 流式、超时等 diff --git a/llms.binding.md b/llms.binding.md index 529a4f17e..d29e55942 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -2,7 +2,9 @@ ## Overview -`Pulsing` is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. +**Pulsing: Backbone for distributed AI systems.** + +Pulsing is a distributed actor runtime built in Rust, designed for Python. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery. Connect AI agents and services across machines — no Redis, no etcd, no YAML. ## Quick Start diff --git a/pyproject.toml b/pyproject.toml index 45c0a712f..6bbc0cb41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pulsing" version = "0.1.0" -description = "Pulsing - Distributed Actor Framework" +description = "Pulsing: Backbone for distributed AI systems. Actor runtime with streaming, zero dependencies, and built-in discovery." readme = "README.md" authors = [ { name = "Reiase", email = "reiase@gmail.com" } @@ -31,11 +31,11 @@ classifiers = [ keywords = ["actor", "distributed", "async", "llm", "inference"] [project.urls] -Homepage = "https://github.com/reiase/pulsing" -Repository = "https://github.com/reiase/pulsing" -Documentation = "https://github.com/reiase/pulsing#readme" -Issues = "https://github.com/reiase/pulsing/issues" -Changelog = "https://github.com/reiase/pulsing/blob/main/CHANGELOG.md" +Homepage = "https://github.com/DeepLink-org/Pulsing" +Repository = "https://github.com/DeepLink-org/Pulsing" +Documentation = "https://github.com/DeepLink-org/Pulsing#readme" +Issues = "https://github.com/DeepLink-org/Pulsing/issues" +Changelog = "https://github.com/DeepLink-org/Pulsing/blob/main/CHANGELOG.md" [project.optional-dependencies] storage = [ diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/actor/__init__.py index 5bc76c682..73f6e1c97 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/actor/__init__.py @@ -50,13 +50,17 @@ async def init( *, seeds: list[str] = None, passphrase: str = None, + head_addr: str = None, + is_head_node: bool = False, ) -> ActorSystem: """Initialize Pulsing actor system Args: addr: Bind address (e.g., "0.0.0.0:8000"). None for standalone mode. - seeds: Seed nodes to join cluster - passphrase: Enable TLS with this passphrase + seeds: Seed nodes to join cluster (Gossip mode). + passphrase: Enable TLS with this passphrase. + head_addr: Address of head node (worker mode). Mutually exclusive with is_head_node. + is_head_node: If True, this node runs as head. Mutually exclusive with head_addr. Returns: ActorSystem instance @@ -65,17 +69,23 @@ async def init( # Standalone mode await init() - # Cluster mode with TLS - await init(addr="0.0.0.0:8000", passphrase="my-secret") - - # Join existing cluster + # Cluster mode (Gossip + seed) await init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) + + # Head node + await init(addr="0.0.0.0:8000", is_head_node=True) + + # Worker node + await init(addr="0.0.0.0:8001", head_addr="192.168.1.1:8000") """ global _global_system if _global_system is not None: return _global_system + if is_head_node and head_addr: + raise ValueError("Cannot set both is_head_node and head_addr") + # Build config if addr: config = SystemConfig.with_addr(addr) @@ -84,6 +94,10 @@ async def init( if seeds: config = config.with_seeds(seeds) + if is_head_node: + config = config.with_head_node() + elif head_addr: + config = config.with_head_addr(head_addr) if passphrase: config = config.with_passphrase(passphrase) diff --git a/python/pulsing/agent/runtime.py b/python/pulsing/agent/runtime.py index 9a84123b6..914d23fb6 100644 --- a/python/pulsing/agent/runtime.py +++ b/python/pulsing/agent/runtime.py @@ -13,9 +13,17 @@ async def runtime( addr: str | None = None, seeds: list[str] | None = None, passphrase: str | None = None, + head_addr: str | None = None, + is_head_node: bool = False, ): """Actor system runtime context manager.""" - await init(addr=addr, seeds=seeds, passphrase=passphrase) + await init( + addr=addr, + seeds=seeds, + passphrase=passphrase, + head_addr=head_addr, + is_head_node=is_head_node, + ) try: yield get_system() finally: From 5024646b69f173fb2359f69aeca4269f37ba81c2 Mon Sep 17 00:00:00 2001 From: Reiase Date: Mon, 16 Feb 2026 22:33:52 +0800 Subject: [PATCH 07/15] Refactor imports and enhance module structure across benchmarks and core - Updated import statements in various benchmark scripts to utilize the new `pulsing.streaming` and `pulsing.core` modules, ensuring consistency and clarity in the codebase. - Removed deprecated references to `pulsing.actor` and `pulsing.queue`, streamlining the API usage in benchmarks. - Introduced a new `pulsing.integrations` module to better organize third-party integrations and improve overall project structure. - Enhanced documentation and comments to reflect the changes in module organization, aiding user understanding and accessibility. --- benchmarks/baseline_throughput.py | 9 +- benchmarks/concurrency_sweep.py | 9 +- benchmarks/large_scale_stress_test.py | 2 +- .../large_scale_stress_test_pulsing_single.py | 2 +- benchmarks/queue_benchmark.py | 4 +- benchmarks/stress_multiprocessing.py | 9 +- crates/pulsing-py/src/actor.rs | 4 +- examples/agent/autogen/distributed.py | 10 +- examples/agent/autogen/simple.py | 2 +- examples/agent/langgraph/distributed.py | 4 +- examples/agent/langgraph/parallel_ideas.py | 2 +- examples/agent/langgraph/simple.py | 2 +- python/pulsing/__init__.py | 19 +- python/pulsing/admin.py | 4 +- python/pulsing/agent/__init__.py | 4 +- python/pulsing/agent/base.py | 2 +- python/pulsing/agent/runtime.py | 2 +- python/pulsing/cli/__main__.py | 24 +- python/pulsing/cli/actor_loader.py | 8 +- python/pulsing/cli/actors.py | 4 +- python/pulsing/compat/__init__.py | 24 -- python/pulsing/{actor => core}/__init__.py | 16 +- python/pulsing/{actor => core}/helpers.py | 0 python/pulsing/core/messaging.py | 5 + python/pulsing/{actor => core}/remote.py | 10 +- python/pulsing/integrations/__init__.py | 5 + .../{ => integrations}/autogen/__init__.py | 2 +- .../autogen/agent_wrapper.py | 2 +- .../{ => integrations}/autogen/runtime.py | 4 +- .../{ => integrations}/langgraph/__init__.py | 2 +- .../{ => integrations}/langgraph/executor.py | 4 +- .../{ => integrations}/langgraph/wrapper.py | 4 +- python/pulsing/{ => integrations}/ray.py | 10 +- .../ray.py => integrations/ray_compat.py} | 20 +- python/pulsing/queue/README.md | 352 ------------------ .../pulsing/{actors => serving}/__init__.py | 0 .../{actors => serving}/load_stream.py | 2 +- python/pulsing/{actors => serving}/router.py | 6 +- .../pulsing/{actors => serving}/scheduler.py | 0 .../{actors => serving}/vllm/__init__.py | 0 .../{actors => serving}/vllm/handlers.py | 0 .../{actors => serving}/vllm/sampling.py | 0 .../pulsing/{actors => serving}/vllm/utils.py | 0 .../{actors => serving}/vllm/worker.py | 2 +- .../{actors => serving}/vllm_worker.py | 2 +- python/pulsing/{actors => serving}/worker.py | 2 +- .../pulsing/{queue => streaming}/__init__.py | 123 +++--- .../pulsing/{queue => streaming}/backend.py | 2 +- python/pulsing/{topic => streaming}/broker.py | 4 +- .../pulsing/{queue => streaming}/manager.py | 8 +- .../{topic/topic.py => streaming/pubsub.py} | 8 +- python/pulsing/{queue => streaming}/queue.py | 6 +- .../pulsing/{queue => streaming}/storage.py | 2 +- .../{queue => streaming}/sync_queue.py | 0 python/pulsing/topic/README.md | 107 ------ python/pulsing/topic/__init__.py | 102 ----- .../python/apis/actor/test_actor_behavior.py | 6 +- .../actor_system/test_actor_system_api.py | 2 +- .../apis/ray_compat/test_ray_compat_api.py | 2 +- .../python/apis/ray_like/test_ray_like_api.py | 2 +- tests/python/conftest.py | 2 +- tests/python/test_actor_list.py | 2 +- tests/python/test_agent_runtime_lifecycle.py | 2 +- tests/python/test_chaos.py | 2 +- tests/python/test_cli_actor.py | 2 +- tests/python/test_queue.py | 8 +- tests/python/test_queue_backends.py | 2 +- tests/python/test_queue_topic_chaos.py | 9 +- tests/python/test_ray_compat_running_loop.py | 2 +- tests/python/test_ray_init.py | 58 +-- tests/python/test_receive_error_behavior.py | 2 +- tests/python/test_remote_decorator.py | 16 +- tests/python/test_resolve_as_any.py | 4 +- tests/python/test_sealed_message.py | 2 +- tests/python/test_system_actor.py | 2 +- tests/python/test_topic.py | 34 +- 76 files changed, 276 insertions(+), 847 deletions(-) delete mode 100644 python/pulsing/compat/__init__.py rename python/pulsing/{actor => core}/__init__.py (93%) rename python/pulsing/{actor => core}/helpers.py (100%) create mode 100644 python/pulsing/core/messaging.py rename python/pulsing/{actor => core}/remote.py (99%) create mode 100644 python/pulsing/integrations/__init__.py rename python/pulsing/{ => integrations}/autogen/__init__.py (91%) rename python/pulsing/{ => integrations}/autogen/agent_wrapper.py (99%) rename python/pulsing/{ => integrations}/autogen/runtime.py (99%) rename python/pulsing/{ => integrations}/langgraph/__init__.py (92%) rename python/pulsing/{ => integrations}/langgraph/executor.py (98%) rename python/pulsing/{ => integrations}/langgraph/wrapper.py (97%) rename python/pulsing/{ => integrations}/ray.py (93%) rename python/pulsing/{compat/ray.py => integrations/ray_compat.py} (94%) delete mode 100644 python/pulsing/queue/README.md rename python/pulsing/{actors => serving}/__init__.py (100%) rename python/pulsing/{actors => serving}/load_stream.py (99%) rename python/pulsing/{actors => serving}/router.py (98%) rename python/pulsing/{actors => serving}/scheduler.py (100%) rename python/pulsing/{actors => serving}/vllm/__init__.py (100%) rename python/pulsing/{actors => serving}/vllm/handlers.py (100%) rename python/pulsing/{actors => serving}/vllm/sampling.py (100%) rename python/pulsing/{actors => serving}/vllm/utils.py (100%) rename python/pulsing/{actors => serving}/vllm/worker.py (99%) rename python/pulsing/{actors => serving}/vllm_worker.py (99%) rename python/pulsing/{actors => serving}/worker.py (99%) rename python/pulsing/{queue => streaming}/__init__.py (50%) rename python/pulsing/{queue => streaming}/backend.py (99%) rename python/pulsing/{topic => streaming}/broker.py (99%) rename python/pulsing/{queue => streaming}/manager.py (98%) rename python/pulsing/{topic/topic.py => streaming/pubsub.py} (98%) rename python/pulsing/{queue => streaming}/queue.py (99%) rename python/pulsing/{queue => streaming}/storage.py (98%) rename python/pulsing/{queue => streaming}/sync_queue.py (100%) delete mode 100644 python/pulsing/topic/README.md delete mode 100644 python/pulsing/topic/__init__.py diff --git a/benchmarks/baseline_throughput.py b/benchmarks/baseline_throughput.py index 28c4bfca3..ca36e7c34 100644 --- a/benchmarks/baseline_throughput.py +++ b/benchmarks/baseline_throughput.py @@ -21,8 +21,13 @@ import time import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) def _percentile(sorted_data: list[float], p: float) -> float: diff --git a/benchmarks/concurrency_sweep.py b/benchmarks/concurrency_sweep.py index 67b0533d6..14342b386 100644 --- a/benchmarks/concurrency_sweep.py +++ b/benchmarks/concurrency_sweep.py @@ -20,8 +20,13 @@ import time import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= diff --git a/benchmarks/large_scale_stress_test.py b/benchmarks/large_scale_stress_test.py index 46efb358c..64c3e8b59 100755 --- a/benchmarks/large_scale_stress_test.py +++ b/benchmarks/large_scale_stress_test.py @@ -18,7 +18,7 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import Actor, StreamMessage, SystemConfig +from pulsing.core import Actor, StreamMessage, SystemConfig # ============================================================================ diff --git a/benchmarks/large_scale_stress_test_pulsing_single.py b/benchmarks/large_scale_stress_test_pulsing_single.py index 1b9f11e28..0f93a71bf 100644 --- a/benchmarks/large_scale_stress_test_pulsing_single.py +++ b/benchmarks/large_scale_stress_test_pulsing_single.py @@ -17,7 +17,7 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import Actor, StreamMessage, SystemConfig +from pulsing.core import Actor, StreamMessage, SystemConfig # ============================================================================ diff --git a/benchmarks/queue_benchmark.py b/benchmarks/queue_benchmark.py index a09463265..f0c6a01ad 100644 --- a/benchmarks/queue_benchmark.py +++ b/benchmarks/queue_benchmark.py @@ -30,8 +30,8 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import SystemConfig -from pulsing.queue import read_queue, write_queue +from pulsing.core import SystemConfig +from pulsing.streaming import read_queue, write_queue # ============================================================================ diff --git a/benchmarks/stress_multiprocessing.py b/benchmarks/stress_multiprocessing.py index 07e045e36..ead4ea849 100644 --- a/benchmarks/stress_multiprocessing.py +++ b/benchmarks/stress_multiprocessing.py @@ -21,8 +21,13 @@ from multiprocessing import Queue import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 9f603ed2e..d556b0be3 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -715,7 +715,7 @@ impl PyActorRef { /// Return an untyped proxy that forwards any method call to the remote actor. fn as_any(&self, py: Python<'_>) -> PyResult { - let remote = py.import("pulsing.actor.remote")?; + let remote = py.import("pulsing.core.remote")?; let proxy_cls = remote.getattr("ActorProxy")?; let proxy = proxy_cls.call1((self.clone(), py.None(), py.None()))?; Ok(proxy.unbind()) @@ -723,7 +723,7 @@ impl PyActorRef { /// Return a typed proxy based on the given class definition. fn as_type(&self, py: Python<'_>, cls: PyObject) -> PyResult { - let remote = py.import("pulsing.actor.remote")?; + let remote = py.import("pulsing.core.remote")?; let extract_fn = remote.getattr("_extract_methods")?; let result = extract_fn.call1((&cls,))?; let methods = result.get_item(0)?; diff --git a/examples/agent/autogen/distributed.py b/examples/agent/autogen/distributed.py index 2b70c59e1..1d6bf2257 100644 --- a/examples/agent/autogen/distributed.py +++ b/examples/agent/autogen/distributed.py @@ -115,7 +115,7 @@ async def run_with_rank( rank: int, world_size: int, master_addr: str, pulsing_base_port: int ): """Run corresponding role based on rank""" - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime my_addr, seeds = get_pulsing_config(rank, master_addr, pulsing_base_port) role_name, agent_class = ROLE_MAP.get(rank, (f"worker_{rank}", None)) @@ -171,7 +171,7 @@ async def run_manager_logic(runtime): async def run_standalone(): """Standalone mode""" - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime print("Running in standalone mode") runtime = PulsingRuntime() @@ -190,7 +190,7 @@ async def run_standalone(): async def run_writer(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8001", seeds=[]) await runtime.start() @@ -200,7 +200,7 @@ async def run_writer(): async def run_editor(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8002", seeds=["127.0.0.1:8001"]) await runtime.start() @@ -210,7 +210,7 @@ async def run_editor(): async def run_manager(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8003", seeds=["127.0.0.1:8001"]) await runtime.start() diff --git a/examples/agent/autogen/simple.py b/examples/agent/autogen/simple.py index 27b8798c9..7a21711f0 100644 --- a/examples/agent/autogen/simple.py +++ b/examples/agent/autogen/simple.py @@ -18,7 +18,7 @@ SingleThreadedAgentRuntime, message_handler, ) -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # Define message types diff --git a/examples/agent/langgraph/distributed.py b/examples/agent/langgraph/distributed.py index d413f97b8..7eaa071da 100644 --- a/examples/agent/langgraph/distributed.py +++ b/examples/agent/langgraph/distributed.py @@ -66,7 +66,7 @@ def build_graph(): async def run_distributed(): """Distributed main program""" - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing print("=" * 50) print("LangGraph + Pulsing Distributed Mode") @@ -100,7 +100,7 @@ async def run_distributed(): async def run_worker(node_name: str, port: int, seed_port: int | None = None): """Start Worker""" - from pulsing.langgraph import start_worker + from pulsing.integrations.langgraph import start_worker nodes = {"llm": llm_node, "tool": tool_node} if node_name not in nodes: diff --git a/examples/agent/langgraph/parallel_ideas.py b/examples/agent/langgraph/parallel_ideas.py index 13a57e00f..7463c3538 100644 --- a/examples/agent/langgraph/parallel_ideas.py +++ b/examples/agent/langgraph/parallel_ideas.py @@ -760,7 +760,7 @@ async def main(): os.environ["LLM_MODEL"] = args.model try: - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing except ImportError: with_pulsing = None diff --git a/examples/agent/langgraph/simple.py b/examples/agent/langgraph/simple.py index b3fd9cad6..239b0dbca 100644 --- a/examples/agent/langgraph/simple.py +++ b/examples/agent/langgraph/simple.py @@ -58,7 +58,7 @@ def build_graph(): async def main(): - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing print("=" * 50) print("LangGraph + Pulsing Standalone Mode") diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index d97781074..a6f814b97 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -22,8 +22,8 @@ def incr(self): self.value += 1; return self.value __version__ = "0.1.0" -# Import from pulsing.actor -from pulsing.actor import ( +# Import from pulsing.core +from pulsing.core import ( # Global system functions init, shutdown, @@ -60,14 +60,14 @@ def init_inside_ray(): await pul.init_inside_ray() """ - from pulsing.ray import async_init_in_ray + from pulsing.integrations.ray import async_init_in_ray return async_init_in_ray() def cleanup_ray(): """清理 Pulsing 在 Ray KV store 中的状态""" - from pulsing.ray import cleanup + from pulsing.integrations.ray import cleanup return cleanup() @@ -93,8 +93,7 @@ class ActorSystem: def __init__(self, inner: _ActorSystem): self._inner = inner - from pulsing.queue import QueueAPI - from pulsing.topic import TopicAPI + from pulsing.streaming import QueueAPI, TopicAPI self.queue = QueueAPI(inner) self.topic = TopicAPI(inner) @@ -257,13 +256,13 @@ class _GlobalQueueAPI: async def write(self, topic, **kwargs): """Open queue for writing. See QueueAPI.write() for args.""" - from pulsing.queue import QueueAPI + from pulsing.streaming import QueueAPI return await QueueAPI(get_system()).write(topic, **kwargs) async def read(self, topic, **kwargs): """Open queue for reading. See QueueAPI.read() for args.""" - from pulsing.queue import QueueAPI + from pulsing.streaming import QueueAPI return await QueueAPI(get_system()).read(topic, **kwargs) @@ -273,13 +272,13 @@ class _GlobalTopicAPI: async def write(self, topic, **kwargs): """Open topic for writing. See TopicAPI.write() for args.""" - from pulsing.topic import TopicAPI + from pulsing.streaming import TopicAPI return await TopicAPI(get_system()).write(topic, **kwargs) async def read(self, topic, **kwargs): """Open topic for reading. See TopicAPI.read() for args.""" - from pulsing.topic import TopicAPI + from pulsing.streaming import TopicAPI return await TopicAPI(get_system()).read(topic, **kwargs) diff --git a/python/pulsing/admin.py b/python/pulsing/admin.py index 76f0da00f..d17a7faad 100644 --- a/python/pulsing/admin.py +++ b/python/pulsing/admin.py @@ -1,14 +1,14 @@ """ Administrative / diagnostic APIs for Pulsing. -These APIs are intentionally kept out of `pulsing.actor`'s top-level `__all__` +These APIs are intentionally kept out of `pulsing.core`'s top-level `__all__` to minimize the main public surface. Import explicitly: from pulsing.admin import list_actors, get_metrics, get_node_info from pulsing.admin import health_check, ping """ -from pulsing.actor.remote import ( +from pulsing.core.remote import ( get_metrics, get_node_info, health_check, diff --git a/python/pulsing/agent/__init__.py b/python/pulsing/agent/__init__.py index be4fdb1ad..25a8f1c84 100644 --- a/python/pulsing/agent/__init__.py +++ b/python/pulsing/agent/__init__.py @@ -1,7 +1,7 @@ """ Pulsing Agent Toolbox -Lightweight multi-agent development tools, fully compatible with pulsing.actor. +Lightweight multi-agent development tools, fully compatible with pulsing.core. Core APIs: - runtime(): Actor system lifecycle management @@ -10,7 +10,7 @@ - parse_json(): JSON parsing Example: - from pulsing.actor import remote, resolve + from pulsing.core import remote, resolve from pulsing.agent import agent, runtime, llm, get_agent_meta # @remote: Basic Actor diff --git a/python/pulsing/agent/base.py b/python/pulsing/agent/base.py index 1f540234f..3787c8aa9 100644 --- a/python/pulsing/agent/base.py +++ b/python/pulsing/agent/base.py @@ -18,7 +18,7 @@ from dataclasses import dataclass, field from typing import Any, Callable, TypeVar -from pulsing.actor import remote +from pulsing.core import remote T = TypeVar("T") diff --git a/python/pulsing/agent/runtime.py b/python/pulsing/agent/runtime.py index 914d23fb6..3dd6f8524 100644 --- a/python/pulsing/agent/runtime.py +++ b/python/pulsing/agent/runtime.py @@ -4,7 +4,7 @@ from contextlib import asynccontextmanager -from pulsing.actor import get_system, init, shutdown +from pulsing.core import get_system, init, shutdown @asynccontextmanager diff --git a/python/pulsing/cli/__main__.py b/python/pulsing/cli/__main__.py index 6529ffe59..f66ff4878 100644 --- a/python/pulsing/cli/__main__.py +++ b/python/pulsing/cli/__main__.py @@ -5,7 +5,7 @@ @hp.param("actor") def actor( - actor_type: str, # Positional argument: full class path (e.g., 'pulsing.actors.worker.TransformersWorker') + actor_type: str, # Positional argument: full class path (e.g., 'pulsing.serving.worker.TransformersWorker') addr: str | None = None, seeds: str | None = None, name: str = "worker", # Actor name (default: "worker") @@ -18,9 +18,9 @@ def actor( Actor type must be a full class path: - Format: 'module.path.ClassName' - - Example: 'pulsing.actors.Router' - - Example: 'pulsing.actors.TransformersWorker' - - Example: 'pulsing.actors.VllmWorker' + - Example: 'pulsing.serving.Router' + - Example: 'pulsing.serving.TransformersWorker' + - Example: 'pulsing.serving.VllmWorker' - Example: 'my_module.my_actor.MyCustomActor' Pass constructor parameters directly as command-line arguments. @@ -29,7 +29,7 @@ def actor( Note: To list actors, use 'pulsing inspect actors' instead. Args: - actor_type: Full class path (positional argument), e.g., 'pulsing.actors.worker.TransformersWorker' + actor_type: Full class path (positional argument), e.g., 'pulsing.serving.worker.TransformersWorker' addr: Actor System bind address (e.g., '0.0.0.0:8000') seeds: Comma-separated list of seed nodes (e.g., '192.168.1.1:8000,192.168.1.2:8000') name: Actor name. Default: 'worker'. Use different names to run multiple workers in the same cluster. @@ -38,17 +38,17 @@ def actor( Examples: # Start a Transformers worker - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --device cpu --name my-worker + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --device cpu --name my-worker # Start a vLLM worker - pulsing actor pulsing.actors.VllmWorker --model Qwen/Qwen2 --role aggregated --max_new_tokens 512 --name vllm-worker + pulsing actor pulsing.serving.VllmWorker --model Qwen/Qwen2 --role aggregated --max_new_tokens 512 --name vllm-worker # Start a Router with OpenAI-compatible API - pulsing actor pulsing.actors.Router --http_host 0.0.0.0 --http_port 8080 --model_name my-llm --worker_name worker + pulsing actor pulsing.serving.Router --http_host 0.0.0.0 --http_port 8080 --model_name my-llm --worker_name worker # Start multiple workers with different names - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-1 --seeds 127.0.0.1:8000 - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-2 --seeds 127.0.0.1:8000 + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-1 --seeds 127.0.0.1:8000 + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-2 --seeds 127.0.0.1:8000 """ from .actors import start_generic_actor @@ -63,9 +63,9 @@ def actor( # Check if actor_type is a valid class path (must contain dots) if "." not in actor_type: raise ValueError( - f"Error: Actor type must be a full class path (e.g., 'pulsing.actors.worker.TransformersWorker').\n" + f"Error: Actor type must be a full class path (e.g., 'pulsing.serving.worker.TransformersWorker').\n" f"Received: '{actor_type}'\n" - f"Example: pulsing actor pulsing.actors.worker.TransformersWorker --model_name gpt2" + f"Example: pulsing actor pulsing.serving.worker.TransformersWorker --model_name gpt2" ) # Parse seeds diff --git a/python/pulsing/cli/actor_loader.py b/python/pulsing/cli/actor_loader.py index bbdff162a..001073951 100644 --- a/python/pulsing/cli/actor_loader.py +++ b/python/pulsing/cli/actor_loader.py @@ -4,14 +4,14 @@ import json from typing import Any -from pulsing.actor import Actor +from pulsing.core import Actor def load_actor_class(class_path: str) -> type[Actor]: """Load Actor class from module path Args: - class_path: Full class path, e.g., 'pulsing.actors.worker.TransformersWorker' + class_path: Full class path, e.g., 'pulsing.serving.worker.TransformersWorker' Returns: Actor class @@ -23,7 +23,7 @@ def load_actor_class(class_path: str) -> type[Actor]: if "." not in class_path: raise ValueError( f"Invalid class path '{class_path}'. Expected format: 'module.path.ClassName'\n" - f"Example: pulsing.actors.worker.TransformersWorker" + f"Example: pulsing.serving.worker.TransformersWorker" ) # Split module path and class name @@ -57,7 +57,7 @@ def load_actor_class(class_path: str) -> type[Actor]: if not isinstance(actor_class, type) or not issubclass(actor_class, Actor): raise ValueError( f"'{class_name}' is not an Actor subclass.\n" - f"Expected a class that inherits from pulsing.actor.Actor" + f"Expected a class that inherits from pulsing.core.Actor" ) return actor_class diff --git a/python/pulsing/cli/actors.py b/python/pulsing/cli/actors.py index be39117e2..1278ddc98 100644 --- a/python/pulsing/cli/actors.py +++ b/python/pulsing/cli/actors.py @@ -15,14 +15,14 @@ def start_generic_actor( """Start a generic Actor class by full module path Args: - actor_type: Full class path, e.g., 'pulsing.actors.worker.TransformersWorker' + actor_type: Full class path, e.g., 'pulsing.serving.worker.TransformersWorker' addr: Actor System bind address seeds: List of seed node addresses name: Actor name extra_kwargs: Additional CLI arguments to pass to Actor constructor """ import inspect - from pulsing.actor.helpers import spawn_and_run + from pulsing.core.helpers import spawn_and_run print(f"Loading Actor class: {actor_type}") diff --git a/python/pulsing/compat/__init__.py b/python/pulsing/compat/__init__.py deleted file mode 100644 index 84da33c38..000000000 --- a/python/pulsing/compat/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Pulsing Compatibility Layer - -Provides Ray-compatible API for easy migration. - -Usage: - from pulsing.compat import ray - - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - - ray.shutdown() -""" - -from . import ray - -__all__ = ["ray"] diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/core/__init__.py similarity index 93% rename from python/pulsing/actor/__init__.py rename to python/pulsing/core/__init__.py index 73f6e1c97..cbf465e5f 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/core/__init__.py @@ -1,8 +1,8 @@ """ -Pulsing Actor System - Python bindings for distributed actor framework +Pulsing Core - Python bindings for distributed actor framework Simple API: - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote await init() @@ -17,7 +17,7 @@ def incr(self): self.value += 1; return self.value await shutdown() Advanced API: - from pulsing.actor import ActorSystem, Actor, Message, SystemConfig + from pulsing.core import ActorSystem, Actor, Message, SystemConfig """ import asyncio @@ -28,14 +28,13 @@ def incr(self): self.value += 1; return self.value ActorId, ActorRef, ActorSystem, - Message, NodeId, SealedPyMessage, - StreamMessage, StreamReader, StreamWriter, SystemConfig, ) +from .messaging import Message, StreamMessage # ============================================================================= @@ -228,11 +227,7 @@ async def tell_with_timeout( PulsingActorError, ) -# NOTE: `__all__` is the *public, stable surface* for `from pulsing.actor import *`. -# We intentionally keep it minimal. Advanced/diagnostic APIs may still be -# importable by name, but are not part of the stable top-level contract. __all__ = [ - # Minimal async API (recommended) "init", "shutdown", "remote", @@ -242,7 +237,6 @@ async def tell_with_timeout( "get_system", "get_system_actor", "is_initialized", - # Minimal core types commonly used in docs/examples "Actor", "Message", "StreamMessage", @@ -253,10 +247,8 @@ async def tell_with_timeout( "ActorProxy", "as_any", "SystemActorProxy", - # Service (for actor_system function) "PythonActorService", "PYTHON_ACTOR_SERVICE_NAME", - # Exceptions "PulsingError", "PulsingRuntimeError", "PulsingActorError", diff --git a/python/pulsing/actor/helpers.py b/python/pulsing/core/helpers.py similarity index 100% rename from python/pulsing/actor/helpers.py rename to python/pulsing/core/helpers.py diff --git a/python/pulsing/core/messaging.py b/python/pulsing/core/messaging.py new file mode 100644 index 000000000..f77935c38 --- /dev/null +++ b/python/pulsing/core/messaging.py @@ -0,0 +1,5 @@ +"""Message types - re-export from Rust _core.""" + +from pulsing._core import Message, StreamMessage + +__all__ = ["Message", "StreamMessage"] diff --git a/python/pulsing/actor/remote.py b/python/pulsing/core/remote.py similarity index 99% rename from python/pulsing/actor/remote.py rename to python/pulsing/core/remote.py index 121989f1c..fd7e9ca67 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/core/remote.py @@ -1024,7 +1024,7 @@ async def spawn( **kwargs: Keyword arguments for the class constructor Example: - from pulsing.actor import init, remote + from pulsing.core import init, remote await init() @@ -1560,7 +1560,7 @@ async def resolve( ActorRef: Actor reference with .as_any() / .as_type() for proxy generation. Example: - from pulsing.actor import init, remote, resolve + from pulsing.core import init, remote, resolve await init() @@ -1679,7 +1679,7 @@ def _auto_init_pulsing(): import ray if ray.is_initialized(): - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray init_in_ray() return @@ -1696,9 +1696,9 @@ def _run_sync_on_pulsing_loop(coro): import asyncio import concurrent.futures - # 尝试使用 pulsing.ray 的后台 loop(Ray 环境) + # 尝试使用 pulsing.integrations.ray 的后台 loop(Ray 环境) try: - from pulsing.ray import _loop + from pulsing.integrations.ray import _loop if _loop is not None: fut = asyncio.run_coroutine_threadsafe(coro, _loop) diff --git a/python/pulsing/integrations/__init__.py b/python/pulsing/integrations/__init__.py new file mode 100644 index 000000000..3560c582b --- /dev/null +++ b/python/pulsing/integrations/__init__.py @@ -0,0 +1,5 @@ +"""Third-party framework integrations.""" + +from . import ray_compat + +__all__ = ["ray_compat"] diff --git a/python/pulsing/autogen/__init__.py b/python/pulsing/integrations/autogen/__init__.py similarity index 91% rename from python/pulsing/autogen/__init__.py rename to python/pulsing/integrations/autogen/__init__.py index dfcbbbca6..93b39e3a8 100644 --- a/python/pulsing/autogen/__init__.py +++ b/python/pulsing/integrations/autogen/__init__.py @@ -2,7 +2,7 @@ Pulsing AutoGen Runtime - Unified Standalone/Distributed Runtime Usage: - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime # Standalone mode runtime = PulsingRuntime() diff --git a/python/pulsing/autogen/agent_wrapper.py b/python/pulsing/integrations/autogen/agent_wrapper.py similarity index 99% rename from python/pulsing/autogen/agent_wrapper.py rename to python/pulsing/integrations/autogen/agent_wrapper.py index 5f200208e..440ea3ef6 100644 --- a/python/pulsing/autogen/agent_wrapper.py +++ b/python/pulsing/integrations/autogen/agent_wrapper.py @@ -6,7 +6,7 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId if TYPE_CHECKING: from .runtime import PulsingRuntime diff --git a/python/pulsing/autogen/runtime.py b/python/pulsing/integrations/autogen/runtime.py similarity index 99% rename from python/pulsing/autogen/runtime.py rename to python/pulsing/integrations/autogen/runtime.py index a7625dad7..e02782739 100644 --- a/python/pulsing/autogen/runtime.py +++ b/python/pulsing/integrations/autogen/runtime.py @@ -30,14 +30,14 @@ cast, ) -from pulsing.actor import ( +from pulsing.core import ( Actor, ActorRef, ActorSystem, Message, SystemConfig, ) -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.autogen") T = TypeVar("T") diff --git a/python/pulsing/langgraph/__init__.py b/python/pulsing/integrations/langgraph/__init__.py similarity index 92% rename from python/pulsing/langgraph/__init__.py rename to python/pulsing/integrations/langgraph/__init__.py index d6c74b063..5e37eb904 100644 --- a/python/pulsing/langgraph/__init__.py +++ b/python/pulsing/integrations/langgraph/__init__.py @@ -3,7 +3,7 @@ Usage: from langgraph.graph import StateGraph - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing # Original LangGraph code graph = StateGraph(MyState) diff --git a/python/pulsing/langgraph/executor.py b/python/pulsing/integrations/langgraph/executor.py similarity index 98% rename from python/pulsing/langgraph/executor.py rename to python/pulsing/integrations/langgraph/executor.py index a91df119c..36b26689e 100644 --- a/python/pulsing/langgraph/executor.py +++ b/python/pulsing/integrations/langgraph/executor.py @@ -15,8 +15,8 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Callable, Dict -from pulsing.actor import Actor, ActorId, ActorRef, ActorSystem, SystemConfig -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core import Actor, ActorId, ActorRef, ActorSystem, SystemConfig +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/langgraph/wrapper.py b/python/pulsing/integrations/langgraph/wrapper.py similarity index 97% rename from python/pulsing/langgraph/wrapper.py rename to python/pulsing/integrations/langgraph/wrapper.py index aac08b991..faac82954 100644 --- a/python/pulsing/langgraph/wrapper.py +++ b/python/pulsing/integrations/langgraph/wrapper.py @@ -8,8 +8,8 @@ import logging from typing import Any, AsyncIterator, Dict, Optional, Union -from pulsing.actor import ActorSystem, SystemConfig -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core import ActorSystem, SystemConfig +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService from .executor import NodeExecutorPool logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/ray.py b/python/pulsing/integrations/ray.py similarity index 93% rename from python/pulsing/ray.py rename to python/pulsing/integrations/ray.py index 9187910d5..9ded1d0b9 100644 --- a/python/pulsing/ray.py +++ b/python/pulsing/integrations/ray.py @@ -6,7 +6,7 @@ 推荐用法: import ray - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) init_in_ray() # driver 进程也需要初始化 @@ -21,7 +21,7 @@ ) except ImportError: raise ImportError( - "pulsing.ray requires Ray. Install with: pip install 'ray[default]'" + "pulsing.integrations.ray requires Ray. Install with: pip install 'ray[default]'" ) import asyncio @@ -72,13 +72,13 @@ def _run_sync(coro): async def _do_init(addr, seeds=None): - from pulsing.actor import init + from pulsing.core import init return await init(addr=addr, seeds=seeds) async def _do_shutdown(): - from pulsing.actor import shutdown + from pulsing.core import shutdown await shutdown() @@ -160,4 +160,4 @@ def cleanup(): _internal_kv_del(_SEED_KEY) -__all__ = ["init_in_ray", "async_init_in_ray", "cleanup"] +__all__ = ["init_in_ray", "async_init_in_ray", "cleanup", "_get_seed", "_loop"] diff --git a/python/pulsing/compat/ray.py b/python/pulsing/integrations/ray_compat.py similarity index 94% rename from python/pulsing/compat/ray.py rename to python/pulsing/integrations/ray_compat.py index 5f9329915..b8737bd5e 100644 --- a/python/pulsing/compat/ray.py +++ b/python/pulsing/integrations/ray_compat.py @@ -2,7 +2,7 @@ Ray-compatible API for Pulsing This module provides a Ray-like synchronous API for easy migration. -For new projects, we recommend using the native async API in pulsing.actor. +For new projects, we recommend using the native async API in pulsing.core. Migration from Ray: # Before (Ray) @@ -19,7 +19,7 @@ def incr(self): self.value += 1; return self.value ray.shutdown() # After (Pulsing compat) - from pulsing.compat import ray # Only change this line! + from pulsing.integrations.ray_compat import ray # Only change this line! ray.init() @@ -33,7 +33,7 @@ def incr(self): self.value += 1; return self.value ray.shutdown() Note: This is a synchronous wrapper around async Pulsing. -For better performance in async environments, use pulsing.actor directly. +For better performance in async environments, use pulsing.core directly. """ import asyncio @@ -186,7 +186,7 @@ def __init__(self, cls: type): def _ensure_wrapped(self): if self._pulsing_class is None: - from pulsing.actor import remote + from pulsing.core import remote self._pulsing_class = remote(self._cls) @@ -226,15 +226,15 @@ def init( ignore_reinit_error: If True, ignore if already initialized Example: - from pulsing.compat import ray + from pulsing.integrations.ray_compat import ray ray.init() """ global _system, _loop _ensure_not_initialized(ignore_reinit_error) - from pulsing.actor import ActorSystem, SystemConfig - from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService + from pulsing.core import ActorSystem, SystemConfig + from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService # If we're already inside a running event loop (e.g., Jupyter/pytest-asyncio), # we must not call run_until_complete() on it. Use a dedicated background loop. @@ -354,6 +354,11 @@ def wait( return ready, remaining +import sys + +# Self-reference so that "from pulsing.integrations.ray_compat import ray" works +ray = sys.modules[__name__] + __all__ = [ "init", "shutdown", @@ -363,4 +368,5 @@ def wait( "put", "wait", "ObjectRef", + "ray", ] diff --git a/python/pulsing/queue/README.md b/python/pulsing/queue/README.md deleted file mode 100644 index a0d6f9f6d..000000000 --- a/python/pulsing/queue/README.md +++ /dev/null @@ -1,352 +0,0 @@ -# 分布式内存队列 - -基于 Pulsing Actor 架构实现的分布式内存队列系统。 - -**支持可插拔存储后端**,可根据需求选择不同的实现。 - -## 架构概览 - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ 应用层 │ -│ │ -│ Queue / QueueWriter / QueueReader │ -│ │ │ -│ │ get_bucket_ref(topic, bucket_id) │ -│ ▼ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ StorageManager (每节点一个) │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ GetBucket(topic, bucket_id) │ │ -│ │ │ │ │ -│ │ ├─ owner = hash(topic:bucket_id) % nodes ← 一致性哈希 │ │ -│ │ │ │ │ -│ │ ├─ owner == self? │ │ -│ │ │ ├─ Yes → 创建/返回 BucketStorage │ │ -│ │ │ │ → BucketReady(actor_id, node_id) │ │ -│ │ │ │ │ │ -│ │ │ └─ No → Redirect(owner_node_id) │ │ -│ │ │ 客户端重定向到正确节点 │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ┌───────────────────────────┼───────────────────────────┐ - ▼ ▼ ▼ -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│BucketStorage │ │BucketStorage │ │BucketStorage │ -│ bucket_0 │ │ bucket_1 │ │ bucket_2 │ -│ │ │ │ │ │ -│ - buffer[] │ │ - buffer[] │ │ - buffer[] │ -│ - backend │ │ - backend │ │ - backend │ -│ - Condition │ │ - Condition │ │ - Condition │ -└──────────────┘ └──────────────┘ └──────────────┘ - Node A Node B Node A -``` - -## 核心组件 - -### 1. StorageManager(存储管理器) - -**每个节点一个实例**,负责管理两类资源: - -| 资源类型 | 请求消息 | Actor 类型 | 用途 | -|---------|---------|-----------|------| -| Queue Bucket | `GetBucket` | `BucketStorage` | 生产者-消费者队列 | -| Topic Broker | `GetTopic` | `TopicBroker` | 发布-订阅 | - -核心职责: -- 使用**一致性哈希**判断资源的 owner 节点 -- Owner 节点:创建并返回对应 Actor -- 非 Owner 节点:返回 `Redirect`,指向正确节点 - -### 2. BucketStorage(桶存储) - -每个 bucket 一个实例,负责: -- 数据缓冲(内存) -- 数据持久化(由后端实现) -- 消费者阻塞/唤醒(asyncio.Condition) - -### 3. Queue / QueueWriter / QueueReader - -高级 API,对用户隐藏底层复杂性。 - -## 设计特点 - -| 特性 | 说明 | -|------|------| -| **集群唯一性** | StorageManager + 一致性哈希,确保每个 bucket 在集群中只有一个 Actor | -| **智能路由** | 错误请求自动重定向到正确节点 | -| **独立锁/条件变量** | 每个 bucket 独立,无跨 bucket 竞争 | -| **流式传输** | 消费者通过 StreamMessage 接收,内存友好 | -| **实时通知** | 新数据通过 condition + 流推送,无轮询 | - -## 请求流程 - -### Bucket 获取流程 - -``` -Queue.put(record) - │ - ├─ bucket_id = hash(record[bucket_column]) % num_buckets - │ - ▼ -get_bucket_ref(system, topic, bucket_id) - │ - ├─ 本地 StorageManager.GetBucket(...) - │ │ - │ ├─ owner == self? - │ │ ├─ Yes → BucketReady(actor_id) → 返回 ActorRef - │ │ └─ No → Redirect(owner_node_id) - │ │ │ - │ │ ▼ - │ │ owner 节点的 StorageManager.GetBucket(...) - │ │ │ - │ │ └─ BucketReady → 返回 ActorRef - │ - └─ bucket_ref.ask(Put, {record}) -``` - -### 数据传输流程 - -``` -生产者 BucketStorage 消费者 (wait=True) - │ │ │ - │── Put ───────────────────▶│ │ - │ │ buffer.append() │ - │ │ condition.notify_all() ──────▶│ 唤醒 - │◀── PutResponse ───────────│ │ - │ │ │ - │ │◀── GetStream ─────────────────│ - │ │ │ - │ │── StreamMessage chunk ───────▶│ 流式发送 -``` - -## 数据可见性模型 - -``` -┌─────────────────────────────────────────────────────┐ -│ 总数据视图 │ -├─────────────────────────┬───────────────────────────┤ -│ 持久化(若后端支持) │ 内存缓冲 │ -│ [0, persisted_count) │ [persisted_count, total) │ -└─────────────────────────┴───────────────────────────┘ - ↑ - 两部分同时可见 -``` - -- 写入后数据**立即**对消费者可见(在内存缓冲中) -- 达到 `batch_size` 后由后端决定是否持久化 -- 调用 `flush()` 可强制持久化 - -## 快速开始 - -```python -import asyncio -import pulsing as pul - -async def main(): - system = await pul.actor_system() - - # 生产者 - writer = await system.queue.write( - "my_queue", - bucket_column="user_id", - num_buckets=4, - ) - - # 写入数据(立即对消费者可见) - await writer.put({"user_id": "u1", "message": "Hello"}) - - # 消费者 - reader = await system.queue.read("my_queue") - - # 读取数据(内存 + 持久化同时可见) - records = await reader.get(limit=100) - - # 阻塞等待新数据 - records = await reader.get(limit=100, wait=True, timeout=10.0) - - await system.shutdown() - -asyncio.run(main()) -``` - -### 同步 API - -通过 `.sync()` 获取同步包装器,可与异步混用: - -```python -# 生产者用同步,消费者用异步 -sync_writer = writer.sync() -sync_writer.put({"user_id": "u1", "message": "Hello"}) -sync_writer.flush() - -records = await reader.get(limit=100) # 异步读取 - -# 或者反过来 -await writer.put({"user_id": "u2", "message": "World"}) - -sync_reader = reader.sync() -records = sync_reader.get(limit=100) # 同步读取 -``` - -## API - -### `system.queue.write(topic, ...)` - -打开队列用于写入。 - -```python -writer = await system.queue.write( - "my_queue", - bucket_column="user_id", # 分桶列 - num_buckets=4, # 桶数量 - batch_size=100, # 批处理大小 -) - -await writer.put({"user_id": "u1", "msg": "hello"}) -await writer.put([record1, record2, ...]) # 批量写入 -await writer.flush() # 强制持久化 -``` - -### `system.queue.read(topic, ...)` - -打开队列用于读取。支持三种模式: - -```python -# 1. 读取所有 bucket -reader = await system.queue.read("my_queue") - -# 2. 读取指定 bucket -reader = await system.queue.read("my_queue", bucket_id=0) -reader = await system.queue.read("my_queue", bucket_ids=[0, 2]) - -# 3. 分布式消费:通过 rank/world_size 自动分配 bucket -reader0 = await system.queue.read("q", rank=0, world_size=2, num_buckets=4) # bucket 0, 2 -reader1 = await system.queue.read("q", rank=1, world_size=2, num_buckets=4) # bucket 1, 3 - -# 读取数据 -records = await reader.get(limit=100) -records = await reader.get(limit=100, wait=True, timeout=10.0) # 阻塞等待 -``` - -## 分布式消费 - -通过 `rank` 和 `world_size` 实现多消费者并行消费: - -``` -num_buckets=4, world_size=2: - -Consumer (rank=0) Consumer (rank=1) - │ │ - ├─▶ bucket_0 ├─▶ bucket_1 - └─▶ bucket_2 └─▶ bucket_3 -``` - -## 可插拔存储后端 - -队列仅内置 `memory` 后端;持久化等能力通过**插件**以 `register_backend()` 接入,不在 Pulsing 内直接依赖具体实现。 - -### 内置后端 - -| 后端 | 说明 | 适用场景 | -|------|------|----------| -| `memory` | 纯内存,无持久化(默认) | 测试、临时数据 | - -### 插件后端 - -持久化或其它后端由第三方包提供,通过 `register_backend()` 注册后使用: - -```python -# 默认内存后端 -writer = await system.queue.write("my_queue") - -# 使用插件提供的后端(示例) -from my_plugin import MyBackend -from pulsing.queue import register_backend - -register_backend("my_backend", MyBackend) -writer = await system.queue.write("my_queue", backend="my_backend") -``` - -### 自定义后端 - -实现 `StorageBackend` 协议即可: - -```python -class MyBackend: - def __init__(self, bucket_id: int, storage_path: str, **kwargs): - ... - - async def put(self, record: dict) -> None: ... - async def put_batch(self, records: list[dict]) -> None: ... - async def get(self, limit: int, offset: int) -> list[dict]: ... - async def get_stream(self, limit, offset, wait, timeout) -> AsyncIterator: ... - async def flush(self) -> None: ... - async def stats(self) -> dict: ... - def total_count(self) -> int: ... -``` - ---- - -## 设计点评 - -### ✅ 优点 - -1. **集群唯一性保证** - - StorageManager 使用一致性哈希确定 bucket owner - - 非 owner 节点返回 Redirect,避免创建重复 Actor - - 解决了分布式环境下的竞态条件问题 - -2. **架构清晰** - - 三层架构:应用层 (Queue) → 管理层 (StorageManager) → 存储层 (BucketStorage) - - 职责分离,每层只关注自己的逻辑 - -3. **智能路由** - - 客户端无需知道 bucket 在哪个节点 - - 自动重定向到正确节点 - -4. **高并发支持** - - 每个 bucket 独立的锁和条件变量 - - 无跨 bucket 竞争 - -5. **数据实时可见** - - 写入后立即可读(内存缓冲) - - 无需等待持久化 - -6. **流式传输** - - 大数据量传输内存友好 - - 支持阻塞等待新数据 - -### ⚠️ 潜在改进点 - -1. **节点变化处理** - - 当前一致性哈希在节点加入/退出时可能导致 bucket 重分布 - - 可以考虑虚拟节点或一致性哈希环来减少影响 - -2. **元数据持久化** - - Queue 配置(bucket_column, num_buckets)目前不持久化 - - 消费者需要知道这些参数 - - 可以考虑将元数据存储在集群中 - -3. **故障恢复** - - 节点故障时,其 bucket 数据可能丢失(内存部分) - - 可以考虑副本或 WAL 机制 - -4. **性能优化** - - `get_bucket_ref` 每次都查询 StorageManager - - 可以增加客户端缓存,减少 RPC 调用 - -### 📊 适用场景 - -- ✅ 分布式数据管道 -- ✅ 生产者-消费者模式 -- ✅ 分布式训练数据分发 -- ✅ 实时数据流处理 -- ⚠️ 不适合需要强一致性的场景 -- ⚠️ 不适合需要事务的场景 diff --git a/python/pulsing/actors/__init__.py b/python/pulsing/serving/__init__.py similarity index 100% rename from python/pulsing/actors/__init__.py rename to python/pulsing/serving/__init__.py diff --git a/python/pulsing/actors/load_stream.py b/python/pulsing/serving/load_stream.py similarity index 99% rename from python/pulsing/actors/load_stream.py rename to python/pulsing/serving/load_stream.py index 293a357c1..3f6ae554e 100644 --- a/python/pulsing/actors/load_stream.py +++ b/python/pulsing/serving/load_stream.py @@ -25,7 +25,7 @@ from collections.abc import Callable from dataclasses import dataclass -from pulsing.actor import ActorRef, Message +from pulsing.core import ActorRef, Message @dataclass diff --git a/python/pulsing/actors/router.py b/python/pulsing/serving/router.py similarity index 98% rename from python/pulsing/actors/router.py rename to python/pulsing/serving/router.py index 991192750..d17dfcc1d 100644 --- a/python/pulsing/actors/router.py +++ b/python/pulsing/serving/router.py @@ -8,7 +8,7 @@ from aiohttp import web -from pulsing.actor import Actor, ActorId, ActorSystem, Message, get_system +from pulsing.core import Actor, ActorId, ActorSystem, Message, get_system @dataclass @@ -431,7 +431,7 @@ class Router(Actor): """Router Actor - OpenAI-compatible HTTP API router as an Actor This actor wraps the start_router/stop_router functions to provide - a CLI-compatible entry point via `pulsing actor pulsing.actors.Router`. + a CLI-compatible entry point via `pulsing actor pulsing.serving.Router`. Args: http_host: HTTP listen address (default: "0.0.0.0") @@ -447,7 +447,7 @@ class Router(Actor): Example: # Start via CLI - pulsing actor pulsing.actors.Router \\ + pulsing actor pulsing.serving.Router \\ --http_host 0.0.0.0 \\ --http_port 8080 \\ --model_name my-llm \\ diff --git a/python/pulsing/actors/scheduler.py b/python/pulsing/serving/scheduler.py similarity index 100% rename from python/pulsing/actors/scheduler.py rename to python/pulsing/serving/scheduler.py diff --git a/python/pulsing/actors/vllm/__init__.py b/python/pulsing/serving/vllm/__init__.py similarity index 100% rename from python/pulsing/actors/vllm/__init__.py rename to python/pulsing/serving/vllm/__init__.py diff --git a/python/pulsing/actors/vllm/handlers.py b/python/pulsing/serving/vllm/handlers.py similarity index 100% rename from python/pulsing/actors/vllm/handlers.py rename to python/pulsing/serving/vllm/handlers.py diff --git a/python/pulsing/actors/vllm/sampling.py b/python/pulsing/serving/vllm/sampling.py similarity index 100% rename from python/pulsing/actors/vllm/sampling.py rename to python/pulsing/serving/vllm/sampling.py diff --git a/python/pulsing/actors/vllm/utils.py b/python/pulsing/serving/vllm/utils.py similarity index 100% rename from python/pulsing/actors/vllm/utils.py rename to python/pulsing/serving/vllm/utils.py diff --git a/python/pulsing/actors/vllm/worker.py b/python/pulsing/serving/vllm/worker.py similarity index 99% rename from python/pulsing/actors/vllm/worker.py rename to python/pulsing/serving/vllm/worker.py index e59f47f38..16a70fa77 100644 --- a/python/pulsing/actors/vllm/worker.py +++ b/python/pulsing/serving/vllm/worker.py @@ -17,7 +17,7 @@ import uuid from typing import Any -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage from .handlers import BaseWorkerHandler, DecodeWorkerHandler, PrefillWorkerHandler from .utils import _is_macos, _setup_macos_metal_env diff --git a/python/pulsing/actors/vllm_worker.py b/python/pulsing/serving/vllm_worker.py similarity index 99% rename from python/pulsing/actors/vllm_worker.py rename to python/pulsing/serving/vllm_worker.py index 574e5055e..ae0c66476 100644 --- a/python/pulsing/actors/vllm_worker.py +++ b/python/pulsing/serving/vllm_worker.py @@ -17,7 +17,7 @@ import uuid from typing import Any -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage from .vllm_handlers import BaseWorkerHandler, DecodeWorkerHandler, PrefillWorkerHandler from .vllm_utils import _is_macos, _setup_macos_metal_env diff --git a/python/pulsing/actors/worker.py b/python/pulsing/serving/worker.py similarity index 99% rename from python/pulsing/actors/worker.py rename to python/pulsing/serving/worker.py index 7d1dd9769..992c2e1af 100644 --- a/python/pulsing/actors/worker.py +++ b/python/pulsing/serving/worker.py @@ -5,7 +5,7 @@ import uuid from dataclasses import dataclass -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage @dataclass diff --git a/python/pulsing/queue/__init__.py b/python/pulsing/streaming/__init__.py similarity index 50% rename from python/pulsing/queue/__init__.py rename to python/pulsing/streaming/__init__.py index 0a4184b87..f236fc79c 100644 --- a/python/pulsing/queue/__init__.py +++ b/python/pulsing/streaming/__init__.py @@ -1,25 +1,12 @@ -"""Distributed In-Memory Queue - Based on Pulsing Actor Architecture +"""Streaming - Queue (point-to-point) and Pub/Sub (topic) APIs -Architecture Features: -- Each node has a StorageManager Actor that manages all buckets on this node -- StorageManager uses consistent hashing to determine the owner node for each bucket -- Ensures only one Actor per bucket across the entire cluster -- Supports pluggable storage backends +Queue: + writer = await system.streaming.write("my_queue") # or system.queue + reader = await system.streaming.read("my_queue") -Storage Backends: -- "memory": Pure in-memory backend (built-in default) -- Custom backends: register_backend() or pass class to write_queue() - -Example: - system = await pul.actor_system() - - # Write to queue - writer = await system.queue.write("my_queue") - await writer.put({"id": "1", "data": "hello"}) - - # Read from queue - reader = await system.queue.read("my_queue") - records = await reader.get(limit=10) +Topic: + writer = await system.topic.write("events") + reader = await system.topic.read("events") """ from typing import TYPE_CHECKING, Any @@ -37,6 +24,15 @@ get_storage_manager, get_topic_broker, ) +from .pubsub import ( + PublishMode, + PublishResult, + TopicReader, + TopicWriter, + read_topic, + subscribe_to_topic, + write_topic, +) from .queue import Queue, QueueReader, QueueWriter, read_queue, write_queue from .storage import BucketStorage from .sync_queue import SyncQueue, SyncQueueReader, SyncQueueWriter @@ -46,19 +42,7 @@ class QueueAPI: - """Queue API entry point via system.queue - - Example: - system = await pul.actor_system() - - # Write - writer = await system.queue.write("my_queue") - await writer.put({"id": "1", "data": "hello"}) - - # Read - reader = await system.queue.read("my_queue") - records = await reader.get(limit=10) - """ + """Queue API entry point via system.queue""" def __init__(self, system: "ActorSystem"): self._system = system @@ -74,20 +58,7 @@ async def write( backend: str | type = "memory", backend_options: dict[str, Any] | None = None, ) -> QueueWriter: - """Open queue for writing - - Args: - topic: Queue topic name - bucket_column: Column used for bucketing (default: "id") - num_buckets: Number of buckets (default: 4) - batch_size: Batch size for writes (default: 100) - storage_path: Storage path (default: ./queue_storage/{topic}) - backend: Storage backend ("memory" or custom) - backend_options: Additional backend options - - Returns: - QueueWriter for put/flush operations - """ + """Open queue for writing""" return await write_queue( self._system, topic, @@ -112,22 +83,7 @@ async def read( backend: str | type = "memory", backend_options: dict[str, Any] | None = None, ) -> QueueReader: - """Open queue for reading - - Args: - topic: Queue topic name - bucket_id: Single bucket to read from - bucket_ids: List of buckets to read from - rank: Consumer rank for distributed consumption - world_size: Total consumers for distributed consumption - num_buckets: Number of buckets (default: 4) - storage_path: Storage path - backend: Storage backend (must match writer) - backend_options: Additional backend options - - Returns: - QueueReader for get operations - """ + """Open queue for reading""" return await read_queue( self._system, topic, @@ -142,29 +98,60 @@ async def read( ) +class TopicAPI: + """Topic API entry point via system.topic""" + + def __init__(self, system: "ActorSystem"): + self._system = system + + async def write( + self, + topic: str, + *, + writer_id: str | None = None, + ) -> TopicWriter: + """Open topic for writing""" + return await write_topic(self._system, topic, writer_id=writer_id) + + async def read( + self, + topic: str, + *, + reader_id: str | None = None, + auto_start: bool = False, + ) -> TopicReader: + """Open topic for reading""" + return await read_topic( + self._system, topic, reader_id=reader_id, auto_start=auto_start + ) + + __all__ = [ - # High-level API "QueueAPI", - # Async API + "TopicAPI", "Queue", "QueueWriter", "QueueReader", "write_queue", "read_queue", - # Sync wrapper (obtained via .sync()) "SyncQueue", "SyncQueueWriter", "SyncQueueReader", - # Low-level components "StorageManager", "BucketStorage", "get_storage_manager", "get_bucket_ref", "get_topic_broker", - # Backend related "StorageBackend", "MemoryBackend", "register_backend", "get_backend_class", "list_backends", + "write_topic", + "read_topic", + "subscribe_to_topic", + "TopicWriter", + "TopicReader", + "PublishMode", + "PublishResult", ] diff --git a/python/pulsing/queue/backend.py b/python/pulsing/streaming/backend.py similarity index 99% rename from python/pulsing/queue/backend.py rename to python/pulsing/streaming/backend.py index 74d88c5f5..eec4d9eb0 100644 --- a/python/pulsing/queue/backend.py +++ b/python/pulsing/streaming/backend.py @@ -10,7 +10,7 @@ # Custom backend (e.g. from a plugin package) from some_plugin import MyBackend - from pulsing.queue import register_backend + from pulsing.streaming import register_backend register_backend("my_backend", MyBackend) writer = await write_queue(system, "topic", backend="my_backend") diff --git a/python/pulsing/topic/broker.py b/python/pulsing/streaming/broker.py similarity index 99% rename from python/pulsing/topic/broker.py rename to python/pulsing/streaming/broker.py index 4210e7f32..82ae9c4fa 100644 --- a/python/pulsing/topic/broker.py +++ b/python/pulsing/streaming/broker.py @@ -9,9 +9,9 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from pulsing.actor import ActorRef, ActorSystem + from pulsing.core import ActorRef, ActorSystem -from pulsing.actor import ActorId, remote +from pulsing.core import ActorId, remote logger = logging.getLogger(__name__) diff --git a/python/pulsing/queue/manager.py b/python/pulsing/streaming/manager.py similarity index 98% rename from python/pulsing/queue/manager.py rename to python/pulsing/streaming/manager.py index bf1fc5894..3f48ac1dc 100644 --- a/python/pulsing/queue/manager.py +++ b/python/pulsing/streaming/manager.py @@ -5,12 +5,12 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import ActorId, ActorRef, ActorSystem, remote +from pulsing.core import ActorId, ActorRef, ActorSystem, remote from .storage import BucketStorage if TYPE_CHECKING: - from pulsing.actor.remote import ActorProxy + from pulsing.core.remote import ActorProxy logger = logging.getLogger(__name__) @@ -190,7 +190,7 @@ async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: self._topics[topic_name] = await self.system.resolve_named(actor_name) logger.debug(f"Resolved existing topic broker: {actor_name}") except Exception: - from pulsing.topic.broker import TopicBroker + from pulsing.streaming.broker import TopicBroker proxy = await TopicBroker.local( self.system, topic_name, self.system, name=actor_name, public=True @@ -504,7 +504,7 @@ async def get_topic_broker( topic: Topic name max_redirects: Maximum redirect count """ - from pulsing.topic.broker import TopicBroker + from pulsing.streaming.broker import TopicBroker manager = await get_storage_manager(system) diff --git a/python/pulsing/topic/topic.py b/python/pulsing/streaming/pubsub.py similarity index 98% rename from python/pulsing/topic/topic.py rename to python/pulsing/streaming/pubsub.py index 03774c79c..ba54d7cf3 100644 --- a/python/pulsing/topic/topic.py +++ b/python/pulsing/streaming/pubsub.py @@ -10,10 +10,10 @@ from typing import TYPE_CHECKING, Any, Callable, Coroutine if TYPE_CHECKING: - from pulsing.actor import ActorRef - from pulsing.actor.remote import ActorProxy + from pulsing.core import ActorRef + from pulsing.core.remote import ActorProxy -from pulsing.actor import Actor, ActorId, ActorSystem, Message +from pulsing.core import Actor, ActorId, ActorSystem, Message logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ class PublishResult: async def _get_broker(system: ActorSystem, topic: str) -> "ActorProxy": """Get topic broker proxy (reuses queue/manager infrastructure)""" - from pulsing.queue.manager import get_topic_broker + from pulsing.streaming.manager import get_topic_broker # get_topic_broker already returns ActorProxy (via TopicBroker.resolve) return await get_topic_broker(system, topic) diff --git a/python/pulsing/queue/queue.py b/python/pulsing/streaming/queue.py similarity index 99% rename from python/pulsing/queue/queue.py rename to python/pulsing/streaming/queue.py index d1756431b..7f1a7a283 100644 --- a/python/pulsing/queue/queue.py +++ b/python/pulsing/streaming/queue.py @@ -8,8 +8,8 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import ActorSystem -from pulsing.actor.remote import ActorProxy +from pulsing.core import ActorSystem +from pulsing.core.remote import ActorProxy from .manager import get_bucket_ref, get_storage_manager @@ -334,7 +334,7 @@ async def write_queue( # Custom backend from a plugin from my_plugin import MyBackend - from pulsing.queue import register_backend + from .backend import register_backend register_backend("my_backend", MyBackend) writer = await write_queue(system, "my_queue", backend="my_backend") """ diff --git a/python/pulsing/queue/storage.py b/python/pulsing/streaming/storage.py similarity index 98% rename from python/pulsing/queue/storage.py rename to python/pulsing/streaming/storage.py index d3e70f2c9..b0181fe98 100644 --- a/python/pulsing/queue/storage.py +++ b/python/pulsing/streaming/storage.py @@ -4,7 +4,7 @@ import logging from typing import Any, AsyncIterator -from pulsing.actor import ActorId, StreamMessage, remote +from pulsing.core import ActorId, StreamMessage, remote from .backend import StorageBackend, get_backend_class diff --git a/python/pulsing/queue/sync_queue.py b/python/pulsing/streaming/sync_queue.py similarity index 100% rename from python/pulsing/queue/sync_queue.py rename to python/pulsing/streaming/sync_queue.py diff --git a/python/pulsing/topic/README.md b/python/pulsing/topic/README.md deleted file mode 100644 index 99568a631..000000000 --- a/python/pulsing/topic/README.md +++ /dev/null @@ -1,107 +0,0 @@ -# Pulsing Topic - 轻量级 Pub/Sub 模块 - -## 概述 - -Topic 模块提供轻量级的 Pub/Sub(发布/订阅)功能,**复用 `queue/manager` 的 StorageManager 进行一致性哈希和集群路由**,确保每个 topic 在集群中只有一个 broker。 - -## 架构 - -``` -┌─────────────────────────────────────────────────────────┐ -│ StorageManager │ -│ (queue/manager.py - 每节点一个实例) │ -│ │ -│ ┌─────────────────┐ ┌─────────────────────────────┐ │ -│ │ GetBucket 处理 │ │ GetTopic 处理 │ │ -│ │ (队列 bucket) │ │ (topic broker) │ │ -│ └─────────────────┘ └─────────────────────────────┘ │ -│ │ -│ 一致性哈希 → 确定 owner 节点 → 创建/返回 ActorRef │ -└─────────────────────────────────────────────────────────┘ - │ - ┌────────────────┴────────────────┐ - │ │ - ▼ ▼ - ┌───────────────┐ ┌───────────────┐ - │ BucketStorage │ │ TopicBroker │ - │ (queue 存储) │ │ (pub/sub) │ - └───────────────┘ └───────────────┘ -``` - -## 使用方式 - -### 发布消息 - -```python -from pulsing.topic import write_topic - -writer = await write_topic(system, "events") -await writer.publish({"type": "user_login", "user_id": 123}) -``` - -### 订阅消息 - -```python -from pulsing.topic import read_topic - -reader = await read_topic(system, "events") - -@reader.on_message -async def handle(msg): - print(f"Received: {msg}") - -await reader.start() - -# 停止订阅 -await reader.stop() -``` - -### 发布模式 - -```python -from pulsing.topic import write_topic, PublishMode - -writer = await write_topic(system, "events") - -# 1. Fire-and-forget(默认)- 发送后立即返回 -result = await writer.publish(data) - -# 2. Wait all acks - 等待所有订阅者响应 -result = await writer.publish(data, mode=PublishMode.WAIT_ALL_ACKS) - -# 3. Wait any ack - 等待任一订阅者响应 -result = await writer.publish(data, mode=PublishMode.WAIT_ANY_ACK) - -# 4. Best effort - 尝试发送,记录失败 -result = await writer.publish(data, mode=PublishMode.BEST_EFFORT) -``` - -## 与 Queue 的关系 - -| 特性 | Queue | Topic | -|------|-------|-------| -| 消息模式 | 点对点(生产者-消费者) | 广播(发布-订阅) | -| 消息存储 | 持久化(可配置后端) | 无持久化(内存) | -| 消费语义 | 每条消息只被消费一次 | 每条消息被所有订阅者消费 | -| 管理方式 | StorageManager | StorageManager(复用) | - -## 公开 API - -模块只导出以下必要的 API: - -```python -from pulsing.topic import ( - write_topic, # 获取写入句柄 - read_topic, # 获取读取句柄 - TopicWriter, # 写入句柄类型 - TopicReader, # 读取句柄类型 - PublishMode, # 发布模式枚举 - PublishResult, # 发布结果 -) -``` - -## 内部实现 - -- `TopicBroker`: Broker Actor,管理订阅者和消息分发 -- `_SubscriberActor`: 订阅者 Actor,接收消息并调用用户回调 -- `StorageManager.GetTopic`: 处理 topic broker 的创建和路由 diff --git a/python/pulsing/topic/__init__.py b/python/pulsing/topic/__init__.py deleted file mode 100644 index 30fdba9a1..000000000 --- a/python/pulsing/topic/__init__.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Topic - Lightweight Pub/Sub Module - -Reuses queue/manager's StorageManager for consistent hashing and redirection, -ensuring only one broker per topic in the cluster. - -Usage: - import pulsing as pul - - await pul.init() - - writer = await pul.topic.write("events") - await writer.publish({"type": "user_login"}) - - reader = await pul.topic.read("events") - - @reader.on_message - async def handle(msg): - print(f"Received: {msg}") - - await reader.start() -""" - -from typing import TYPE_CHECKING - -from pulsing.topic.topic import ( - PublishMode, - PublishResult, - TopicReader, - TopicWriter, - read_topic, - subscribe_to_topic, - write_topic, -) - -if TYPE_CHECKING: - from pulsing._core import ActorSystem - - -class TopicAPI: - """Topic API entry point via system.topic or pul.topic - - Example: - writer = await pul.topic.write("events") - await writer.publish({"type": "user_login"}) - - reader = await pul.topic.read("events") - """ - - def __init__(self, system: "ActorSystem"): - self._system = system - - async def write( - self, - topic: str, - *, - writer_id: str | None = None, - ) -> TopicWriter: - """Open topic for writing - - Args: - topic: Topic name - writer_id: Writer ID (optional) - - Returns: - TopicWriter for publish operations - """ - return await write_topic(self._system, topic, writer_id=writer_id) - - async def read( - self, - topic: str, - *, - reader_id: str | None = None, - auto_start: bool = False, - ) -> TopicReader: - """Open topic for reading - - Args: - topic: Topic name - reader_id: Reader ID (optional) - auto_start: Whether to automatically start receiving - - Returns: - TopicReader for subscribing to messages - """ - return await read_topic( - self._system, topic, reader_id=reader_id, auto_start=auto_start - ) - - -__all__ = [ - # High-level API - "TopicAPI", - # Async API - "write_topic", - "read_topic", - "subscribe_to_topic", - "TopicWriter", - "TopicReader", - "PublishMode", - "PublishResult", -] diff --git a/tests/python/apis/actor/test_actor_behavior.py b/tests/python/apis/actor/test_actor_behavior.py index 52b2aed05..5adeb95cb 100644 --- a/tests/python/apis/actor/test_actor_behavior.py +++ b/tests/python/apis/actor/test_actor_behavior.py @@ -14,7 +14,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId # ============================================================================ @@ -459,7 +459,7 @@ def ping(self): @pytest.mark.asyncio async def test_remote_metadata_delegation(): """_WrappedActor delegates metadata() to user instance.""" - from pulsing.actor.remote import _WrappedActor + from pulsing.core.remote import _WrappedActor # Create raw instance and wrap it instance = object.__new__(_MetadataService._cls) @@ -472,7 +472,7 @@ async def test_remote_metadata_delegation(): @pytest.mark.asyncio async def test_remote_metadata_delegation_no_metadata(): """_WrappedActor returns empty dict when user instance has no metadata().""" - from pulsing.actor.remote import _WrappedActor + from pulsing.core.remote import _WrappedActor class _NoMeta: def ping(self): diff --git a/tests/python/apis/actor_system/test_actor_system_api.py b/tests/python/apis/actor_system/test_actor_system_api.py index bd0408555..bfcda1ef4 100644 --- a/tests/python/apis/actor_system/test_actor_system_api.py +++ b/tests/python/apis/actor_system/test_actor_system_api.py @@ -16,7 +16,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId # ============================================================================ diff --git a/tests/python/apis/ray_compat/test_ray_compat_api.py b/tests/python/apis/ray_compat/test_ray_compat_api.py index 78986d0d0..b467bd849 100644 --- a/tests/python/apis/ray_compat/test_ray_compat_api.py +++ b/tests/python/apis/ray_compat/test_ray_compat_api.py @@ -15,7 +15,7 @@ import pytest import time -from pulsing.compat import ray +from pulsing.integrations.ray_compat import ray # ============================================================================ diff --git a/tests/python/apis/ray_like/test_ray_like_api.py b/tests/python/apis/ray_like/test_ray_like_api.py index d3716fd25..998c9ea31 100644 --- a/tests/python/apis/ray_like/test_ray_like_api.py +++ b/tests/python/apis/ray_like/test_ray_like_api.py @@ -12,7 +12,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor +from pulsing.core import Actor # ============================================================================ diff --git a/tests/python/conftest.py b/tests/python/conftest.py index 03aba5d9b..b9030aa13 100644 --- a/tests/python/conftest.py +++ b/tests/python/conftest.py @@ -22,7 +22,7 @@ async def cleanup_global_system(): # Clean up after test try: - from pulsing.actor import _global_system, shutdown + from pulsing.core import _global_system, shutdown if _global_system is not None: await shutdown() diff --git a/tests/python/test_actor_list.py b/tests/python/test_actor_list.py index 8c117fad3..b8ebbca63 100644 --- a/tests/python/test_actor_list.py +++ b/tests/python/test_actor_list.py @@ -3,7 +3,7 @@ import asyncio import pytest import json -from pulsing.actor import init, remote, get_system, list_actors +from pulsing.core import init, remote, get_system, list_actors from pulsing.cli.inspect import _print_actors_table import io import sys diff --git a/tests/python/test_agent_runtime_lifecycle.py b/tests/python/test_agent_runtime_lifecycle.py index 0201f5309..22dfd6939 100644 --- a/tests/python/test_agent_runtime_lifecycle.py +++ b/tests/python/test_agent_runtime_lifecycle.py @@ -13,7 +13,7 @@ import pytest -from pulsing.actor import get_system, remote +from pulsing.core import get_system, remote from pulsing.agent import ( agent, cleanup, diff --git a/tests/python/test_chaos.py b/tests/python/test_chaos.py index 9a2ac89ba..7af7cf615 100644 --- a/tests/python/test_chaos.py +++ b/tests/python/test_chaos.py @@ -1,7 +1,7 @@ import asyncio import random import pytest -from pulsing.actor import ( +from pulsing.core import ( Actor, ActorId, Message, diff --git a/tests/python/test_cli_actor.py b/tests/python/test_cli_actor.py index 0bca36913..570407c7b 100644 --- a/tests/python/test_cli_actor.py +++ b/tests/python/test_cli_actor.py @@ -33,4 +33,4 @@ def test_actor_invalid_class_path_message(self): with pytest.raises(ValueError) as exc_info: actor_cli(actor_type="router") assert "full class path" in str(exc_info.value) - assert "pulsing.actors.worker.TransformersWorker" in str(exc_info.value) + assert "pulsing.serving.worker.TransformersWorker" in str(exc_info.value) diff --git a/tests/python/test_queue.py b/tests/python/test_queue.py index 318fac679..ae9a0a624 100644 --- a/tests/python/test_queue.py +++ b/tests/python/test_queue.py @@ -24,7 +24,7 @@ import pytest import pulsing as pul -from pulsing.queue import ( +from pulsing.streaming import ( BucketStorage, Queue, QueueReader, @@ -1054,7 +1054,7 @@ def test_sync_queue_standalone(): # Setup in background loop async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( @@ -1121,7 +1121,7 @@ def test_sync_writer_reader_standalone(): async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( @@ -1188,7 +1188,7 @@ def test_sync_reader_offset_standalone(): async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( diff --git a/tests/python/test_queue_backends.py b/tests/python/test_queue_backends.py index 67d20b69c..38d7742e9 100644 --- a/tests/python/test_queue_backends.py +++ b/tests/python/test_queue_backends.py @@ -19,7 +19,7 @@ import pytest import pulsing as pul -from pulsing.queue import ( +from pulsing.streaming import ( BucketStorage, MemoryBackend, Queue, diff --git a/tests/python/test_queue_topic_chaos.py b/tests/python/test_queue_topic_chaos.py index a47e5dbba..845c5c6f7 100644 --- a/tests/python/test_queue_topic_chaos.py +++ b/tests/python/test_queue_topic_chaos.py @@ -20,8 +20,13 @@ import pytest import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= diff --git a/tests/python/test_ray_compat_running_loop.py b/tests/python/test_ray_compat_running_loop.py index f5b6b4d22..2af79e5c9 100644 --- a/tests/python/test_ray_compat_running_loop.py +++ b/tests/python/test_ray_compat_running_loop.py @@ -7,7 +7,7 @@ def test_ray_compat_init_inside_running_loop(): This covers environments like Jupyter or pytest-asyncio where an event loop is already running on the main thread. """ - from pulsing.compat import ray + from pulsing.integrations.ray_compat import ray async def main(): ray.init() diff --git a/tests/python/test_ray_init.py b/tests/python/test_ray_init.py index 092a73e0b..44e588375 100644 --- a/tests/python/test_ray_init.py +++ b/tests/python/test_ray_init.py @@ -18,36 +18,36 @@ def _reset_pulsing_state(): """Reset all Pulsing module state (system, background loop, KV).""" - import pulsing.actor as pa - import pulsing.ray as pr + import pulsing.core as pc + import pulsing.integrations.ray as pray # Shutdown Pulsing system via background loop - if pa._global_system is not None and pr._loop is not None: + if pc._global_system is not None and pray._loop is not None: try: - pr._run_sync(pr._do_shutdown()) + pray._run_sync(pray._do_shutdown()) except Exception: pass # Force clear global system (safety net) - pa._global_system = None + pc._global_system = None # Stop background event loop - if pr._loop is not None: + if pray._loop is not None: try: - pr._loop.call_soon_threadsafe(pr._loop.stop) + pray._loop.call_soon_threadsafe(pray._loop.stop) except Exception: pass - if pr._thread is not None: + if pray._thread is not None: try: - pr._thread.join(timeout=5) + pray._thread.join(timeout=5) except Exception: pass - pr._loop = None - pr._thread = None + pray._loop = None + pray._thread = None # Clean KV store try: - pr.cleanup() + pray.cleanup() except Exception: pass @@ -72,7 +72,7 @@ def ray_env(): def test_init_returns_system(ray_env): """init_in_ray() returns a Pulsing ActorSystem.""" - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray system = init_in_ray() assert system is not None @@ -81,7 +81,7 @@ def test_init_returns_system(ray_env): def test_init_stores_seed_in_kv(ray_env): """First caller's address is stored as seed in Ray KV.""" - from pulsing.ray import _get_seed, init_in_ray + from pulsing.integrations.ray import _get_seed, init_in_ray system = init_in_ray() seed_addr = _get_seed() @@ -91,8 +91,8 @@ def test_init_stores_seed_in_kv(ray_env): def test_init_sets_global_system(ray_env): """init_in_ray() sets pulsing.actor global system.""" - from pulsing.actor import is_initialized - from pulsing.ray import init_in_ray + from pulsing.core import is_initialized + from pulsing.integrations.ray import init_in_ray assert not is_initialized() init_in_ray() @@ -106,7 +106,7 @@ def test_init_sets_global_system(ray_env): def test_init_raises_without_ray(): """init_in_ray() raises when Ray is not initialized.""" - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray with pytest.raises(RuntimeError, match="Ray 未初始化"): init_in_ray() @@ -114,7 +114,7 @@ def test_init_raises_without_ray(): async def test_async_init_raises_without_ray(): """async_init_in_ray() raises when Ray is not initialized.""" - from pulsing.ray import async_init_in_ray + from pulsing.integrations.ray import async_init_in_ray with pytest.raises(RuntimeError, match="Ray 未初始化"): await async_init_in_ray() @@ -127,7 +127,7 @@ async def test_async_init_raises_without_ray(): def test_cleanup_clears_kv(ray_env): """cleanup() removes seed from KV store.""" - from pulsing.ray import _get_seed, cleanup, init_in_ray + from pulsing.integrations.ray import _get_seed, cleanup, init_in_ray init_in_ray() assert _get_seed() is not None @@ -147,7 +147,7 @@ def test_init_in_ray_actor(ray_env): @ray.remote class Worker: def setup(self): - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray system = init_in_ray() return str(system.addr) @@ -168,7 +168,7 @@ def test_multi_actor_same_seed(ray_env): """All workers in separate processes discover the same seed.""" import os - from pulsing.ray import _get_seed, init_in_ray + from pulsing.integrations.ray import _get_seed, init_in_ray driver_pid = os.getpid() @@ -181,13 +181,13 @@ class Worker: def setup(self): import os - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray init_in_ray() return os.getpid() def get_seed(self): - from pulsing.ray import _get_seed + from pulsing.integrations.ray import _get_seed return _get_seed() @@ -219,13 +219,13 @@ class Worker: def setup(self): import os - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray system = init_in_ray() return os.getpid(), str(system.addr) def get_seed(self): - from pulsing.ray import _get_seed + from pulsing.integrations.ray import _get_seed return _get_seed() @@ -264,13 +264,13 @@ def test_actor_becomes_seed_without_driver(ray_env): @ray.remote class Worker: def setup(self): - from pulsing.ray import init_in_ray + from pulsing.integrations.ray import init_in_ray system = init_in_ray() return str(system.addr) def get_seed(self): - from pulsing.ray import _get_seed + from pulsing.integrations.ray import _get_seed return _get_seed() @@ -294,7 +294,7 @@ def get_seed(self): async def test_async_init_returns_system(ray_env): """async_init_in_ray() returns a system.""" - from pulsing.ray import async_init_in_ray + from pulsing.integrations.ray import async_init_in_ray system = await async_init_in_ray() assert system is not None @@ -303,7 +303,7 @@ async def test_async_init_returns_system(ray_env): async def test_async_init_stores_seed(ray_env): """async_init_in_ray() stores seed in KV.""" - from pulsing.ray import _get_seed, async_init_in_ray + from pulsing.integrations.ray import _get_seed, async_init_in_ray system = await async_init_in_ray() assert _get_seed() == str(system.addr) diff --git a/tests/python/test_receive_error_behavior.py b/tests/python/test_receive_error_behavior.py index 7415cb5c1..62b0a902d 100644 --- a/tests/python/test_receive_error_behavior.py +++ b/tests/python/test_receive_error_behavior.py @@ -9,7 +9,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor +from pulsing.core import Actor # ============================================================================ diff --git a/tests/python/test_remote_decorator.py b/tests/python/test_remote_decorator.py index 58100a8ea..aca7d26ab 100644 --- a/tests/python/test_remote_decorator.py +++ b/tests/python/test_remote_decorator.py @@ -25,7 +25,7 @@ @pytest.mark.asyncio async def test_proxy_method_validation(): """Test that proxy validates method names when methods list is provided.""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class Service: @@ -66,7 +66,7 @@ def valid_method(self): @pytest.mark.asyncio async def test_sync_method_error_handling(): """Test error handling in sync methods.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class ErrorService: @@ -90,7 +90,7 @@ def will_fail(self): @pytest.mark.asyncio async def test_async_method_error_handling(): """Test error handling in async methods.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class AsyncErrorService: @@ -120,7 +120,7 @@ async def will_fail(self): @pytest.mark.asyncio async def test_actor_proxy_from_ref_dynamic_mode(): """Test ActorProxy.from_ref in dynamic mode (no method list).""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class DynamicService: @@ -152,7 +152,7 @@ def method_b(self): @pytest.mark.asyncio async def test_actor_proxy_from_ref_with_async_methods(): """Test ActorProxy.from_ref with explicit async_methods set.""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class HybridService: @@ -197,7 +197,7 @@ async def async_method(self): @pytest.mark.asyncio async def test_remote_delayed_call(): """Test self.delayed(sec).method(...) schedules a tell after delay.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class DelayedCallService: @@ -237,7 +237,7 @@ def get_received(self): @pytest.mark.asyncio async def test_remote_delayed_call_cancel(): """Test that the task returned by delayed().method() can be cancelled.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class DelayedCancelService: @@ -278,7 +278,7 @@ def get_received(self): @pytest.mark.asyncio async def test_async_method_does_not_block_actor(): """Test that async methods don't block the actor from receiving new messages.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class NonBlockingService: diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index a7dfb891d..2a408887a 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -15,7 +15,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorRef, as_any, remote +from pulsing.core import Actor, ActorRef, as_any, remote # ============================================================================ @@ -166,7 +166,7 @@ async def test_as_any_function_with_ref_from_resolve(initialized_pul): @pytest.mark.asyncio async def test_as_any_function_with_raw_ref(initialized_pul): """as_any(ref) works when ref is raw ActorRef from system.resolve().""" - from pulsing.actor import get_system + from pulsing.core import get_system await _ServiceWithMethods.spawn(name="as_any_raw_svc", public=True) diff --git a/tests/python/test_sealed_message.py b/tests/python/test_sealed_message.py index f8fbbcdb9..d5781df34 100644 --- a/tests/python/test_sealed_message.py +++ b/tests/python/test_sealed_message.py @@ -13,7 +13,7 @@ from dataclasses import dataclass import pytest -from pulsing.actor import ( +from pulsing.core import ( Actor, Message, SealedPyMessage, diff --git a/tests/python/test_system_actor.py b/tests/python/test_system_actor.py index 527bdaf05..92a1e2860 100644 --- a/tests/python/test_system_actor.py +++ b/tests/python/test_system_actor.py @@ -9,7 +9,7 @@ import asyncio import pytest import pulsing as pul -from pulsing.actor import ( +from pulsing.core import ( get_python_actor_service, get_system_actor, remote, diff --git a/tests/python/test_topic.py b/tests/python/test_topic.py index c21ce051b..08f64e195 100644 --- a/tests/python/test_topic.py +++ b/tests/python/test_topic.py @@ -16,7 +16,7 @@ import pytest import pulsing as pul -from pulsing.topic import ( +from pulsing.streaming import ( PublishMode, PublishResult, TopicReader, @@ -728,7 +728,7 @@ async def test_double_start_stop(actor_system): @pytest.mark.asyncio async def test_topic_broker_via_storage_manager(actor_system): """Test that topic broker is created via StorageManager.""" - from pulsing.queue.manager import get_storage_manager + from pulsing.streaming.manager import get_storage_manager # Ensure StorageManager exists manager = await get_storage_manager(actor_system) @@ -748,7 +748,7 @@ async def test_topic_broker_via_storage_manager(actor_system): @pytest.mark.asyncio async def test_list_topics(actor_system): """Test listing topics via StorageManager.""" - from pulsing.queue.manager import get_storage_manager + from pulsing.streaming.manager import get_storage_manager # Create some topics await write_topic(actor_system, "list_topic_1") @@ -868,7 +868,7 @@ async def handle_slow(msg): @pytest.mark.asyncio async def test_publish_timeout_error(actor_system): """Test that publish raises TimeoutError when timeout expires.""" - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId # Create an intentionally slow subscriber class SlowSubscriber(Actor): @@ -891,7 +891,7 @@ async def receive(self, msg): await actor_system.spawn(slow_actor, name=actor_name, public=True) # Register with broker using helper function - from pulsing.topic import subscribe_to_topic + from pulsing.streaming import subscribe_to_topic await subscribe_to_topic( actor_system, "timeout_error_topic", "slow_sub", actor_name @@ -909,7 +909,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_ask_with_timeout_success(actor_system): """Test ask_with_timeout helper function (success case).""" - from pulsing.actor import Actor, ActorId, ask_with_timeout + from pulsing.core import Actor, ActorId, ask_with_timeout class EchoActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -932,7 +932,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_ask_with_timeout_error(actor_system): """Test ask_with_timeout raises TimeoutError when timeout expires.""" - from pulsing.actor import Actor, ActorId, ask_with_timeout + from pulsing.core import Actor, ActorId, ask_with_timeout class SlowActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -956,7 +956,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_tell_with_timeout_success(actor_system): """Test tell_with_timeout helper function (success case).""" - from pulsing.actor import Actor, ActorId, tell_with_timeout + from pulsing.core import Actor, ActorId, tell_with_timeout received = [] @@ -985,7 +985,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_default_publish_timeout(): """Test that DEFAULT_PUBLISH_TIMEOUT is reasonable.""" - from pulsing.topic.topic import DEFAULT_PUBLISH_TIMEOUT + from pulsing.streaming.pubsub import DEFAULT_PUBLISH_TIMEOUT # Default timeout should be a reasonable value (30 seconds) assert DEFAULT_PUBLISH_TIMEOUT == 30.0 @@ -994,7 +994,7 @@ async def test_default_publish_timeout(): @pytest.mark.asyncio async def test_default_ask_timeout(): """Test that DEFAULT_ASK_TIMEOUT is reasonable.""" - from pulsing.actor import DEFAULT_ASK_TIMEOUT + from pulsing.core import DEFAULT_ASK_TIMEOUT # Default timeout should be a reasonable value (30 seconds) assert DEFAULT_ASK_TIMEOUT == 30.0 @@ -1011,8 +1011,8 @@ async def test_subscriber_failure_threshold_eviction(actor_system): Verify P0-3 fix: Subscribers are automatically evicted after 3 consecutive failures. """ - from pulsing.actor import Actor, ActorId - from pulsing.topic.broker import MAX_CONSECUTIVE_FAILURES + from pulsing.core import Actor, ActorId + from pulsing.streaming.broker import MAX_CONSECUTIVE_FAILURES # Verify configuration constants assert MAX_CONSECUTIVE_FAILURES == 3 @@ -1035,7 +1035,7 @@ async def receive(self, msg): await actor_system.spawn(failing_actor, name=actor_name, public=True) # Register failing subscriber with broker using helper function - from pulsing.topic import subscribe_to_topic + from pulsing.streaming import subscribe_to_topic await subscribe_to_topic( actor_system, "eviction_test_topic", "failing_sub", actor_name @@ -1069,7 +1069,7 @@ async def test_subscriber_ttl_config(): Verify P0-3 fix: TTL re-resolve configuration. """ - from pulsing.topic.broker import REF_TTL_SECONDS, MAX_CONSECUTIVE_FAILURES + from pulsing.streaming.broker import REF_TTL_SECONDS, MAX_CONSECUTIVE_FAILURES # Verify configuration is reasonable assert REF_TTL_SECONDS == 60.0, "TTL should be 60 seconds" @@ -1120,7 +1120,7 @@ async def test_default_mailbox_capacity_config(): Verify P1-1 fix: SystemConfig's default mailbox capacity. """ # Python side uses through Rust bindings, verify default value exists - from pulsing.actor import SystemConfig + from pulsing.core import SystemConfig config = SystemConfig.standalone() # Verify config can be created normally @@ -1138,7 +1138,7 @@ async def test_resolve_named_returns_actor(actor_system): Verify P1-2 fix: resolve_named basic functionality. """ - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId class TestActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -1170,7 +1170,7 @@ async def test_resolve_named_multiple_calls(actor_system): Verify P1-2 fix: Multiple resolves should return valid ActorRefs. Note: RoundRobin cannot be verified in single-node environment, but basic functionality can be verified. """ - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId class CounterActor(Actor): def __init__(self): From 02221aaa2983d91236c778a77a4c5923292026f2 Mon Sep 17 00:00:00 2001 From: Reiase Date: Mon, 16 Feb 2026 22:39:15 +0800 Subject: [PATCH 08/15] Refactor module imports and update documentation for clarity - Updated import statements across various documentation files to reflect the new module structure, replacing `pulsing.actor` and `pulsing.queue` with `pulsing.core` and `pulsing.streaming`. - Enhanced examples and guides to demonstrate the updated import paths, ensuring consistency and clarity for users. - Improved documentation for actor and queue functionalities, emphasizing the new organization under `pulsing.integrations` and `pulsing.serving`. - Streamlined references to actor management and queue operations, aiding user understanding and accessibility. --- docs/design/name-only-resolve.md | 2 +- docs/overrides/home.html | 4 +-- docs/src/agent/autogen.md | 2 +- docs/src/agent/autogen.zh.md | 2 +- docs/src/agent/index.md | 4 +-- docs/src/agent/index.zh.md | 4 +-- docs/src/agent/langgraph.md | 4 +-- docs/src/agent/langgraph.zh.md | 4 +-- docs/src/api/python.md | 4 +-- docs/src/api/python.zh.md | 4 +-- docs/src/design/as-actor-decorator.md | 2 +- docs/src/design/as-actor-decorator.zh.md | 2 +- docs/src/design/cluster-networking.md | 2 +- docs/src/design/cluster-networking.zh.md | 2 +- docs/src/design/load_sync.md | 2 +- docs/src/design/load_sync.zh.md | 2 +- docs/src/examples/index.md | 4 +-- docs/src/examples/index.zh.md | 4 +-- docs/src/examples/llm_inference.md | 6 ++-- docs/src/examples/llm_inference.zh.md | 6 ++-- docs/src/guide/operations.md | 32 ++++++++++---------- docs/src/guide/operations.zh.md | 28 ++++++++--------- docs/src/guide/queue.md | 4 +-- docs/src/guide/queue.zh.md | 4 +-- docs/src/guide/reliability.md | 2 +- docs/src/guide/reliability.zh.md | 2 +- docs/src/guide/remote_actors.md | 2 +- docs/src/guide/remote_actors.zh.md | 2 +- docs/src/guide/semantics.md | 4 +-- docs/src/guide/semantics.zh.md | 4 +-- docs/src/guide/style.md | 10 +++--- docs/src/guide/style.zh.md | 10 +++--- docs/src/quickstart/agent.md | 6 ++-- docs/src/quickstart/agent.zh.md | 6 ++-- docs/src/quickstart/cluster_networking.md | 6 ++-- docs/src/quickstart/cluster_networking.zh.md | 6 ++-- docs/src/quickstart/llm_inference.md | 10 +++--- docs/src/quickstart/llm_inference.zh.md | 10 +++--- 38 files changed, 107 insertions(+), 107 deletions(-) diff --git a/docs/design/name-only-resolve.md b/docs/design/name-only-resolve.md index 450f03dc2..dec2bba30 100644 --- a/docs/design/name-only-resolve.md +++ b/docs/design/name-only-resolve.md @@ -75,7 +75,7 @@ ## 建议 - **短期**:采用 **方案 A(`get_actor(name)` + 动态 Proxy)**,并在 Proxy 内采用 **A1**(`async_methods is None` 时全部按 async),这样: - - 只在一个地方(如 `pulsing.actor`)增加 `get_actor(name)`(及可选 `node_id`)。 + - 只在一个地方(如 `pulsing.core`)增加 `get_actor(name)`(及可选 `node_id`)。 - 对 `ActorProxy` 做最小改动:在 `__getattr__` 里当 `self._async_methods is None` 时令 `is_async=True`。 - **命名**:`get_actor(name)` 与现有 `resolve(name)`(返回 ref)区分清晰;若希望更短,可再提供 `pul.actor(name)` 作为别名。 - **文档**:说明「无类型、无补全;流式优先用类型化 resolve」即可。 diff --git a/docs/overrides/home.html b/docs/overrides/home.html index 20d320aac..2293663e1 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -876,12 +876,12 @@

LLM Inference Ready

# Start OpenAI-compatible Router
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
     --addr 0.0.0.0:8000 \
     --http_port 8080 --model_name my-llm
 
 # Start vLLM Worker
-pulsing actor pulsing.actors.VllmWorker \
+pulsing actor pulsing.serving.VllmWorker \
     --model Qwen/Qwen2.5-0.5B \
     --addr 0.0.0.0:8001 --seeds 127.0.0.1:8000
 
diff --git a/docs/src/agent/autogen.md b/docs/src/agent/autogen.md
index 1113c86dc..334d78718 100644
--- a/docs/src/agent/autogen.md
+++ b/docs/src/agent/autogen.md
@@ -12,7 +12,7 @@ Pulsing implements `PulsingRuntime`, a distributed runtime for Microsoft AutoGen
 ## Quick Start
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 from autogen_core import AgentId, RoutedAgent, message_handler
 
 class MyAgent(RoutedAgent):
diff --git a/docs/src/agent/autogen.zh.md b/docs/src/agent/autogen.zh.md
index 599987940..235111f51 100644
--- a/docs/src/agent/autogen.zh.md
+++ b/docs/src/agent/autogen.zh.md
@@ -12,7 +12,7 @@ Pulsing 实现了 `PulsingRuntime`,为 Microsoft AutoGen 提供分布式运行
 ## 快速开始
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 from autogen_core import AgentId, RoutedAgent, message_handler
 
 class MyAgent(RoutedAgent):
diff --git a/docs/src/agent/index.md b/docs/src/agent/index.md
index d4d335ebb..cf92b295c 100644
--- a/docs/src/agent/index.md
+++ b/docs/src/agent/index.md
@@ -84,7 +84,7 @@ finally:
 ### AutoGen
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 # Replace SingleThreadedAgentRuntime
 runtime = PulsingRuntime(addr="0.0.0.0:8000")
@@ -95,7 +95,7 @@ await runtime.register_factory("agent", lambda: MyAgent())
 ### LangGraph
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 app = graph.compile()
 distributed_app = with_pulsing(
diff --git a/docs/src/agent/index.zh.md b/docs/src/agent/index.zh.md
index 6739ad82f..68a0d711d 100644
--- a/docs/src/agent/index.zh.md
+++ b/docs/src/agent/index.zh.md
@@ -84,7 +84,7 @@ finally:
 ### AutoGen
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 # 替代 SingleThreadedAgentRuntime
 runtime = PulsingRuntime(addr="0.0.0.0:8000")
@@ -95,7 +95,7 @@ await runtime.register_factory("agent", lambda: MyAgent())
 ### LangGraph
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 app = graph.compile()
 distributed_app = with_pulsing(
diff --git a/docs/src/agent/langgraph.md b/docs/src/agent/langgraph.md
index 06b798966..0bfce3b69 100644
--- a/docs/src/agent/langgraph.md
+++ b/docs/src/agent/langgraph.md
@@ -12,7 +12,7 @@ Pulsing provides `with_pulsing()`, a one-line wrapper for distributed LangGraph
 ## Quick Start
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 from langgraph.graph import StateGraph
 
 # Build graph as usual
@@ -53,7 +53,7 @@ node_mapping={
 ## Starting Workers
 
 ```python
-from pulsing.langgraph import start_worker
+from pulsing.integrations.langgraph import start_worker
 
 # GPU server
 await start_worker("llm", llm_node, addr="0.0.0.0:8001")
diff --git a/docs/src/agent/langgraph.zh.md b/docs/src/agent/langgraph.zh.md
index 7b4db9ccd..d5c273cd7 100644
--- a/docs/src/agent/langgraph.zh.md
+++ b/docs/src/agent/langgraph.zh.md
@@ -12,7 +12,7 @@ Pulsing 提供 `with_pulsing()`,一行代码实现分布式 LangGraph。
 ## 快速开始
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 from langgraph.graph import StateGraph
 
 # 像往常一样构建图
@@ -53,7 +53,7 @@ node_mapping={
 ## 启动 Worker
 
 ```python
-from pulsing.langgraph import start_worker
+from pulsing.integrations.langgraph import start_worker
 
 # GPU 服务器
 await start_worker("llm", llm_node, addr="0.0.0.0:8001")
diff --git a/docs/src/api/python.md b/docs/src/api/python.md
index 190c123ba..74197c15b 100644
--- a/docs/src/api/python.md
+++ b/docs/src/api/python.md
@@ -24,7 +24,7 @@ pip install -e .
 
 ## Actor Module
 
-::: pulsing.actor
+::: pulsing.core
 
 ## Agent Module
 
@@ -32,4 +32,4 @@ pip install -e .
 
 ## Queue Module
 
-::: pulsing.queue
+::: pulsing.streaming
diff --git a/docs/src/api/python.zh.md b/docs/src/api/python.zh.md
index 1a46bcac3..e88208be9 100644
--- a/docs/src/api/python.zh.md
+++ b/docs/src/api/python.zh.md
@@ -24,7 +24,7 @@ pip install -e .
 
 ## Actor 模块
 
-::: pulsing.actor
+::: pulsing.core
 
 ## Agent 模块
 
@@ -32,4 +32,4 @@ pip install -e .
 
 ## 队列模块
 
-::: pulsing.queue
+::: pulsing.streaming
diff --git a/docs/src/design/as-actor-decorator.md b/docs/src/design/as-actor-decorator.md
index d355610d2..5a32a46a0 100644
--- a/docs/src/design/as-actor-decorator.md
+++ b/docs/src/design/as-actor-decorator.md
@@ -165,7 +165,7 @@ html = await worker.fetch_data("https://example.com")
 counter = await Counter.spawn(name="global_counter", init_value=0)
 
 # 其他地方可以通过名称解析
-from pulsing.actor import get_system
+from pulsing.core import get_system
 ref = await get_system().resolve("global_counter")
 ```
 
diff --git a/docs/src/design/as-actor-decorator.zh.md b/docs/src/design/as-actor-decorator.zh.md
index 1e1a67ef5..99cdb7ae6 100644
--- a/docs/src/design/as-actor-decorator.zh.md
+++ b/docs/src/design/as-actor-decorator.zh.md
@@ -165,7 +165,7 @@ html = await worker.fetch_data("https://example.com")
 counter = await Counter.spawn(name="global_counter", init_value=0)
 
 # 其他地方可以通过名称解析
-from pulsing.actor import get_system
+from pulsing.core import get_system
 ref = await get_system().resolve("global_counter")
 ```
 
diff --git a/docs/src/design/cluster-networking.md b/docs/src/design/cluster-networking.md
index 527a7af6e..8ae2775ab 100644
--- a/docs/src/design/cluster-networking.md
+++ b/docs/src/design/cluster-networking.md
@@ -73,7 +73,7 @@ In Rust, `ActorSystem::new(config)` builds a `NamingBackend`: if `config.head_ad
 - **Seed discovery** uses Ray’s **internal KV store**:
   - The first process to call `init_in_ray()` starts Pulsing with **no seeds**, gets its bind address, and **writes** that address into Ray KV under a fixed key (e.g. `pulsing:seed_addr`). It is the initial “seed” node.
   - Any later process reads that key, gets the seed address, and starts Pulsing **with that seed**. So all processes join the same Pulsing cluster; under the hood it is still **Gossip + seed**, with the first writer’s address as the seed.
-- If two processes race to write the key, the implementation may shut down one Pulsing instance and re-join using the winner’s address (see `pulsing.ray`).
+- If two processes race to write the key, the implementation may shut down one Pulsing instance and re-join using the winner’s address (see `pulsing.integrations.ray`).
 
 So: **Ray KV only provides the first seed**. After that, the cluster behaves like a normal Gossip cluster. There is no separate “Ray backend”; it is Gossip with a different bootstrap source.
 
diff --git a/docs/src/design/cluster-networking.zh.md b/docs/src/design/cluster-networking.zh.md
index fd987ac73..f3fdef1a5 100644
--- a/docs/src/design/cluster-networking.zh.md
+++ b/docs/src/design/cluster-networking.zh.md
@@ -73,7 +73,7 @@ Gossip 的节奏与行为由 `GossipConfig` 控制:`gossip_interval`、`fanout
 - **Seed 发现**使用 Ray 的 **internal KV**:
   - 第一个调用 `init_in_ray()` 的进程以**无 seed** 方式启动 Pulsing,得到本机地址后将该地址**写入** Ray KV 的固定 key(如 `pulsing:seed_addr`),成为初始 “seed” 节点。
   - 之后任意进程读取该 key,得到 seed 地址,并以该 seed 启动 Pulsing,从而加入同一集群。底层仍是 **Gossip + seed**,首个写入者的地址即 seed。
-- 若两进程竞争写 key,实现上会对其中一个实例做 shutdown 并用胜出者地址重新 join(见 `pulsing.ray`)。
+- 若两进程竞争写 key,实现上会对其中一个实例做 shutdown 并用胜出者地址重新 join(见 `pulsing.integrations.ray`)。
 
 因此:**Ray KV 仅提供首个 seed**;之后集群行为与普通 Gossip 集群一致,没有单独的 “Ray 后端”,只是 Gossip 的另一种启动来源。
 
diff --git a/docs/src/design/load_sync.md b/docs/src/design/load_sync.md
index 653684c1b..f0bbb2f00 100644
--- a/docs/src/design/load_sync.md
+++ b/docs/src/design/load_sync.md
@@ -58,7 +58,7 @@ worker_ref = await scheduler.select_worker()  # 选择负载最低的
 ### 启动 Router (默认使用 StreamLoadScheduler)
 
 ```python
-from pulsing.actors import start_router
+from pulsing.serving import start_router
 
 runner = await start_router(
     system,
diff --git a/docs/src/design/load_sync.zh.md b/docs/src/design/load_sync.zh.md
index 653684c1b..f0bbb2f00 100644
--- a/docs/src/design/load_sync.zh.md
+++ b/docs/src/design/load_sync.zh.md
@@ -58,7 +58,7 @@ worker_ref = await scheduler.select_worker()  # 选择负载最低的
 ### 启动 Router (默认使用 StreamLoadScheduler)
 
 ```python
-from pulsing.actors import start_router
+from pulsing.serving import start_router
 
 runner = await start_router(
     system,
diff --git a/docs/src/examples/index.md b/docs/src/examples/index.md
index 33639fb7e..bd9318903 100644
--- a/docs/src/examples/index.md
+++ b/docs/src/examples/index.md
@@ -236,7 +236,7 @@ Pulsing integrates with popular agent frameworks. See [Agent Integration](../age
 Use `PulsingRuntime` as a drop-in replacement for AutoGen's runtime:
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 runtime = PulsingRuntime(addr="0.0.0.0:8000")
 await runtime.start()
@@ -254,7 +254,7 @@ cd examples/agent/autogen && ./run_distributed.sh
 Use `with_pulsing()` to enable distributed execution:
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 app = graph.compile()
 distributed_app = with_pulsing(
diff --git a/docs/src/examples/index.zh.md b/docs/src/examples/index.zh.md
index db877e949..4917f83b9 100644
--- a/docs/src/examples/index.zh.md
+++ b/docs/src/examples/index.zh.md
@@ -199,7 +199,7 @@ Pulsing 集成主流 Agent 框架。详见 [Agent 框架支持](../agent/index.z
 使用 `PulsingRuntime` 替代 AutoGen 默认运行时:
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 runtime = PulsingRuntime(addr="0.0.0.0:8000")
 await runtime.start()
@@ -217,7 +217,7 @@ cd examples/agent/autogen && ./run_distributed.sh
 使用 `with_pulsing()` 实现分布式执行:
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 app = graph.compile()
 distributed_app = with_pulsing(
diff --git a/docs/src/examples/llm_inference.md b/docs/src/examples/llm_inference.md
index 7481f98d7..bf03eb7f9 100644
--- a/docs/src/examples/llm_inference.md
+++ b/docs/src/examples/llm_inference.md
@@ -19,7 +19,7 @@ This guide shows how to run a **router + worker** LLM service with Pulsing, and
 The router needs an **actor system address** so workers can join the same cluster:
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_host 0.0.0.0 \
   --http_port 8080 \
@@ -34,7 +34,7 @@ You can run **one or more** workers. Each worker should join the router node via
 ### Option A: Transformers worker (Terminal B)
 
 ```bash
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --device cpu \
   --addr 0.0.0.0:8001 \
@@ -45,7 +45,7 @@ pulsing actor pulsing.actors.worker.TransformersWorker \
 ### Option B: vLLM worker (Terminal C)
 
 ```bash
-pulsing actor pulsing.actors.vllm.VllmWorker \
+pulsing actor pulsing.serving.vllm.VllmWorker \
   --model Qwen/Qwen2.5-0.5B \
   --addr 0.0.0.0:8002 \
   --seeds 127.0.0.1:8000 \
diff --git a/docs/src/examples/llm_inference.zh.md b/docs/src/examples/llm_inference.zh.md
index da2f4724a..58732d297 100644
--- a/docs/src/examples/llm_inference.zh.md
+++ b/docs/src/examples/llm_inference.zh.md
@@ -19,7 +19,7 @@
 Router 需要指定 **actor system 地址**,以便其它进程启动的 workers 加入同一集群:
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_host 0.0.0.0 \
   --http_port 8080 \
@@ -34,7 +34,7 @@ pulsing actor pulsing.actors.Router \
 ### 方案 A:Transformers Worker(终端 B)
 
 ```bash
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --device cpu \
   --addr 0.0.0.0:8001 \
@@ -45,7 +45,7 @@ pulsing actor pulsing.actors.worker.TransformersWorker \
 ### 方案 B:vLLM Worker(终端 C)
 
 ```bash
-pulsing actor pulsing.actors.vllm.VllmWorker \
+pulsing actor pulsing.serving.vllm.VllmWorker \
   --model Qwen/Qwen2.5-0.5B \
   --addr 0.0.0.0:8002 \
   --seeds 127.0.0.1:8000 \
diff --git a/docs/src/guide/operations.md b/docs/src/guide/operations.md
index 7fc18e19f..baf6dfd10 100644
--- a/docs/src/guide/operations.md
+++ b/docs/src/guide/operations.md
@@ -12,9 +12,9 @@ The `pulsing actor` command starts actors by providing their full class path. Th
 
 Actor type must be a full class path:
 - Format: `module.path.ClassName`
-- Example: `pulsing.actors.Router`
-- Example: `pulsing.actors.TransformersWorker`
-- Example: `pulsing.actors.VllmWorker`
+- Example: `pulsing.serving.Router`
+- Example: `pulsing.serving.TransformersWorker`
+- Example: `pulsing.serving.VllmWorker`
 - Example: `my_module.my_actor.MyCustomActor`
 
 ### Examples
@@ -22,7 +22,7 @@ Actor type must be a full class path:
 #### Router (OpenAI-compatible HTTP API)
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_host 0.0.0.0 \
   --http_port 8080 \
@@ -34,7 +34,7 @@ pulsing actor pulsing.actors.Router \
 #### Transformers Worker
 
 ```bash
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --device cpu \
   --addr 0.0.0.0:8001 \
@@ -45,7 +45,7 @@ pulsing actor pulsing.actors.worker.TransformersWorker \
 #### vLLM Worker
 
 ```bash
-pulsing actor pulsing.actors.vllm.VllmWorker \
+pulsing actor pulsing.serving.vllm.VllmWorker \
   --model Qwen/Qwen2 \
   --addr 0.0.0.0:8002 \
   --seeds 127.0.0.1:8000 \
@@ -58,18 +58,18 @@ pulsing actor pulsing.actors.vllm.VllmWorker \
 
 ```bash
 # Start multiple workers with different names
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --name worker-1 \
   --seeds 127.0.0.1:8000
 
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --name worker-2 \
   --seeds 127.0.0.1:8000
 
 # Router targeting specific worker name
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --worker_name worker-1 \
   --seeds 127.0.0.1:8000
 ```
@@ -85,7 +85,7 @@ pulsing actor pulsing.actors.Router \
 
 ```bash
 # Pass parameters directly as command-line arguments
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --device cpu \
   --preload true \
@@ -93,7 +93,7 @@ pulsing actor pulsing.actors.worker.TransformersWorker \
   --seeds 127.0.0.1:8000
 
 # Start vLLM worker with all parameters
-pulsing actor pulsing.actors.vllm.VllmWorker \
+pulsing actor pulsing.serving.vllm.VllmWorker \
   --model Qwen/Qwen2 \
   --role aggregated \
   --max_new_tokens 512 \
@@ -109,7 +109,7 @@ Options:
 
 The Actor class must:
 - Be importable from the specified module path
-- Inherit from `pulsing.actor.Actor`
+- Inherit from `pulsing.core.Actor`
 - Have a constructor with named parameters (the CLI automatically matches arguments to constructor parameters)
 
 **How it works:**
@@ -227,10 +227,10 @@ pulsing bench gpt2 --url http://localhost:8080
 
 | Task | Command |
 |------|---------|
-| Start router | `pulsing actor pulsing.actors.Router --addr 0.0.0.0:8000 --http_port 8080` |
-| Start worker | `pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --seeds ...` |
-| Start multiple workers | `pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-1 --seeds ...` |
-| Router with custom worker | `pulsing actor pulsing.actors.Router --worker_name worker-1 --seeds ...` |
+| Start router | `pulsing actor pulsing.serving.Router --addr 0.0.0.0:8000 --http_port 8080` |
+| Start worker | `pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --seeds ...` |
+| Start multiple workers | `pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-1 --seeds ...` |
+| Router with custom worker | `pulsing actor pulsing.serving.Router --worker_name worker-1 --seeds ...` |
 | List actors | `pulsing inspect actors --endpoint 127.0.0.1:8000` |
 | Inspect cluster | `pulsing inspect cluster --seeds 127.0.0.1:8000` |
 | Inspect actors | `pulsing inspect actors --seeds 127.0.0.1:8000 --top 10` |
diff --git a/docs/src/guide/operations.zh.md b/docs/src/guide/operations.zh.md
index 0e19dc52d..bb148c9fb 100644
--- a/docs/src/guide/operations.zh.md
+++ b/docs/src/guide/operations.zh.md
@@ -12,9 +12,9 @@ Pulsing 内置 CLI 工具,用于启动 actors、检查系统和基准测试分
 
 Actor 类型必须是完整的类路径:
 - 格式: `module.path.ClassName`
-- 示例: `pulsing.actors.Router`
-- 示例: `pulsing.actors.TransformersWorker`
-- 示例: `pulsing.actors.VllmWorker`
+- 示例: `pulsing.serving.Router`
+- 示例: `pulsing.serving.TransformersWorker`
+- 示例: `pulsing.serving.VllmWorker`
 - 示例: `my_module.my_actor.MyCustomActor`
 
 ### 示例
@@ -22,7 +22,7 @@ Actor 类型必须是完整的类路径:
 #### Router(OpenAI 兼容 HTTP API)
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_host 0.0.0.0 \
   --http_port 8080 \
@@ -34,7 +34,7 @@ pulsing actor pulsing.actors.Router \
 #### Transformers Worker
 
 ```bash
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --device cpu \
   --addr 0.0.0.0:8001 \
@@ -45,7 +45,7 @@ pulsing actor pulsing.actors.worker.TransformersWorker \
 #### vLLM Worker
 
 ```bash
-pulsing actor pulsing.actors.vllm.VllmWorker \
+pulsing actor pulsing.serving.vllm.VllmWorker \
   --model Qwen/Qwen2 \
   --addr 0.0.0.0:8002 \
   --seeds 127.0.0.1:8000 \
@@ -58,18 +58,18 @@ pulsing actor pulsing.actors.vllm.VllmWorker \
 
 ```bash
 # 启动多个不同名称的 worker
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --name worker-1 \
   --seeds 127.0.0.1:8000
 
-pulsing actor pulsing.actors.worker.TransformersWorker \
+pulsing actor pulsing.serving.worker.TransformersWorker \
   --model_name gpt2 \
   --name worker-2 \
   --seeds 127.0.0.1:8000
 
 # Router 路由到特定 worker 名称
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --worker_name worker-1 \
   --seeds 127.0.0.1:8000
 ```
@@ -87,7 +87,7 @@ CLI 会检查 Actor 类的构造函数签名,并自动从命令行参数中提
 
 Actor 类必须:
 - 可以从指定的模块路径导入
-- 继承自 `pulsing.actor.Actor`
+- 继承自 `pulsing.core.Actor`
 - 具有带命名参数的构造函数(CLI 会自动将参数匹配到构造函数参数)
 
 ---
@@ -209,10 +209,10 @@ pulsing bench gpt2 --url http://localhost:8080
 
 | 任务 | 命令 |
 |------|------|
-| 启动 router | `pulsing actor pulsing.actors.Router --addr 0.0.0.0:8000 --http_port 8080` |
-| 启动 worker | `pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --seeds ...` |
-| 启动多个 worker | `pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-1 --seeds ...` |
-| Router 指定 worker | `pulsing actor pulsing.actors.Router --worker_name worker-1 --seeds ...` |
+| 启动 router | `pulsing actor pulsing.serving.Router --addr 0.0.0.0:8000 --http_port 8080` |
+| 启动 worker | `pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --seeds ...` |
+| 启动多个 worker | `pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-1 --seeds ...` |
+| Router 指定 worker | `pulsing actor pulsing.serving.Router --worker_name worker-1 --seeds ...` |
 | 列出 actors | `pulsing inspect actors --endpoint 127.0.0.1:8000` |
 | 检查集群 | `pulsing inspect cluster --seeds 127.0.0.1:8000` |
 | 检查 actors | `pulsing inspect actors --seeds 127.0.0.1:8000 --top 10` |
diff --git a/docs/src/guide/queue.md b/docs/src/guide/queue.md
index 70dccf6e8..9a02cb08a 100644
--- a/docs/src/guide/queue.md
+++ b/docs/src/guide/queue.md
@@ -179,7 +179,7 @@ For persistent storage, use backends from [Persisting](https://github.com/DeepLi
 
 ```python
 import pulsing as pul
-from pulsing.queue import register_backend
+from pulsing.streaming import register_backend
 import persisting as pst
 
 # Register backends from Persisting
@@ -209,7 +209,7 @@ writer = await pul.queue.write(
 Implement the `StorageBackend` protocol and register:
 
 ```python
-from pulsing.queue import register_backend
+from pulsing.streaming import register_backend
 
 class MyBackend:
     async def put(self, record): ...
diff --git a/docs/src/guide/queue.zh.md b/docs/src/guide/queue.zh.md
index 823d5acfa..b4150ee90 100644
--- a/docs/src/guide/queue.zh.md
+++ b/docs/src/guide/queue.zh.md
@@ -179,7 +179,7 @@ writer = await pul.queue.write(
 
 ```python
 import pulsing as pul
-from pulsing.queue import register_backend
+from pulsing.streaming import register_backend
 import persisting as pst
 
 # 从 Persisting 注册后端
@@ -209,7 +209,7 @@ writer = await pul.queue.write(
 实现 `StorageBackend` 协议并注册:
 
 ```python
-from pulsing.queue import register_backend
+from pulsing.streaming import register_backend
 
 class MyBackend:
     async def put(self, record): ...
diff --git a/docs/src/guide/reliability.md b/docs/src/guide/reliability.md
index bfcde9630..c30808094 100644
--- a/docs/src/guide/reliability.md
+++ b/docs/src/guide/reliability.md
@@ -13,7 +13,7 @@ This page collects **practical reliability rules** for building production syste
 Prefer explicit timeouts on `ask`:
 
 ```python
-from pulsing.actor import ask_with_timeout
+from pulsing.core import ask_with_timeout
 
 result = await ask_with_timeout(ref, {"op": "compute"}, timeout=10.0)
 ```
diff --git a/docs/src/guide/reliability.zh.md b/docs/src/guide/reliability.zh.md
index f6b1d5c36..5104314f4 100644
--- a/docs/src/guide/reliability.zh.md
+++ b/docs/src/guide/reliability.zh.md
@@ -13,7 +13,7 @@
 对 `ask` 建议显式加超时:
 
 ```python
-from pulsing.actor import ask_with_timeout
+from pulsing.core import ask_with_timeout
 
 result = await ask_with_timeout(ref, {"op": "compute"}, timeout=10.0)
 ```
diff --git a/docs/src/guide/remote_actors.md b/docs/src/guide/remote_actors.md
index 645bd56d2..400c6973f 100644
--- a/docs/src/guide/remote_actors.md
+++ b/docs/src/guide/remote_actors.md
@@ -172,7 +172,7 @@ except PulsingRuntimeError as e:
 Use timeouts for remote calls to avoid indefinite waits:
 
 ```python
-from pulsing.actor import ask_with_timeout
+from pulsing.core import ask_with_timeout
 
 try:
     response = await ask_with_timeout(remote_ref, msg, timeout=10.0)
diff --git a/docs/src/guide/remote_actors.zh.md b/docs/src/guide/remote_actors.zh.md
index 86b753ae5..3fbba4584 100644
--- a/docs/src/guide/remote_actors.zh.md
+++ b/docs/src/guide/remote_actors.zh.md
@@ -172,7 +172,7 @@ except PulsingRuntimeError as e:
 为远程调用使用超时,避免无限等待:
 
 ```python
-from pulsing.actor import ask_with_timeout
+from pulsing.core import ask_with_timeout
 
 try:
     response = await ask_with_timeout(remote_ref, msg, timeout=10.0)
diff --git a/docs/src/guide/semantics.md b/docs/src/guide/semantics.md
index 8ad889f37..2be48b8b4 100644
--- a/docs/src/guide/semantics.md
+++ b/docs/src/guide/semantics.md
@@ -5,7 +5,7 @@ This page defines what Pulsing **guarantees** (and does **not** guarantee) for:
 - Actor execution
 - Remote messaging (`ask` / `tell`)
 - Streaming responses (`StreamMessage`)
-- Distributed memory queue (`pulsing.queue`)
+- Distributed memory queue (`pulsing.streaming`)
 
 The goal is to make it safe to build production systems without assuming stronger semantics than Pulsing actually provides.
 
@@ -120,7 +120,7 @@ Recommendation:
 
 - Make each chunk independently meaningful (include `seq` / offsets / ids) so consumers can resume or deduplicate if needed.
 
-## Queue semantics (`pulsing.queue`)
+## Queue semantics (`pulsing.streaming`)
 
 The distributed queue is **sharded** into buckets:
 
diff --git a/docs/src/guide/semantics.zh.md b/docs/src/guide/semantics.zh.md
index 4cca9cb3f..378893920 100644
--- a/docs/src/guide/semantics.zh.md
+++ b/docs/src/guide/semantics.zh.md
@@ -5,7 +5,7 @@
 - Actor 执行语义
 - 远程消息(`ask` / `tell`)
 - 流式响应(`StreamMessage`)
-- 分布式内存队列(`pulsing.queue`)
+- 分布式内存队列(`pulsing.streaming`)
 
 ## TL;DR(快速结论)
 
@@ -114,7 +114,7 @@ async for chunk in response.stream_reader():
 - 流式 chunk 是 best-effort,可能出现"部分输出后中断"。
 - 建议每个 chunk 带上 `seq` / offset / id,让消费端可恢复/去重。
 
-## 队列语义(`pulsing.queue`)
+## 队列语义(`pulsing.streaming`)
 
 队列按 bucket 分片:
 
diff --git a/docs/src/guide/style.md b/docs/src/guide/style.md
index 3095c937f..046652375 100644
--- a/docs/src/guide/style.md
+++ b/docs/src/guide/style.md
@@ -17,9 +17,9 @@ This page defines terminology and style conventions for Pulsing documentation an
 
 | Component | CLI Actor Class Path | Description |
 |-----------|---------------------|-------------|
-| Router | `pulsing.actors.Router` | OpenAI-compatible HTTP router |
-| TransformersWorker | `pulsing.actors.TransformersWorker` | Transformers inference worker |
-| VllmWorker | `pulsing.actors.VllmWorker` | vLLM inference worker |
+| Router | `pulsing.serving.Router` | OpenAI-compatible HTTP router |
+| TransformersWorker | `pulsing.serving.TransformersWorker` | Transformers inference worker |
+| VllmWorker | `pulsing.serving.VllmWorker` | vLLM inference worker |
 
 **Note**: When documentation mentions "Router", it typically refers to the HTTP routing component for LLM inference services. Example code requiring task dispatch logic should use names like `Dispatcher` to avoid confusion.
 
@@ -31,8 +31,8 @@ This page defines terminology and style conventions for Pulsing documentation an
 pulsing actor  [options]
 
 # Examples
-pulsing actor pulsing.actors.Router --http_port 8080 --model_name my-llm
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --device cpu
+pulsing actor pulsing.serving.Router --http_port 8080 --model_name my-llm
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --device cpu
 ```
 
 ### Inspect Commands (Observer Mode)
diff --git a/docs/src/guide/style.zh.md b/docs/src/guide/style.zh.md
index b8ec752cf..694b87e29 100644
--- a/docs/src/guide/style.zh.md
+++ b/docs/src/guide/style.zh.md
@@ -17,9 +17,9 @@
 
 | 组件 | CLI actor 类路径 | 说明 |
 |------|------------------|------|
-| Router | `pulsing.actors.Router` | OpenAI 兼容 HTTP 路由 |
-| TransformersWorker | `pulsing.actors.TransformersWorker` | Transformers 推理 Worker |
-| VllmWorker | `pulsing.actors.VllmWorker` | vLLM 推理 Worker |
+| Router | `pulsing.serving.Router` | OpenAI 兼容 HTTP 路由 |
+| TransformersWorker | `pulsing.serving.TransformersWorker` | Transformers 推理 Worker |
+| VllmWorker | `pulsing.serving.VllmWorker` | vLLM 推理 Worker |
 
 **注意**:文档中提到"Router"时,通常指 LLM 推理服务的 HTTP 路由组件。示例代码中若需要任务分发逻辑,应使用 `Dispatcher` 等名称以避免混淆。
 
@@ -31,8 +31,8 @@
 pulsing actor <完整类路径> [选项]
 
 # 示例
-pulsing actor pulsing.actors.Router --http_port 8080 --model_name my-llm
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --device cpu
+pulsing actor pulsing.serving.Router --http_port 8080 --model_name my-llm
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --device cpu
 ```
 
 ### 检查命令(观察者模式)
diff --git a/docs/src/quickstart/agent.md b/docs/src/quickstart/agent.md
index 6481b7a46..ce7b65a6f 100644
--- a/docs/src/quickstart/agent.md
+++ b/docs/src/quickstart/agent.md
@@ -61,7 +61,7 @@ agent = AssistantAgent("assistant", model_client=model)
 Replace `SingleThreadedAgentRuntime` with `PulsingRuntime`:
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 # Single process (default)
 runtime = PulsingRuntime()
@@ -90,7 +90,7 @@ import asyncio
 from autogen_agentchat.agents import AssistantAgent
 from autogen_core import AgentId
 from autogen_ext.models.openai import OpenAIChatCompletionClient
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 async def main():
     model = OpenAIChatCompletionClient(model="gpt-4o-mini")
@@ -136,7 +136,7 @@ app = graph.compile()
 ### Step 2: Wrap with Pulsing
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 distributed_app = with_pulsing(
     app,
diff --git a/docs/src/quickstart/agent.zh.md b/docs/src/quickstart/agent.zh.md
index a542f0680..45a6cbd42 100644
--- a/docs/src/quickstart/agent.zh.md
+++ b/docs/src/quickstart/agent.zh.md
@@ -61,7 +61,7 @@ agent = AssistantAgent("assistant", model_client=model)
 用 `PulsingRuntime` 替换 `SingleThreadedAgentRuntime`:
 
 ```python
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 # 单进程(默认)
 runtime = PulsingRuntime()
@@ -90,7 +90,7 @@ import asyncio
 from autogen_agentchat.agents import AssistantAgent
 from autogen_core import AgentId
 from autogen_ext.models.openai import OpenAIChatCompletionClient
-from pulsing.autogen import PulsingRuntime
+from pulsing.integrations.autogen import PulsingRuntime
 
 async def main():
     model = OpenAIChatCompletionClient(model="gpt-4o-mini")
@@ -136,7 +136,7 @@ app = graph.compile()
 ### 步骤 2:用 Pulsing 包装
 
 ```python
-from pulsing.langgraph import with_pulsing
+from pulsing.integrations.langgraph import with_pulsing
 
 distributed_app = with_pulsing(
     app,
diff --git a/docs/src/quickstart/cluster_networking.md b/docs/src/quickstart/cluster_networking.md
index faa7c548f..256b755e9 100644
--- a/docs/src/quickstart/cluster_networking.md
+++ b/docs/src/quickstart/cluster_networking.md
@@ -127,7 +127,7 @@ You can also use `SystemConfig.with_head_node()` / `.with_head_addr(addr)` and p
 
 ```python
 import ray
-from pulsing.ray import init_in_ray
+from pulsing.integrations.ray import init_in_ray
 
 # Recommended: hook so every worker runs init_in_ray at startup
 ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})
@@ -147,14 +147,14 @@ actor = await MyActor.spawn(name="my_actor")
 **Async** (e.g. async Ray actors):
 
 ```python
-from pulsing.ray import async_init_in_ray
+from pulsing.integrations.ray import async_init_in_ray
 await async_init_in_ray()
 ```
 
 **Cleanup** (e.g. tests):
 
 ```python
-from pulsing.ray import cleanup
+from pulsing.integrations.ray import cleanup
 cleanup()
 ```
 
diff --git a/docs/src/quickstart/cluster_networking.zh.md b/docs/src/quickstart/cluster_networking.zh.md
index c121b6fe6..d3cfe5174 100644
--- a/docs/src/quickstart/cluster_networking.zh.md
+++ b/docs/src/quickstart/cluster_networking.zh.md
@@ -127,7 +127,7 @@ await pul.init(addr="0.0.0.0:8001", head_addr="192.168.1.10:8000")
 
 ```python
 import ray
-from pulsing.ray import init_in_ray
+from pulsing.integrations.ray import init_in_ray
 
 # 推荐:用 hook 让每个 worker 启动时执行 init_in_ray
 ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})
@@ -147,14 +147,14 @@ actor = await MyActor.spawn(name="my_actor")
 **异步**(如 async Ray actor):
 
 ```python
-from pulsing.ray import async_init_in_ray
+from pulsing.integrations.ray import async_init_in_ray
 await async_init_in_ray()
 ```
 
 **清理**(如测试):
 
 ```python
-from pulsing.ray import cleanup
+from pulsing.integrations.ray import cleanup
 cleanup()
 ```
 
diff --git a/docs/src/quickstart/llm_inference.md b/docs/src/quickstart/llm_inference.md
index 48115d8c3..2b2536ba9 100644
--- a/docs/src/quickstart/llm_inference.md
+++ b/docs/src/quickstart/llm_inference.md
@@ -53,7 +53,7 @@ Choose a backend:
 Open **Terminal A**:
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_port 8080 \
   --model_name my-llm
@@ -74,7 +74,7 @@ Open **Terminal B**:
 === "Transformers (CPU)"
 
     ```bash
-    pulsing actor pulsing.actors.TransformersWorker \
+    pulsing actor pulsing.serving.TransformersWorker \
       --model_name gpt2 \
       --device cpu \
       --addr 0.0.0.0:8001 \
@@ -84,7 +84,7 @@ Open **Terminal B**:
 === "vLLM (GPU)"
 
     ```bash
-    pulsing actor pulsing.actors.VllmWorker \
+    pulsing actor pulsing.serving.VllmWorker \
       --model Qwen/Qwen2.5-0.5B \
       --addr 0.0.0.0:8002 \
       --seeds 127.0.0.1:8000
@@ -145,10 +145,10 @@ Add more workers to handle more load:
 
 ```bash
 # Terminal C
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000
 
 # Terminal D
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000
 ```
 
 The Router automatically load-balances across all workers.
diff --git a/docs/src/quickstart/llm_inference.zh.md b/docs/src/quickstart/llm_inference.zh.md
index 42f9a715d..f38a56efb 100644
--- a/docs/src/quickstart/llm_inference.zh.md
+++ b/docs/src/quickstart/llm_inference.zh.md
@@ -53,7 +53,7 @@ pip install pulsing
 打开**终端 A**:
 
 ```bash
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
   --addr 0.0.0.0:8000 \
   --http_port 8080 \
   --model_name my-llm
@@ -74,7 +74,7 @@ pulsing actor pulsing.actors.Router \
 === "Transformers (CPU)"
 
     ```bash
-    pulsing actor pulsing.actors.TransformersWorker \
+    pulsing actor pulsing.serving.TransformersWorker \
       --model_name gpt2 \
       --device cpu \
       --addr 0.0.0.0:8001 \
@@ -84,7 +84,7 @@ pulsing actor pulsing.actors.Router \
 === "vLLM (GPU)"
 
     ```bash
-    pulsing actor pulsing.actors.VllmWorker \
+    pulsing actor pulsing.serving.VllmWorker \
       --model Qwen/Qwen2.5-0.5B \
       --addr 0.0.0.0:8002 \
       --seeds 127.0.0.1:8000
@@ -145,10 +145,10 @@ curl -N http://localhost:8080/v1/chat/completions \
 
 ```bash
 # 终端 C
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000
 
 # 终端 D
-pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000
+pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000
 ```
 
 Router 会自动在所有 Worker 间负载均衡。

From a6be6b8d9f3ce43805e7695a3fc93f197a06292e Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Thu, 19 Feb 2026 19:41:12 +0800
Subject: [PATCH 09/15] Add optional zerocopy descriptor protocol and enhance
 streaming capabilities

- Introduced an optional zerocopy descriptor protocol to bypass pickle serialization for eligible Python objects, improving performance for large payloads.
- Implemented automatic stream transfer for large payloads exceeding a specified threshold, allowing for efficient data handling in chunks.
- Updated the `ZeroCopyDescriptor` class to manage metadata and buffer information, facilitating seamless integration with existing APIs.
- Enhanced the `StorageBackend` interface with new tensor-native methods for better data management.
- Added comprehensive tests to validate the new zerocopy functionality and ensure data integrity during transmission.
- Updated documentation to reflect the new features and usage guidelines for the zerocopy protocol, improving clarity for users.
---
 Cargo.lock                             |   1 +
 crates/pulsing-py/Cargo.toml           |   1 +
 crates/pulsing-py/src/actor.rs         | 606 ++++++++++++++++++++-----
 llms.binding.md                        |  46 ++
 python/pulsing/core/__init__.py        |   2 +
 python/pulsing/streaming/backend.py    |  72 +++
 python/pulsing/streaming/storage.py    | 121 +++++
 tests/python/test_queue_backends.py    |  18 +
 tests/python/test_sealed_message.py    | 161 +++++++
 tests/python/test_zerocopy_protocol.py | 117 +++++
 10 files changed, 1042 insertions(+), 103 deletions(-)
 create mode 100644 tests/python/test_zerocopy_protocol.py

diff --git a/Cargo.lock b/Cargo.lock
index 7ab3c760e..97362d543 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1905,6 +1905,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "async-trait",
+ "bincode",
  "crossbeam-channel",
  "futures",
  "pulsing-actor",
diff --git a/crates/pulsing-py/Cargo.toml b/crates/pulsing-py/Cargo.toml
index ebcf875f5..4bf0e8f48 100644
--- a/crates/pulsing-py/Cargo.toml
+++ b/crates/pulsing-py/Cargo.toml
@@ -21,6 +21,7 @@ async-trait = { workspace = true }
 futures = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
+bincode = { workspace = true }
 thiserror = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs
index d556b0be3..94f90fc1e 100644
--- a/crates/pulsing-py/src/actor.rs
+++ b/crates/pulsing-py/src/actor.rs
@@ -7,6 +7,8 @@ use pulsing_actor::supervision::{BackoffStrategy, RestartPolicy, SupervisionSpec
 use pyo3::exceptions::{PyRuntimeError, PyStopAsyncIteration, PyValueError};
 use pyo3::prelude::*;
 use pyo3::types::PyBytes;
+use serde::{Deserialize, Serialize};
+use std::cmp::min;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::sync::Mutex as StdMutex;
@@ -19,6 +21,43 @@ use crate::python_executor::python_executor;
 
 /// Special message type identifier for pickle-encoded Python objects
 const SEALED_PY_MSG_TYPE: &str = "__sealed_py_message__";
+/// Special message type identifier for zerocopy descriptor payloads (small, single message)
+const SEALED_ZEROCOPY_MSG_TYPE: &str = "__sealed_zerocopy_message__";
+/// Stream frame: descriptor header (metadata only, no bulk data)
+const ZC_DESCRIPTOR_MSG_TYPE: &str = "__zc_descriptor__";
+/// Stream frame: raw data chunk
+const ZC_CHUNK_MSG_TYPE: &str = "__zc_chunk__";
+
+/// Zerocopy metadata header — the single wire format for both single-message and stream paths.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct ZeroCopyDescriptorHeader {
+    version: u32,
+    buffer_count: usize,
+    buffer_lengths: Vec,
+    dtype: Option,
+    shape: Option>,
+    strides: Option>,
+    transport: Option,
+    checksum: Option,
+}
+
+fn zerocopy_chunk_bytes() -> usize {
+    const DEFAULT: usize = 1024 * 1024;
+    const MIN: usize = 4 * 1024;
+    std::env::var("PULSING_ZEROCOPY_CHUNK_BYTES")
+        .ok()
+        .and_then(|v| v.parse::().ok())
+        .map(|v| v.max(MIN))
+        .unwrap_or(DEFAULT)
+}
+
+fn zerocopy_stream_threshold() -> usize {
+    const DEFAULT: usize = 64 * 1024;
+    std::env::var("PULSING_ZEROCOPY_STREAM_THRESHOLD")
+        .ok()
+        .and_then(|v| v.parse::().ok())
+        .unwrap_or(DEFAULT)
+}
 
 /// Convert PulsingError to Python exception (used for actor system APIs that return Result<_, PulsingError>).
 fn to_pyerr(err: pulsing_actor::error::PulsingError) -> PyErr {
@@ -404,6 +443,167 @@ impl PySealedMessage {
     }
 }
 
+/// Descriptor object for optional zerocopy payload transport.
+#[pyclass(name = "ZeroCopyDescriptor")]
+#[derive(Clone)]
+pub struct PyZeroCopyDescriptor {
+    #[pyo3(get)]
+    version: u32,
+    #[pyo3(get)]
+    buffers: Vec,
+    #[pyo3(get)]
+    dtype: Option,
+    #[pyo3(get)]
+    shape: Option>,
+    #[pyo3(get)]
+    strides: Option>,
+    #[pyo3(get)]
+    transport: Option,
+    #[pyo3(get)]
+    checksum: Option,
+}
+
+/// Validate that a Python object exposes a contiguous buffer.
+/// If it's not directly extractable as &[u8] (e.g. memoryview), convert via bytes().
+fn ensure_contiguous_buffer(py: Python<'_>, item: &Bound<'_, pyo3::PyAny>) -> PyResult {
+    if item.extract::<&[u8]>().is_ok() {
+        return Ok(item.clone().unbind());
+    }
+    // Try converting via bytes() for memoryview and other buffer-protocol objects
+    let builtins = py.import("builtins")?;
+    let bytes_obj = builtins.getattr("bytes")?.call1((item,)).map_err(|_| {
+        PyValueError::new_err(
+            "ZeroCopyDescriptor.buffers items must expose a contiguous Python buffer (bytes/bytearray/memoryview/tensor)",
+        )
+    })?;
+    // Verify the result is extractable
+    bytes_obj.extract::<&[u8]>().map_err(|_| {
+        PyValueError::new_err(
+            "ZeroCopyDescriptor.buffers items must expose a contiguous Python buffer (bytes/bytearray/memoryview/tensor)",
+        )
+    })?;
+    Ok(bytes_obj.unbind())
+}
+
+impl PyZeroCopyDescriptor {
+    /// Total byte size of all buffers.
+    fn total_buffer_bytes(&self, py: Python<'_>) -> usize {
+        self.buffers
+            .iter()
+            .filter_map(|buf_obj| buf_obj.bind(py).extract::<&[u8]>().ok().map(|s| s.len()))
+            .sum()
+    }
+
+    /// Build a descriptor header (metadata only, no data).
+    fn to_header(&self, py: Python<'_>) -> ZeroCopyDescriptorHeader {
+        ZeroCopyDescriptorHeader {
+            version: self.version,
+            buffer_count: self.buffers.len(),
+            buffer_lengths: self
+                .buffers
+                .iter()
+                .filter_map(|b| b.bind(py).extract::<&[u8]>().ok().map(|s| s.len()))
+                .collect(),
+            dtype: self.dtype.clone(),
+            shape: self.shape.clone(),
+            strides: self.strides.clone(),
+            transport: self.transport.clone(),
+            checksum: self.checksum.clone(),
+        }
+    }
+
+    /// Serialize for single-message path: [4-byte header_len LE] ++ header_bytes ++ raw_data.
+    fn serialize_single(&self, py: Python<'_>) -> PyResult> {
+        let header = self.to_header(py);
+        let header_bytes = bincode::serialize(&header).map_err(to_py_value_err)?;
+        let header_len = header_bytes.len() as u32;
+        let total_data: usize = header.buffer_lengths.iter().sum();
+        let mut out = Vec::with_capacity(4 + header_bytes.len() + total_data);
+        out.extend_from_slice(&header_len.to_le_bytes());
+        out.extend_from_slice(&header_bytes);
+        for buf_obj in &self.buffers {
+            let bound = buf_obj.bind(py);
+            let data = bound.extract::<&[u8]>()?;
+            out.extend_from_slice(data);
+        }
+        Ok(out)
+    }
+
+    /// Reconstruct from header + raw buffer data (shared by single and stream paths).
+    fn from_wire(
+        py: Python<'_>,
+        header: ZeroCopyDescriptorHeader,
+        raw_buffers: Vec>,
+    ) -> Self {
+        Self {
+            version: header.version,
+            buffers: raw_buffers
+                .into_iter()
+                .map(|b| PyBytes::new(py, &b).into_any().unbind())
+                .collect(),
+            dtype: header.dtype,
+            shape: header.shape,
+            strides: header.strides,
+            transport: header.transport,
+            checksum: header.checksum,
+        }
+    }
+}
+
+#[pymethods]
+impl PyZeroCopyDescriptor {
+    #[new]
+    #[pyo3(signature = (
+        buffers,
+        *,
+        dtype=None,
+        shape=None,
+        strides=None,
+        transport=None,
+        checksum=None,
+        version=1
+    ))]
+    #[allow(clippy::too_many_arguments)]
+    fn new(
+        py: Python<'_>,
+        buffers: Vec,
+        dtype: Option,
+        shape: Option>,
+        strides: Option>,
+        transport: Option,
+        checksum: Option,
+        version: u32,
+    ) -> PyResult {
+        if buffers.is_empty() {
+            return Err(PyValueError::new_err(
+                "ZeroCopyDescriptor requires at least one buffer",
+            ));
+        }
+        let normalized: Vec = buffers
+            .into_iter()
+            .map(|item| ensure_contiguous_buffer(py, item.bind(py)))
+            .collect::>>()?;
+        Ok(Self {
+            version,
+            buffers: normalized,
+            dtype,
+            shape,
+            strides,
+            transport,
+            checksum,
+        })
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "ZeroCopyDescriptor(version={}, buffers={}, transport={:?})",
+            self.version,
+            self.buffers.len(),
+            self.transport
+        )
+    }
+}
+
 /// Helper function to pickle a Python object in Rust
 fn pickle_object(py: Python<'_>, obj: &PyObject) -> PyResult> {
     let pickle = py.import("pickle")?;
@@ -412,6 +612,272 @@ fn pickle_object(py: Python<'_>, obj: &PyObject) -> PyResult> {
     Ok(bytes.as_bytes().to_vec())
 }
 
+/// Try to extract a `PyZeroCopyDescriptor` from a Python object via `__zerocopy__(ctx)`.
+///
+/// Returns `Ok(None)` if the object does not implement the protocol.
+fn try_zerocopy_descriptor<'py>(
+    py: Python<'py>,
+    obj: &PyObject,
+) -> PyResult>> {
+    let bound = obj.bind(py);
+    let zc_method = match bound.getattr("__zerocopy__") {
+        Ok(m) => m,
+        Err(_) => return Ok(None),
+    };
+    if !zc_method.is_callable() {
+        return Ok(None);
+    }
+    let descriptor = zc_method.call1((py.None(),))?;
+    if !descriptor.is_instance_of::() {
+        return Err(PyValueError::new_err(
+            "__zerocopy__ must return ZeroCopyDescriptor",
+        ));
+    }
+    Ok(Some(descriptor.extract()?))
+}
+
+/// Parse single-message zerocopy payload: [4-byte header_len LE] ++ header ++ raw_data.
+fn parse_zerocopy_single(py: Python<'_>, data: &[u8]) -> PyResult {
+    if data.len() < 4 {
+        return Err(PyValueError::new_err("Zerocopy payload too short"));
+    }
+    let header_len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
+    if data.len() < 4 + header_len {
+        return Err(PyValueError::new_err("Zerocopy payload truncated"));
+    }
+    let header: ZeroCopyDescriptorHeader =
+        bincode::deserialize(&data[4..4 + header_len]).map_err(to_py_value_err)?;
+    let mut offset = 4 + header_len;
+    let raw_buffers: Vec> = header
+        .buffer_lengths
+        .iter()
+        .map(|&len| {
+            let buf = data[offset..offset + len].to_vec();
+            offset += len;
+            buf
+        })
+        .collect();
+    let desc = PyZeroCopyDescriptor::from_wire(py, header, raw_buffers);
+    let obj = Py::new(py, desc)?;
+    Ok(obj.into_pyobject(py)?.into_any().unbind())
+}
+
+fn zerocopy_mode() -> String {
+    std::env::var("PULSING_ZEROCOPY")
+        .unwrap_or_else(|_| "auto".to_string())
+        .to_ascii_lowercase()
+}
+
+/// Build a `Message::Stream` for a large zerocopy payload: descriptor header + data chunks.
+fn encode_zerocopy_stream(py: Python<'_>, zc: &PyZeroCopyDescriptor) -> PyResult {
+    let chunk_len = zerocopy_chunk_bytes();
+    let header = zc.to_header(py);
+    let header_bytes = bincode::serialize(&header).map_err(to_py_value_err)?;
+
+    let (tx, rx) = mpsc::channel::>(32);
+
+    // Collect buffer data now (we hold the GIL) to avoid crossing thread boundary with PyObject
+    let buffer_data: Vec> = zc
+        .buffers
+        .iter()
+        .map(|buf_obj| {
+            let bound = buf_obj.bind(py);
+            let data = bound.extract::<&[u8]>()?;
+            Ok(data.to_vec())
+        })
+        .collect::>>()?;
+
+    std::thread::spawn(move || {
+        if tx
+            .blocking_send(Ok(Message::single(ZC_DESCRIPTOR_MSG_TYPE, header_bytes)))
+            .is_err()
+        {
+            return;
+        }
+        for buf in &buffer_data {
+            let mut offset = 0;
+            while offset < buf.len() {
+                let end = min(offset + chunk_len, buf.len());
+                let chunk = buf[offset..end].to_vec();
+                if tx
+                    .blocking_send(Ok(Message::single(ZC_CHUNK_MSG_TYPE, chunk)))
+                    .is_err()
+                {
+                    return;
+                }
+                offset = end;
+            }
+        }
+    });
+
+    Ok(Message::from_channel(ZC_DESCRIPTOR_MSG_TYPE, rx))
+}
+
+/// Reassemble a zerocopy stream (descriptor header already parsed).
+/// Reads remaining data chunks from the stream and fills pre-allocated buffers.
+async fn reassemble_zerocopy_stream(
+    header: ZeroCopyDescriptorHeader,
+    stream: &mut std::pin::Pin<
+        Box> + Send>,
+    >,
+) -> pulsing_actor::error::Result<(ZeroCopyDescriptorHeader, Vec>)> {
+    let mut raw_buffers: Vec> = header
+        .buffer_lengths
+        .iter()
+        .map(|&len| Vec::with_capacity(len))
+        .collect();
+    let total_expected: usize = header.buffer_lengths.iter().sum();
+
+    let mut buf_idx = 0;
+    let mut received = 0usize;
+
+    while received < total_expected {
+        let frame = stream.next().await.ok_or_else(|| {
+            pulsing_actor::error::PulsingError::from(pulsing_actor::error::RuntimeError::Other(
+                "Zerocopy stream ended before all data received".into(),
+            ))
+        })??;
+
+        match frame {
+            Message::Single {
+                ref msg_type,
+                ref data,
+            } if msg_type == ZC_CHUNK_MSG_TYPE => {
+                let remaining_in_buf = header.buffer_lengths[buf_idx] - raw_buffers[buf_idx].len();
+                if data.len() <= remaining_in_buf {
+                    raw_buffers[buf_idx].extend_from_slice(data);
+                } else {
+                    // Chunk spans buffer boundary: split across buffers
+                    let first_part = &data[..remaining_in_buf];
+                    raw_buffers[buf_idx].extend_from_slice(first_part);
+                    let mut rest = &data[remaining_in_buf..];
+                    buf_idx += 1;
+                    while !rest.is_empty() && buf_idx < raw_buffers.len() {
+                        let can_take = min(
+                            rest.len(),
+                            header.buffer_lengths[buf_idx] - raw_buffers[buf_idx].len(),
+                        );
+                        raw_buffers[buf_idx].extend_from_slice(&rest[..can_take]);
+                        rest = &rest[can_take..];
+                        if raw_buffers[buf_idx].len() == header.buffer_lengths[buf_idx] {
+                            buf_idx += 1;
+                        }
+                    }
+                }
+                received += data.len();
+                if buf_idx < raw_buffers.len()
+                    && raw_buffers[buf_idx].len() == header.buffer_lengths[buf_idx]
+                {
+                    buf_idx += 1;
+                }
+            }
+            _ => {
+                return Err(pulsing_actor::error::PulsingError::from(
+                    pulsing_actor::error::RuntimeError::Other(format!(
+                        "Unexpected frame in zerocopy stream: {:?}",
+                        frame.msg_type()
+                    )),
+                ));
+            }
+        }
+    }
+
+    Ok((header, raw_buffers))
+}
+
+/// Encode a Python object into a `Message`.
+///
+/// Small zerocopy payloads → `Message::Single`; large ones → `Message::Stream`
+/// (descriptor-first + chunked data). Non-zerocopy objects → pickle.
+fn encode_python_payload(py: Python<'_>, obj: &PyObject) -> PyResult {
+    match zerocopy_mode().as_str() {
+        "off" => Ok(Message::single(SEALED_PY_MSG_TYPE, pickle_object(py, obj)?)),
+        "force" => {
+            let zc = try_zerocopy_descriptor(py, obj)?.ok_or_else(|| {
+                PyValueError::new_err(
+                    "PULSING_ZEROCOPY=force but object does not provide __zerocopy__",
+                )
+            })?;
+            encode_zerocopy_message(py, &zc)
+        }
+        _ => match try_zerocopy_descriptor(py, obj)? {
+            Some(zc) => encode_zerocopy_message(py, &zc),
+            None => Ok(Message::single(SEALED_PY_MSG_TYPE, pickle_object(py, obj)?)),
+        },
+    }
+}
+
+/// Decide between single-message or stream encoding based on total buffer size.
+fn encode_zerocopy_message(
+    py: Python<'_>,
+    zc: &PyRef<'_, PyZeroCopyDescriptor>,
+) -> PyResult {
+    let total = zc.total_buffer_bytes(py);
+    if total >= zerocopy_stream_threshold() {
+        encode_zerocopy_stream(py, zc)
+    } else {
+        let bytes = zc.serialize_single(py)?;
+        Ok(Message::single(SEALED_ZEROCOPY_MSG_TYPE, bytes))
+    }
+}
+
+/// Unified decoder: converts any `Message` (pickle / zerocopy-single / zerocopy-stream / other)
+/// into a Python object.
+async fn decode_message_to_pyobject(msg: Message) -> PyResult {
+    match msg {
+        Message::Single {
+            ref msg_type,
+            ref data,
+        } if msg_type == SEALED_PY_MSG_TYPE => Python::with_gil(|py| unpickle_object(py, data)),
+        Message::Single {
+            ref msg_type,
+            ref data,
+        } if msg_type == SEALED_ZEROCOPY_MSG_TYPE => {
+            Python::with_gil(|py| parse_zerocopy_single(py, data))
+        }
+        Message::Stream {
+            ref default_msg_type,
+            ..
+        } if default_msg_type == ZC_DESCRIPTOR_MSG_TYPE => {
+            let Message::Stream { mut stream, .. } = msg else {
+                unreachable!()
+            };
+            let first = stream
+                .next()
+                .await
+                .ok_or_else(|| PyRuntimeError::new_err("Empty zerocopy stream"))?
+                .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+            let header_data = match first {
+                Message::Single {
+                    ref msg_type,
+                    ref data,
+                } if msg_type == ZC_DESCRIPTOR_MSG_TYPE => data.clone(),
+                _ => {
+                    return Err(PyRuntimeError::new_err(
+                        "First frame of zerocopy stream must be descriptor",
+                    ));
+                }
+            };
+            let header: ZeroCopyDescriptorHeader =
+                bincode::deserialize(&header_data).map_err(to_py_value_err)?;
+            let (header, raw_buffers) = reassemble_zerocopy_stream(header, &mut stream)
+                .await
+                .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+            Python::with_gil(|py| {
+                let desc = PyZeroCopyDescriptor::from_wire(py, header, raw_buffers);
+                let obj = Py::new(py, desc)?;
+                Ok(obj.into_pyobject(py)?.into_any().unbind())
+            })
+        }
+        _ => Python::with_gil(|py| {
+            Ok(PyMessage::from_rust_message(msg)
+                .into_pyobject(py)?
+                .into_any()
+                .unbind())
+        }),
+    }
+}
+
 /// Helper function to unpickle bytes back to a Python object
 fn unpickle_object(py: Python<'_>, data: &[u8]) -> PyResult {
     let pickle = py.import("pickle")?;
@@ -446,22 +912,19 @@ impl PyStreamReader {
             let mut guard = stream.lock().await;
             if let Some(ref mut s) = *guard {
                 match s.next().await {
-                    Some(Ok(msg)) => Python::with_gil(|py| {
-                        // Auto unpickle if it's a sealed Python message
-                        match &msg {
-                            Message::Single { msg_type, data }
-                                if msg_type == SEALED_PY_MSG_TYPE =>
-                            {
-                                unpickle_object(py, data)
-                            }
-                            _ => {
-                                // Return as PyMessage for JSON/other types
-                                Ok(PyMessage::from_rust_message(msg)
-                                    .into_pyobject(py)?
-                                    .into_any()
-                                    .unbind())
-                            }
+                    Some(Ok(msg)) => Python::with_gil(|py| match &msg {
+                        Message::Single { msg_type, data } if msg_type == SEALED_PY_MSG_TYPE => {
+                            unpickle_object(py, data)
+                        }
+                        Message::Single { msg_type, data }
+                            if msg_type == SEALED_ZEROCOPY_MSG_TYPE =>
+                        {
+                            parse_zerocopy_single(py, data)
                         }
+                        _ => Ok(PyMessage::from_rust_message(msg)
+                            .into_pyobject(py)?
+                            .into_any()
+                            .unbind()),
                     }),
                     Some(Err(e)) => Err(PyRuntimeError::new_err(e.to_string())),
                     None => {
@@ -510,13 +973,12 @@ impl PyStreamWriter {
     /// This is the recommended method for Python-to-Python streaming.
     /// Objects are automatically pickled and will be unpickled on the reader side.
     fn write<'py>(&self, py: Python<'py>, obj: PyObject) -> PyResult> {
-        let pickled = pickle_object(py, &obj)?;
+        let msg = encode_python_payload(py, &obj)?;
         let sender = self.sender.clone();
 
         pyo3_async_runtimes::tokio::future_into_py(py, async move {
             let guard = sender.lock().await;
             if let Some(ref tx) = *guard {
-                let msg = Message::single(SEALED_PY_MSG_TYPE, pickled);
                 tx.send(Ok(msg))
                     .await
                     .map_err(|_| PyRuntimeError::new_err("Stream closed"))?;
@@ -614,8 +1076,8 @@ enum PyActorResponse {
         String,
         mpsc::Receiver>,
     ),
-    /// Pickled Python object for Python-to-Python communication
-    Sealed(Vec),
+    /// Pre-encoded Message (pickle single, zerocopy single, or zerocopy stream)
+    Encoded(Message),
     /// Generator (async or sync) to be iterated
     Generator(PyObject, PyObject, bool), // (generator, event_loop, is_async)
 }
@@ -658,33 +1120,12 @@ impl PyActorRef {
             let py_msg: PyMessage = msg_bound.extract()?;
             py_msg.to_message()
         } else {
-            // Pickle any other Python object
-            let pickled = pickle_object(py, &msg)?;
-            Message::single(SEALED_PY_MSG_TYPE, pickled)
+            encode_python_payload(py, &msg)?
         };
 
         pyo3_async_runtimes::tokio::future_into_py(py, async move {
             let response = actor_ref.send(actor_msg).await.map_err(to_pyerr)?;
-
-            // Check if response is a sealed message
-            Python::with_gil(|py| {
-                match response {
-                    Message::Single {
-                        ref msg_type,
-                        ref data,
-                    } if msg_type == SEALED_PY_MSG_TYPE => {
-                        // Unpickle and return the original Python object
-                        unpickle_object(py, data)
-                    }
-                    _ => {
-                        // Return as PyMessage for non-sealed responses
-                        Ok(PyMessage::from_rust_message(response)
-                            .into_pyobject(py)?
-                            .into_any()
-                            .unbind())
-                    }
-                }
-            })
+            decode_message_to_pyobject(response).await
         })
     }
 
@@ -702,9 +1143,7 @@ impl PyActorRef {
             let py_msg: PyMessage = msg_bound.extract()?;
             py_msg.to_message()
         } else {
-            // Pickle any other Python object
-            let pickled = pickle_object(py, &msg)?;
-            Message::single(SEALED_PY_MSG_TYPE, pickled)
+            encode_python_payload(py, &msg)?
         };
 
         pyo3_async_runtimes::tokio::future_into_py(py, async move {
@@ -989,36 +1428,20 @@ impl Actor for PythonActorWrapper {
         let (handler, event_loop) =
             Python::with_gil(|py| (self.handler.clone_ref(py), self.event_loop.clone_ref(py)));
 
-        // Check if this is a sealed Python message
-        let is_sealed_msg = msg.msg_type() == SEALED_PY_MSG_TYPE;
-        let py_msg = PyMessage::from_rust_message(msg);
+        // Decode-first: convert any message format to a Python object
+        let call_arg = decode_message_to_pyobject(msg).await.map_err(|e| {
+            pulsing_actor::error::PulsingError::from(pulsing_actor::error::RuntimeError::Other(
+                e.to_string(),
+            ))
+        })?;
 
         let response: Result = python_executor()
             .execute(move || {
                 Python::with_gil(|py| -> PyResult {
                     let receive_method = handler.getattr(py, "receive")?;
-
-                    // If sealed message, unpickle and pass the original Python object
-                    let call_arg: PyObject = if is_sealed_msg {
-                        let payload = py_msg.payload.as_ref().ok_or_else(|| {
-                            pyo3::exceptions::PyValueError::new_err("Expected payload for sealed message")
-                        })?;
-                        unpickle_object(py, payload)?
-                    } else {
-                        py_msg.into_pyobject(py)?.into_any().unbind()
-                    };
-
-                    let result = receive_method.call1(py, (call_arg,));
-
-                    // Handle Python exceptions and convert to ActorError
-                    let result = match result {
+                    let result = match receive_method.call1(py, (&call_arg,)) {
                         Ok(value) => value,
-                        Err(py_err) => {
-                            // Convert Python exception to ActorError
-                            // We need to return this as an error in the Python execution context
-                            // The error will be caught and converted at the Rust level
-                            return Err(py_err);
-                        }
+                        Err(py_err) => return Err(py_err),
                     };
 
                     let asyncio = py.import("asyncio")?;
@@ -1040,32 +1463,25 @@ impl Actor for PythonActorWrapper {
                         return Ok(PyActorResponse::Single(PyMessage::empty()));
                     }
 
-                    // Check for generator (sync or async) - fast path using type name
                     let type_name = py_result_bound
                         .get_type()
                         .qualname()
                         .map(|s| s.to_string())
                         .unwrap_or_default();
-                    let is_gen = type_name == "generator";
-                    let is_async_gen = type_name == "async_generator";
-
-                    if is_async_gen || is_gen {
+                    if type_name == "async_generator" || type_name == "generator" {
                         return Ok(PyActorResponse::Generator(
                             py_result.clone_ref(py),
                             event_loop.clone_ref(py),
-                            is_async_gen,
+                            type_name == "async_generator",
                         ));
                     }
 
-                    // Handle StreamMessage
                     if py_result_bound.is_instance_of::() {
                         let stream_msg_cell = py_result_bound.downcast::()?;
-
                         let borrowed = stream_msg_cell.borrow();
                         let default_msg_type = borrowed.default_msg_type.clone();
                         let receiver_arc = borrowed.receiver.clone();
                         drop(borrowed);
-
                         let receiver = {
                             let mut guard = receiver_arc.lock().map_err(|e| {
                                 pyo3::exceptions::PyRuntimeError::new_err(format!(
@@ -1075,7 +1491,6 @@ impl Actor for PythonActorWrapper {
                             })?;
                             guard.take()
                         };
-
                         if let Some(rx) = receiver {
                             return Ok(PyActorResponse::StreamChannel(default_msg_type, rx));
                         } else {
@@ -1085,21 +1500,18 @@ impl Actor for PythonActorWrapper {
                         }
                     }
 
-                    // Handle PyMessage (for Rust actor communication)
                     if py_result_bound.is_instance_of::() {
                         let msg: PyMessage = py_result_bound.extract()?;
                         if msg.is_stream() {
                             return Err(pyo3::exceptions::PyValueError::new_err(
                                 "PyMessage with stream cannot be returned from receive(), use StreamMessage instead"
                             ));
-                        } else {
-                            return Ok(PyActorResponse::Single(msg));
                         }
+                        return Ok(PyActorResponse::Single(msg));
                     }
 
-                    // For any other Python object, pickle it and return as SealedPyMessage
-                    let pickled = pickle_object(py, &py_result)?;
-                    Ok(PyActorResponse::Sealed(pickled))
+                    let msg = encode_python_payload(py, &py_result)?;
+                    Ok(PyActorResponse::Encoded(msg))
                 })
             })
             .await
@@ -1111,7 +1523,6 @@ impl Actor for PythonActorWrapper {
                 )
             })?;
 
-        // Convert Python exceptions to ActorError
         let response = match response {
             Ok(resp) => resp,
             Err(py_err) => {
@@ -1132,21 +1543,16 @@ impl Actor for PythonActorWrapper {
             PyActorResponse::StreamChannel(default_msg_type, rx) => {
                 Ok(Message::from_channel(&default_msg_type, rx))
             }
-            PyActorResponse::Sealed(data) => Ok(Message::single(SEALED_PY_MSG_TYPE, data)),
+            PyActorResponse::Encoded(msg) => Ok(msg),
             PyActorResponse::Generator(generator, event_loop, is_async) => {
-                // Create channel for streaming generator values
                 let (tx, rx) = mpsc::channel::>(32);
-
-                // Spawn background task to iterate generator
                 tokio::spawn(async move {
                     let result = python_executor()
                         .execute(move || {
                             Python::with_gil(|py| -> PyResult<()> {
                                 let gen = generator.bind(py);
                                 let asyncio = py.import("asyncio")?;
-
                                 if is_async {
-                                    // Async generator: iterate using anext()
                                     let run_coroutine_threadsafe =
                                         asyncio.getattr("run_coroutine_threadsafe")?;
                                     loop {
@@ -1155,15 +1561,13 @@ impl Actor for PythonActorWrapper {
                                             .call1((&anext_coro, &event_loop))?;
                                         match future.call_method0("result") {
                                             Ok(item) => {
-                                                let pickled = pickle_object(py, &item.unbind())?;
-                                                let msg =
-                                                    Message::single(SEALED_PY_MSG_TYPE, pickled);
+                                                let item_obj = item.unbind();
+                                                let msg = encode_python_payload(py, &item_obj)?;
                                                 if tx.blocking_send(Ok(msg)).is_err() {
                                                     break;
                                                 }
                                             }
                                             Err(e) => {
-                                                // Check if StopAsyncIteration
                                                 if e.is_instance_of::(py) {
                                                     break;
                                                 }
@@ -1179,19 +1583,16 @@ impl Actor for PythonActorWrapper {
                                         }
                                     }
                                 } else {
-                                    // Sync generator: iterate using next()
                                     loop {
                                         match gen.call_method0("__next__") {
                                             Ok(item) => {
-                                                let pickled = pickle_object(py, &item.unbind())?;
-                                                let msg =
-                                                    Message::single(SEALED_PY_MSG_TYPE, pickled);
+                                                let item_obj = item.unbind();
+                                                let msg = encode_python_payload(py, &item_obj)?;
                                                 if tx.blocking_send(Ok(msg)).is_err() {
                                                     break;
                                                 }
                                             }
                                             Err(e) => {
-                                                // Check if StopIteration
                                                 if e.is_instance_of::(py) {
                                                     break;
                                                 }
@@ -1211,12 +1612,10 @@ impl Actor for PythonActorWrapper {
                             })
                         })
                         .await;
-
                     if let Err(e) = result {
                         tracing::error!("Generator iteration error: {:?}", e);
                     }
                 });
-
                 Ok(Message::from_channel(SEALED_PY_MSG_TYPE, rx))
             }
         }
@@ -1767,5 +2166,6 @@ pub fn add_to_module(m: &Bound<'_, pyo3::types::PyModule>) -> PyResult<()> {
     m.add_class::()?;
     // Sealed message support (for Python-to-Python communication)
     m.add_class::()?;
+    m.add_class::()?;
     Ok(())
 }
diff --git a/llms.binding.md b/llms.binding.md
index d29e55942..a2c22ac19 100644
--- a/llms.binding.md
+++ b/llms.binding.md
@@ -271,6 +271,51 @@ response = await actorref.ask({"action": "get"})
 await actorref.tell({"action": "log", "data": "hello"})
 ```
 
+#### Optional Zerocopy Descriptor Protocol
+
+Pulsing supports an optional zerocopy fast path to bypass pickle serialization for eligible
+Python objects. If the object does not provide the protocol, Pulsing falls back to existing
+pickle-based transport automatically.
+
+```python
+from pulsing.core import ZeroCopyDescriptor
+
+class MyTensorLike:
+    def __zerocopy__(self, ctx):
+        return ZeroCopyDescriptor(
+            buffers=[memoryview(self.buffer)],
+            dtype="float32",
+            shape=[1024],
+            strides=[4],
+            transport="inline",   # e.g. inline/shm
+            checksum=None,        # optional
+            version=1,
+        )
+```
+
+Rules:
+
+- `__zerocopy__(ctx)` is optional; missing protocol means fallback to pickle.
+- Descriptor is the single source of truth (no separate `__metadata__`).
+- Zerocopy is an optimization path for reduced serialization and buffer copies.
+- `buffers` should provide contiguous Python buffer views (e.g. `memoryview`, tensor buffer, `bytearray`) to avoid extra Python-side copy.
+- Payload validation failure or unsupported descriptor always falls back to pickle unless explicitly forced by runtime config.
+
+**Automatic stream transfer for large payloads:**
+
+When the total buffer size exceeds a threshold (default 64 KB), Pulsing automatically uses a descriptor-first stream transfer instead of packing everything into a single message:
+
+1. A lightweight descriptor header (dtype, shape, strides, buffer lengths) is sent as the first stream frame.
+2. Buffer data follows as a sequence of raw chunk frames, each up to `PULSING_ZEROCOPY_CHUNK_BYTES` (default 1 MB).
+3. The receiver pre-allocates buffers based on the descriptor and fills them incrementally as chunks arrive.
+
+Small payloads below the threshold are still sent as a single message with descriptor + data packed together. This is transparent to the user — `actor.receive()` always gets a `ZeroCopyDescriptor` regardless of the transfer mode.
+
+Environment variables:
+- `PULSING_ZEROCOPY`: `auto` (default) / `off` / `force`
+- `PULSING_ZEROCOPY_STREAM_THRESHOLD`: minimum buffer size in bytes to trigger stream transfer (default 65536)
+- `PULSING_ZEROCOPY_CHUNK_BYTES`: chunk size in bytes for stream transfer (default 1048576, minimum 4096)
+
 #### Actor Lifecycle
 
 ```python
@@ -497,6 +542,7 @@ system.shutdown().await?;
 ### Key Conventions
 
 - **Message encoding**: `Message::pack(&T)` uses bincode + `type_name::()`; for cross-version protocols use `Message::single("TypeV1", bytes)`.
+- **Optional zerocopy**: when payload objects implement `__zerocopy__(ctx)`, Pulsing may bypass pickle and send descriptor + buffers directly; otherwise it uses normal pickle/bytes paths.
 - **Naming and resolution**:
   - `spawn_named(name, actor)`: Creates a discoverable actor, name is the resolution path
   - `resolve(name)`: One-shot resolve (may become stale after migration)
diff --git a/python/pulsing/core/__init__.py b/python/pulsing/core/__init__.py
index cbf465e5f..da67eee19 100644
--- a/python/pulsing/core/__init__.py
+++ b/python/pulsing/core/__init__.py
@@ -30,6 +30,7 @@ def incr(self): self.value += 1; return self.value
     ActorSystem,
     NodeId,
     SealedPyMessage,
+    ZeroCopyDescriptor,
     StreamReader,
     StreamWriter,
     SystemConfig,
@@ -249,6 +250,7 @@ async def tell_with_timeout(
     "SystemActorProxy",
     "PythonActorService",
     "PYTHON_ACTOR_SERVICE_NAME",
+    "ZeroCopyDescriptor",
     "PulsingError",
     "PulsingRuntimeError",
     "PulsingActorError",
diff --git a/python/pulsing/streaming/backend.py b/python/pulsing/streaming/backend.py
index eec4d9eb0..d1e7383a4 100644
--- a/python/pulsing/streaming/backend.py
+++ b/python/pulsing/streaming/backend.py
@@ -76,6 +76,25 @@ def total_count(self) -> int:
         """Total record count"""
         ...
 
+    async def put_tensor(self, data: Any, **kwargs: Any) -> Any:
+        """Optional tensor-native put API."""
+        raise NotImplementedError
+
+    async def get_data(self, batch_meta: Any, fields: list[str] | None = None) -> Any:
+        """Optional tensor-native batch data API."""
+        raise NotImplementedError
+
+    async def get_meta(
+        self,
+        fields: list[str],
+        batch_size: int,
+        task_name: str = "default",
+        sampler: Any = None,
+        **sampling_kwargs: Any,
+    ) -> Any:
+        """Optional tensor-native metadata API."""
+        raise NotImplementedError
+
 
 class MemoryBackend:
     """Pure In-Memory Backend - Built-in Default Implementation
@@ -163,6 +182,59 @@ async def stats(self) -> dict[str, Any]:
     def total_count(self) -> int:
         return len(self.buffer)
 
+    async def put_tensor(self, data: Any, **kwargs: Any) -> Any:
+        if isinstance(data, list):
+            await self.put_batch(data)
+            return {"size": len(data)}
+        if isinstance(data, dict):
+            await self.put(data)
+            return {"size": 1}
+        raise TypeError("MemoryBackend.put_tensor expects dict or list[dict]")
+
+    async def get_data(self, batch_meta: Any, fields: list[str] | None = None) -> Any:
+        if isinstance(batch_meta, dict):
+            indexes = batch_meta.get("global_indexes", [])
+        else:
+            indexes = getattr(batch_meta, "global_indexes", [])
+        rows = [self.buffer[i] for i in indexes if 0 <= i < len(self.buffer)]
+        if not fields:
+            return rows
+        return [{k: v for k, v in row.items() if k in fields} for row in rows]
+
+    async def get_meta(
+        self,
+        fields: list[str],
+        batch_size: int,
+        task_name: str = "default",
+        sampler: Any = None,
+        **sampling_kwargs: Any,
+    ) -> Any:
+        total = len(self.buffer)
+        ready = list(range(total))
+        if sampler is not None:
+            sampled, _ = sampler.sample(ready, batch_size, **sampling_kwargs)
+        else:
+            sampled = ready[:batch_size]
+        return {
+            "samples": [
+                {
+                    "partition_id": sampling_kwargs.get("partition_id", "default"),
+                    "global_index": idx,
+                    "fields": {
+                        field: {
+                            "name": field,
+                            "dtype": None,
+                            "shape": None,
+                            "production_status": "ready",
+                        }
+                        for field in fields
+                    },
+                }
+                for idx in sampled
+            ],
+            "global_indexes": sampled,
+        }
+
 
 # ============================================================
 # Backend Registry
diff --git a/python/pulsing/streaming/storage.py b/python/pulsing/streaming/storage.py
index b0181fe98..35fc33fd3 100644
--- a/python/pulsing/streaming/storage.py
+++ b/python/pulsing/streaming/storage.py
@@ -43,6 +43,9 @@ def __init__(
 
         # Backend instance (initialized in on_start)
         self._backend: StorageBackend | None = None
+        self._production_status: dict[int, dict[str, str]] = {}
+        self._consumption_status: dict[str, set[int]] = {}
+        self._key_to_index: dict[str, int] = {}
 
     def on_start(self, actor_id: ActorId) -> None:
         # Create backend instance
@@ -74,7 +77,10 @@ async def put(self, record: dict) -> dict:
         """
         if not record:
             raise ValueError("Missing 'record'")
+        before = self._backend.total_count()
         await self._backend.put(record)
+        fields = [k for k in record.keys() if not str(k).startswith("_")]
+        self._production_status[before] = {field: "ready" for field in fields}
         return {"status": "ok"}
 
     async def put_batch(self, records: list[dict]) -> dict:
@@ -88,9 +94,28 @@ async def put_batch(self, records: list[dict]) -> dict:
         """
         if not records:
             raise ValueError("Missing 'records'")
+        start = self._backend.total_count()
         await self._backend.put_batch(records)
+        for i, record in enumerate(records):
+            fields = [k for k in record.keys() if not str(k).startswith("_")]
+            self._production_status[start + i] = {field: "ready" for field in fields}
         return {"status": "ok", "count": len(records)}
 
+    async def put_tensor(
+        self, data: Any, partition_id: str = "default", **kwargs: Any
+    ) -> dict:
+        if hasattr(self._backend, "put_tensor"):
+            meta = await self._backend.put_tensor(
+                data, partition_id=partition_id, **kwargs
+            )
+            if hasattr(meta, "global_indexes") and hasattr(meta, "field_names"):
+                for idx in meta.global_indexes:
+                    self._production_status[idx] = {
+                        field: "ready" for field in meta.field_names
+                    }
+            return {"status": "ok"}
+        raise NotImplementedError("Backend does not support put_tensor")
+
     async def get(self, limit: int = 100, offset: int = 0) -> list[dict]:
         """Get records.
 
@@ -140,3 +165,99 @@ async def stats(self) -> dict:
             Statistics dict from backend
         """
         return await self._backend.stats()
+
+    async def get_meta(
+        self,
+        fields: list[str],
+        batch_size: int,
+        task_name: str,
+        sampler: Any = None,
+        **sampling_kwargs: Any,
+    ) -> dict:
+        if hasattr(self._backend, "get_meta"):
+            meta = await self._backend.get_meta(
+                fields=fields,
+                batch_size=batch_size,
+                task_name=task_name,
+                sampler=sampler,
+                **sampling_kwargs,
+            )
+            if hasattr(meta, "to_dict"):
+                return meta.to_dict()
+            return meta
+
+        consumed = self._consumption_status.setdefault(task_name, set())
+        ready = []
+        for idx in sorted(self._production_status):
+            if idx in consumed:
+                continue
+            status = self._production_status[idx]
+            if all(status.get(field) == "ready" for field in fields):
+                ready.append(idx)
+
+        if sampler is not None:
+            sampled, marked = sampler.sample(ready, batch_size, **sampling_kwargs)
+        else:
+            sampled = ready[:batch_size]
+            marked = sampled
+        consumed.update(marked)
+        return {
+            "samples": [
+                {
+                    "partition_id": sampling_kwargs.get("partition_id", "default"),
+                    "global_index": idx,
+                    "fields": {
+                        field: {
+                            "name": field,
+                            "dtype": None,
+                            "shape": None,
+                            "production_status": "ready",
+                        }
+                        for field in fields
+                    },
+                }
+                for idx in sampled
+            ],
+            "global_indexes": sampled,
+        }
+
+    async def get_data(self, batch_meta: dict, fields: list[str] | None = None) -> Any:
+        if hasattr(self._backend, "get_data"):
+            return await self._backend.get_data(batch_meta, fields=fields)
+
+        indexes = batch_meta.get("global_indexes") or [
+            sample.get("global_index", -1) for sample in batch_meta.get("samples", [])
+        ]
+        if hasattr(self._backend, "get_by_indices"):
+            rows = await self._backend.get_by_indices(indexes)
+        else:
+            rows = []
+            for idx in indexes:
+                rows.extend(await self._backend.get(limit=1, offset=idx))
+        if fields:
+            return [{k: v for k, v in row.items() if k in fields} for row in rows]
+        return rows
+
+    async def mark_consumed(self, task_name: str, global_indexes: list[int]) -> dict:
+        self._consumption_status.setdefault(task_name, set()).update(global_indexes)
+        if hasattr(self._backend, "mark_consumed"):
+            await self._backend.mark_consumed(task_name, global_indexes)
+        return {"status": "ok"}
+
+    async def reset_consumption(self, task_name: str) -> dict:
+        self._consumption_status.pop(task_name, None)
+        if hasattr(self._backend, "reset_consumption"):
+            await self._backend.reset_consumption(task_name)
+        return {"status": "ok"}
+
+    async def clear(self, global_indexes: list[int]) -> dict:
+        if hasattr(self._backend, "clear"):
+            await self._backend.clear(global_indexes)
+        return {"status": "ok"}
+
+    async def kv_register(self, key: str, global_index: int) -> dict:
+        self._key_to_index[key] = global_index
+        return {"status": "ok"}
+
+    async def kv_resolve(self, keys: list[str]) -> dict:
+        return {"indexes": [self._key_to_index.get(key, -1) for key in keys]}
diff --git a/tests/python/test_queue_backends.py b/tests/python/test_queue_backends.py
index 38d7742e9..1f87310b2 100644
--- a/tests/python/test_queue_backends.py
+++ b/tests/python/test_queue_backends.py
@@ -448,6 +448,24 @@ async def stats(self) -> dict[str, Any]:
             def total_count(self) -> int:
                 return len(self.data)
 
+            async def put_tensor(self, data: Any, **kwargs: Any) -> Any:
+                raise NotImplementedError
+
+            async def get_data(
+                self, batch_meta: Any, fields: list[str] | None = None
+            ) -> Any:
+                raise NotImplementedError
+
+            async def get_meta(
+                self,
+                fields: list[str],
+                batch_size: int,
+                task_name: str = "default",
+                sampler: Any = None,
+                **sampling_kwargs: Any,
+            ) -> Any:
+                raise NotImplementedError
+
         # Verify it satisfies protocol (duck typing)
         backend = MinimalBackend(bucket_id=0)
         assert isinstance(backend, StorageBackend)
diff --git a/tests/python/test_sealed_message.py b/tests/python/test_sealed_message.py
index d5781df34..aa1206ee0 100644
--- a/tests/python/test_sealed_message.py
+++ b/tests/python/test_sealed_message.py
@@ -10,6 +10,7 @@
 """
 
 import asyncio
+import os
 from dataclasses import dataclass
 
 import pytest
@@ -17,6 +18,7 @@
     Actor,
     Message,
     SealedPyMessage,
+    ZeroCopyDescriptor,
 )
 import pulsing as pul
 
@@ -143,6 +145,54 @@ async def receive(self, msg):
         return {"received": msg}
 
 
+class ZeroCopyPayload:
+    """Object implementing Pulsing zerocopy descriptor protocol."""
+
+    def __init__(self, raw: bytes):
+        self.raw = raw
+
+    def __zerocopy__(self, _ctx):
+        return ZeroCopyDescriptor(
+            buffers=[memoryview(self.raw)],
+            dtype="u8",
+            shape=[len(self.raw)],
+            strides=[1],
+            transport="inline",
+            checksum=None,
+            version=1,
+        )
+
+
+class ZeroCopyInspectorActor(Actor):
+    async def receive(self, msg):
+        if isinstance(msg, ZeroCopyDescriptor):
+            buffers = msg.buffers
+            return {
+                "is_descriptor": True,
+                "buffer_count": len(buffers),
+                "size": len(buffers[0]),
+                "dtype": msg.dtype,
+            }
+        return {"is_descriptor": False, "type": type(msg).__name__}
+
+
+class NonContiguousZeroCopyPayload:
+    def __init__(self, raw: bytes):
+        self.raw = raw
+
+    def __zerocopy__(self, _ctx):
+        view = memoryview(self.raw)[::2]
+        return ZeroCopyDescriptor(
+            buffers=[view],
+            dtype="u8",
+            shape=[len(view)],
+            strides=[2],
+            transport="inline",
+            checksum=None,
+            version=1,
+        )
+
+
 # ============================================================================
 # Fixtures
 # ============================================================================
@@ -412,6 +462,117 @@ async def test_tell_with_dict(actor_system):
     assert response["value"] == 8
 
 
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_descriptor(actor_system):
+    """ask() should use zerocopy when object defines __zerocopy__."""
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+    actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-inspector")
+    response = await actor_ref.ask(ZeroCopyPayload(b"abcdef"))
+    assert response["is_descriptor"] is True
+    assert response["buffer_count"] == 1
+    assert response["size"] == 6
+    assert response["dtype"] == "u8"
+
+
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_force(actor_system):
+    """force mode should reject payloads without __zerocopy__."""
+    os.environ["PULSING_ZEROCOPY"] = "force"
+    actor_ref = await actor_system.spawn(EchoAnyActor(), name="zc-force")
+    with pytest.raises(Exception):
+        await actor_ref.ask({"not": "zerocopy"})
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+
+
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_large_buffer(actor_system):
+    """Large payload (>= stream threshold) goes through descriptor-first stream path."""
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+    os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "65536"
+    os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "65536"
+    try:
+        actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-large")
+        payload = bytearray(8 * 1024 * 1024)
+        response = await actor_ref.ask(ZeroCopyPayload(payload))
+        assert response["is_descriptor"] is True
+        assert response["buffer_count"] == 1
+        assert response["size"] == len(payload)
+    finally:
+        os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None)
+        os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None)
+
+
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_small_buffer_single_path(actor_system):
+    """Small payload (< stream threshold) stays on single-message path."""
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+    os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "1048576"
+    try:
+        actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-small")
+        payload = b"small_payload_1234"
+        response = await actor_ref.ask(ZeroCopyPayload(payload))
+        assert response["is_descriptor"] is True
+        assert response["buffer_count"] == 1
+        assert response["size"] == len(payload)
+        assert response["dtype"] == "u8"
+    finally:
+        os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None)
+
+
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_stream_threshold_boundary(actor_system):
+    """Payload exactly at stream threshold goes through stream path."""
+    threshold = 4096
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+    os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = str(threshold)
+    os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "4096"
+    try:
+        actor_ref = await actor_system.spawn(
+            ZeroCopyInspectorActor(), name="zc-boundary"
+        )
+        payload = bytearray(threshold)
+        response = await actor_ref.ask(ZeroCopyPayload(payload))
+        assert response["is_descriptor"] is True
+        assert response["buffer_count"] == 1
+        assert response["size"] == threshold
+    finally:
+        os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None)
+        os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None)
+
+
+@pytest.mark.asyncio
+async def test_ask_with_zerocopy_stream_multi_chunk(actor_system):
+    """Large buffer is transmitted in multiple chunks and reassembled correctly."""
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+    os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "4096"
+    os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "4096"
+    try:
+        actor_ref = await actor_system.spawn(
+            ZeroCopyInspectorActor(), name="zc-multichunk"
+        )
+        # 5 chunks worth of data
+        payload = bytearray(range(256)) * 80  # 20480 bytes
+        response = await actor_ref.ask(ZeroCopyPayload(bytes(payload)))
+        assert response["is_descriptor"] is True
+        assert response["buffer_count"] == 1
+        assert response["size"] == len(payload)
+    finally:
+        os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None)
+        os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None)
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_force_rejects_non_contiguous_buffer(actor_system):
+    """Force mode rejects non-contiguous buffer views."""
+    os.environ["PULSING_ZEROCOPY"] = "force"
+    actor_ref = await actor_system.spawn(
+        ZeroCopyInspectorActor(), name="zc-noncontiguous"
+    )
+    with pytest.raises(Exception):
+        await actor_ref.ask(NonContiguousZeroCopyPayload(b"0123456789"))
+    os.environ["PULSING_ZEROCOPY"] = "auto"
+
+
 # ============================================================================
 # Backward Compatibility Tests
 # ============================================================================
diff --git a/tests/python/test_zerocopy_protocol.py b/tests/python/test_zerocopy_protocol.py
new file mode 100644
index 000000000..46334d4b7
--- /dev/null
+++ b/tests/python/test_zerocopy_protocol.py
@@ -0,0 +1,117 @@
+import pytest
+import pulsing as pul
+from pulsing.core import Actor, ZeroCopyDescriptor
+
+
+class _ZeroCopyPayload:
+    def __init__(self, payload: bytes):
+        self.payload = payload
+
+    def __zerocopy__(self, _ctx):
+        return ZeroCopyDescriptor(
+            buffers=[memoryview(self.payload)],
+            dtype="u8",
+            shape=[len(self.payload)],
+            strides=[1],
+            transport="inline",
+            checksum=None,
+            version=1,
+        )
+
+
+class _Inspector(Actor):
+    async def receive(self, msg):
+        if isinstance(msg, ZeroCopyDescriptor):
+            buffers = msg.buffers
+            return {
+                "kind": "descriptor",
+                "version": msg.version,
+                "buffer_count": len(buffers),
+                "first_size": len(buffers[0]),
+            }
+        return {"kind": "normal", "type": type(msg).__name__}
+
+
+@pytest.fixture
+async def actor_system():
+    system = await pul.actor_system()
+    yield system
+    await system.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_auto_uses_descriptor(actor_system, monkeypatch):
+    monkeypatch.setenv("PULSING_ZEROCOPY", "auto")
+    ref = await actor_system.spawn(_Inspector(), name="zc-auto")
+    resp = await ref.ask(_ZeroCopyPayload(b"hello"))
+    assert resp["kind"] == "descriptor"
+    assert resp["version"] == 1
+    assert resp["buffer_count"] == 1
+    assert resp["first_size"] == 5
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_off_falls_back_pickle(actor_system, monkeypatch):
+    monkeypatch.setenv("PULSING_ZEROCOPY", "off")
+    ref = await actor_system.spawn(_Inspector(), name="zc-off")
+    resp = await ref.ask(_ZeroCopyPayload(b"hello"))
+    assert resp["kind"] == "normal"
+    assert resp["type"] == "_ZeroCopyPayload"
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_force_rejects_non_descriptor(actor_system, monkeypatch):
+    monkeypatch.setenv("PULSING_ZEROCOPY", "force")
+    ref = await actor_system.spawn(_Inspector(), name="zc-force")
+    with pytest.raises(Exception):
+        await ref.ask({"x": 1})
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_small_payload_single_path(actor_system, monkeypatch):
+    """Small payload below stream threshold stays on single-message path."""
+    monkeypatch.setenv("PULSING_ZEROCOPY", "auto")
+    monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "1048576")
+    ref = await actor_system.spawn(_Inspector(), name="zc-small")
+    resp = await ref.ask(_ZeroCopyPayload(b"tiny"))
+    assert resp["kind"] == "descriptor"
+    assert resp["first_size"] == 4
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_large_payload_stream_path(actor_system, monkeypatch):
+    """Large payload above stream threshold goes through descriptor-first stream."""
+    monkeypatch.setenv("PULSING_ZEROCOPY", "auto")
+    monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "4096")
+    monkeypatch.setenv("PULSING_ZEROCOPY_CHUNK_BYTES", "4096")
+    ref = await actor_system.spawn(_Inspector(), name="zc-stream")
+    big = bytes(range(256)) * 64  # 16384 bytes, 4 chunks
+    resp = await ref.ask(_ZeroCopyPayload(big))
+    assert resp["kind"] == "descriptor"
+    assert resp["first_size"] == len(big)
+    assert resp["buffer_count"] == 1
+
+
+@pytest.mark.asyncio
+async def test_zerocopy_stream_data_integrity(actor_system, monkeypatch):
+    """Data transmitted via stream path arrives intact."""
+    monkeypatch.setenv("PULSING_ZEROCOPY", "auto")
+    monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "4096")
+    monkeypatch.setenv("PULSING_ZEROCOPY_CHUNK_BYTES", "4096")
+
+    class _DataVerifier(Actor):
+        async def receive(self, msg):
+            if isinstance(msg, ZeroCopyDescriptor):
+                data = bytes(msg.buffers[0])
+                return {
+                    "size": len(data),
+                    "checksum": sum(data) % 65536,
+                }
+            return {}
+
+    ref = await actor_system.spawn(_DataVerifier(), name="zc-verify")
+    payload = bytes(range(256)) * 80  # 20480 bytes
+    expected_checksum = sum(payload) % 65536
+    resp = await ref.ask(_ZeroCopyPayload(payload))
+    assert resp["size"] == len(payload)
+    assert resp["checksum"] == expected_checksum

From 52c227e3dde09a7536f74843a450edde61cbea13 Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Fri, 20 Feb 2026 16:30:44 +0800
Subject: [PATCH 10/15] Refactor comments and documentation for clarity and
 consistency across multiple files

---
 crates/pulsing-actor/src/supervision.rs       |  2 +-
 crates/pulsing-actor/src/system/config.rs     |  2 +-
 crates/pulsing-actor/src/system/runtime.rs    |  2 +-
 .../src/transport/http2/client.rs             |  6 +-
 .../tests/integration/single_node_tests.rs    |  2 +-
 .../tests/unit/actor/actor_tests.rs           |  2 +-
 .../tests/unit/system/supervision_tests.rs    | 16 +++---
 crates/pulsing-py/src/actor.rs                |  4 +-
 python/pulsing/__init__.py                    |  8 +--
 python/pulsing/cli/__main__.py                | 30 +++++-----
 python/pulsing/core/remote.py                 | 56 +++++++++----------
 python/pulsing/examples/__init__.py           | 12 ++--
 python/pulsing/examples/counting_game.py      | 54 +++++++++---------
 python/pulsing/integrations/ray.py            | 54 +++++++++---------
 14 files changed, 127 insertions(+), 123 deletions(-)

diff --git a/crates/pulsing-actor/src/supervision.rs b/crates/pulsing-actor/src/supervision.rs
index 152cfdad2..7ed713e73 100644
--- a/crates/pulsing-actor/src/supervision.rs
+++ b/crates/pulsing-actor/src/supervision.rs
@@ -13,7 +13,7 @@ pub enum RestartPolicy {
     Always,
     /// Restart the actor only if it failed (non-normal exit)
     OnFailure,
-    /// Never restart the actor (default). Panic / 不可恢复错误时停止且不恢复
+    /// Never restart the actor (default). Stop and don't recover on panic / unrecoverable errors
     #[default]
     Never,
 }
diff --git a/crates/pulsing-actor/src/system/config.rs b/crates/pulsing-actor/src/system/config.rs
index b250d99b9..1aa545c00 100644
--- a/crates/pulsing-actor/src/system/config.rs
+++ b/crates/pulsing-actor/src/system/config.rs
@@ -516,7 +516,7 @@ mod tests {
         assert!(err.to_string().contains("head_node"));
     }
 
-    // --- 配置解析 ---
+    // --- Configuration Parsing ---
 
     #[test]
     fn test_config_with_seeds() {
diff --git a/crates/pulsing-actor/src/system/runtime.rs b/crates/pulsing-actor/src/system/runtime.rs
index 143734a68..fe4d0f646 100644
--- a/crates/pulsing-actor/src/system/runtime.rs
+++ b/crates/pulsing-actor/src/system/runtime.rs
@@ -35,7 +35,7 @@ pub(crate) async fn run_actor_instance(
                                 responder.send(Ok(response));
                             }
                             Err(e) => {
-                                // 业务错误:receive 返回 Err,只把错误返回给调用者,actor 继续处理下一条消息
+                                // Business error: receive returns Err, only return error to caller, actor continues processing next message
                                 tracing::warn!(actor_id = ?ctx.id(), error = %e, "Receive returned error (returned to caller)");
                                 responder.send(Err(e));
                             }
diff --git a/crates/pulsing-actor/src/transport/http2/client.rs b/crates/pulsing-actor/src/transport/http2/client.rs
index 27ca75e0e..75b81cc02 100644
--- a/crates/pulsing-actor/src/transport/http2/client.rs
+++ b/crates/pulsing-actor/src/transport/http2/client.rs
@@ -822,7 +822,7 @@ mod tests {
         assert!(Arc::ptr_eq(&client.pool, &cloned.pool));
     }
 
-    // --- 连接管理 ---
+    // --- Connection Management ---
 
     #[test]
     fn test_client_pool_and_stats() {
@@ -860,7 +860,7 @@ mod tests {
         client.shutdown();
     }
 
-    // --- 错误恢复:对不可达地址应返回连接错误 ---
+    // --- Error Recovery: should return connection error for unreachable addresses ---
 
     #[tokio::test]
     async fn test_ask_connection_error() {
@@ -894,7 +894,7 @@ mod tests {
         );
     }
 
-    // --- 错误植入 ---
+    // --- Fault Injection ---
 
     #[tokio::test]
     async fn test_fault_injector_ask() {
diff --git a/crates/pulsing-actor/tests/integration/single_node_tests.rs b/crates/pulsing-actor/tests/integration/single_node_tests.rs
index 3c852d49d..179f6cfca 100644
--- a/crates/pulsing-actor/tests/integration/single_node_tests.rs
+++ b/crates/pulsing-actor/tests/integration/single_node_tests.rs
@@ -383,7 +383,7 @@ mod error_tests {
         assert!(result.is_err());
         assert_eq!(crash_count.load(Ordering::SeqCst), 1);
 
-        // Actor 仍存活,后续消息应正常处理
+        // Actor still alive, subsequent messages should be processed normally
         let result2: Result = actor_ref.ask(Ping { value: 42 }).await;
         assert!(
             result2.is_ok(),
diff --git a/crates/pulsing-actor/tests/unit/actor/actor_tests.rs b/crates/pulsing-actor/tests/unit/actor/actor_tests.rs
index ac084e4f3..b2cfff6bf 100644
--- a/crates/pulsing-actor/tests/unit/actor/actor_tests.rs
+++ b/crates/pulsing-actor/tests/unit/actor/actor_tests.rs
@@ -288,7 +288,7 @@ mod error_tests {
         let result: Result = actor_ref.ask(ErrorMessage).await;
         assert!(result.is_err());
 
-        // receive 返回 Err 时只把错误返回给调用者,actor 不退出
+        // When receive returns Err, only return error to caller, actor doesn't exit
         let result2: Result = actor_ref.ask(Ping { value: 1 }).await;
         assert!(
             result2.is_ok(),
diff --git a/crates/pulsing-actor/tests/unit/system/supervision_tests.rs b/crates/pulsing-actor/tests/unit/system/supervision_tests.rs
index adbb209a2..dedb15819 100644
--- a/crates/pulsing-actor/tests/unit/system/supervision_tests.rs
+++ b/crates/pulsing-actor/tests/unit/system/supervision_tests.rs
@@ -60,11 +60,11 @@ async fn test_restart_on_failure() {
     let resp = actor_ref.send(Message::single("ping", b"1")).await;
     assert!(resp.is_ok());
 
-    // 2nd message - receive 返回 Err,错误返回给调用者,actor 不退出、不重启
+    // 2nd message - receive returns Err, error returned to caller, actor doesn't exit or restart
     let resp = actor_ref.send(Message::single("ping", b"2")).await;
     assert!(resp.is_err());
 
-    // 3rd message - 同一实例仍存活,继续处理
+    // 3rd message - same instance still alive, continues processing
     let resp = actor_ref.send(Message::single("ping", b"3")).await;
     assert!(resp.is_ok());
 
@@ -80,7 +80,7 @@ async fn test_restart_on_failure() {
 
 #[tokio::test]
 async fn test_max_restarts_exceeded() {
-    // receive 返回 Err 不会导致 actor 退出,因此不会触发 restart;factory 只被调用一次
+    // receive returning Err doesn't cause actor to exit, so no restart is triggered; factory only called once
     let system = ActorSystem::new(SystemConfig::standalone()).await.unwrap();
     let counter = Arc::new(AtomicU32::new(0));
 
@@ -89,7 +89,7 @@ async fn test_max_restarts_exceeded() {
         counter_clone.fetch_add(1, Ordering::SeqCst);
         Ok(FailingActor {
             counter: Arc::new(AtomicU32::new(0)),
-            fail_at: 1, // 第 1 条消息返回 Err
+            fail_at: 1, // 1st message returns Err
         })
     };
 
@@ -110,13 +110,13 @@ async fn test_max_restarts_exceeded() {
         .await
         .unwrap();
 
-    // 第 1 条消息:receive 返回 Err,只回传错误,actor 不退出
+    // 1st message: receive returns Err, only return error to caller, actor doesn't exit
     let r1 = actor_ref.send(Message::single("ping", b"1")).await;
     assert!(r1.is_err());
-    assert_eq!(counter.load(Ordering::SeqCst), 1); // factory 只调用 1 次
+    assert_eq!(counter.load(Ordering::SeqCst), 1); // factory only called once
 
-    // 第 2 条消息:同一实例,count=2 != fail_at(1),返回 Ok
+    // 2nd message: same instance, count=2 != fail_at(1), returns Ok
     let r2 = actor_ref.send(Message::single("ping", b"2")).await;
     assert!(r2.is_ok());
-    assert_eq!(counter.load(Ordering::SeqCst), 1); // 无重启
+    assert_eq!(counter.load(Ordering::SeqCst), 1); // no restart
 }
diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs
index 94f90fc1e..e0a8d782a 100644
--- a/crates/pulsing-py/src/actor.rs
+++ b/crates/pulsing-py/src/actor.rs
@@ -2064,7 +2064,7 @@ impl PyActorSystem {
 
             match timeout {
                 None => {
-                    // 无 timeout:找不到立刻报错(原有行为)
+                    // No timeout: error immediately if not found (original behavior)
                     let actor_ref = system
                         .resolve_named(&path, node.as_ref())
                         .await
@@ -2072,7 +2072,7 @@ impl PyActorSystem {
                     Ok(PyActorRef { inner: actor_ref })
                 }
                 Some(secs) => {
-                    // 带 timeout:重试直到名字出现或超时
+                    // With timeout: retry until name appears or timeout
                     let deadline =
                         tokio::time::Instant::now() + std::time::Duration::from_secs_f64(secs);
                     let mut last_err = None;
diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py
index a6f814b97..36d432799 100644
--- a/python/pulsing/__init__.py
+++ b/python/pulsing/__init__.py
@@ -52,11 +52,11 @@ def incr(self): self.value += 1; return self.value
 )
 
 
-# Ray integration (lazy import — 仅在 Ray 环境下可用)
+# Ray integration (lazy import — only available in Ray environment)
 def init_inside_ray():
-    """在 Ray worker 中初始化 Pulsing 并加入集群(async 版本)。
+    """Initialize Pulsing in Ray worker and join cluster (async version).
 
-    用法::
+    Usage::
 
         await pul.init_inside_ray()
     """
@@ -66,7 +66,7 @@ def init_inside_ray():
 
 
 def cleanup_ray():
-    """清理 Pulsing 在 Ray KV store 中的状态"""
+    """Clean up Pulsing state in Ray KV store"""
     from pulsing.integrations.ray import cleanup
 
     return cleanup()
diff --git a/python/pulsing/cli/__main__.py b/python/pulsing/cli/__main__.py
index f66ff4878..3baf04f48 100644
--- a/python/pulsing/cli/__main__.py
+++ b/python/pulsing/cli/__main__.py
@@ -260,19 +260,19 @@ def bench(
 @hp.param("examples")
 def examples(name: str | None = None):
     """
-    列出或查看 Pulsing 内置示例。
+    List or view Pulsing built-in examples.
 
-    不带参数时列出所有可用示例;指定名称时显示该示例的详细说明、
-    运行方式和源码路径。
+    Lists all available examples when called without arguments;
+    shows detailed description, usage, and source path when given a name.
 
     Args:
-        name: 示例名称(可选)。留空则列出所有示例。
+        name: Example name (optional). If omitted, lists all examples.
 
     Examples:
-        # 列出所有示例
+        # List all examples
         pulsing examples
 
-        # 查看某个示例的详情
+        # View details of a specific example
         pulsing examples counting_game
     """
     from pulsing.examples import get_example_detail, list_examples
@@ -280,19 +280,19 @@ def examples(name: str | None = None):
     if name is None:
         all_examples = list_examples()
         if not all_examples:
-            print("暂无可用示例。")
+            print("No examples available.")
             return
-        print("可用示例:\n")
+        print("Available examples:\n")
         max_name_len = max(len(n) for n, _, _ in all_examples)
         for n, summary, filepath in all_examples:
             print(f"  {n:<{max_name_len}}  {summary}")
-        print("\n使用 'pulsing examples <名称>' 查看详情。")
+        print("\nUse 'pulsing examples ' for details.")
         return
 
     detail = get_example_detail(name)
     if detail is None:
-        print(f"未知示例: '{name}'")
-        print("使用 'pulsing examples' 查看所有可用示例。")
+        print(f"Unknown example: '{name}'")
+        print("Use 'pulsing examples' to see all available examples.")
         return
 
     summary, docstring, filepath = detail
@@ -302,15 +302,15 @@ def examples(name: str | None = None):
     if docstring:
         print(docstring)
         print()
-    print(f"源码路径:\n  {filepath}\n")
-    print(f"快速运行:\n  python -m pulsing.examples.{name}")
+    print(f"Source path:\n  {filepath}\n")
+    print(f"Quick run:\n  python -m pulsing.examples.{name}")
 
 
 def main():
     import sys
 
-    # 让 `pulsing examples ` 以位置参数方式工作
-    # hp 框架把有默认值的参数当 --name 选项,这里做一层转换
+    # Make `pulsing examples ` work with positional arguments
+    # hp framework treats params with default values as --name options, so we convert here
     if (
         len(sys.argv) >= 3
         and sys.argv[1] == "examples"
diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py
index fd7e9ca67..af38e6e05 100644
--- a/python/pulsing/core/remote.py
+++ b/python/pulsing/core/remote.py
@@ -310,7 +310,7 @@ def _extract_methods(cls: type) -> tuple[list[str], set[str]]:
     if isinstance(cls, ActorClass):
         cls = cls._cls
 
-    # 如果是 Ray ActorClass,提取原始类
+    # If it's a Ray ActorClass, extract the original class
     try:
         from ray.actor import ActorClass as RayActorClass
 
@@ -380,9 +380,9 @@ def from_ref(
 
 
 class _MethodCaller:
-    """Method caller. 支持两种用法:
-    - await proxy.method(args)  — 方法调用
-    - await proxy.attr          — 属性读取(无参调用)
+    """Method caller. Supports two usage patterns:
+    - await proxy.method(args)  — method call
+    - await proxy.attr          — attribute access (no args)
     """
 
     def __init__(self, actor_ref: ActorRef, method_name: str, is_async: bool = False):
@@ -397,7 +397,7 @@ def __call__(self, *args, **kwargs):
             return self._sync_call(*args, **kwargs)
 
     def __await__(self):
-        """支持 await proxy.attr 直接读取属性"""
+        """Support await proxy.attr for direct attribute access"""
         return self().__await__()
 
     async def _sync_call(self, *args, **kwargs) -> Any:
@@ -683,7 +683,7 @@ async def receive(self, msg) -> Any:
                 return _wrap_response_v1(error=error_msg)
 
             if not callable(attr):
-                # 属性读取:直接返回值
+                # Attribute access: return value directly
                 if version == 2:
                     return _wrap_response_v2(result=attr)
                 return _wrap_response_v1(result=attr)
@@ -955,7 +955,7 @@ class ActorClass:
 
     @staticmethod
     def _unwrap_ray_class(cls):
-        """如果 cls 是 Ray ActorClass,提取原始用户类"""
+        """Extract original user class if cls is a Ray ActorClass"""
         try:
             from ray.actor import ActorClass as RayActorClass
         except ImportError:
@@ -975,7 +975,7 @@ def __init__(
         max_backoff: float = 30.0,
     ):
         unwrapped = self._unwrap_ray_class(cls)
-        # 保留 Ray handle,使 .remote() 可用
+        # Keep Ray handle so .remote() remains available
         self._ray_cls = cls if unwrapped is not cls else None
         cls = unwrapped
         self._cls = cls
@@ -1002,7 +1002,7 @@ def __init__(
         # Register class
         _actor_class_registry[self._class_name] = cls
 
-        # 如果原始类被 @ray.remote 装饰,用 Ray 的 .remote() 覆盖实例方法
+        # If original class was decorated with @ray.remote, override with Ray's .remote() method
         if self._ray_cls is not None:
             self.remote = self._ray_cls.remote
 
@@ -1569,7 +1569,7 @@ async def resolve(
         proxy = ref.as_any()
         await proxy.send_text(chat_id, content)
 
-        # 等待名字出现(gossip 收敛)
+        # Wait for name to appear (gossip convergence)
         ref = await resolve("peer_node", timeout=30)
 
         # Low-level ask
@@ -1605,17 +1605,17 @@ def as_any(ref: ActorRef) -> ActorProxy:
 
 
 def mount(instance: Any, *, name: str, public: bool = True) -> None:
-    """将已有 Python 对象挂载到 Pulsing 通信网络。
+    """Mount an existing Python object to the Pulsing communication network.
 
-    同步接口,可在 ``__init__`` 中调用。内部自动完成:
-      1. 初始化 Pulsing(如果当前进程还没有,自动检测 Ray 环境)
-      2. 将 instance 包装为 Pulsing actor
-      3. 注册到 Pulsing 网络,其他节点可通过 ``pul.resolve(name)`` 发现
+    Synchronous interface, can be called in ``__init__``. Automatically:
+      1. Initialize Pulsing (if not already, auto-detects Ray environment)
+      2. Wrap instance as a Pulsing actor
+      3. Register to Pulsing network, other nodes can discover via ``pul.resolve(name)``
 
     Args:
-        instance: 要挂载的对象(任意 Python 实例)
-        name: Pulsing 名称,其他节点通过此名字 resolve
-        public: 是否可被集群其他节点发现(默认 True)
+        instance: Object to mount (any Python instance)
+        name: Pulsing name, other nodes resolve via this name
+        public: Whether discoverable by other cluster nodes (default True)
 
     Example::
 
@@ -1631,7 +1631,7 @@ async def greet(self, msg):
     """
     from . import _global_system
 
-    # 自动初始化 Pulsing
+    # Auto-initialize Pulsing
     if _global_system is None:
         _auto_init_pulsing()
 
@@ -1639,7 +1639,7 @@ async def greet(self, msg):
 
     if system is None:
         raise RuntimeError(
-            "Pulsing 初始化失败。请确保已调用 pul.init() 或在 Ray 环境中运行。"
+            "Pulsing initialization failed. Please call pul.init() or run in Ray environment."
         )
 
     actor_name = name if "/" in name else f"actors/{name}"
@@ -1655,10 +1655,10 @@ async def _do_mount():
 
 
 def unmount(name: str) -> None:
-    """从 Pulsing 网络卸载一个已挂载的 actor。
+    """Unmount a previously mounted actor from the Pulsing network.
 
     Args:
-        name: 挂载时使用的名称
+        name: Name used during mounting
     """
     from . import _global_system
 
@@ -1674,7 +1674,7 @@ async def _do_unmount():
 
 
 def _auto_init_pulsing():
-    """自动检测环境并初始化 Pulsing。"""
+    """Auto-detect environment and initialize Pulsing."""
     try:
         import ray
 
@@ -1687,16 +1687,16 @@ def _auto_init_pulsing():
         pass
 
     raise RuntimeError(
-        "Pulsing 未初始化。请先调用 await pul.init() 或确保在 Ray 环境中运行。"
+        "Pulsing not initialized. Please call await pul.init() or run in Ray environment."
     )
 
 
 def _run_sync_on_pulsing_loop(coro):
-    """在 Pulsing 的后台事件循环上同步执行协程。"""
+    """Execute coroutine synchronously on Pulsing's background event loop."""
     import asyncio
     import concurrent.futures
 
-    # 尝试使用 pulsing.integrations.ray 的后台 loop(Ray 环境)
+    # Try to use pulsing.integrations.ray's background loop (Ray environment)
     try:
         from pulsing.integrations.ray import _loop
 
@@ -1706,7 +1706,7 @@ def _run_sync_on_pulsing_loop(coro):
     except ImportError:
         pass
 
-    # 非 Ray 环境:尝试在当前线程创建新 loop
+    # Non-Ray environment: try to create new loop in current thread
     try:
         loop = asyncio.get_running_loop()
     except RuntimeError:
@@ -1715,7 +1715,7 @@ def _run_sync_on_pulsing_loop(coro):
     if loop is None:
         return asyncio.run(coro)
 
-    # 已有 running loop(比如 async context),在新线程运行
+    # Already have running loop (e.g., async context), run in new thread
     with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
         return pool.submit(asyncio.run, coro).result(timeout=30)
 
diff --git a/python/pulsing/examples/__init__.py b/python/pulsing/examples/__init__.py
index 662418015..6f4cf8037 100644
--- a/python/pulsing/examples/__init__.py
+++ b/python/pulsing/examples/__init__.py
@@ -1,21 +1,21 @@
 """
-pulsing.examples — Pulsing 内置示例集
+pulsing.examples — Pulsing built-in examples collection
 
-每个子模块都是一个可独立运行的示例,同时也可被测试导入复用。
+Each submodule is a standalone runnable example, also importable for testing.
 """
 
 import importlib
 import inspect
 from pathlib import Path
 
-# 注册所有 example:模块名 → 一句话摘要
+# Register all examples: module name -> one-line summary
 _EXAMPLES = {
-    "counting_game": "Pulsing + Ray 分布式报数游戏",
+    "counting_game": "Pulsing + Ray distributed counting game",
 }
 
 
 def list_examples():
-    """返回 [(name, summary, module_path)] 列表"""
+    """Return [(name, summary, module_path)] list"""
     result = []
     examples_dir = Path(__file__).parent
     for name, summary in _EXAMPLES.items():
@@ -25,7 +25,7 @@ def list_examples():
 
 
 def get_example_detail(name):
-    """返回 (summary, docstring, filepath),找不到则返回 None"""
+    """Return (summary, docstring, filepath), or None if not found"""
     if name not in _EXAMPLES:
         return None
     mod = importlib.import_module(f"pulsing.examples.{name}")
diff --git a/python/pulsing/examples/counting_game.py b/python/pulsing/examples/counting_game.py
index b4bea4cbb..2a33ea138 100644
--- a/python/pulsing/examples/counting_game.py
+++ b/python/pulsing/examples/counting_game.py
@@ -1,9 +1,9 @@
-"""Counting Game — Pulsing 分布式报数游戏
+"""Counting Game — Pulsing distributed counting game
 
-20 个节点依次报数并广播,演示 Pulsing 的 actor 消息通信能力。
-Ray 仅用于启动多进程,报数逻辑完全由 Pulsing actor 完成。
+20 nodes count in sequence and broadcast, demonstrating Pulsing's actor messaging capability.
+Ray is only used for multiprocessing; counting logic is entirely handled by Pulsing actors.
 
-运行:
+Usage:
     python -m pulsing.examples.counting_game
     python -m pulsing.examples.counting_game --num-workers 10
 """
@@ -16,28 +16,28 @@
 import pulsing as pul
 
 
-# ── 报数 Actor ───────────────────────────────────────────
+# ── Counting Actor ───────────────────────────────────────────
 
 
 @ray.remote
 class Counter:
-    """每个节点持有名字、有序节点列表、报数日志。"""
+    """Each node holds its name, ordered peer list, and counting log."""
 
     def __init__(self, name, peers):
         self.name = name
         self.peers = sorted(peers)
         self.log = []
-        pul.mount(self, name=name)  # 一行接入 Pulsing 网络
+        pul.mount(self, name=name)  # One line to join Pulsing network
 
     async def yield_number(self):
-        """报数:广播自己的编号给所有节点"""
+        """Yield number: broadcast own number to all nodes"""
         num = self.peers.index(self.name) + 1
         for peer in self.peers:
             proxy = (await pul.resolve(peer, timeout=30)).as_type(Counter)
             await proxy.on_number(num, self.name)
 
     async def on_number(self, num, from_who):
-        """收到报数:记录,前序节点报完则接力"""
+        """Receive number: log it, relay if previous node finished"""
         self.log.append({"number": num, "from": from_who})
         idx = self.peers.index(self.name)
         if idx > 0 and from_who == self.peers[idx - 1]:
@@ -50,51 +50,55 @@ def get_log(self):
         return list(self.log)
 
 
-# ── 运行 ─────────────────────────────────────────────────
+# ── Run ─────────────────────────────────────────────────
 
 
 def run(num_workers=20):
-    """运行报数游戏(需要 Ray 已初始化)。返回各节点日志,失败抛异常。"""
+    """Run counting game (requires Ray initialized). Returns logs from all nodes, raises on failure."""
     names = [f"node_{i:02d}" for i in range(num_workers)]
     t0 = time.time()
 
-    # 1) 创建 Ray actor(__init__ 中自动 pul.mount 接入 Pulsing)
-    print(f"[counting_game] 启动 {num_workers} 个节点 ...")
+    # 1) Create Ray actors (auto pul.mount in __init__ to join Pulsing)
+    print(f"[counting_game] Starting {num_workers} nodes ...")
     actors = [Counter.remote(name, names) for name in names]
     pids = ray.get([a.get_pid.remote() for a in actors])
-    assert len(set(pids)) == num_workers, "worker 进程数不足"
-    print(f"[counting_game] {num_workers} 节点就绪 ({time.time()-t0:.1f}s)")
+    assert len(set(pids)) == num_workers, "Not enough worker processes"
+    print(f"[counting_game] {num_workers} nodes ready ({time.time()-t0:.1f}s)")
 
-    # 2) node_00 报数 → 自动接力至 node_19
-    print("[counting_game] node_00 开始报数 ...")
+    # 2) node_00 yields -> auto relays to node_19
+    print("[counting_game] node_00 starting count ...")
     ray.get(actors[0].yield_number.remote())
 
-    # 3) 等待所有节点收齐日志
+    # 3) Wait for all nodes to collect complete logs
     deadline = time.time() + 30
     while time.time() < deadline:
         logs = ray.get([a.get_log.remote() for a in actors])
         done = sum(1 for lg in logs if len(lg) == num_workers)
-        print(f"\r[counting_game] 收集日志 {done}/{num_workers}", end="", flush=True)
+        print(
+            f"\r[counting_game] Collecting logs {done}/{num_workers}",
+            end="",
+            flush=True,
+        )
         if done == num_workers:
             break
         time.sleep(0.5)
     else:
-        raise TimeoutError("报数超时")
+        raise TimeoutError("Counting timeout")
     print()
 
-    # 4) 验证:每条日志的 from 应与报数序号对应
+    # 4) Verify: each log entry's 'from' should match the number
     for entries in logs:
         for e in entries:
             assert e["from"] == f"node_{e['number']-1:02d}"
 
-    # 5) 打印结果
+    # 5) Print results
     order = " → ".join(f"{i+1}:{names[i]}" for i in range(min(5, num_workers)))
     if num_workers > 5:
         order += f" → ... → {num_workers}:{names[-1]}"
     elapsed = time.time() - t0
-    print(f"[counting_game] 报数顺序: {order}")
+    print(f"[counting_game] Counting order: {order}")
     print(
-        f"[counting_game] 通过! {num_workers}x{num_workers}={num_workers**2} 条消息, {elapsed:.1f}s"
+        f"[counting_game] Passed! {num_workers}x{num_workers}={num_workers**2} messages, {elapsed:.1f}s"
     )
     pul.cleanup_ray()
     return logs
@@ -106,7 +110,7 @@ def run(num_workers=20):
 def main():
     import argparse
 
-    p = argparse.ArgumentParser(description="Pulsing 分布式报数游戏")
+    p = argparse.ArgumentParser(description="Pulsing distributed counting game")
     p.add_argument("--num-workers", type=int, default=20)
     args = p.parse_args()
 
diff --git a/python/pulsing/integrations/ray.py b/python/pulsing/integrations/ray.py
index 9ded1d0b9..4978ea131 100644
--- a/python/pulsing/integrations/ray.py
+++ b/python/pulsing/integrations/ray.py
@@ -1,15 +1,15 @@
 """
-pulsing.ray - 在 Ray 集群中初始化 Pulsing
+pulsing.ray - Initialize Pulsing in Ray cluster
 
-每个 Ray worker 进程调用 init_in_ray() 即可启动 Pulsing 并自动加入集群。
-通过 Ray 的 internal KV store 协调 seed 节点发现。
+Each Ray worker process can call init_in_ray() to start Pulsing and auto-join the cluster.
+Uses Ray's internal KV store to coordinate seed node discovery.
 
-推荐用法:
+Recommended usage:
     import ray
     from pulsing.integrations.ray import init_in_ray
 
     ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})
-    init_in_ray()  # driver 进程也需要初始化
+    init_in_ray()  # driver process also needs initialization
 """
 
 try:
@@ -29,23 +29,23 @@
 
 _SEED_KEY = "pulsing:seed_addr"
 
-# 后台事件循环(供 sync init 使用)
+# Background event loop (for sync init)
 _loop = None
 _thread = None
 
 
 def _get_node_ip():
-    """获取当前 Ray 节点 IP"""
+    """Get current Ray node IP"""
     ctx = ray.get_runtime_context()
     node_id = ctx.get_node_id()
     for node in ray.nodes():
         if node["NodeID"] == node_id and node["Alive"]:
             return node["NodeManagerAddress"]
-    raise RuntimeError("无法获取当前 Ray 节点 IP")
+    raise RuntimeError("Cannot get current Ray node IP")
 
 
 def _start_background_loop():
-    """启动后台事件循环线程"""
+    """Start background event loop thread"""
     global _loop, _thread
     if _thread is not None:
         return
@@ -66,7 +66,7 @@ def _run():
 
 
 def _run_sync(coro):
-    """在后台事件循环中同步执行协程"""
+    """Execute coroutine synchronously in background event loop"""
     fut = asyncio.run_coroutine_threadsafe(coro, _loop)
     return fut.result(timeout=30)
 
@@ -84,60 +84,60 @@ async def _do_shutdown():
 
 
 def _get_seed():
-    """从 Ray KV store 获取 seed 地址"""
+    """Get seed address from Ray KV store"""
     data = _internal_kv_get(_SEED_KEY)
     return data.decode() if data else None
 
 
 def _try_set_seed(addr):
-    """原子写入 seed 地址,返回 True 表示写入成功(我是 seed)。
+    """Atomically write seed address, returns True if write succeeded (I am seed).
 
-    _internal_kv_put(overwrite=False) 返回值语义:
-        False = key 不存在,已写入(成功)
-        True  = key 已存在,未覆盖(失败)
+    _internal_kv_put(overwrite=False) return value semantics:
+        False = key doesn't exist, written (success)
+        True  = key already exists, not overwritten (failure)
     """
     already_exists = _internal_kv_put(_SEED_KEY, addr.encode(), overwrite=False)
     return not already_exists
 
 
 def init_in_ray():
-    """在当前进程初始化 Pulsing 并加入集群。
+    """Initialize Pulsing in current process and join cluster.
 
-    可直接调用,也可作为 Ray worker_process_setup_hook:
+    Can be called directly or used as Ray worker_process_setup_hook:
 
         ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})
-        init_in_ray()  # driver 也需要
+        init_in_ray()  # driver also needs this
     """
     if not ray.is_initialized():
-        raise RuntimeError("Ray 未初始化,请先调用 ray.init()")
+        raise RuntimeError("Ray not initialized, please call ray.init() first")
 
     node_ip = _get_node_ip()
     _start_background_loop()
 
-    # 已有 seed → 直接加入
+    # Seed exists -> join directly
     seed_addr = _get_seed()
     if seed_addr is not None:
         return _run_sync(_do_init(f"{node_ip}:0", seeds=[seed_addr]))
 
-    # 启动为潜在 seed
+    # Start as potential seed
     system = _run_sync(_do_init(f"{node_ip}:0"))
     my_addr = str(system.addr)
 
     if _try_set_seed(my_addr):
-        return system  # 写入成功,我是 seed
+        return system  # Write succeeded, I am seed
 
-    # 竞争失败(极罕见),重新加入实际 seed
+    # Race lost (rare), re-join with actual seed
     _run_sync(_do_shutdown())
     return _run_sync(_do_init(f"{node_ip}:0", seeds=[_get_seed()]))
 
 
 async def async_init_in_ray():
-    """在当前进程初始化 Pulsing 并加入集群(异步版本)。
+    """Initialize Pulsing in current process and join cluster (async version).
 
-    适用于 async Ray actor。
+    Suitable for async Ray actors.
     """
     if not ray.is_initialized():
-        raise RuntimeError("Ray 未初始化,请先调用 ray.init()")
+        raise RuntimeError("Ray not initialized, please call ray.init() first")
 
     node_ip = _get_node_ip()
 
@@ -156,7 +156,7 @@ async def async_init_in_ray():
 
 
 def cleanup():
-    """清理 Pulsing 在 Ray KV store 中的状态"""
+    """Clean up Pulsing state in Ray KV store"""
     _internal_kv_del(_SEED_KEY)
 
 

From 880b5cf9f62e4fe1654121790625b7687d659b21 Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Fri, 20 Feb 2026 22:08:24 +0800
Subject: [PATCH 11/15] Add pytest-cov to development dependencies and enhance
 test documentation

- Included `pytest-cov` in the development dependencies for improved test coverage reporting.
- Updated comments and documentation in `test_receive_error_behavior.py` for clarity and consistency.
- Added new test files for agent runtime lifecycle, CLI commands, and streaming backends, enhancing overall test coverage and organization.
- Improved existing tests to ensure better readability and maintainability, aligning with recent refactoring efforts.
---
 pyproject.toml                                |   1 +
 tests/python/agent/__init__.py                |   0
 .../test_agent_runtime_lifecycle.py           |   0
 .../python/apis/actor/test_actor_behavior.py  |   2 +-
 tests/python/cli/__init__.py                  |   0
 tests/python/{ => cli}/test_cli_actor.py      |   0
 tests/python/{ => cli}/test_cli_inspect.py    |   0
 tests/python/core/test_helpers.py             | 345 ++++++++++
 tests/python/core/test_remote_edge_cases.py   | 608 ++++++++++++++++++
 tests/python/integrations/__init__.py         |   0
 .../test_ray_compat_running_loop.py           |   0
 .../{ => integrations}/test_ray_init.py       |   6 +-
 tests/python/streaming/__init__.py            |   0
 tests/python/{ => streaming}/test_queue.py    |   0
 .../{ => streaming}/test_queue_backends.py    |   0
 .../{ => streaming}/test_queue_topic_chaos.py |  50 +-
 tests/python/{ => streaming}/test_topic.py    |   0
 tests/python/test_receive_error_behavior.py   |  20 +-
 18 files changed, 993 insertions(+), 39 deletions(-)
 create mode 100644 tests/python/agent/__init__.py
 rename tests/python/{ => agent}/test_agent_runtime_lifecycle.py (100%)
 create mode 100644 tests/python/cli/__init__.py
 rename tests/python/{ => cli}/test_cli_actor.py (100%)
 rename tests/python/{ => cli}/test_cli_inspect.py (100%)
 create mode 100644 tests/python/core/test_helpers.py
 create mode 100644 tests/python/core/test_remote_edge_cases.py
 create mode 100644 tests/python/integrations/__init__.py
 rename tests/python/{ => integrations}/test_ray_compat_running_loop.py (100%)
 rename tests/python/{ => integrations}/test_ray_init.py (97%)
 create mode 100644 tests/python/streaming/__init__.py
 rename tests/python/{ => streaming}/test_queue.py (100%)
 rename tests/python/{ => streaming}/test_queue_backends.py (100%)
 rename tests/python/{ => streaming}/test_queue_topic_chaos.py (90%)
 rename tests/python/{ => streaming}/test_topic.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 6bbc0cb41..c20cbf98b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ storage = [
 dev = [
     "pytest>=8.0",
     "pytest-asyncio>=0.23",
+    "pytest-cov>=5.0",
     "ruff>=0.8",
     "maturin>=1.0",
     "pylance",
diff --git a/tests/python/agent/__init__.py b/tests/python/agent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/python/test_agent_runtime_lifecycle.py b/tests/python/agent/test_agent_runtime_lifecycle.py
similarity index 100%
rename from tests/python/test_agent_runtime_lifecycle.py
rename to tests/python/agent/test_agent_runtime_lifecycle.py
diff --git a/tests/python/apis/actor/test_actor_behavior.py b/tests/python/apis/actor/test_actor_behavior.py
index 5adeb95cb..b9f2ea262 100644
--- a/tests/python/apis/actor/test_actor_behavior.py
+++ b/tests/python/apis/actor/test_actor_behavior.py
@@ -1,5 +1,5 @@
 """
-Tests for Actor Behavior as defined in llms.binding.md (Actor 行为 section).
+Tests for Actor Behavior as defined in llms.binding.md (Actor Behavior section).
 
 Tests cover:
 1. Base Actor with receive method (sync/async)
diff --git a/tests/python/cli/__init__.py b/tests/python/cli/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/python/test_cli_actor.py b/tests/python/cli/test_cli_actor.py
similarity index 100%
rename from tests/python/test_cli_actor.py
rename to tests/python/cli/test_cli_actor.py
diff --git a/tests/python/test_cli_inspect.py b/tests/python/cli/test_cli_inspect.py
similarity index 100%
rename from tests/python/test_cli_inspect.py
rename to tests/python/cli/test_cli_inspect.py
diff --git a/tests/python/core/test_helpers.py b/tests/python/core/test_helpers.py
new file mode 100644
index 000000000..91d5780c0
--- /dev/null
+++ b/tests/python/core/test_helpers.py
@@ -0,0 +1,345 @@
+"""
+Tests for core/helpers.py and streaming utilities.
+
+Focus on:
+- Protocol unwrapping functions
+- Response handling
+- Stream message handling
+"""
+
+import asyncio
+
+import pytest
+
+import pulsing as pul
+from pulsing.core import remote, init, shutdown
+
+
+# ============================================================================
+# Protocol unwrap tests
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_unwrap_call_v1():
+    """Test unwrapping v1 protocol call."""
+    from pulsing.core.remote import _wrap_call_v1, _unwrap_call
+
+    msg = _wrap_call_v1("my_method", (1, 2, 3), {"key": "val"}, False)
+    method, args, kwargs, is_async = _unwrap_call(msg)
+
+    assert method == "my_method"
+    assert args == (1, 2, 3)
+    assert kwargs == {"key": "val"}
+    assert is_async is False
+
+
+@pytest.mark.asyncio
+async def test_unwrap_call_v2():
+    """Test unwrapping v2 protocol call."""
+    from pulsing.core.remote import _wrap_call_v2, _unwrap_call
+
+    msg = _wrap_call_v2("async_method", (), {"param": 42}, True)
+    method, args, kwargs, is_async = _unwrap_call(msg)
+
+    assert method == "async_method"
+    assert args == ()
+    assert kwargs == {"param": 42}
+    assert is_async is True
+
+
+@pytest.mark.asyncio
+async def test_unwrap_response_v1():
+    """Test unwrapping v1 protocol response."""
+    from pulsing.core.remote import (
+        _wrap_response_v1,
+        _unwrap_response,
+        _wrap_call_v1,
+    )
+
+    # Success response
+    resp = _wrap_response_v1(result={"data": "success"})
+    result, error = _unwrap_response(resp)
+    assert error is None
+    assert result == {"data": "success"}
+
+    # Error response
+    err = _wrap_response_v1(error="something failed")
+    result, error = _unwrap_response(err)
+    assert result is None
+    assert "something failed" in error
+
+
+@pytest.mark.asyncio
+async def test_unwrap_response_v2():
+    """Test unwrapping v2 protocol response."""
+    from pulsing.core.remote import (
+        _wrap_response_v2,
+        _unwrap_response,
+    )
+
+    # Success response
+    resp = _wrap_response_v2(result=[1, 2, 3])
+    result, error = _unwrap_response(resp)
+    assert error is None
+    assert result == [1, 2, 3]
+
+    # Error response
+    err = _wrap_response_v2(error="error message")
+    result, error = _unwrap_response(err)
+    assert result is None
+    assert "error message" in error
+
+
+# ============================================================================
+# Single value iterator
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_single_value_iterator():
+    """Test _SingleValueIterator yields one value then stops."""
+    from pulsing.core.remote import _SingleValueIterator
+
+    it = _SingleValueIterator("single_value")
+    results = []
+    async for v in it:
+        results.append(v)
+    assert results == ["single_value"]
+
+
+# ============================================================================
+# Delayed call proxy
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_delayed_call_proxy_cancel():
+    """Test that DelayedCallProxy tasks can be cancelled."""
+    from pulsing.core.remote import _DelayedCallProxy
+
+    await init()
+    try:
+
+        @remote
+        class TestActor:
+            def ping(self):
+                return "pong"
+
+        actor = await TestActor.spawn()
+        # Get the ref through the ref property
+        ref = actor.ref
+
+        proxy = _DelayedCallProxy(ref, 0.1)
+        # This returns a task that can be cancelled
+        task = proxy.ping()
+        task.cancel()
+
+        # Wait a bit
+        await asyncio.sleep(0.2)
+
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Exception consuming
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_consume_task_exception():
+    """Test _consume_task_exception handles various exception types."""
+    from pulsing.core.remote import _consume_task_exception
+
+    async def raise_cancelled():
+        raise asyncio.CancelledError()
+
+    async def raise_runtime():
+        raise RuntimeError("stream closed")
+
+    async def raise_value():
+        raise ValueError("bad value")
+
+    # CancelledError should be silently consumed
+    task = asyncio.create_task(raise_cancelled())
+    try:
+        await task
+    except asyncio.CancelledError:
+        pass
+    _consume_task_exception(task)
+
+    # RuntimeError should be logged but not raise
+    task = asyncio.create_task(raise_runtime())
+    try:
+        await task
+    except RuntimeError:
+        pass
+    _consume_task_exception(task)
+
+    # ValueError should be logged
+    task = asyncio.create_task(raise_value())
+    try:
+        await task
+    except ValueError:
+        pass
+    _consume_task_exception(task)
+
+
+# ============================================================================
+# Protocol detection edge cases
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_detect_protocol_unknown_format():
+    """Test protocol detection with unknown format defaults to v1."""
+    from pulsing.core.remote import _detect_protocol_version
+
+    # Unknown format
+    assert _detect_protocol_version({}) == 1
+    assert _detect_protocol_version({"some": "data"}) == 1
+
+    # v1 format (has __call__)
+    assert _detect_protocol_version({"__call__": "method"}) == 1
+
+    # v2 format (has __pulsing_proto__)
+    assert _detect_protocol_version({"__pulsing_proto__": "v2"}) == 2
+    assert _detect_protocol_version({"__pulsing_proto__": 2}) == 2
+
+
+# ============================================================================
+# Error path tests
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_actor_error_in_async_generator():
+    """Test error handling in async generator."""
+
+    @remote
+    class FailingAsyncGenActor:
+        async def failing_gen(self, fail_at):
+            for i in range(10):
+                if i == fail_at:
+                    raise RuntimeError(f"Failed at {fail_at}")
+                yield i
+
+    await init()
+    try:
+        actor = await FailingAsyncGenActor.spawn()
+        results = []
+        with pytest.raises(Exception):
+            async for v in actor.failing_gen(3):
+                results.append(v)
+        # May or may not have results depending on when exception raised
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_sync_generator_error():
+    """Test error in sync generator."""
+
+    @remote
+    class FailingSyncGenActor:
+        def failing_gen(self, fail_at):
+            for i in range(10):
+                if i == fail_at:
+                    raise RuntimeError(f"Failed at {fail_at}")
+                yield i
+
+    await init()
+    try:
+        actor = await FailingSyncGenActor.spawn()
+        result = await actor.failing_gen(3)
+        items = []
+        with pytest.raises(Exception):
+            if hasattr(result, "__aiter__"):
+                async for v in result:
+                    items.append(v)
+            elif hasattr(result, "__iter__"):
+                for v in result:
+                    items.append(v)
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_actor_error_in_on_start():
+    """Test error in on_start - actor spawn should still succeed but method calls fail."""
+    # Note: When on_start raises, the actor's mailbox may close
+    # This test verifies the error path is exercised
+
+    @remote
+    class FailingOnStartActor:
+        def __init__(self):
+            self.started = False
+
+        def on_start(self, actor_id):
+            raise ValueError("on_start error")
+
+        def ping(self):
+            return "pong"
+
+    await init()
+    try:
+        # Actor spawn should succeed
+        actor = await FailingOnStartActor.spawn()
+        # The on_start error may cause the actor to stop
+        # This tests the error handling path
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Actor lifecycle
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_actor_lifecycle():
+    """Test actor lifecycle - on_start callback."""
+
+    lifecycle_events = []
+
+    @remote
+    class LifecycleActor:
+        def __init__(self, events):
+            self.events = events
+            self.value = 0
+
+        def on_start(self, actor_id):
+            self.events.append(("on_start", str(actor_id)))
+
+        def on_stop(self):
+            self.events.append(("on_stop", None))
+
+        def metadata(self):
+            return {"type": "lifecycle"}
+
+        def increment(self):
+            self.value += 1
+            return self.value
+
+        def get_value(self):
+            return self.value
+
+    await init()
+    try:
+        actor = await LifecycleActor.spawn(lifecycle_events)
+
+        # Ensure actor has started (on_start runs before first message is processed)
+        _ = await actor.get_value()
+        assert any(e[0] == "on_start" for e in lifecycle_events)
+
+        # Use the actor
+        assert await actor.increment() == 1
+        assert await actor.increment() == 2
+
+    finally:
+        await shutdown()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py
new file mode 100644
index 000000000..2ff190150
--- /dev/null
+++ b/tests/python/core/test_remote_edge_cases.py
@@ -0,0 +1,608 @@
+"""
+Tests for core/remote.py edge cases and uncovered paths.
+
+Focus areas:
+- _WrappedActor edge cases
+- v1/v2 protocol handling
+- Attribute access
+- Sync generator handling
+- on_start/on_stop callbacks
+- metadata method
+- Error paths
+"""
+
+import asyncio
+
+import pytest
+
+import pulsing as pul
+from pulsing.core import remote, init, shutdown, get_system
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+async def system():
+    """Create a standalone ActorSystem for testing."""
+    sys = await pul.actor_system()
+    yield sys
+    await sys.shutdown()
+
+
+# ============================================================================
+# _WrappedActor: on_start/on_stop callbacks
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_wrapped_actor_on_start_callback():
+    """Test that on_start is called when actor starts."""
+
+    @remote
+    class ActorWithOnStart:
+        def __init__(self):
+            self.started = False
+            self.actor_id = None
+
+        def on_start(self, actor_id):
+            self.started = True
+            self.actor_id = actor_id
+
+        def is_started(self):
+            return self.started, str(self.actor_id) if self.actor_id else None
+
+    await init()
+    try:
+        actor = await ActorWithOnStart.spawn()
+        started, aid = await actor.is_started()
+        assert started is True
+        assert aid is not None
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_wrapped_actor_on_stop_callback():
+    """Test that on_stop is called when actor stops."""
+    results = []
+
+    @remote
+    class ActorWithOnStop:
+        def __init__(self, results_list):
+            self.results = results_list
+
+        def on_stop(self):
+            self.results.append("stopped")
+
+        def ping(self):
+            return "pong"
+
+    await init()
+    try:
+        actor_name = "on_stop_test_actor"
+        actor = await ActorWithOnStop.spawn(results, name=actor_name)
+        assert await actor.ping() == "pong"
+        # Stop the actor by name - this should trigger on_stop
+        await get_system().stop(actor_name)
+        await asyncio.sleep(0.1)
+        assert "stopped" in results
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_wrapped_actor_no_on_start_method():
+    """Test actor without on_start works normally."""
+
+    @remote
+    class NoOnStartActor:
+        def __init__(self):
+            self.value = 42
+
+        def get_value(self):
+            return self.value
+
+    await init()
+    try:
+        actor = await NoOnStartActor.spawn()
+        assert await actor.get_value() == 42
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# _WrappedActor: metadata method
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_wrapped_actor_metadata():
+    """Test that metadata method is called and returned."""
+
+    @remote
+    class ActorWithMetadata:
+        def metadata(self):
+            return {"version": "1.0", "type": "test"}
+
+        def ping(self):
+            return "pong"
+
+    await init()
+    try:
+        actor = await ActorWithMetadata.spawn(name="metadata_test")
+        # Metadata should be accessible
+        ref = actor.ref
+        # The metadata is stored during spawn
+        assert await actor.ping() == "pong"
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_wrapped_actor_no_metadata():
+    """Test actor without metadata method returns empty dict."""
+
+    @remote
+    class NoMetadataActor:
+        def ping(self):
+            return "pong"
+
+    await init()
+    try:
+        actor = await NoMetadataActor.spawn()
+        assert await actor.ping() == "pong"
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# _WrappedActor: attribute access
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_attribute_access():
+    """Test accessing public attributes through protocol."""
+
+    @remote
+    class AttributeActor:
+        def __init__(self):
+            self.counter = 0
+            self.name = "test_actor"
+
+        def increment(self):
+            self.counter += 1
+            return self.counter
+
+    await init()
+    try:
+        actor = await AttributeActor.spawn()
+        # Method call
+        assert await actor.increment() == 1
+        assert await actor.increment() == 2
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# _WrappedActor: sync generator handling
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_sync_generator_method():
+    """Test sync generator method returns sequence of values."""
+
+    @remote
+    class GeneratorActor:
+        def count_up(self, n):
+            for i in range(n):
+                yield i
+
+    await init()
+    try:
+        actor = await GeneratorActor.spawn()
+        # Sync generator methods need await then iterate
+        result = await actor.count_up(5)
+        items = []
+        if hasattr(result, "__aiter__"):
+            async for item in result:
+                items.append(item)
+        elif hasattr(result, "__iter__"):
+            for item in result:
+                items.append(item)
+        else:
+            items.append(result)
+        assert len(items) >= 1
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_sync_generator_with_exception():
+    """Test sync generator that raises exception."""
+
+    @remote
+    class FailingGeneratorActor:
+        def failing_gen(self, fail_at):
+            for i in range(10):
+                if i == fail_at:
+                    raise ValueError(f"Failed at {fail_at}")
+                yield i
+
+    await init()
+    try:
+        actor = await FailingGeneratorActor.spawn()
+        result = await actor.failing_gen(3)
+        items = []
+        with pytest.raises(Exception):
+            if hasattr(result, "__aiter__"):
+                async for item in result:
+                    items.append(item)
+            elif hasattr(result, "__iter__"):
+                for item in result:
+                    items.append(item)
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Protocol v1/v2 handling
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_protocol_v1_call():
+    """Test v1 protocol call format."""
+    from pulsing.core.remote import (
+        _wrap_call_v1,
+        _unwrap_call,
+        _wrap_response_v1,
+        _detect_protocol_version,
+    )
+
+    # v1 call format
+    msg = _wrap_call_v1("test_method", (1, 2), {"key": "value"}, False)
+    assert msg["__call__"] == "test_method"
+    assert msg["args"] == (1, 2)
+    assert msg["kwargs"] == {"key": "value"}
+    assert msg["__async__"] is False
+
+    # v1 response
+    resp = _wrap_response_v1(result="success")
+    assert resp["__result__"] == "success"
+    assert "__error__" not in resp
+
+    # Error response
+    err_resp = _wrap_response_v1(error="failed")
+    assert err_resp["__error__"] == "failed"
+    assert "__result__" not in err_resp
+
+    # Protocol detection
+    assert _detect_protocol_version(msg) == 1
+
+
+@pytest.mark.asyncio
+async def test_protocol_v2_call():
+    """Test v2 protocol call format."""
+    from pulsing.core.remote import (
+        _wrap_call_v2,
+        _wrap_response_v2,
+        _detect_protocol_version,
+    )
+
+    # v2 call format
+    msg = _wrap_call_v2("test_method", (1, 2), {"key": "value"}, True)
+    assert msg["__pulsing_proto__"] == "v2"
+    assert msg["__pulsing__"]["call"] == "test_method"
+    assert msg["__pulsing__"]["async"] is True
+    assert msg["user_data"]["args"] == (1, 2)
+
+    # v2 response
+    resp = _wrap_response_v2(result="success")
+    assert resp["__pulsing_proto__"] == "v2"
+    assert resp["__pulsing__"]["result"] == "success"
+
+    # Error response
+    err_resp = _wrap_response_v2(error="failed")
+    assert err_resp["__pulsing__"]["error"] == "failed"
+
+    # Protocol detection
+    assert _detect_protocol_version(msg) == 2
+
+
+# ============================================================================
+# Invalid method handling
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_call_private_method():
+    """Test that calling private methods (starting with _) returns error."""
+
+    @remote
+    class PrivateMethodActor:
+        def public_method(self):
+            return "public"
+
+        def _private_method(self):
+            return "private"
+
+    await init()
+    try:
+        actor = await PrivateMethodActor.spawn()
+        # Public method should work
+        assert await actor.public_method() == "public"
+        # Private method should be blocked (AttributeError on proxy)
+        with pytest.raises(AttributeError):
+            _ = actor._private_method
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_call_nonexistent_method():
+    """Test that calling non-existent method returns error."""
+
+    @remote
+    class SimpleActor:
+        def existing_method(self):
+            return "exists"
+
+    await init()
+    try:
+        actor = await SimpleActor.spawn()
+        # Existing method works
+        assert await actor.existing_method() == "exists"
+        # Non-existent method raises AttributeError
+        with pytest.raises(AttributeError):
+            _ = actor.nonexistent_method
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Message protocol edge cases
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_unknown_message_type():
+    """Test actor behavior with unknown message type."""
+    from pulsing._core import Message
+
+    @remote
+    class MessageHandlingActor:
+        def ping(self):
+            return "pong"
+
+    await init()
+    try:
+        actor = await MessageHandlingActor.spawn()
+        # Normal call works
+        assert await actor.ping() == "pong"
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Async generator edge cases
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_async_generator_immediate_break():
+    """Test async generator when caller breaks immediately."""
+
+    @remote
+    class AsyncGenActor:
+        async def stream_values(self, n):
+            for i in range(n):
+                await asyncio.sleep(0.01)
+                yield i
+
+    await init()
+    try:
+        actor = await AsyncGenActor.spawn()
+        # Break on first value
+        count = 0
+        async for value in actor.stream_values(10):
+            count += 1
+            if count >= 2:
+                break
+        assert count == 2
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_async_generator_empty():
+    """Test async generator that yields nothing."""
+
+    @remote
+    class EmptyGenActor:
+        async def empty_stream(self):
+            return
+            yield  # Never reached
+
+    await init()
+    try:
+        actor = await EmptyGenActor.spawn()
+        results = []
+        async for value in actor.empty_stream():
+            results.append(value)
+        assert results == []
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Complex scenarios
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_actor_with_both_sync_and_async_methods():
+    """Test actor mixing sync methods, async methods, and generators."""
+
+    @remote
+    class MixedActor:
+        def __init__(self):
+            self.sync_count = 0
+            self.async_count = 0
+
+        def sync_method(self, x):
+            self.sync_count += 1
+            return x * 2
+
+        async def async_method(self, x):
+            await asyncio.sleep(0.01)
+            self.async_count += 1
+            return x * 3
+
+        async def async_gen(self, n):
+            for i in range(n):
+                await asyncio.sleep(0.01)
+                yield i * 10
+
+        def get_counts(self):
+            return self.sync_count, self.async_count
+
+    await init()
+    try:
+        actor = await MixedActor.spawn()
+
+        # Sync method
+        assert await actor.sync_method(5) == 10
+
+        # Async method
+        assert await actor.async_method(5) == 15
+
+        # Async generator
+        async_gen_results = [v async for v in actor.async_gen(3)]
+        assert async_gen_results == [0, 10, 20]
+
+        # Check counts
+        sc, ac = await actor.get_counts()
+        assert sc == 1
+        assert ac == 1
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_actor_exception_preserves_state():
+    """Test that actor state is preserved after exception."""
+
+    @remote
+    class StatefulActor:
+        def __init__(self):
+            self.value = 0
+
+        def increment(self):
+            self.value += 1
+            return self.value
+
+        def fail(self):
+            raise ValueError("Intentional failure")
+
+        def get_value(self):
+            return self.value
+
+    await init()
+    try:
+        actor = await StatefulActor.spawn()
+
+        # First increment
+        assert await actor.increment() == 1
+
+        # This fails but shouldn't corrupt state
+        with pytest.raises(Exception):
+            await actor.fail()
+
+        # State should still be intact
+        assert await actor.get_value() == 1
+
+        # Can continue incrementing
+        assert await actor.increment() == 2
+    finally:
+        await shutdown()
+
+
+# ============================================================================
+# Delayed call advanced scenarios
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_delayed_call_with_args():
+    """Test delayed call with arguments."""
+
+    @remote
+    class DelayedArgsActor:
+        def __init__(self):
+            self.messages = []
+
+        def schedule_message(self, delay, msg):
+            self.delayed(delay).record(msg)
+
+        def record(self, msg):
+            self.messages.append(msg)
+
+        def get_messages(self):
+            return list(self.messages)
+
+    await init()
+    try:
+        actor = await DelayedArgsActor.spawn()
+        await actor.schedule_message(0.05, "hello")
+
+        assert await actor.get_messages() == []
+        await asyncio.sleep(0.1)
+        assert await actor.get_messages() == ["hello"]
+    finally:
+        await shutdown()
+
+
+@pytest.mark.asyncio
+async def test_multiple_delayed_calls():
+    """Test multiple delayed calls execute in order."""
+
+    @remote
+    class MultiDelayedActor:
+        def __init__(self):
+            self.events = []
+
+        def schedule_all(self):
+            self.delayed(0.02).record("second")
+            self.delayed(0.01).record("first")
+            self.events.append("immediate")
+
+        def record(self, msg):
+            self.events.append(msg)
+
+        def get_events(self):
+            return list(self.events)
+
+    await init()
+    try:
+        actor = await MultiDelayedActor.spawn()
+        await actor.schedule_all()
+
+        # Immediate should be recorded
+        assert "immediate" in await actor.get_events()
+
+        await asyncio.sleep(0.05)
+
+        events = await actor.get_events()
+        assert "first" in events
+        assert "second" in events
+    finally:
+        await shutdown()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/python/integrations/__init__.py b/tests/python/integrations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/python/test_ray_compat_running_loop.py b/tests/python/integrations/test_ray_compat_running_loop.py
similarity index 100%
rename from tests/python/test_ray_compat_running_loop.py
rename to tests/python/integrations/test_ray_compat_running_loop.py
diff --git a/tests/python/test_ray_init.py b/tests/python/integrations/test_ray_init.py
similarity index 97%
rename from tests/python/test_ray_init.py
rename to tests/python/integrations/test_ray_init.py
index 44e588375..cbf1d5fad 100644
--- a/tests/python/test_ray_init.py
+++ b/tests/python/integrations/test_ray_init.py
@@ -108,7 +108,7 @@ def test_init_raises_without_ray():
     """init_in_ray() raises when Ray is not initialized."""
     from pulsing.integrations.ray import init_in_ray
 
-    with pytest.raises(RuntimeError, match="Ray 未初始化"):
+    with pytest.raises(RuntimeError, match="Ray not initialized"):
         init_in_ray()
 
 
@@ -116,7 +116,7 @@ async def test_async_init_raises_without_ray():
     """async_init_in_ray() raises when Ray is not initialized."""
     from pulsing.integrations.ray import async_init_in_ray
 
-    with pytest.raises(RuntimeError, match="Ray 未初始化"):
+    with pytest.raises(RuntimeError, match="Ray not initialized"):
         await async_init_in_ray()
 
 
@@ -315,7 +315,7 @@ async def test_async_init_stores_seed(ray_env):
 
 
 def test_counting_game(ray_env):
-    """20 个进程通过 Pulsing actor 玩报数游戏(复用 pulsing.examples)。"""
+    """20 processes play counting game via Pulsing actor (reuses pulsing.examples)."""
     from pulsing.examples.counting_game import run
 
     run(num_workers=NUM_WORKERS)
diff --git a/tests/python/streaming/__init__.py b/tests/python/streaming/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/python/test_queue.py b/tests/python/streaming/test_queue.py
similarity index 100%
rename from tests/python/test_queue.py
rename to tests/python/streaming/test_queue.py
diff --git a/tests/python/test_queue_backends.py b/tests/python/streaming/test_queue_backends.py
similarity index 100%
rename from tests/python/test_queue_backends.py
rename to tests/python/streaming/test_queue_backends.py
diff --git a/tests/python/test_queue_topic_chaos.py b/tests/python/streaming/test_queue_topic_chaos.py
similarity index 90%
rename from tests/python/test_queue_topic_chaos.py
rename to tests/python/streaming/test_queue_topic_chaos.py
index 845c5c6f7..8e4f75495 100644
--- a/tests/python/test_queue_topic_chaos.py
+++ b/tests/python/streaming/test_queue_topic_chaos.py
@@ -1,12 +1,12 @@
 """
-Queue & Topic 混沌测试
+Queue & Topic Chaos Testing
 
-在随机延迟、高并发、动态加入/退出、随机参数等混沌场景下验证:
-- Queue: 数据不丢、不重(按 rank/world_size 分桶)、无死锁
-- Topic: 订阅者动态变化时发布不崩溃、交付语义可区分、慢/失败订阅者被踢或超时
-- 与 StorageManager 共享资源时无阻塞、无竞态
+Validates under chaotic scenarios with random delays, high concurrency, dynamic join/leave, random parameters:
+- Queue: no data loss, no duplication (bucketed by rank/world_size), no deadlock
+- Topic: no crash during publish when subscribers dynamically change, distinguishable delivery semantics, slow/failed subscribers kicked or timeout
+- No blocking or race conditions when sharing resources with StorageManager
 
-运行: pytest tests/python/test_queue_topic_chaos.py -v -s
+Run: pytest tests/python/test_queue_topic_chaos.py -v -s
 """
 
 from __future__ import annotations
@@ -30,7 +30,7 @@
 
 
 # =============================================================================
-# Fixtures & 随机负载工具
+# Fixtures & Random Load Utilities
 # =============================================================================
 
 
@@ -49,21 +49,21 @@ def temp_storage_path():
 
 
 def _random_sleep(max_ms: int = 20):
-    """短随机延迟,模拟混沌."""
+    """Short random delay to simulate chaos."""
     return asyncio.sleep(random.uniform(0, max_ms) / 1000.0)
 
 
 def _chaos_sleep(
     min_ms: int = 0, max_ms: int = 50, occasional_long_ms: int | None = 120
 ):
-    """随机延迟:常规 min~max_ms,小概率长延迟(模拟抖动)。"""
+    """Random delay: normally min~max_ms, small chance of long delay (simulating jitter)."""
     if occasional_long_ms and random.random() < 0.08:
         return asyncio.sleep(random.uniform(max_ms, occasional_long_ms) / 1000.0)
     return asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0)
 
 
 # =============================================================================
-# Queue 混沌
+# Queue Chaos
 # =============================================================================
 
 
@@ -71,7 +71,7 @@ def _chaos_sleep(
 async def test_queue_chaos_concurrent_producer_consumer(
     actor_system, temp_storage_path
 ):
-    """混沌:多生产者 + 多消费者(rank/world_size),随机 put/get/延迟,验证不丢不重."""
+    """Chaos: multiple producers + multiple consumers (rank/world_size), random put/get/delay, verify no loss or duplication."""
     random.seed(42)
     topic = "chaos_q_concurrent"
     num_buckets = random.choice([3, 4, 5, 6])
@@ -144,7 +144,7 @@ async def consumer(rank: int):
 async def test_queue_chaos_many_buckets_parallel_handles(
     actor_system, temp_storage_path
 ):
-    """混沌:多桶、多 writer 并行写,多 reader 并行读;用单 reader 收齐后校验总数(多 reader 会瓜分数据)."""
+    """Chaos: many buckets, multiple writers in parallel, multiple readers in parallel; use single reader to collect all and verify total (multiple readers would split data)."""
     random.seed(43)
     topic = "chaos_q_many_buckets"
     num_buckets = random.randint(4, 12)
@@ -179,7 +179,7 @@ async def write_batch(wid: int):
 
     await asyncio.gather(*[write_batch(w) for w in range(num_writers)])
 
-    # 单 reader 读全量,避免多 reader 瓜分导致并集不足 expected_count
+    # Single reader reads full data, avoiding multiple readers splitting data resulting in insufficient union
     r = await read_queue(
         actor_system,
         topic=topic,
@@ -203,7 +203,7 @@ async def write_batch(wid: int):
 
 @pytest.mark.asyncio
 async def test_queue_chaos_reader_reset_and_reread(actor_system, temp_storage_path):
-    """混沌:同一 reader 多次 reset + get,与间歇写入交错,随机 limit/延迟."""
+    """Chaos: same reader multiple reset + get, interleaved with intermittent writes, random limit/delay."""
     random.seed(44)
     topic = "chaos_q_reset"
     num_buckets = random.choice([2, 3, 4])
@@ -243,13 +243,13 @@ async def test_queue_chaos_reader_reset_and_reread(actor_system, temp_storage_pa
 
 
 # =============================================================================
-# Topic 混沌
+# Topic Chaos
 # =============================================================================
 
 
 @pytest.mark.asyncio
 async def test_topic_chaos_subscribers_join_leave_during_publish(actor_system):
-    """混沌:发布过程中订阅者动态加入/退出,随机阶段数/每阶段消息数/模式/延迟."""
+    """Chaos: subscribers dynamically join/leave during publishing, random phases/messages per phase/mode/delay."""
     random.seed(45)
     topic_name = "chaos_t_join_leave"
     writer = await write_topic(actor_system, topic_name)
@@ -304,7 +304,7 @@ async def on_msg(msg):
 
 @pytest.mark.asyncio
 async def test_topic_chaos_many_publishers_many_subscribers(actor_system):
-    """混沌:多发布者 + 多订阅者,随机发布模式/条数/延迟,验证每人收到预期条数."""
+    """Chaos: multiple publishers + multiple subscribers, random publish mode/count/delay, verify each receives expected count."""
     random.seed(46)
     topic_name = "chaos_t_many"
     num_publishers = random.randint(3, 6)
@@ -353,7 +353,7 @@ async def publish_batch(pid: int):
 
 @pytest.mark.asyncio
 async def test_topic_chaos_slow_callback_best_effort(actor_system):
-    """混沌:部分订阅者 callback 很慢,随机条数/延迟/超时,best_effort 验证不崩溃."""
+    """Chaos: some subscriber callbacks are slow, random count/delay/timeout, best_effort verify no crash."""
     random.seed(47)
     topic_name = "chaos_t_slow"
     writer = await write_topic(actor_system, topic_name)
@@ -393,13 +393,13 @@ async def slow_cb(m):
 
 
 # =============================================================================
-# 混合:Queue + Topic 同时混沌
+# Mixed: Queue + Topic Chaos Simultaneously
 # =============================================================================
 
 
 @pytest.mark.asyncio
 async def test_chaos_mixed_queue_and_topic_same_loop(actor_system, temp_storage_path):
-    """混沌:同一 loop 内 queue + topic 并发,随机条数/桶数/延迟."""
+    """Chaos: queue + topic concurrent in same loop, random count/buckets/delay."""
     random.seed(48)
     q_topic = "chaos_mixed_q"
     t_topic = "chaos_mixed_t"
@@ -453,7 +453,7 @@ async def topic_chaos():
 
 @pytest.mark.asyncio
 async def test_chaos_rapid_open_close_handles(actor_system, temp_storage_path):
-    """混沌:快速反复创建/丢弃 queue writer 和 topic reader,随机次数/延迟."""
+    """Chaos: rapidly create/discard queue writer and topic reader repeatedly, random times/delay."""
     random.seed(49)
     n_writes = random.randint(6, 12)
     n_readers = random.randint(4, 10)
@@ -485,13 +485,13 @@ async def test_chaos_rapid_open_close_handles(actor_system, temp_storage_path):
     assert result.subscriber_count >= 0
 
     # -------------------------------------------------------------------------
-    # 新增:高复杂度 / 随机负载风暴
+    # Added: High Complexity / Random Load Storm
     # -------------------------------------------------------------------------
 
 
 @pytest.mark.asyncio
 async def test_queue_chaos_storm_random_params(actor_system, temp_storage_path):
-    """混沌风暴:全随机参数(桶数/消费者数/生产者数/条数/get limit/延迟),验证不丢不重."""
+    """Chaos storm: fully random parameters (buckets/consumers/producers/count/get limit/delay), verify no loss or duplication."""
     random.seed(100)
     topic = "chaos_q_storm"
     num_buckets = random.randint(2, 8)
@@ -570,7 +570,7 @@ async def consumer(rank: int):
 
 @pytest.mark.asyncio
 async def test_topic_chaos_storm_random_params(actor_system):
-    """混沌风暴:全随机 topic 参数(发布者/订阅者数量、条数、模式、延迟),验证交付."""
+    """Chaos storm: fully random topic parameters (publishers/subscribers count, messages, mode, delay), verify delivery."""
     random.seed(101)
     topic_name = "chaos_t_storm"
     num_publishers = random.randint(2, 5)
@@ -616,7 +616,7 @@ async def pub(pid: int):
 
 @pytest.mark.asyncio
 async def test_chaos_storm_multi_queue_multi_topic(actor_system, temp_storage_path):
-    """混沌风暴:多 queue + 多 topic 同时跑,各自随机负载,验证无死锁、数据一致."""
+    """Chaos storm: multiple queues + multiple topics running simultaneously, each with random load, verify no deadlock and data consistency."""
     random.seed(102)
     q_topics = ["chaos_storm_q1", "chaos_storm_q2"]
     t_topics = ["chaos_storm_t1", "chaos_storm_t2"]
diff --git a/tests/python/test_topic.py b/tests/python/streaming/test_topic.py
similarity index 100%
rename from tests/python/test_topic.py
rename to tests/python/streaming/test_topic.py
diff --git a/tests/python/test_receive_error_behavior.py b/tests/python/test_receive_error_behavior.py
index 62b0a902d..3ac21295a 100644
--- a/tests/python/test_receive_error_behavior.py
+++ b/tests/python/test_receive_error_behavior.py
@@ -1,9 +1,9 @@
 """
-Tests for receive error behavior (业务错误不杀 actor、panic 停止不恢复).
+Tests for receive error behavior (business errors don't kill actor, panic stops without recovery).
 
 Covers:
-1. receive 返回/抛出错误时:错误返回给调用者,actor 不退出,可继续处理下一条消息
-2. 多次 receive 错误:每次错误只回传调用方,actor 始终存活
+1. When receive returns/raises error: error returned to caller, actor doesn't exit, can process next message
+2. Multiple receive errors: each error only returned to caller, actor stays alive
 """
 
 import pytest
@@ -26,12 +26,12 @@ async def system():
 
 
 # ============================================================================
-# Actor: 对特定消息返回错误,其它消息正常处理
+# Actor: returns error for specific message, processes other messages normally
 # ============================================================================
 
 
 class ErrorOnBadMessageActor(Actor):
-    """收到 'bad' 时 raise,其它消息 echo."""
+    """Raises when receiving 'bad', echoes other messages."""
 
     async def receive(self, msg):
         if msg == "bad":
@@ -40,27 +40,27 @@ async def receive(self, msg):
 
 
 # ============================================================================
-# Test: receive 出错只回传调用者,actor 不退出
+# Test: receive error only returned to caller, actor doesn't exit
 # ============================================================================
 
 
 @pytest.mark.asyncio
 async def test_receive_error_returned_to_caller_actor_stays_alive(system):
-    """receive 返回/抛出错误时:调用者收到错误,actor 不退出,下一条消息正常处理。"""
+    """When receive returns/raises error: caller receives error, actor doesn't exit, next message processed normally."""
     ref = await system.spawn(ErrorOnBadMessageActor(), name="error_on_bad")
 
-    # 第一条:触发错误,应收到异常
+    # 1st message: trigger error, should receive exception
     with pytest.raises(Exception):
         await ref.ask("bad")
 
-    # 第二条:actor 仍存活,应正常返回
+    # 2nd message: actor still alive, should return normally
     result = await ref.ask("ok")
     assert result == "ok"
 
 
 @pytest.mark.asyncio
 async def test_receive_multiple_errors_then_success(system):
-    """多次 receive 出错:每次错误只回传调用方,actor 始终存活,最后一条正常。"""
+    """Multiple receive errors: each error only returned to caller, actor stays alive, final message succeeds."""
     ref = await system.spawn(ErrorOnBadMessageActor(), name="multi_error")
 
     for _ in range(3):

From 3b5721d91f8618fd8282c06929686e6b7ae0c3ff Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Mon, 23 Feb 2026 17:53:18 +0800
Subject: [PATCH 12/15] code cleanup

---
 python/pulsing/core/remote.py               | 213 +++++++-------------
 tests/python/core/test_helpers.py           |  81 ++------
 tests/python/core/test_remote_edge_cases.py |  72 +++----
 3 files changed, 107 insertions(+), 259 deletions(-)

diff --git a/python/pulsing/core/remote.py b/python/pulsing/core/remote.py
index af38e6e05..de8fe3fb3 100644
--- a/python/pulsing/core/remote.py
+++ b/python/pulsing/core/remote.py
@@ -3,7 +3,6 @@
 import asyncio
 import inspect
 import logging
-import os
 import random
 import uuid
 from abc import ABC, abstractmethod
@@ -12,15 +11,6 @@
 from pulsing._core import ActorRef, ActorSystem, Message, StreamMessage
 from pulsing.exceptions import PulsingActorError, PulsingRuntimeError
 
-# Protocol version configuration
-# Default to v1 for backward compatibility
-_DEFAULT_PROTOCOL_VERSION = int(os.getenv("PULSING_PROTOCOL_VERSION", "1"))
-
-
-def _get_protocol_version() -> int:
-    """Get protocol version from environment or default to v1."""
-    return _DEFAULT_PROTOCOL_VERSION
-
 
 def _consume_task_exception(task: asyncio.Task) -> None:
     """Consume exception from background task to avoid 'Task exception was never retrieved'."""
@@ -37,60 +27,22 @@ def _consume_task_exception(task: asyncio.Task) -> None:
         logging.getLogger(__name__).exception("Stream task failed")
 
 
-def _detect_protocol_version(msg: dict) -> int:
-    """Auto-detect protocol version from message.
-
-    Returns:
-        1 for v1 protocol, 2 for v2 protocol
-    """
-    if "__pulsing_proto__" in msg:
-        version_str = msg["__pulsing_proto__"]
-        if isinstance(version_str, str) and version_str.startswith("v"):
-            return int(version_str[1:])
-        return int(version_str)
-    # v1 compatibility: check for __call__ field
-    if "__call__" in msg:
-        return 1
-    return 1  # default to v1
-
-
-def _wrap_call_v1(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict:
-    """v1 protocol: legacy format (backward compatible).
-
-    Format:
-        {
-            "__call__": method_name,
-            "args": args,
-            "kwargs": kwargs,
-            "__async__": is_async
-        }
-    """
-    return {
-        "__call__": method,
-        "args": args,
-        "kwargs": kwargs,
-        "__async__": is_async,
-    }
+# Wire format version (single protocol)
+_PULSING_WIRE_VERSION = "1"
 
 
-def _wrap_call_v2(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict:
-    """v2 protocol: namespace isolation.
+def _wrap_call(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict:
+    """Wrap method call for wire format (namespace isolation).
 
     Format:
         {
-            "__pulsing_proto__": "v2",
-            "__pulsing__": {
-                "call": method_name,
-                "async": is_async
-            },
-            "user_data": {
-                "args": args,
-                "kwargs": kwargs
-            }
+            "__pulsing_proto__": version,
+            "__pulsing__": { "call": method_name, "async": is_async },
+            "user_data": { "args": args, "kwargs": kwargs }
         }
     """
     return {
-        "__pulsing_proto__": "v2",
+        "__pulsing_proto__": _PULSING_WIRE_VERSION,
         "__pulsing__": {
             "call": method,
             "async": is_async,
@@ -103,70 +55,53 @@ def _wrap_call_v2(method: str, args: tuple, kwargs: dict, is_async: bool) -> dic
 
 
 def _unwrap_call(msg: dict) -> tuple[str, tuple, dict, bool]:
-    """Unwrap call message, supporting both v1 and v2 protocols.
-
-    Returns:
-        (method_name, args, kwargs, is_async)
-    """
-    version = _detect_protocol_version(msg)
-
-    if version == 2:
-        pulsing = msg.get("__pulsing__", {})
-        user_data = msg.get("user_data", {})
-        return (
-            pulsing.get("call", ""),
-            tuple(user_data.get("args", ())),
-            dict(user_data.get("kwargs", {})),
-            pulsing.get("async", False),
-        )
-    else:  # v1
-        return (
-            msg.get("__call__", ""),
-            tuple(msg.get("args", ())),
-            dict(msg.get("kwargs", {})),
-            msg.get("__async__", False),
-        )
-
-
-def _wrap_response_v1(result: Any = None, error: str | None = None) -> dict:
-    """v1 protocol response format."""
-    if error:
-        return {"__error__": error}
-    return {"__result__": result}
+    """Unwrap call message. Returns (method_name, args, kwargs, is_async)."""
+    pulsing = msg.get("__pulsing__", {})
+    user_data = msg.get("user_data", {})
+    return (
+        pulsing.get("call", ""),
+        tuple(user_data.get("args", ())),
+        dict(user_data.get("kwargs", {})),
+        pulsing.get("async", False),
+    )
 
 
-def _wrap_response_v2(result: Any = None, error: str | None = None) -> dict:
-    """v2 protocol response format."""
+def _wrap_response(result: Any = None, error: str | None = None) -> dict:
+    """Wrap response for wire format."""
     if error:
         return {
-            "__pulsing_proto__": "v2",
+            "__pulsing_proto__": _PULSING_WIRE_VERSION,
             "__pulsing__": {"error": error},
             "user_data": {},
         }
     return {
-        "__pulsing_proto__": "v2",
+        "__pulsing_proto__": _PULSING_WIRE_VERSION,
         "__pulsing__": {"result": result},
         "user_data": {},
     }
 
 
 def _unwrap_response(resp: dict) -> tuple[Any, str | None]:
-    """Unwrap response, supporting both v1 and v2 protocols.
+    """Unwrap response. Returns (result, error) - one of them will be None.
 
-    Returns:
-        (result, error) - one of them will be None
+    Accepts: wire format (__pulsing__.result/error), legacy (__result__/__error__),
+    and top-level "result"/"error" (e.g. from Message payload JSON).
     """
-    version = _detect_protocol_version(resp)
-
-    if version == 2:
-        pulsing = resp.get("__pulsing__", {})
+    pulsing = resp.get("__pulsing__", {})
+    if isinstance(pulsing, dict):
         if "error" in pulsing:
             return (None, pulsing["error"])
-        return (pulsing.get("result"), None)
-    else:  # v1
-        if "__error__" in resp:
-            return (None, resp["__error__"])
-        return (resp.get("__result__"), None)
+        if "result" in pulsing:
+            return (pulsing["result"], None)
+    if "__error__" in resp:
+        return (None, resp["__error__"])
+    if "__result__" in resp:
+        return (resp["__result__"], None)
+    if "error" in resp:
+        return (None, resp["error"])
+    if "result" in resp:
+        return (resp["result"], None)
+    return (None, None)
 
 
 _PULSING_ERROR_PREFIX = "__PULSING_ERROR__:"
@@ -402,12 +337,7 @@ def __await__(self):
 
     async def _sync_call(self, *args, **kwargs) -> Any:
         """Synchronous method call."""
-        # Use configured protocol version (default v1)
-        protocol_version = _get_protocol_version()
-        if protocol_version == 2:
-            call_msg = _wrap_call_v2(self._method, args, kwargs, False)
-        else:
-            call_msg = _wrap_call_v1(self._method, args, kwargs, False)
+        call_msg = _wrap_call(self._method, args, kwargs, False)
 
         resp = await _ask_convert_errors(self._ref, call_msg)
 
@@ -427,12 +357,21 @@ async def _sync_call(self, *args, **kwargs) -> Any:
             if resp.is_stream:
                 return _SyncGeneratorStreamReader(resp)
             data = resp.to_json()
+            if not isinstance(data, dict):
+                return resp
             if resp.msg_type == "Error":
-                # Actor execution error
                 raise PulsingActorError(
                     data.get("error", "Remote call failed"),
                     actor_name=str(self._ref.actor_id.id),
                 )
+            result, error = _unwrap_response(data)
+            if error:
+                raise PulsingActorError(
+                    error,
+                    actor_name=str(self._ref.actor_id.id),
+                )
+            if result is not None:
+                return result
             return data.get("result")
         return resp
 
@@ -463,12 +402,7 @@ def __init__(
     async def _get_stream(self):
         """Get stream (lazy initialization)"""
         if self._stream_reader is None:
-            # Use configured protocol version (default v1)
-            protocol_version = _get_protocol_version()
-            if protocol_version == 2:
-                call_msg = _wrap_call_v2(self._method, self._args, self._kwargs, True)
-            else:
-                call_msg = _wrap_call_v1(self._method, self._args, self._kwargs, True)
+            call_msg = _wrap_call(self._method, self._args, self._kwargs, True)
             resp = await _ask_convert_errors(self._ref, call_msg)
 
             # Response may be PyMessage (streaming) or direct Python object
@@ -502,20 +436,32 @@ async def __anext__(self):
         reader = await self._get_stream()
         try:
             item = await reader.__anext__()
-            # Check if it's the final result
             if isinstance(item, dict):
+                # Wire format (__pulsing__.result/error) or legacy (__result__/__error__)
+                result, error = _unwrap_response(item)
+                if error is not None:
+                    raise PulsingActorError(
+                        error, actor_name=str(self._ref.actor_id.id)
+                    )
+                if (
+                    result is not None
+                    and "__yield__" not in item
+                    and "__final__" not in item
+                ):
+                    # Single-value response (non-streaming)
+                    self._final_result = result
+                    self._got_result = True
+                    raise StopAsyncIteration
                 if "__final__" in item:
                     self._final_result = item.get("__result__")
                     self._got_result = True
                     raise StopAsyncIteration
                 if "__error__" in item:
-                    # Actor execution error
                     raise PulsingActorError(
                         item["__error__"], actor_name=str(self._ref.actor_id.id)
                     )
                 if "__yield__" in item:
                     return item["__yield__"]
-                # Single-value response (non-streaming): {"__result__": value}
                 if "__result__" in item:
                     self._final_result = item.get("__result__")
                     self._got_result = True
@@ -604,7 +550,7 @@ def __getattr__(self, name: str):
             raise AttributeError(name)
 
         def caller(*args, **kwargs):
-            msg = _wrap_call_v1(name, args, kwargs, is_async=True)
+            msg = _wrap_call(name, args, kwargs, is_async=True)
             delay = max(0.0, self._delay_sec)
 
             async def _send():
@@ -662,31 +608,20 @@ def metadata(self) -> dict[str, str]:
         return {}
 
     async def receive(self, msg) -> Any:
-        # Handle dict-based call format (supporting both v1 and v2)
+        # Handle dict-based call format
         if isinstance(msg, dict):
-            # Detect protocol version
-            version = _detect_protocol_version(msg)
             method, args, kwargs, is_async_call = _unwrap_call(msg)
 
             if not method or method.startswith("_"):
-                error_msg = f"Invalid method: {method}"
-                if version == 2:
-                    return _wrap_response_v2(error=error_msg)
-                return _wrap_response_v1(error=error_msg)
+                return _wrap_response(error=f"Invalid method: {method}")
 
             _MISSING = object()
             attr = getattr(self._instance, method, _MISSING)
             if attr is _MISSING:
-                error_msg = f"Not found: {method}"
-                if version == 2:
-                    return _wrap_response_v2(error=error_msg)
-                return _wrap_response_v1(error=error_msg)
+                return _wrap_response(error=f"Not found: {method}")
 
             if not callable(attr):
-                # Attribute access: return value directly
-                if version == 2:
-                    return _wrap_response_v2(result=attr)
-                return _wrap_response_v1(result=attr)
+                return _wrap_response(result=attr)
 
             func = attr
 
@@ -716,15 +651,9 @@ async def receive(self, msg) -> Any:
                     return self._handle_generator_result(result)
                 if asyncio.iscoroutine(result):
                     result = await result
-                # Use same protocol version as request
-                if version == 2:
-                    return _wrap_response_v2(result=result)
-                return _wrap_response_v1(result=result)
+                return _wrap_response(result=result)
             except Exception as e:
-                error_msg = str(e)
-                if version == 2:
-                    return _wrap_response_v2(error=error_msg)
-                return _wrap_response_v1(error=error_msg)
+                return _wrap_response(error=str(e))
 
         # Handle legacy Message-based call format (for Rust actor compatibility)
         if isinstance(msg, Message):
diff --git a/tests/python/core/test_helpers.py b/tests/python/core/test_helpers.py
index 91d5780c0..1855c29ff 100644
--- a/tests/python/core/test_helpers.py
+++ b/tests/python/core/test_helpers.py
@@ -21,27 +21,19 @@
 
 
 @pytest.mark.asyncio
-async def test_unwrap_call_v1():
-    """Test unwrapping v1 protocol call."""
-    from pulsing.core.remote import _wrap_call_v1, _unwrap_call
+async def test_unwrap_call():
+    """Test wrap/unwrap call message."""
+    from pulsing.core.remote import _wrap_call, _unwrap_call
 
-    msg = _wrap_call_v1("my_method", (1, 2, 3), {"key": "val"}, False)
+    msg = _wrap_call("my_method", (1, 2, 3), {"key": "val"}, False)
     method, args, kwargs, is_async = _unwrap_call(msg)
-
     assert method == "my_method"
     assert args == (1, 2, 3)
     assert kwargs == {"key": "val"}
     assert is_async is False
 
-
-@pytest.mark.asyncio
-async def test_unwrap_call_v2():
-    """Test unwrapping v2 protocol call."""
-    from pulsing.core.remote import _wrap_call_v2, _unwrap_call
-
-    msg = _wrap_call_v2("async_method", (), {"param": 42}, True)
-    method, args, kwargs, is_async = _unwrap_call(msg)
-
+    msg_async = _wrap_call("async_method", (), {"param": 42}, True)
+    method, args, kwargs, is_async = _unwrap_call(msg_async)
     assert method == "async_method"
     assert args == ()
     assert kwargs == {"param": 42}
@@ -49,48 +41,21 @@ async def test_unwrap_call_v2():
 
 
 @pytest.mark.asyncio
-async def test_unwrap_response_v1():
-    """Test unwrapping v1 protocol response."""
-    from pulsing.core.remote import (
-        _wrap_response_v1,
-        _unwrap_response,
-        _wrap_call_v1,
-    )
-
-    # Success response
-    resp = _wrap_response_v1(result={"data": "success"})
+async def test_unwrap_response():
+    """Test wrap/unwrap response message."""
+    from pulsing.core.remote import _wrap_response, _unwrap_response
+
+    resp = _wrap_response(result={"data": "success"})
     result, error = _unwrap_response(resp)
     assert error is None
     assert result == {"data": "success"}
 
-    # Error response
-    err = _wrap_response_v1(error="something failed")
+    err = _wrap_response(error="something failed")
     result, error = _unwrap_response(err)
     assert result is None
     assert "something failed" in error
 
 
-@pytest.mark.asyncio
-async def test_unwrap_response_v2():
-    """Test unwrapping v2 protocol response."""
-    from pulsing.core.remote import (
-        _wrap_response_v2,
-        _unwrap_response,
-    )
-
-    # Success response
-    resp = _wrap_response_v2(result=[1, 2, 3])
-    result, error = _unwrap_response(resp)
-    assert error is None
-    assert result == [1, 2, 3]
-
-    # Error response
-    err = _wrap_response_v2(error="error message")
-    result, error = _unwrap_response(err)
-    assert result is None
-    assert "error message" in error
-
-
 # ============================================================================
 # Single value iterator
 # ============================================================================
@@ -186,28 +151,6 @@ async def raise_value():
     _consume_task_exception(task)
 
 
-# ============================================================================
-# Protocol detection edge cases
-# ============================================================================
-
-
-@pytest.mark.asyncio
-async def test_detect_protocol_unknown_format():
-    """Test protocol detection with unknown format defaults to v1."""
-    from pulsing.core.remote import _detect_protocol_version
-
-    # Unknown format
-    assert _detect_protocol_version({}) == 1
-    assert _detect_protocol_version({"some": "data"}) == 1
-
-    # v1 format (has __call__)
-    assert _detect_protocol_version({"__call__": "method"}) == 1
-
-    # v2 format (has __pulsing_proto__)
-    assert _detect_protocol_version({"__pulsing_proto__": "v2"}) == 2
-    assert _detect_protocol_version({"__pulsing_proto__": 2}) == 2
-
-
 # ============================================================================
 # Error path tests
 # ============================================================================
diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py
index 2ff190150..4b4b8e772 100644
--- a/tests/python/core/test_remote_edge_cases.py
+++ b/tests/python/core/test_remote_edge_cases.py
@@ -3,7 +3,7 @@
 
 Focus areas:
 - _WrappedActor edge cases
-- v1/v2 protocol handling
+- Protocol wire format (call/response)
 - Attribute access
 - Sync generator handling
 - on_start/on_stop callbacks
@@ -250,68 +250,44 @@ def failing_gen(self, fail_at):
 
 
 # ============================================================================
-# Protocol v1/v2 handling
+# Protocol wire format
 # ============================================================================
 
 
 @pytest.mark.asyncio
-async def test_protocol_v1_call():
-    """Test v1 protocol call format."""
+async def test_protocol_call_format():
+    """Test protocol call/response format (single wire format)."""
     from pulsing.core.remote import (
-        _wrap_call_v1,
+        _wrap_call,
+        _wrap_response,
         _unwrap_call,
-        _wrap_response_v1,
-        _detect_protocol_version,
+        _unwrap_response,
     )
 
-    # v1 call format
-    msg = _wrap_call_v1("test_method", (1, 2), {"key": "value"}, False)
-    assert msg["__call__"] == "test_method"
-    assert msg["args"] == (1, 2)
-    assert msg["kwargs"] == {"key": "value"}
-    assert msg["__async__"] is False
-
-    # v1 response
-    resp = _wrap_response_v1(result="success")
-    assert resp["__result__"] == "success"
-    assert "__error__" not in resp
-
-    # Error response
-    err_resp = _wrap_response_v1(error="failed")
-    assert err_resp["__error__"] == "failed"
-    assert "__result__" not in err_resp
-
-    # Protocol detection
-    assert _detect_protocol_version(msg) == 1
-
-
-@pytest.mark.asyncio
-async def test_protocol_v2_call():
-    """Test v2 protocol call format."""
-    from pulsing.core.remote import (
-        _wrap_call_v2,
-        _wrap_response_v2,
-        _detect_protocol_version,
-    )
-
-    # v2 call format
-    msg = _wrap_call_v2("test_method", (1, 2), {"key": "value"}, True)
-    assert msg["__pulsing_proto__"] == "v2"
+    msg = _wrap_call("test_method", (1, 2), {"key": "value"}, True)
+    assert msg["__pulsing_proto__"] == "1"
     assert msg["__pulsing__"]["call"] == "test_method"
     assert msg["__pulsing__"]["async"] is True
     assert msg["user_data"]["args"] == (1, 2)
 
-    # v2 response
-    resp = _wrap_response_v2(result="success")
-    assert resp["__pulsing_proto__"] == "v2"
+    method, args, kwargs, is_async = _unwrap_call(msg)
+    assert method == "test_method"
+    assert args == (1, 2)
+    assert kwargs == {"key": "value"}
+    assert is_async is True
+
+    resp = _wrap_response(result="success")
+    assert resp["__pulsing_proto__"] == "1"
     assert resp["__pulsing__"]["result"] == "success"
+    result, error = _unwrap_response(resp)
+    assert result == "success"
+    assert error is None
 
-    # Error response
-    err_resp = _wrap_response_v2(error="failed")
+    err_resp = _wrap_response(error="failed")
     assert err_resp["__pulsing__"]["error"] == "failed"
-
-    # Protocol detection
-    assert _detect_protocol_version(msg) == 2
+    result, error = _unwrap_response(err_resp)
+    assert result is None
+    assert error == "failed"
 
 
 # ============================================================================

From 13edcb40f4f7452c5a9ac5bec694740290145ca9 Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Mon, 23 Feb 2026 18:21:16 +0800
Subject: [PATCH 13/15] Update Justfile to clarify usage of `uv run` with
 `--system` flag for consistent environment execution during CI tests

---
 Justfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Justfile b/Justfile
index ec630b686..cf565f94b 100644
--- a/Justfile
+++ b/Justfile
@@ -203,8 +203,8 @@ ci-test:
     # Install wheel and dependencies using uv (preferred) or pip
     if command -v uv &> /dev/null; then
         uv pip install --system dist/*.whl pytest pytest-asyncio
-        # Use uv run pytest (uses uv-managed Python environment)
-        uv run pytest tests/python -v
+        # --system: use same env as above (where wheel was installed), avoid project venv without pulsing
+        uv run --system pytest tests/python -v
     else
         # Fallback to pip if uv not available
         pip install dist/*.whl pytest pytest-asyncio

From 4c68495f99a6e3c672d13d4dc35622d06d0e401b Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Mon, 23 Feb 2026 18:28:52 +0800
Subject: [PATCH 14/15] Refactor CI test execution in Justfile to improve
 Python interpreter detection and error handling

- Updated the CI test command to check for available Python interpreters instead of relying on `uv run`, ensuring compatibility with various environments.
- Enhanced error messaging to inform users when no Python interpreter is found, improving usability during CI processes.
---
 Justfile | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/Justfile b/Justfile
index cf565f94b..51b375927 100644
--- a/Justfile
+++ b/Justfile
@@ -203,12 +203,18 @@ ci-test:
     # Install wheel and dependencies using uv (preferred) or pip
     if command -v uv &> /dev/null; then
         uv pip install --system dist/*.whl pytest pytest-asyncio
-        # --system: use same env as above (where wheel was installed), avoid project venv without pulsing
-        uv run --system pytest tests/python -v
+        # Use same interpreter as above (where wheel was installed); do not use uv run (project venv has no pulsing)
+        for py in python3.12 python3.11 python3.10 python3 python; do
+            if command -v $py &> /dev/null; then
+                $py -m pytest tests/python -v
+                exit 0
+            fi
+        done
+        echo "Error: No Python interpreter found"
+        exit 1
     else
         # Fallback to pip if uv not available
         pip install dist/*.whl pytest pytest-asyncio
-        # Try to find python executable
         for py in python3 python3.12 python3.11 python3.10 python; do
             if command -v $py &> /dev/null; then
                 $py -m pytest tests/python -v

From 85de7c79d4fb6d6414f253d7c510d4956b89f02e Mon Sep 17 00:00:00 2001
From: Reiase 
Date: Mon, 23 Feb 2026 18:32:53 +0800
Subject: [PATCH 15/15] Refactor test assertion in `test_ray_init.py` for
 clarity

- Updated the assertion in `test_concurrent_init_without_driver` to store the popped seed in a variable before checking its presence in the addresses, improving code readability and maintainability.
---
 tests/python/integrations/test_ray_init.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python/integrations/test_ray_init.py b/tests/python/integrations/test_ray_init.py
index cbf1d5fad..5546e31bc 100644
--- a/tests/python/integrations/test_ray_init.py
+++ b/tests/python/integrations/test_ray_init.py
@@ -255,7 +255,8 @@ def get_seed(self):
     ), f"Expected 1 seed, got {len(unique_seeds)}: {unique_seeds}"
 
     # The seed must be one of the workers' addresses
-    assert unique_seeds.pop() in addrs
+    seed = unique_seeds.pop()
+    assert seed in addrs
 
 
 def test_actor_becomes_seed_without_driver(ray_env):