diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index ccbab97fc..f2ff724b8 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: false contact_links: - name: Pulsing GitHub Discussions - url: https://github.com/reiase/Pulsing/discussions + url: https://github.com/DeepLink-org/Pulsing/discussions about: Ask questions and discuss ideas in GitHub Discussions. diff --git a/CHANGELOG.md b/CHANGELOG.md index f65ed4939..c9cd2218e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,5 +38,5 @@ - Python 绑定 - 基础文档和示例 -[Unreleased]: https://github.com/reiase/pulsing/compare/v0.1.0...HEAD -[0.1.0]: https://github.com/reiase/pulsing/releases/tag/v0.1.0 +[Unreleased]: https://github.com/DeepLink-org/Pulsing/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/DeepLink-org/Pulsing/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd90f1444..976e182bd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ ```bash # 克隆仓库 -git clone https://github.com/reiase/pulsing.git +git clone https://github.com/DeepLink-org/Pulsing.git cd pulsing # 安装 Python 依赖 @@ -38,7 +38,7 @@ pytest tests/ ```bash git clone https://github.com/YOUR_USERNAME/pulsing.git cd pulsing -git remote add upstream https://github.com/reiase/pulsing.git +git remote add upstream https://github.com/DeepLink-org/Pulsing.git ``` ### 3. 创建分支 @@ -136,4 +136,4 @@ pytest tests/actor_system/ ## 问题? -如果你有任何问题,请通过 [GitHub Issues](https://github.com/reiase/pulsing/issues) 联系我们。 +如果你有任何问题,请通过 [GitHub Issues](https://github.com/DeepLink-org/Pulsing/issues) 联系我们。 diff --git a/Cargo.lock b/Cargo.lock index 7ab3c760e..97362d543 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1905,6 +1905,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "bincode", "crossbeam-channel", "futures", "pulsing-actor", diff --git a/Cargo.toml b/Cargo.toml index 1e0e62cb5..d7d7bda28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ edition = "2021" description = "Pulsing - Distributed Actor Framework" authors = ["Reiase "] license = "Apache-2.0" -repository = "https://github.com/reiase/pulsing" +repository = "https://github.com/DeepLink-org/Pulsing" keywords = ["actor", "distributed", "async", "inference"] [workspace.dependencies] diff --git a/Justfile b/Justfile index ec630b686..51b375927 100644 --- a/Justfile +++ b/Justfile @@ -203,12 +203,18 @@ ci-test: # Install wheel and dependencies using uv (preferred) or pip if command -v uv &> /dev/null; then uv pip install --system dist/*.whl pytest pytest-asyncio - # Use uv run pytest (uses uv-managed Python environment) - uv run pytest tests/python -v + # Use same interpreter as above (where wheel was installed); do not use uv run (project venv has no pulsing) + for py in python3.12 python3.11 python3.10 python3 python; do + if command -v $py &> /dev/null; then + $py -m pytest tests/python -v + exit 0 + fi + done + echo "Error: No Python interpreter found" + exit 1 else # Fallback to pip if uv not available pip install dist/*.whl pytest pytest-asyncio - # Try to find python executable for py in python3 python3.12 python3.11 python3.10 python; do if command -v $py &> /dev/null; then $py -m pytest tests/python -v diff --git a/README.md b/README.md index c6675a3ee..be34e43a7 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,19 @@ **[中文文档](README.zh.md)** -**Pulsing is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications.** +**Backbone for distributed AI systems.** -🚀 **Zero Dependencies** — Pure Rust + Tokio, no NATS/etcd/Redis +**Actor runtime. Streaming-first. Zero dependencies. Built-in discovery.** + +Pulsing is a distributed actor runtime built in Rust, designed for Python. Connect AI agents and services across machines — no Redis, no etcd, no YAML. Just `pip install pulsing`. -🌐 **Auto Discovery** — Built-in Gossip protocol for cluster management +🚀 **Zero Dependencies** — Pure Rust + Tokio, no NATS/etcd/Redis -🔀 **Location Transparent** — Same API for local and remote Actors +⚡ **Streaming-first** — Native support for streaming responses, built for LLM token generation -⚡ **Streaming Ready** — Native support for LLM streaming responses +🌐 **Built-in Discovery** — SWIM/Gossip protocol for automatic cluster management -🤖 **Agent Friendly** — Integrates with AutoGen, LangGraph out of the box +🔀 **Same API Everywhere** — Same `await actor.method()` for local and remote Actors ## 🚀 Get Started in 5 Minutes diff --git a/README.zh.md b/README.zh.md index 6611b76ea..e476e3795 100644 --- a/README.zh.md +++ b/README.zh.md @@ -7,17 +7,19 @@ **[English](README.md)** -**Pulsing 是一个分布式 actor 框架,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。** +**分布式 AI 系统的通信骨干。** -🚀 **零外部依赖** — 纯 Rust + Tokio,无需 NATS/etcd/Redis +**Actor 运行时。流式优先。零依赖。内置发现。** + +Pulsing 是一个用 Rust 构建、为 Python 设计的分布式 Actor 运行时。跨机器连接 AI Agent 和服务——不需要 Redis,不需要 etcd,不需要 YAML。只需 `pip install pulsing`。 -🌐 **自动发现** — 内置 Gossip 协议管理集群 +🚀 **零外部依赖** — 纯 Rust + Tokio,无需 NATS/etcd/Redis -🔀 **位置透明** — 本地和远程 Actor 使用相同 API +⚡ **流式优先** — 原生流式响应支持,为 LLM token 生成而设计 -⚡ **流式支持** — 原生支持 LLM 流式响应 +🌐 **内置发现** — SWIM/Gossip 协议实现自动集群管理 -🤖 **Agent 友好** — 开箱即用集成 AutoGen、LangGraph +🔀 **统一 API** — 本地和远程 Actor 使用相同的 `await actor.method()` ## 🚀 5分钟快速体验 diff --git a/ROADMAP.md b/ROADMAP.md index 5d047c6bd..b989ed11e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -108,7 +108,7 @@ ## 贡献 -欢迎社区贡献!如果你对某个功能感兴趣,请在 [GitHub Issues](https://github.com/reiase/pulsing/issues) 中讨论。 +欢迎社区贡献!如果你对某个功能感兴趣,请在 [GitHub Issues](https://github.com/DeepLink-org/Pulsing/issues) 中讨论。 ## 参考 diff --git a/SECURITY.md b/SECURITY.md index bb5969cba..b4030e4d6 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,7 +13,7 @@ ### 报告方式 1. **GitHub Private Vulnerability Reporting**(推荐) - - 访问 [Security Advisories](https://github.com/reiase/pulsing/security/advisories) + - 访问 [Security Advisories](https://github.com/DeepLink-org/Pulsing/security/advisories) - 点击 "Report a vulnerability" 2. **邮件** diff --git a/benchmarks/baseline_throughput.py b/benchmarks/baseline_throughput.py index 28c4bfca3..ca36e7c34 100644 --- a/benchmarks/baseline_throughput.py +++ b/benchmarks/baseline_throughput.py @@ -21,8 +21,13 @@ import time import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) def _percentile(sorted_data: list[float], p: float) -> float: diff --git a/benchmarks/concurrency_sweep.py b/benchmarks/concurrency_sweep.py index 67b0533d6..14342b386 100644 --- a/benchmarks/concurrency_sweep.py +++ b/benchmarks/concurrency_sweep.py @@ -20,8 +20,13 @@ import time import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= diff --git a/benchmarks/large_scale_stress_test.py b/benchmarks/large_scale_stress_test.py index 46efb358c..64c3e8b59 100755 --- a/benchmarks/large_scale_stress_test.py +++ b/benchmarks/large_scale_stress_test.py @@ -18,7 +18,7 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import Actor, StreamMessage, SystemConfig +from pulsing.core import Actor, StreamMessage, SystemConfig # ============================================================================ diff --git a/benchmarks/large_scale_stress_test_pulsing_single.py b/benchmarks/large_scale_stress_test_pulsing_single.py index 1b9f11e28..0f93a71bf 100644 --- a/benchmarks/large_scale_stress_test_pulsing_single.py +++ b/benchmarks/large_scale_stress_test_pulsing_single.py @@ -17,7 +17,7 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import Actor, StreamMessage, SystemConfig +from pulsing.core import Actor, StreamMessage, SystemConfig # ============================================================================ diff --git a/benchmarks/queue_benchmark.py b/benchmarks/queue_benchmark.py index a09463265..f0c6a01ad 100644 --- a/benchmarks/queue_benchmark.py +++ b/benchmarks/queue_benchmark.py @@ -30,8 +30,8 @@ from dataclasses import dataclass, field import pulsing as pul -from pulsing.actor import SystemConfig -from pulsing.queue import read_queue, write_queue +from pulsing.core import SystemConfig +from pulsing.streaming import read_queue, write_queue # ============================================================================ diff --git a/benchmarks/stress_multiprocessing.py b/benchmarks/stress_multiprocessing.py index 07e045e36..ead4ea849 100644 --- a/benchmarks/stress_multiprocessing.py +++ b/benchmarks/stress_multiprocessing.py @@ -21,8 +21,13 @@ from multiprocessing import Queue import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= diff --git a/crates/pulsing-actor/Cargo.toml b/crates/pulsing-actor/Cargo.toml index f0fc8630f..5b67ed392 100644 --- a/crates/pulsing-actor/Cargo.toml +++ b/crates/pulsing-actor/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" description = "Lightweight distributed actor framework for Pulsing" authors.workspace = true license.workspace = true -repository = "https://github.com/reiase/pulsing" +repository = "https://github.com/DeepLink-org/Pulsing" keywords = ["actor", "distributed", "cluster", "gossip"] [features] diff --git a/crates/pulsing-actor/src/supervision.rs b/crates/pulsing-actor/src/supervision.rs index 152cfdad2..7ed713e73 100644 --- a/crates/pulsing-actor/src/supervision.rs +++ b/crates/pulsing-actor/src/supervision.rs @@ -13,7 +13,7 @@ pub enum RestartPolicy { Always, /// Restart the actor only if it failed (non-normal exit) OnFailure, - /// Never restart the actor (default). Panic / 不可恢复错误时停止且不恢复 + /// Never restart the actor (default). Stop and don't recover on panic / unrecoverable errors #[default] Never, } diff --git a/crates/pulsing-actor/src/system/config.rs b/crates/pulsing-actor/src/system/config.rs index b250d99b9..1aa545c00 100644 --- a/crates/pulsing-actor/src/system/config.rs +++ b/crates/pulsing-actor/src/system/config.rs @@ -516,7 +516,7 @@ mod tests { assert!(err.to_string().contains("head_node")); } - // --- 配置解析 --- + // --- Configuration Parsing --- #[test] fn test_config_with_seeds() { diff --git a/crates/pulsing-actor/src/system/runtime.rs b/crates/pulsing-actor/src/system/runtime.rs index 143734a68..fe4d0f646 100644 --- a/crates/pulsing-actor/src/system/runtime.rs +++ b/crates/pulsing-actor/src/system/runtime.rs @@ -35,7 +35,7 @@ pub(crate) async fn run_actor_instance( responder.send(Ok(response)); } Err(e) => { - // 业务错误:receive 返回 Err,只把错误返回给调用者,actor 继续处理下一条消息 + // Business error: receive returns Err, only return error to caller, actor continues processing next message tracing::warn!(actor_id = ?ctx.id(), error = %e, "Receive returned error (returned to caller)"); responder.send(Err(e)); } diff --git a/crates/pulsing-actor/src/transport/http2/client.rs b/crates/pulsing-actor/src/transport/http2/client.rs index 27ca75e0e..75b81cc02 100644 --- a/crates/pulsing-actor/src/transport/http2/client.rs +++ b/crates/pulsing-actor/src/transport/http2/client.rs @@ -822,7 +822,7 @@ mod tests { assert!(Arc::ptr_eq(&client.pool, &cloned.pool)); } - // --- 连接管理 --- + // --- Connection Management --- #[test] fn test_client_pool_and_stats() { @@ -860,7 +860,7 @@ mod tests { client.shutdown(); } - // --- 错误恢复:对不可达地址应返回连接错误 --- + // --- Error Recovery: should return connection error for unreachable addresses --- #[tokio::test] async fn test_ask_connection_error() { @@ -894,7 +894,7 @@ mod tests { ); } - // --- 错误植入 --- + // --- Fault Injection --- #[tokio::test] async fn test_fault_injector_ask() { diff --git a/crates/pulsing-actor/tests/integration/single_node_tests.rs b/crates/pulsing-actor/tests/integration/single_node_tests.rs index 3c852d49d..179f6cfca 100644 --- a/crates/pulsing-actor/tests/integration/single_node_tests.rs +++ b/crates/pulsing-actor/tests/integration/single_node_tests.rs @@ -383,7 +383,7 @@ mod error_tests { assert!(result.is_err()); assert_eq!(crash_count.load(Ordering::SeqCst), 1); - // Actor 仍存活,后续消息应正常处理 + // Actor still alive, subsequent messages should be processed normally let result2: Result = actor_ref.ask(Ping { value: 42 }).await; assert!( result2.is_ok(), diff --git a/crates/pulsing-actor/tests/unit/actor/actor_tests.rs b/crates/pulsing-actor/tests/unit/actor/actor_tests.rs index ac084e4f3..b2cfff6bf 100644 --- a/crates/pulsing-actor/tests/unit/actor/actor_tests.rs +++ b/crates/pulsing-actor/tests/unit/actor/actor_tests.rs @@ -288,7 +288,7 @@ mod error_tests { let result: Result = actor_ref.ask(ErrorMessage).await; assert!(result.is_err()); - // receive 返回 Err 时只把错误返回给调用者,actor 不退出 + // When receive returns Err, only return error to caller, actor doesn't exit let result2: Result = actor_ref.ask(Ping { value: 1 }).await; assert!( result2.is_ok(), diff --git a/crates/pulsing-actor/tests/unit/system/supervision_tests.rs b/crates/pulsing-actor/tests/unit/system/supervision_tests.rs index adbb209a2..dedb15819 100644 --- a/crates/pulsing-actor/tests/unit/system/supervision_tests.rs +++ b/crates/pulsing-actor/tests/unit/system/supervision_tests.rs @@ -60,11 +60,11 @@ async fn test_restart_on_failure() { let resp = actor_ref.send(Message::single("ping", b"1")).await; assert!(resp.is_ok()); - // 2nd message - receive 返回 Err,错误返回给调用者,actor 不退出、不重启 + // 2nd message - receive returns Err, error returned to caller, actor doesn't exit or restart let resp = actor_ref.send(Message::single("ping", b"2")).await; assert!(resp.is_err()); - // 3rd message - 同一实例仍存活,继续处理 + // 3rd message - same instance still alive, continues processing let resp = actor_ref.send(Message::single("ping", b"3")).await; assert!(resp.is_ok()); @@ -80,7 +80,7 @@ async fn test_restart_on_failure() { #[tokio::test] async fn test_max_restarts_exceeded() { - // receive 返回 Err 不会导致 actor 退出,因此不会触发 restart;factory 只被调用一次 + // receive returning Err doesn't cause actor to exit, so no restart is triggered; factory only called once let system = ActorSystem::new(SystemConfig::standalone()).await.unwrap(); let counter = Arc::new(AtomicU32::new(0)); @@ -89,7 +89,7 @@ async fn test_max_restarts_exceeded() { counter_clone.fetch_add(1, Ordering::SeqCst); Ok(FailingActor { counter: Arc::new(AtomicU32::new(0)), - fail_at: 1, // 第 1 条消息返回 Err + fail_at: 1, // 1st message returns Err }) }; @@ -110,13 +110,13 @@ async fn test_max_restarts_exceeded() { .await .unwrap(); - // 第 1 条消息:receive 返回 Err,只回传错误,actor 不退出 + // 1st message: receive returns Err, only return error to caller, actor doesn't exit let r1 = actor_ref.send(Message::single("ping", b"1")).await; assert!(r1.is_err()); - assert_eq!(counter.load(Ordering::SeqCst), 1); // factory 只调用 1 次 + assert_eq!(counter.load(Ordering::SeqCst), 1); // factory only called once - // 第 2 条消息:同一实例,count=2 != fail_at(1),返回 Ok + // 2nd message: same instance, count=2 != fail_at(1), returns Ok let r2 = actor_ref.send(Message::single("ping", b"2")).await; assert!(r2.is_ok()); - assert_eq!(counter.load(Ordering::SeqCst), 1); // 无重启 + assert_eq!(counter.load(Ordering::SeqCst), 1); // no restart } diff --git a/crates/pulsing-py/Cargo.toml b/crates/pulsing-py/Cargo.toml index ebcf875f5..4bf0e8f48 100644 --- a/crates/pulsing-py/Cargo.toml +++ b/crates/pulsing-py/Cargo.toml @@ -21,6 +21,7 @@ async-trait = { workspace = true } futures = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +bincode = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 7a7fc638b..e0a8d782a 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -7,6 +7,8 @@ use pulsing_actor::supervision::{BackoffStrategy, RestartPolicy, SupervisionSpec use pyo3::exceptions::{PyRuntimeError, PyStopAsyncIteration, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; +use serde::{Deserialize, Serialize}; +use std::cmp::min; use std::net::SocketAddr; use std::sync::Arc; use std::sync::Mutex as StdMutex; @@ -19,6 +21,43 @@ use crate::python_executor::python_executor; /// Special message type identifier for pickle-encoded Python objects const SEALED_PY_MSG_TYPE: &str = "__sealed_py_message__"; +/// Special message type identifier for zerocopy descriptor payloads (small, single message) +const SEALED_ZEROCOPY_MSG_TYPE: &str = "__sealed_zerocopy_message__"; +/// Stream frame: descriptor header (metadata only, no bulk data) +const ZC_DESCRIPTOR_MSG_TYPE: &str = "__zc_descriptor__"; +/// Stream frame: raw data chunk +const ZC_CHUNK_MSG_TYPE: &str = "__zc_chunk__"; + +/// Zerocopy metadata header — the single wire format for both single-message and stream paths. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ZeroCopyDescriptorHeader { + version: u32, + buffer_count: usize, + buffer_lengths: Vec, + dtype: Option, + shape: Option>, + strides: Option>, + transport: Option, + checksum: Option, +} + +fn zerocopy_chunk_bytes() -> usize { + const DEFAULT: usize = 1024 * 1024; + const MIN: usize = 4 * 1024; + std::env::var("PULSING_ZEROCOPY_CHUNK_BYTES") + .ok() + .and_then(|v| v.parse::().ok()) + .map(|v| v.max(MIN)) + .unwrap_or(DEFAULT) +} + +fn zerocopy_stream_threshold() -> usize { + const DEFAULT: usize = 64 * 1024; + std::env::var("PULSING_ZEROCOPY_STREAM_THRESHOLD") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT) +} /// Convert PulsingError to Python exception (used for actor system APIs that return Result<_, PulsingError>). fn to_pyerr(err: pulsing_actor::error::PulsingError) -> PyErr { @@ -404,6 +443,167 @@ impl PySealedMessage { } } +/// Descriptor object for optional zerocopy payload transport. +#[pyclass(name = "ZeroCopyDescriptor")] +#[derive(Clone)] +pub struct PyZeroCopyDescriptor { + #[pyo3(get)] + version: u32, + #[pyo3(get)] + buffers: Vec, + #[pyo3(get)] + dtype: Option, + #[pyo3(get)] + shape: Option>, + #[pyo3(get)] + strides: Option>, + #[pyo3(get)] + transport: Option, + #[pyo3(get)] + checksum: Option, +} + +/// Validate that a Python object exposes a contiguous buffer. +/// If it's not directly extractable as &[u8] (e.g. memoryview), convert via bytes(). +fn ensure_contiguous_buffer(py: Python<'_>, item: &Bound<'_, pyo3::PyAny>) -> PyResult { + if item.extract::<&[u8]>().is_ok() { + return Ok(item.clone().unbind()); + } + // Try converting via bytes() for memoryview and other buffer-protocol objects + let builtins = py.import("builtins")?; + let bytes_obj = builtins.getattr("bytes")?.call1((item,)).map_err(|_| { + PyValueError::new_err( + "ZeroCopyDescriptor.buffers items must expose a contiguous Python buffer (bytes/bytearray/memoryview/tensor)", + ) + })?; + // Verify the result is extractable + bytes_obj.extract::<&[u8]>().map_err(|_| { + PyValueError::new_err( + "ZeroCopyDescriptor.buffers items must expose a contiguous Python buffer (bytes/bytearray/memoryview/tensor)", + ) + })?; + Ok(bytes_obj.unbind()) +} + +impl PyZeroCopyDescriptor { + /// Total byte size of all buffers. + fn total_buffer_bytes(&self, py: Python<'_>) -> usize { + self.buffers + .iter() + .filter_map(|buf_obj| buf_obj.bind(py).extract::<&[u8]>().ok().map(|s| s.len())) + .sum() + } + + /// Build a descriptor header (metadata only, no data). + fn to_header(&self, py: Python<'_>) -> ZeroCopyDescriptorHeader { + ZeroCopyDescriptorHeader { + version: self.version, + buffer_count: self.buffers.len(), + buffer_lengths: self + .buffers + .iter() + .filter_map(|b| b.bind(py).extract::<&[u8]>().ok().map(|s| s.len())) + .collect(), + dtype: self.dtype.clone(), + shape: self.shape.clone(), + strides: self.strides.clone(), + transport: self.transport.clone(), + checksum: self.checksum.clone(), + } + } + + /// Serialize for single-message path: [4-byte header_len LE] ++ header_bytes ++ raw_data. + fn serialize_single(&self, py: Python<'_>) -> PyResult> { + let header = self.to_header(py); + let header_bytes = bincode::serialize(&header).map_err(to_py_value_err)?; + let header_len = header_bytes.len() as u32; + let total_data: usize = header.buffer_lengths.iter().sum(); + let mut out = Vec::with_capacity(4 + header_bytes.len() + total_data); + out.extend_from_slice(&header_len.to_le_bytes()); + out.extend_from_slice(&header_bytes); + for buf_obj in &self.buffers { + let bound = buf_obj.bind(py); + let data = bound.extract::<&[u8]>()?; + out.extend_from_slice(data); + } + Ok(out) + } + + /// Reconstruct from header + raw buffer data (shared by single and stream paths). + fn from_wire( + py: Python<'_>, + header: ZeroCopyDescriptorHeader, + raw_buffers: Vec>, + ) -> Self { + Self { + version: header.version, + buffers: raw_buffers + .into_iter() + .map(|b| PyBytes::new(py, &b).into_any().unbind()) + .collect(), + dtype: header.dtype, + shape: header.shape, + strides: header.strides, + transport: header.transport, + checksum: header.checksum, + } + } +} + +#[pymethods] +impl PyZeroCopyDescriptor { + #[new] + #[pyo3(signature = ( + buffers, + *, + dtype=None, + shape=None, + strides=None, + transport=None, + checksum=None, + version=1 + ))] + #[allow(clippy::too_many_arguments)] + fn new( + py: Python<'_>, + buffers: Vec, + dtype: Option, + shape: Option>, + strides: Option>, + transport: Option, + checksum: Option, + version: u32, + ) -> PyResult { + if buffers.is_empty() { + return Err(PyValueError::new_err( + "ZeroCopyDescriptor requires at least one buffer", + )); + } + let normalized: Vec = buffers + .into_iter() + .map(|item| ensure_contiguous_buffer(py, item.bind(py))) + .collect::>>()?; + Ok(Self { + version, + buffers: normalized, + dtype, + shape, + strides, + transport, + checksum, + }) + } + + fn __repr__(&self) -> String { + format!( + "ZeroCopyDescriptor(version={}, buffers={}, transport={:?})", + self.version, + self.buffers.len(), + self.transport + ) + } +} + /// Helper function to pickle a Python object in Rust fn pickle_object(py: Python<'_>, obj: &PyObject) -> PyResult> { let pickle = py.import("pickle")?; @@ -412,6 +612,272 @@ fn pickle_object(py: Python<'_>, obj: &PyObject) -> PyResult> { Ok(bytes.as_bytes().to_vec()) } +/// Try to extract a `PyZeroCopyDescriptor` from a Python object via `__zerocopy__(ctx)`. +/// +/// Returns `Ok(None)` if the object does not implement the protocol. +fn try_zerocopy_descriptor<'py>( + py: Python<'py>, + obj: &PyObject, +) -> PyResult>> { + let bound = obj.bind(py); + let zc_method = match bound.getattr("__zerocopy__") { + Ok(m) => m, + Err(_) => return Ok(None), + }; + if !zc_method.is_callable() { + return Ok(None); + } + let descriptor = zc_method.call1((py.None(),))?; + if !descriptor.is_instance_of::() { + return Err(PyValueError::new_err( + "__zerocopy__ must return ZeroCopyDescriptor", + )); + } + Ok(Some(descriptor.extract()?)) +} + +/// Parse single-message zerocopy payload: [4-byte header_len LE] ++ header ++ raw_data. +fn parse_zerocopy_single(py: Python<'_>, data: &[u8]) -> PyResult { + if data.len() < 4 { + return Err(PyValueError::new_err("Zerocopy payload too short")); + } + let header_len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize; + if data.len() < 4 + header_len { + return Err(PyValueError::new_err("Zerocopy payload truncated")); + } + let header: ZeroCopyDescriptorHeader = + bincode::deserialize(&data[4..4 + header_len]).map_err(to_py_value_err)?; + let mut offset = 4 + header_len; + let raw_buffers: Vec> = header + .buffer_lengths + .iter() + .map(|&len| { + let buf = data[offset..offset + len].to_vec(); + offset += len; + buf + }) + .collect(); + let desc = PyZeroCopyDescriptor::from_wire(py, header, raw_buffers); + let obj = Py::new(py, desc)?; + Ok(obj.into_pyobject(py)?.into_any().unbind()) +} + +fn zerocopy_mode() -> String { + std::env::var("PULSING_ZEROCOPY") + .unwrap_or_else(|_| "auto".to_string()) + .to_ascii_lowercase() +} + +/// Build a `Message::Stream` for a large zerocopy payload: descriptor header + data chunks. +fn encode_zerocopy_stream(py: Python<'_>, zc: &PyZeroCopyDescriptor) -> PyResult { + let chunk_len = zerocopy_chunk_bytes(); + let header = zc.to_header(py); + let header_bytes = bincode::serialize(&header).map_err(to_py_value_err)?; + + let (tx, rx) = mpsc::channel::>(32); + + // Collect buffer data now (we hold the GIL) to avoid crossing thread boundary with PyObject + let buffer_data: Vec> = zc + .buffers + .iter() + .map(|buf_obj| { + let bound = buf_obj.bind(py); + let data = bound.extract::<&[u8]>()?; + Ok(data.to_vec()) + }) + .collect::>>()?; + + std::thread::spawn(move || { + if tx + .blocking_send(Ok(Message::single(ZC_DESCRIPTOR_MSG_TYPE, header_bytes))) + .is_err() + { + return; + } + for buf in &buffer_data { + let mut offset = 0; + while offset < buf.len() { + let end = min(offset + chunk_len, buf.len()); + let chunk = buf[offset..end].to_vec(); + if tx + .blocking_send(Ok(Message::single(ZC_CHUNK_MSG_TYPE, chunk))) + .is_err() + { + return; + } + offset = end; + } + } + }); + + Ok(Message::from_channel(ZC_DESCRIPTOR_MSG_TYPE, rx)) +} + +/// Reassemble a zerocopy stream (descriptor header already parsed). +/// Reads remaining data chunks from the stream and fills pre-allocated buffers. +async fn reassemble_zerocopy_stream( + header: ZeroCopyDescriptorHeader, + stream: &mut std::pin::Pin< + Box> + Send>, + >, +) -> pulsing_actor::error::Result<(ZeroCopyDescriptorHeader, Vec>)> { + let mut raw_buffers: Vec> = header + .buffer_lengths + .iter() + .map(|&len| Vec::with_capacity(len)) + .collect(); + let total_expected: usize = header.buffer_lengths.iter().sum(); + + let mut buf_idx = 0; + let mut received = 0usize; + + while received < total_expected { + let frame = stream.next().await.ok_or_else(|| { + pulsing_actor::error::PulsingError::from(pulsing_actor::error::RuntimeError::Other( + "Zerocopy stream ended before all data received".into(), + )) + })??; + + match frame { + Message::Single { + ref msg_type, + ref data, + } if msg_type == ZC_CHUNK_MSG_TYPE => { + let remaining_in_buf = header.buffer_lengths[buf_idx] - raw_buffers[buf_idx].len(); + if data.len() <= remaining_in_buf { + raw_buffers[buf_idx].extend_from_slice(data); + } else { + // Chunk spans buffer boundary: split across buffers + let first_part = &data[..remaining_in_buf]; + raw_buffers[buf_idx].extend_from_slice(first_part); + let mut rest = &data[remaining_in_buf..]; + buf_idx += 1; + while !rest.is_empty() && buf_idx < raw_buffers.len() { + let can_take = min( + rest.len(), + header.buffer_lengths[buf_idx] - raw_buffers[buf_idx].len(), + ); + raw_buffers[buf_idx].extend_from_slice(&rest[..can_take]); + rest = &rest[can_take..]; + if raw_buffers[buf_idx].len() == header.buffer_lengths[buf_idx] { + buf_idx += 1; + } + } + } + received += data.len(); + if buf_idx < raw_buffers.len() + && raw_buffers[buf_idx].len() == header.buffer_lengths[buf_idx] + { + buf_idx += 1; + } + } + _ => { + return Err(pulsing_actor::error::PulsingError::from( + pulsing_actor::error::RuntimeError::Other(format!( + "Unexpected frame in zerocopy stream: {:?}", + frame.msg_type() + )), + )); + } + } + } + + Ok((header, raw_buffers)) +} + +/// Encode a Python object into a `Message`. +/// +/// Small zerocopy payloads → `Message::Single`; large ones → `Message::Stream` +/// (descriptor-first + chunked data). Non-zerocopy objects → pickle. +fn encode_python_payload(py: Python<'_>, obj: &PyObject) -> PyResult { + match zerocopy_mode().as_str() { + "off" => Ok(Message::single(SEALED_PY_MSG_TYPE, pickle_object(py, obj)?)), + "force" => { + let zc = try_zerocopy_descriptor(py, obj)?.ok_or_else(|| { + PyValueError::new_err( + "PULSING_ZEROCOPY=force but object does not provide __zerocopy__", + ) + })?; + encode_zerocopy_message(py, &zc) + } + _ => match try_zerocopy_descriptor(py, obj)? { + Some(zc) => encode_zerocopy_message(py, &zc), + None => Ok(Message::single(SEALED_PY_MSG_TYPE, pickle_object(py, obj)?)), + }, + } +} + +/// Decide between single-message or stream encoding based on total buffer size. +fn encode_zerocopy_message( + py: Python<'_>, + zc: &PyRef<'_, PyZeroCopyDescriptor>, +) -> PyResult { + let total = zc.total_buffer_bytes(py); + if total >= zerocopy_stream_threshold() { + encode_zerocopy_stream(py, zc) + } else { + let bytes = zc.serialize_single(py)?; + Ok(Message::single(SEALED_ZEROCOPY_MSG_TYPE, bytes)) + } +} + +/// Unified decoder: converts any `Message` (pickle / zerocopy-single / zerocopy-stream / other) +/// into a Python object. +async fn decode_message_to_pyobject(msg: Message) -> PyResult { + match msg { + Message::Single { + ref msg_type, + ref data, + } if msg_type == SEALED_PY_MSG_TYPE => Python::with_gil(|py| unpickle_object(py, data)), + Message::Single { + ref msg_type, + ref data, + } if msg_type == SEALED_ZEROCOPY_MSG_TYPE => { + Python::with_gil(|py| parse_zerocopy_single(py, data)) + } + Message::Stream { + ref default_msg_type, + .. + } if default_msg_type == ZC_DESCRIPTOR_MSG_TYPE => { + let Message::Stream { mut stream, .. } = msg else { + unreachable!() + }; + let first = stream + .next() + .await + .ok_or_else(|| PyRuntimeError::new_err("Empty zerocopy stream"))? + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let header_data = match first { + Message::Single { + ref msg_type, + ref data, + } if msg_type == ZC_DESCRIPTOR_MSG_TYPE => data.clone(), + _ => { + return Err(PyRuntimeError::new_err( + "First frame of zerocopy stream must be descriptor", + )); + } + }; + let header: ZeroCopyDescriptorHeader = + bincode::deserialize(&header_data).map_err(to_py_value_err)?; + let (header, raw_buffers) = reassemble_zerocopy_stream(header, &mut stream) + .await + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + Python::with_gil(|py| { + let desc = PyZeroCopyDescriptor::from_wire(py, header, raw_buffers); + let obj = Py::new(py, desc)?; + Ok(obj.into_pyobject(py)?.into_any().unbind()) + }) + } + _ => Python::with_gil(|py| { + Ok(PyMessage::from_rust_message(msg) + .into_pyobject(py)? + .into_any() + .unbind()) + }), + } +} + /// Helper function to unpickle bytes back to a Python object fn unpickle_object(py: Python<'_>, data: &[u8]) -> PyResult { let pickle = py.import("pickle")?; @@ -446,22 +912,19 @@ impl PyStreamReader { let mut guard = stream.lock().await; if let Some(ref mut s) = *guard { match s.next().await { - Some(Ok(msg)) => Python::with_gil(|py| { - // Auto unpickle if it's a sealed Python message - match &msg { - Message::Single { msg_type, data } - if msg_type == SEALED_PY_MSG_TYPE => - { - unpickle_object(py, data) - } - _ => { - // Return as PyMessage for JSON/other types - Ok(PyMessage::from_rust_message(msg) - .into_pyobject(py)? - .into_any() - .unbind()) - } + Some(Ok(msg)) => Python::with_gil(|py| match &msg { + Message::Single { msg_type, data } if msg_type == SEALED_PY_MSG_TYPE => { + unpickle_object(py, data) + } + Message::Single { msg_type, data } + if msg_type == SEALED_ZEROCOPY_MSG_TYPE => + { + parse_zerocopy_single(py, data) } + _ => Ok(PyMessage::from_rust_message(msg) + .into_pyobject(py)? + .into_any() + .unbind()), }), Some(Err(e)) => Err(PyRuntimeError::new_err(e.to_string())), None => { @@ -510,13 +973,12 @@ impl PyStreamWriter { /// This is the recommended method for Python-to-Python streaming. /// Objects are automatically pickled and will be unpickled on the reader side. fn write<'py>(&self, py: Python<'py>, obj: PyObject) -> PyResult> { - let pickled = pickle_object(py, &obj)?; + let msg = encode_python_payload(py, &obj)?; let sender = self.sender.clone(); pyo3_async_runtimes::tokio::future_into_py(py, async move { let guard = sender.lock().await; if let Some(ref tx) = *guard { - let msg = Message::single(SEALED_PY_MSG_TYPE, pickled); tx.send(Ok(msg)) .await .map_err(|_| PyRuntimeError::new_err("Stream closed"))?; @@ -614,8 +1076,8 @@ enum PyActorResponse { String, mpsc::Receiver>, ), - /// Pickled Python object for Python-to-Python communication - Sealed(Vec), + /// Pre-encoded Message (pickle single, zerocopy single, or zerocopy stream) + Encoded(Message), /// Generator (async or sync) to be iterated Generator(PyObject, PyObject, bool), // (generator, event_loop, is_async) } @@ -658,33 +1120,12 @@ impl PyActorRef { let py_msg: PyMessage = msg_bound.extract()?; py_msg.to_message() } else { - // Pickle any other Python object - let pickled = pickle_object(py, &msg)?; - Message::single(SEALED_PY_MSG_TYPE, pickled) + encode_python_payload(py, &msg)? }; pyo3_async_runtimes::tokio::future_into_py(py, async move { let response = actor_ref.send(actor_msg).await.map_err(to_pyerr)?; - - // Check if response is a sealed message - Python::with_gil(|py| { - match response { - Message::Single { - ref msg_type, - ref data, - } if msg_type == SEALED_PY_MSG_TYPE => { - // Unpickle and return the original Python object - unpickle_object(py, data) - } - _ => { - // Return as PyMessage for non-sealed responses - Ok(PyMessage::from_rust_message(response) - .into_pyobject(py)? - .into_any() - .unbind()) - } - } - }) + decode_message_to_pyobject(response).await }) } @@ -702,9 +1143,7 @@ impl PyActorRef { let py_msg: PyMessage = msg_bound.extract()?; py_msg.to_message() } else { - // Pickle any other Python object - let pickled = pickle_object(py, &msg)?; - Message::single(SEALED_PY_MSG_TYPE, pickled) + encode_python_payload(py, &msg)? }; pyo3_async_runtimes::tokio::future_into_py(py, async move { @@ -713,6 +1152,26 @@ impl PyActorRef { }) } + /// Return an untyped proxy that forwards any method call to the remote actor. + fn as_any(&self, py: Python<'_>) -> PyResult { + let remote = py.import("pulsing.core.remote")?; + let proxy_cls = remote.getattr("ActorProxy")?; + let proxy = proxy_cls.call1((self.clone(), py.None(), py.None()))?; + Ok(proxy.unbind()) + } + + /// Return a typed proxy based on the given class definition. + fn as_type(&self, py: Python<'_>, cls: PyObject) -> PyResult { + let remote = py.import("pulsing.core.remote")?; + let extract_fn = remote.getattr("_extract_methods")?; + let result = extract_fn.call1((&cls,))?; + let methods = result.get_item(0)?; + let async_methods = result.get_item(1)?; + let proxy_cls = remote.getattr("ActorProxy")?; + let proxy = proxy_cls.call1((self.clone(), methods, async_methods))?; + Ok(proxy.unbind()) + } + fn __repr__(&self) -> String { format!( "ActorRef(id={}, local={})", @@ -754,6 +1213,21 @@ impl PySystemConfig { }) } + /// Run this node as the head node (workers will register with it). + fn with_head_node(&self) -> Self { + Self { + inner: self.inner.clone().with_head_node(), + } + } + + /// Connect to a head node at the given address (makes this node a worker). + fn with_head_addr(&self, addr: String) -> PyResult { + let socket_addr: SocketAddr = addr.parse().map_err(to_py_value_err)?; + Ok(Self { + inner: self.inner.clone().with_head_addr(socket_addr), + }) + } + /// Enable TLS with passphrase-derived certificates /// /// All nodes using the same passphrase will be able to communicate securely. @@ -954,36 +1428,20 @@ impl Actor for PythonActorWrapper { let (handler, event_loop) = Python::with_gil(|py| (self.handler.clone_ref(py), self.event_loop.clone_ref(py))); - // Check if this is a sealed Python message - let is_sealed_msg = msg.msg_type() == SEALED_PY_MSG_TYPE; - let py_msg = PyMessage::from_rust_message(msg); + // Decode-first: convert any message format to a Python object + let call_arg = decode_message_to_pyobject(msg).await.map_err(|e| { + pulsing_actor::error::PulsingError::from(pulsing_actor::error::RuntimeError::Other( + e.to_string(), + )) + })?; let response: Result = python_executor() .execute(move || { Python::with_gil(|py| -> PyResult { let receive_method = handler.getattr(py, "receive")?; - - // If sealed message, unpickle and pass the original Python object - let call_arg: PyObject = if is_sealed_msg { - let payload = py_msg.payload.as_ref().ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err("Expected payload for sealed message") - })?; - unpickle_object(py, payload)? - } else { - py_msg.into_pyobject(py)?.into_any().unbind() - }; - - let result = receive_method.call1(py, (call_arg,)); - - // Handle Python exceptions and convert to ActorError - let result = match result { + let result = match receive_method.call1(py, (&call_arg,)) { Ok(value) => value, - Err(py_err) => { - // Convert Python exception to ActorError - // We need to return this as an error in the Python execution context - // The error will be caught and converted at the Rust level - return Err(py_err); - } + Err(py_err) => return Err(py_err), }; let asyncio = py.import("asyncio")?; @@ -1005,32 +1463,25 @@ impl Actor for PythonActorWrapper { return Ok(PyActorResponse::Single(PyMessage::empty())); } - // Check for generator (sync or async) - fast path using type name let type_name = py_result_bound .get_type() .qualname() .map(|s| s.to_string()) .unwrap_or_default(); - let is_gen = type_name == "generator"; - let is_async_gen = type_name == "async_generator"; - - if is_async_gen || is_gen { + if type_name == "async_generator" || type_name == "generator" { return Ok(PyActorResponse::Generator( py_result.clone_ref(py), event_loop.clone_ref(py), - is_async_gen, + type_name == "async_generator", )); } - // Handle StreamMessage if py_result_bound.is_instance_of::() { let stream_msg_cell = py_result_bound.downcast::()?; - let borrowed = stream_msg_cell.borrow(); let default_msg_type = borrowed.default_msg_type.clone(); let receiver_arc = borrowed.receiver.clone(); drop(borrowed); - let receiver = { let mut guard = receiver_arc.lock().map_err(|e| { pyo3::exceptions::PyRuntimeError::new_err(format!( @@ -1040,7 +1491,6 @@ impl Actor for PythonActorWrapper { })?; guard.take() }; - if let Some(rx) = receiver { return Ok(PyActorResponse::StreamChannel(default_msg_type, rx)); } else { @@ -1050,21 +1500,18 @@ impl Actor for PythonActorWrapper { } } - // Handle PyMessage (for Rust actor communication) if py_result_bound.is_instance_of::() { let msg: PyMessage = py_result_bound.extract()?; if msg.is_stream() { return Err(pyo3::exceptions::PyValueError::new_err( "PyMessage with stream cannot be returned from receive(), use StreamMessage instead" )); - } else { - return Ok(PyActorResponse::Single(msg)); } + return Ok(PyActorResponse::Single(msg)); } - // For any other Python object, pickle it and return as SealedPyMessage - let pickled = pickle_object(py, &py_result)?; - Ok(PyActorResponse::Sealed(pickled)) + let msg = encode_python_payload(py, &py_result)?; + Ok(PyActorResponse::Encoded(msg)) }) }) .await @@ -1076,7 +1523,6 @@ impl Actor for PythonActorWrapper { ) })?; - // Convert Python exceptions to ActorError let response = match response { Ok(resp) => resp, Err(py_err) => { @@ -1097,21 +1543,16 @@ impl Actor for PythonActorWrapper { PyActorResponse::StreamChannel(default_msg_type, rx) => { Ok(Message::from_channel(&default_msg_type, rx)) } - PyActorResponse::Sealed(data) => Ok(Message::single(SEALED_PY_MSG_TYPE, data)), + PyActorResponse::Encoded(msg) => Ok(msg), PyActorResponse::Generator(generator, event_loop, is_async) => { - // Create channel for streaming generator values let (tx, rx) = mpsc::channel::>(32); - - // Spawn background task to iterate generator tokio::spawn(async move { let result = python_executor() .execute(move || { Python::with_gil(|py| -> PyResult<()> { let gen = generator.bind(py); let asyncio = py.import("asyncio")?; - if is_async { - // Async generator: iterate using anext() let run_coroutine_threadsafe = asyncio.getattr("run_coroutine_threadsafe")?; loop { @@ -1120,15 +1561,13 @@ impl Actor for PythonActorWrapper { .call1((&anext_coro, &event_loop))?; match future.call_method0("result") { Ok(item) => { - let pickled = pickle_object(py, &item.unbind())?; - let msg = - Message::single(SEALED_PY_MSG_TYPE, pickled); + let item_obj = item.unbind(); + let msg = encode_python_payload(py, &item_obj)?; if tx.blocking_send(Ok(msg)).is_err() { break; } } Err(e) => { - // Check if StopAsyncIteration if e.is_instance_of::(py) { break; } @@ -1144,19 +1583,16 @@ impl Actor for PythonActorWrapper { } } } else { - // Sync generator: iterate using next() loop { match gen.call_method0("__next__") { Ok(item) => { - let pickled = pickle_object(py, &item.unbind())?; - let msg = - Message::single(SEALED_PY_MSG_TYPE, pickled); + let item_obj = item.unbind(); + let msg = encode_python_payload(py, &item_obj)?; if tx.blocking_send(Ok(msg)).is_err() { break; } } Err(e) => { - // Check if StopIteration if e.is_instance_of::(py) { break; } @@ -1176,12 +1612,10 @@ impl Actor for PythonActorWrapper { }) }) .await; - if let Err(e) = result { tracing::error!("Generator iteration error: {:?}", e); } }); - Ok(Message::from_channel(SEALED_PY_MSG_TYPE, rx)) } } @@ -1600,12 +2034,16 @@ impl PyActorSystem { } /// Resolve a named actor (selects one instance using load balancing) - #[pyo3(signature = (name, node_id=None))] + /// + /// When `timeout` is provided, retries resolution until the name appears + /// or the timeout expires (useful for waiting on gossip propagation). + #[pyo3(signature = (name, node_id=None, timeout=None))] fn resolve_named<'py>( &self, py: Python<'py>, name: String, node_id: Option, + timeout: Option, ) -> PyResult> { let system = self.inner.clone(); @@ -1623,23 +2061,44 @@ impl PyActorSystem { ActorPath::new(&name).map_err(to_py_value_err)? }; let node = node_id.map(NodeId::new); - let actor_ref = system - .resolve_named(&path, node.as_ref()) - .await - .map_err(to_pyerr)?; - Ok(PyActorRef { inner: actor_ref }) + + match timeout { + None => { + // No timeout: error immediately if not found (original behavior) + let actor_ref = system + .resolve_named(&path, node.as_ref()) + .await + .map_err(to_pyerr)?; + Ok(PyActorRef { inner: actor_ref }) + } + Some(secs) => { + // With timeout: retry until name appears or timeout + let deadline = + tokio::time::Instant::now() + std::time::Duration::from_secs_f64(secs); + let mut last_err = None; + while tokio::time::Instant::now() < deadline { + match system.resolve_named(&path, node.as_ref()).await { + Ok(actor_ref) => return Ok(PyActorRef { inner: actor_ref }), + Err(e) => last_err = Some(e), + } + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + } + Err(to_pyerr(last_err.unwrap())) + } + } }) } /// Alias for resolve_named - resolve actor by name - #[pyo3(signature = (name, *, node_id=None))] + #[pyo3(signature = (name, *, node_id=None, timeout=None))] fn resolve<'py>( &self, py: Python<'py>, name: String, node_id: Option, + timeout: Option, ) -> PyResult> { - self.resolve_named(py, name, node_id) + self.resolve_named(py, name, node_id, timeout) } fn stop<'py>(&self, py: Python<'py>, actor_name: String) -> PyResult> { @@ -1707,5 +2166,6 @@ pub fn add_to_module(m: &Bound<'_, pyo3::types::PyModule>) -> PyResult<()> { m.add_class::()?; // Sealed message support (for Python-to-Python communication) m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/docs/Makefile b/docs/Makefile index ca6bc6bba..80fd06a78 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,11 +2,14 @@ help: @echo "Available commands:" - @echo " make serve - Serve documentation locally (auto-installs deps)" + @echo " make serve - Serve documentation locally (auto-installs deps, livereload on)" @echo " make build - Build documentation (auto-installs deps)" @echo " make clean - Clean build artifacts" @echo " make check-links - Check for broken links" @echo " make sync - Sync dependencies with pyproject.toml" + @echo "" + @echo " If livereload does not refresh on file change, try:" + @echo " WATCHDOG_USE_POLLING=1 make serve" sync: uv sync --all-extras @@ -14,7 +17,7 @@ sync: serve: @echo "Installing Pulsing in development mode..." cd .. && uv run maturin develop - uv run mkdocs serve + uv run mkdocs serve --livereload -w src -w mkdocs.yml -w overrides build: @echo "Installing Pulsing in development mode..." diff --git a/docs/design/name-only-resolve.md b/docs/design/name-only-resolve.md index 450f03dc2..dec2bba30 100644 --- a/docs/design/name-only-resolve.md +++ b/docs/design/name-only-resolve.md @@ -75,7 +75,7 @@ ## 建议 - **短期**:采用 **方案 A(`get_actor(name)` + 动态 Proxy)**,并在 Proxy 内采用 **A1**(`async_methods is None` 时全部按 async),这样: - - 只在一个地方(如 `pulsing.actor`)增加 `get_actor(name)`(及可选 `node_id`)。 + - 只在一个地方(如 `pulsing.core`)增加 `get_actor(name)`(及可选 `node_id`)。 - 对 `ActorProxy` 做最小改动:在 `__getattr__` 里当 `self._async_methods is None` 时令 `is_async=True`。 - **命名**:`get_actor(name)` 与现有 `resolve(name)`(返回 ref)区分清晰;若希望更短,可再提供 `pul.actor(name)` 作为别名。 - **文档**:说明「无类型、无补全;流式优先用类型化 resolve」即可。 diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d46a09fc5..5362b7ff1 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -1,5 +1,5 @@ site_name: Pulsing -site_description: Pulsing is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. +site_description: "Pulsing: Backbone for distributed AI systems. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery." # Docs may be hosted elsewhere, but repo is the canonical entry point. site_url: https://github.com/DeepLink-org/pulsing docs_dir: src @@ -83,12 +83,23 @@ plugins: site_name: "Pulsing 文档" nav_translations: Home: 首页 - Home: 首页 - Getting Started: 开始使用 Overview: 概述 + Quick Start: 快速开始 + First Actor: 第一个 Actor + Actor Patterns: Actor 模式 + Cluster Setup: 集群组网 + Get Started: 入门 + Getting Started: 开始使用 + Core Concepts: 核心概念 + Architecture Overview: 架构概览 + Cluster & Network: 集群与网络 + Implementation: 实现细节 + User Guides: 用户指南 + Architecture & Design: 架构与设计 + Complete Reference: 完整参考 LLM Inference: LLM 推理 Distributed Agents: 分布式 Agent - Migrate from Ray: 从 Ray 迁移 + Ray + Pulsing: Ray + Pulsing User Guide: 用户指南 Guide: 指南 Actors: Actor 指南 @@ -105,15 +116,17 @@ plugins: Ping-Pong: Ping-Pong Distributed Counter: 分布式计数器 API Reference: API 参考 - Overview: API 概述 + API Overview: API 概述 Python: Python API Rust: Rust API FAQ: 常见问题 Design: 设计文档 + Actor Basics: Actor 基础 Architecture: 架构概览 Actor System: Actor 系统 Behavior API: Behavior API Node Discovery: 节点发现 + Cluster Networking: 集群组网 Actor Addressing: Actor 寻址 HTTP2 Transport: HTTP2 传输 Load Sync: 负载同步 @@ -136,18 +149,21 @@ plugins: nav: - Home: index.md - - Getting Started: - - Overview: quickstart/index.md + - Overview: overview.md + - Quick Start: + - First Actor: quickstart/index.md + - Actor Patterns: quickstart/patterns.md + - Cluster Setup: quickstart/cluster_networking.md - LLM Inference: quickstart/llm_inference.md - Distributed Agents: quickstart/agent.md - - Migrate from Ray: quickstart/migrate_from_ray.md - - User Guide: - - Guide: guide/index.md - - Actors: guide/actors.md + - Ray + Pulsing: quickstart/migrate_from_ray.md + - Core Concepts: + - Actor Basics: guide/actors.md - Communication Patterns: guide/communication_patterns.md - Remote Actors: guide/remote_actors.md - - Operations: guide/operations.md - Reliability: guide/reliability.md + - User Guides: + - Operations: guide/operations.md - Security: guide/security.md - Distributed Queue: guide/queue.md - Semantics: guide/semantics.md @@ -162,26 +178,31 @@ nav: - Ping-Pong: examples/ping_pong.md - Distributed Counter: examples/distributed_counter.md - API Reference: - - Overview: api/overview.md + - API Overview: api/overview.md + - Complete Reference: api_reference.md - Python: api/python.md - Rust: api/rust.md - FAQ: faq.md - - Design: - - Architecture: design/architecture.md - - Actor System: design/actor-system.md - - Behavior API: design/behavior.md - - Node Discovery: design/node-discovery.md - - Actor Addressing: design/actor-addressing.md - - HTTP2 Transport: design/http2-transport.md - - Load Sync: design/load_sync.md - - AS Actor Decorator: design/as-actor-decorator.md - - Communication Evolution: design/cluster-communication-evolution.md + - Architecture & Design: + - Architecture Overview: design/architecture.md + - Actor System: + - Actor System: design/actor-system.md + - Behavior API: design/behavior.md + - Cluster & Network: + - Cluster Networking: design/cluster-networking.md + - Node Discovery: design/node-discovery.md + - HTTP2 Transport: design/http2-transport.md + - Actor Addressing: design/actor-addressing.md + - Load Sync: design/load_sync.md + - Implementation: + - AS Actor Decorator: design/as-actor-decorator.md + - Communication Evolution: design/cluster-communication-evolution.md extra: generator: false social: - icon: fontawesome/brands/github - link: https://github.com/reiase/pulsing + link: https://github.com/DeepLink-org/Pulsing extra_css: - assets/stylesheets/home.css diff --git a/docs/overrides/home.html b/docs/overrides/home.html index 65beacd5c..2293663e1 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -485,9 +485,11 @@

Pulsing

{% if config.theme.language == "zh" %} -

为可扩展 AI 系统设计的轻量级分布式 Actor 框架

+

分布式 AI 系统的通信骨干

+

Actor 运行时。流式优先。零依赖。内置发现。

{% else %} -

Lightweight Distributed Actor Framework for Scalable AI Systems

+

Backbone for Distributed AI Systems

+

Actor runtime. Streaming-first. Zero dependencies. Built-in discovery.

{% endif %}
@@ -700,11 +702,11 @@

LLM Inference Services

🔀
{% if config.theme.language == "zh" %} -

分布式计算

-

替代 Ray 用于轻量级分布式工作负载。非常适合 ML 流水线、数据处理和微服务。

+

增强 Ray 通信

+

为 Ray Actor 增加流式、发现和跨集群通信能力。Ray 负责调度,Pulsing 负责通信。

{% else %} -

Distributed Computing

-

Replace Ray for lightweight distributed workloads. Perfect for ML pipelines, data processing, and microservices.

+

Enhance Ray Communication

+

Add streaming, discovery, and cross-cluster calls to Ray actors. Ray handles scheduling, Pulsing handles communication.

{% endif %}
@@ -874,12 +876,12 @@

LLM Inference Ready

# Start OpenAI-compatible Router
-pulsing actor pulsing.actors.Router \
+pulsing actor pulsing.serving.Router \
     --addr 0.0.0.0:8000 \
     --http_port 8080 --model_name my-llm
 
 # Start vLLM Worker
-pulsing actor pulsing.actors.VllmWorker \
+pulsing actor pulsing.serving.VllmWorker \
     --model Qwen/Qwen2.5-0.5B \
     --addr 0.0.0.0:8001 --seeds 127.0.0.1:8000
 
@@ -904,19 +906,19 @@ 

Join the Community

Pulsing is open source and community-driven. Get involved!

{% endif %} @@ -104,7 +108,7 @@ async def main(): asyncio.run(main()) ``` -[:octicons-arrow-right-24: Getting Started](quickstart/index.md){ .md-button } +[:octicons-arrow-right-24: Quick Start](quickstart/index.md){ .md-button } --- @@ -112,11 +116,13 @@ asyncio.run(main()) | Goal | Link | |------|------| -| Understand the Actor model | [Guide: Actors](guide/actors.md) | -| Build a cluster | [Guide: Remote Actors](guide/remote_actors.md) | -| Operate your system | [Guide: CLI Operations](guide/operations.md) | -| Deep dive into design | [Design Documents](design/architecture.md) | -| API details | [API Reference](api/overview.md) | +| What is Pulsing / who is it for? | [Overview](overview.md) | +| Understand the Actor model | [Actor Basics](guide/actors.md) | +| Build a cluster | [Remote Actors](guide/remote_actors.md) | +| Operate your system | [CLI Operations](guide/operations.md) | +| Architecture and design | [Architecture & Design](design/architecture.md) | +| API details | [API Overview](api/overview.md) | +| Full API contract | [Complete Reference](api_reference.md) | --- diff --git a/docs/src/index.zh.md b/docs/src/index.zh.md index a23275b83..db78f0dd8 100644 --- a/docs/src/index.zh.md +++ b/docs/src/index.zh.md @@ -1,7 +1,7 @@ --- template: home.html -title: Pulsing - 轻量级分布式 Actor 框架 -description: Pulsing 是一个分布式 actor 框架,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。 +title: Pulsing - 分布式 AI 系统的通信骨干 +description: "Pulsing:分布式 AI 系统的通信骨干。Actor 运行时。流式优先。零依赖。内置发现。" hide: toc --- @@ -9,35 +9,39 @@ hide: toc # Pulsing -一个**分布式 actor 框架**,为构建分布式系统提供通信骨干,并为 AI 应用提供专门支持。 +**分布式 AI 系统的通信骨干。** + +Actor 运行时。流式优先。零依赖。内置发现。 + +用 Rust 构建、为 Python 设计的分布式 Actor 运行时。跨机器连接 AI Agent 和服务——不需要 Redis,不需要 etcd,不需要 YAML。 ## 为什么选择 Pulsing?
-- :material-package-variant-closed:{ .lg .middle } **零外部依赖** +- :material-package-variant-closed:{ .lg .middle } **零依赖** --- - 纯 Rust + Tokio 实现。无需 etcd、NATS、Redis 或 Consul。 + 纯 Rust + Tokio 实现。无需 etcd、NATS、Redis 或 Consul。只需 `pip install pulsing`。 -- :material-radar:{ .lg .middle } **内置集群发现** +- :material-lightning-bolt:{ .lg .middle } **流式优先** --- - SWIM/Gossip 协议实现自动节点发现和故障检测。 + 原生流式支持,为 LLM token 生成和实时通信而设计。 -- :material-lightning-bolt:{ .lg .middle } **高性能** +- :material-radar:{ .lg .middle } **内置发现** --- - 异步运行时 + HTTP/2 传输 + 原生流式支持。 + SWIM/Gossip 协议实现自动节点发现和故障检测。无需配置。 -- :material-language-python:{ .lg .middle } **Python 优先** +- :material-language-python:{ .lg .middle } **Rust 构建,Python 设计** --- - 通过 PyO3 提供完整 Python API。`@remote` 装饰器将任意类变成 Actor。 + 通过 PyO3 提供完整异步 Python API。`@remote` 装饰器将任意类变成分布式 Actor。
@@ -63,13 +67,13 @@ hide: toc [:octicons-arrow-right-24: 分布式 Agent](quickstart/agent.zh.md) -- :material-swap-horizontal:{ .lg .middle } **替代 Ray** +- :material-swap-horizontal:{ .lg .middle } **与 Ray 协同** --- - 兼容 API,一行导入即可从 Ray 迁移。 + 用 Pulsing 作为 Ray Actor 的通信层。流式、发现、跨集群调用——开箱即用。 - [:octicons-arrow-right-24: 从 Ray 迁移](quickstart/migrate_from_ray.zh.md) + [:octicons-arrow-right-24: Ray + Pulsing](quickstart/migrate_from_ray.zh.md)
@@ -112,11 +116,13 @@ asyncio.run(main()) | 目标 | 链接 | |------|------| -| 理解 Actor 模型 | [指南:Actor](guide/actors.zh.md) | -| 构建集群 | [指南:远程 Actor](guide/remote_actors.zh.md) | -| 运维系统 | [指南:CLI 操作](guide/operations.zh.md) | -| 深入设计 | [设计文档](design/architecture.md) | -| API 详情 | [API 参考](api_reference.md) | +| Pulsing 是什么 / 适合谁? | [概述](overview.zh.md) | +| 理解 Actor 模型 | [Actor 基础](guide/actors.zh.md) | +| 构建集群 | [远程 Actor](guide/remote_actors.zh.md) | +| 运维系统 | [CLI 运维](guide/operations.zh.md) | +| 架构与设计 | [架构与设计](design/architecture.zh.md) | +| API 详情 | [API 概述](api/overview.zh.md) | +| 完整 API 契约 | [完整参考](api_reference.zh.md) | --- diff --git a/docs/src/overview.md b/docs/src/overview.md new file mode 100644 index 000000000..8ba0896aa --- /dev/null +++ b/docs/src/overview.md @@ -0,0 +1,48 @@ +# Pulsing Overview + +## What is Pulsing? + +**Pulsing: Backbone for distributed AI systems.** + +Pulsing is a distributed actor runtime built in Rust, designed for Python. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery. + +In one sentence: turn any Python class into a distributed Actor with `@remote` — no etcd, NATS, or Redis required. Same API for local and remote, with native streaming support. + +--- + +## What Can You Do with Pulsing? + +| Use case | What you get | +|----------|----------------| +| **LLM inference services** | Scalable backends with streaming, OpenAI-compatible API, and optional vLLM/Transformers workers. | +| **Distributed agents** | Multi-agent systems with native integration for AutoGen and LangGraph; same code runs locally or across machines. | +| **Enhance Ray communication** | Add streaming, actor discovery, and cross-cluster calls to Ray actors via `pul.mount()`. Use Ray for scheduling, Pulsing for communication. | +| **Custom distributed apps** | Build services and workers that discover each other via built-in gossip or a head node, over a single HTTP/2 port. | + +--- + +## Who Is It For? + +| Role | Benefit | +|------|---------| +| **AI / ML application developers** | One-line scaling: add `addr` and `seeds` (or use init-in-Ray) to run agents and inference across nodes without learning a new paradigm. | +| **Distributed systems engineers** | Zero external coordination stores; built-in SWIM/gossip and optional head-node topology; single-port networking. | +| **Ray users** | Use Pulsing as a communication layer alongside Ray: `pul.mount()` bridges Ray actors onto the Pulsing network for streaming, discovery, and cross-cluster calls. | + +You don't need to be a distributed systems expert to get value — the API is designed to stay simple from single process to multi-node. + +--- + +## Design Principles + +- **Zero external dependencies** — Pure Rust core + Tokio; no etcd, NATS, or Redis. Cluster discovery uses built-in gossip or an optional head node. +- **Location transparency** — Same API for local and remote actors: `await actor.method()` whether the actor is on this process or another machine. +- **Python first** — `@pul.remote` turns a class into an Actor; `spawn()` and `resolve()` for creation and discovery; native async/await and streaming. +- **Single port** — Actor RPC and cluster protocol share one HTTP/2 port per node, simplifying deployment and firewalls. + +--- + +## Next Steps + +- **[Quick Start](quickstart/index.md)** — Run your first Actor in minutes, then go stateful and distributed. +- **[Ray + Pulsing](quickstart/migrate_from_ray.md)** — Use Pulsing as Ray's communication layer, or use the standalone API. diff --git a/docs/src/overview.zh.md b/docs/src/overview.zh.md new file mode 100644 index 000000000..0bf8eb32b --- /dev/null +++ b/docs/src/overview.zh.md @@ -0,0 +1,48 @@ +# Pulsing 概述 + +## 什么是 Pulsing? + +**Pulsing:分布式 AI 系统的通信骨干。** + +Pulsing 是一个用 Rust 构建、为 Python 设计的分布式 Actor 运行时。流式优先。零依赖。内置发现。 + +一句话:用 `@remote` 把任意 Python 类变成分布式 Actor,无需 etcd、NATS 或 Redis。本地和远程使用同一套 API,原生支持流式通信。 + +--- + +## 你能用 Pulsing 做什么? + +| 场景 | 你能得到什么 | +|------|----------------| +| **LLM 推理服务** | 可扩展的推理后端、流式输出、OpenAI 兼容 API,以及可选的 vLLM/Transformers Worker。 | +| **分布式 Agent** | 多智能体系统,原生集成 AutoGen 与 LangGraph;同一套代码可在本机或跨机运行。 | +| **增强 Ray 通信** | 通过 `pul.mount()` 为 Ray Actor 增加流式、发现和跨集群调用能力。Ray 负责调度,Pulsing 负责通信。 | +| **自定义分布式应用** | 通过内置 Gossip 或 Head 节点组网,单端口 HTTP/2,构建服务与 Worker。 | + +--- + +## 适合谁用? + +| 角色 | 收益 | +|------|------| +| **AI / ML 应用开发者** | 一行级扩展:加上 `addr` 和 `seeds`(或用 init-in-Ray),即可在多节点跑 Agent 与推理,无需学习新范式。 | +| **分布式系统工程师** | 零外部协调存储;内置 SWIM/Gossip 与可选 Head 拓扑;单端口组网。 | +| **Ray 用户** | 用 Pulsing 作为 Ray 的通信层:`pul.mount()` 将 Ray Actor 接入 Pulsing 网络,获得流式、发现和跨集群调用能力。 | + +你不需要成为分布式系统专家也能用好 —— 从单进程到多节点,API 保持简洁。 + +--- + +## 设计理念 + +- **零外部依赖** — 核心纯 Rust + Tokio;不依赖 etcd、NATS、Redis。集群发现采用内置 Gossip 或可选 Head 节点。 +- **位置透明** — 本地与远程 Actor 同一套 API:`await actor.method()` 无论 Actor 在本进程还是远程。 +- **Python 优先** — `@pul.remote` 将类变成 Actor;`spawn()` / `resolve()` 用于创建与发现;原生 async/await 与流式。 +- **单端口** — 每节点一个 HTTP/2 端口同时承载 Actor RPC 与集群协议,便于部署与防火墙配置。 + +--- + +## 下一步 + +- **[快速开始](quickstart/index.zh.md)** — 几分钟内跑起第一个 Actor,再进阶到有状态与分布式。 +- **[Ray + Pulsing](quickstart/migrate_from_ray.zh.md)** — 用 Pulsing 作为 Ray 的通信层,或使用 Pulsing 独立 API。 diff --git a/docs/src/quickstart/agent.md b/docs/src/quickstart/agent.md index 6481b7a46..ce7b65a6f 100644 --- a/docs/src/quickstart/agent.md +++ b/docs/src/quickstart/agent.md @@ -61,7 +61,7 @@ agent = AssistantAgent("assistant", model_client=model) Replace `SingleThreadedAgentRuntime` with `PulsingRuntime`: ```python -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # Single process (default) runtime = PulsingRuntime() @@ -90,7 +90,7 @@ import asyncio from autogen_agentchat.agents import AssistantAgent from autogen_core import AgentId from autogen_ext.models.openai import OpenAIChatCompletionClient -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime async def main(): model = OpenAIChatCompletionClient(model="gpt-4o-mini") @@ -136,7 +136,7 @@ app = graph.compile() ### Step 2: Wrap with Pulsing ```python -from pulsing.langgraph import with_pulsing +from pulsing.integrations.langgraph import with_pulsing distributed_app = with_pulsing( app, diff --git a/docs/src/quickstart/agent.zh.md b/docs/src/quickstart/agent.zh.md index a542f0680..45a6cbd42 100644 --- a/docs/src/quickstart/agent.zh.md +++ b/docs/src/quickstart/agent.zh.md @@ -61,7 +61,7 @@ agent = AssistantAgent("assistant", model_client=model) 用 `PulsingRuntime` 替换 `SingleThreadedAgentRuntime`: ```python -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # 单进程(默认) runtime = PulsingRuntime() @@ -90,7 +90,7 @@ import asyncio from autogen_agentchat.agents import AssistantAgent from autogen_core import AgentId from autogen_ext.models.openai import OpenAIChatCompletionClient -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime async def main(): model = OpenAIChatCompletionClient(model="gpt-4o-mini") @@ -136,7 +136,7 @@ app = graph.compile() ### 步骤 2:用 Pulsing 包装 ```python -from pulsing.langgraph import with_pulsing +from pulsing.integrations.langgraph import with_pulsing distributed_app = with_pulsing( app, diff --git a/docs/src/quickstart/cluster_networking.md b/docs/src/quickstart/cluster_networking.md new file mode 100644 index 000000000..256b755e9 --- /dev/null +++ b/docs/src/quickstart/cluster_networking.md @@ -0,0 +1,206 @@ +# Cluster Networking (How to Use) + +This page describes **how to form and use** a Pulsing cluster. For protocol and implementation details, see [Cluster Networking (Design)](../design/cluster-networking.md). + +--- + +## Three modes + +| Mode | What you configure | Best for | +|------|--------------------|----------| +| **Gossip + seed** | Bind address + optional seed addresses to join | Kubernetes, VMs, bare metal; no single point of failure | +| **Head node** | One node as head, others with head address | Simple ops; one fixed coordinator address | +| **Init in Ray** | `init_in_ray()` in each process; no seeds | Already using Ray; automatic seed discovery | + +All modes use a **single HTTP/2 port** per node. No etcd, NATS, or Redis. + +--- + +## Mode 1: Gossip + seed + +### Configuration + +**Python** + +```python +import pulsing as pul + +# First node +await pul.init(addr="0.0.0.0:8000") + +# Later nodes — join via seeds +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// First node +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// Later nodes +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +With multiple seeds (e.g. a Kubernetes Service), pass a list; the node probes until it gets a member list. + +### Kubernetes + +Use the Service name as the seed so new pods can join: + +```python +await pul.init(addr="0.0.0.0:8080", seeds=["actor-cluster.default.svc.cluster.local:8080"]) +``` + +### When to use + +- No single point of failure for discovery +- You run on K8s, VMs, or bare metal and can expose at least one address (or Service) as seed +- Eventual consistency of membership is acceptable (typically hundreds of ms) + +--- + +## Mode 2: Head node + +### Configuration + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head node +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker nodes +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# Head node +await pul.init(addr="0.0.0.0:8000", is_head_node=True) + +# Worker nodes +await pul.init(addr="0.0.0.0:8001", head_addr="192.168.1.10:8000") +``` + +You can also use `SystemConfig.with_head_node()` / `.with_head_addr(addr)` and pass the config to `ActorSystem.create(config, loop)` for advanced use. + +### Head parameters (Rust) + +- **Sync interval**: how often workers pull from head (default 5s) +- **Heartbeat interval**: worker → head (default 10s) +- **Heartbeat timeout**: head marks worker dead after (default 30s) + +### When to use + +- One fixed address (the head) for firewalls and monitoring +- You accept a single point of failure for coordination until head recovers +- You want the head as the single source of truth for membership/registry + +--- + +## Mode 3: Init in Ray + +### Requirements + +- Ray installed and `ray.init()` called before `init_in_ray()` +- Every process that uses Pulsing (driver and workers) must call `init_in_ray()` in that process + +### Usage + +```python +import ray +from pulsing.integrations.ray import init_in_ray + +# Recommended: hook so every worker runs init_in_ray at startup +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# Driver must also init +init_in_ray() + +# Use Pulsing as usual +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") +``` + +**Async** (e.g. async Ray actors): + +```python +from pulsing.integrations.ray import async_init_in_ray +await async_init_in_ray() +``` + +**Cleanup** (e.g. tests): + +```python +from pulsing.integrations.ray import cleanup +cleanup() +``` + +### When to use + +- You already run Ray and want Pulsing on the same nodes as one cluster +- You want one-line cluster formation per process without managing seeds or head address +- You are okay depending on Ray’s KV only for bootstrap; after that Pulsing uses its own gossip + +### Limitations + +- Requires Ray and its internal KV +- Every process must call `init_in_ray()` (driver explicitly; workers via hook) +- One Pulsing cluster per Ray cluster (one KV key) + +--- + +## Comparison and choice + +| Criterion | Gossip + seed | Head node | Init in Ray | +|-----------|----------------|-----------|-------------| +| External deps | None | None | Ray | +| Single point of failure | No | Yes (head) | No | +| Config | addr + optional seeds | addr + head addr or head role | None (Ray KV) | +| Best environment | K8s, VMs, bare metal | One coordinator OK | Existing Ray cluster | +| Python `init()` | `addr`, `seeds` | Via SystemConfig if exposed | `init_in_ray()` | + +**Suggested choice:** + +- **Already on Ray** → **Init in Ray** +- **No SPOF, no Ray** → **Gossip + seed** (use a K8s Service as seed when on K8s) +- **One fixed coordinator, simple ops** → **Head node** + +--- + +## Best practices + +1. **Gossip + seed**: In K8s use a Service as seed; keep one port open for all nodes (actor + gossip). +2. **Head node**: Run head on a stable host/port; tune heartbeat timeout under load. +3. **Init in Ray**: Call `init_in_ray()` in the driver and set `worker_process_setup_hook`; use `cleanup()` in tests if needed. +4. **Security**: For any mode, enable TLS (e.g. passphrase) for cluster traffic — see [Security](../guide/security.md). + +--- + +## See also + +- [Cluster Networking (Design)](../design/cluster-networking.md) — how the protocols and backends work +- [Remote Actors](../guide/remote_actors.md) — resolve, named actors, multi-node +- [Ray + Pulsing](migrate_from_ray.md) — use Pulsing as Ray's communication layer diff --git a/docs/src/quickstart/cluster_networking.zh.md b/docs/src/quickstart/cluster_networking.zh.md new file mode 100644 index 000000000..d3cfe5174 --- /dev/null +++ b/docs/src/quickstart/cluster_networking.zh.md @@ -0,0 +1,206 @@ +# 集群组网(如何使用) + +本页说明**如何组建和使用** Pulsing 集群。协议与实现细节见 [集群组网(设计)](../design/cluster-networking.zh.md)。 + +--- + +## 三种方式 + +| 方式 | 你需要配置什么 | 适用场景 | +|------|----------------|----------| +| **Gossip + seed** | 绑定地址 + 可选 seed 地址以加入 | Kubernetes、VM、裸机;无单点故障 | +| **Head 节点** | 一个节点作 Head,其余填 Head 地址 | 运维简单;一个固定协调地址 | +| **Init in Ray** | 每个进程调用 `init_in_ray()`,无需 seeds | 已在用 Ray;自动发现 seed | + +所有方式每节点**单一 HTTP/2 端口**,不依赖 etcd、NATS、Redis。 + +--- + +## 方式一:Gossip + seed + +### 配置 + +**Python** + +```python +import pulsing as pul + +# 首节点 +await pul.init(addr="0.0.0.0:8000") + +# 后续节点:通过 seeds 加入 +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.10:8000"]) +``` + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// 首节点 +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?); +let system = ActorSystem::new(config).await?; + +// 后续节点 +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_seeds(vec!["192.168.1.10:8000".parse()?]); +let system = ActorSystem::new(config).await?; +``` + +多个 seed(如 Kubernetes Service)时传入列表即可,节点会探测直到获得成员列表。 + +### Kubernetes + +用 Service 名作为 seed,新 Pod 即可加入: + +```python +await pul.init(addr="0.0.0.0:8080", seeds=["actor-cluster.default.svc.cluster.local:8080"]) +``` + +### 何时选用 + +- 发现逻辑无单点故障 +- 运行在 K8s、VM 或裸机,能提供至少一个地址(或 Service)作 seed +- 能接受成员关系的最终一致性(通常几百毫秒内收敛) + +--- + +## 方式二:Head 节点 + +### 配置 + +**Rust** + +```rust +use pulsing_actor::prelude::*; +use std::net::SocketAddr; + +// Head 节点 +let config = SystemConfig::with_addr("0.0.0.0:8000".parse()?) + .with_head_node(); +let system = ActorSystem::new(config).await?; + +// Worker 节点 +let head_addr: SocketAddr = "192.168.1.10:8000".parse()?; +let config = SystemConfig::with_addr("0.0.0.0:8001".parse()?) + .with_head_addr(head_addr); +let system = ActorSystem::new(config).await?; +``` + +**Python** + +```python +import pulsing as pul + +# Head 节点 +await pul.init(addr="0.0.0.0:8000", is_head_node=True) + +# Worker 节点 +await pul.init(addr="0.0.0.0:8001", head_addr="192.168.1.10:8000") +``` + +也可使用 `SystemConfig.with_head_node()` / `.with_head_addr(addr)` 后传给 `ActorSystem.create(config, loop)` 做高级用法。 + +### Head 参数(Rust) + +- **同步间隔**:Worker 从 Head 拉取的周期(默认 5s) +- **心跳间隔**:Worker 向 Head 发送心跳的周期(默认 10s) +- **心跳超时**:Head 将 Worker 判为死亡的时间(默认 30s) + +### 何时选用 + +- 希望一个固定地址(Head)做防火墙与监控 +- 可接受协调单点(Head 宕机期间无法新加入直到恢复) +- 希望以 Head 为成员/注册表的唯一真相源 + +--- + +## 方式三:Init in Ray + +### 前置条件 + +- 已安装 Ray,且先执行 `ray.init()` 再调用 `init_in_ray()` +- 每个使用 Pulsing 的进程(driver 与 worker)都必须在该进程中调用 `init_in_ray()` + +### 用法 + +```python +import ray +from pulsing.integrations.ray import init_in_ray + +# 推荐:用 hook 让每个 worker 启动时执行 init_in_ray +ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + +# driver 也必须初始化 +init_in_ray() + +# 按常规使用 Pulsing +import pulsing as pul +@pul.remote +class MyActor: + def run(self): return "ok" + +actor = await MyActor.spawn(name="my_actor") +``` + +**异步**(如 async Ray actor): + +```python +from pulsing.integrations.ray import async_init_in_ray +await async_init_in_ray() +``` + +**清理**(如测试): + +```python +from pulsing.integrations.ray import cleanup +cleanup() +``` + +### 何时选用 + +- 已在用 Ray,希望 Pulsing 在同一批节点上组成一个集群 +- 希望每个进程一行代码完成组网,无需自己维护 seed 或 Head 地址 +- 能接受仅在启动阶段依赖 Ray 的 KV;之后仅用 Pulsing 自己的 gossip + +### 限制 + +- 依赖 Ray 及其 internal KV +- 每个进程都必须调用 `init_in_ray()`(driver 显式;worker 通过 hook) +- 一个 Ray 集群对应一个 Pulsing 集群(一个 KV key) + +--- + +## 对比与选型 + +| 维度 | Gossip + seed | Head 节点 | Init in Ray | +|------|----------------|-----------|-------------| +| 外部依赖 | 无 | 无 | Ray | +| 单点故障 | 无 | 有(Head) | 无 | +| 配置 | addr + 可选 seeds | addr + Head 地址或 Head 角色 | 无(Ray KV) | +| 适用环境 | K8s、VM、裸机 | 可接受单一协调节点 | 已有 Ray 集群 | +| Python init() | `addr`、`seeds` | 通过 SystemConfig(若暴露) | `init_in_ray()` | + +**选型建议:** + +- **已有 Ray** → **Init in Ray** +- **不要单点且不用 Ray** → **Gossip + seed**(K8s 下用 Service 作 seed) +- **一个固定协调节点、运维简单** → **Head 节点** + +--- + +## 最佳实践 + +1. **Gossip + seed**:K8s 下用 Service 作 seed;各节点开放同一端口(Actor + Gossip)。 +2. **Head 节点**:Head 部署在稳定主机/端口;根据负载调整心跳超时。 +3. **Init in Ray**:Driver 中调用 `init_in_ray()` 并设置 `worker_process_setup_hook`;测试中如需可调用 `cleanup()`。 +4. **安全**:任意方式均可为集群流量开启 TLS(如 passphrase),见 [安全](../guide/security.zh.md)。 + +--- + +## 相关文档 + +- [集群组网(设计)](../design/cluster-networking.zh.md) — 协议与后端如何实现 +- [远程 Actor](../guide/remote_actors.zh.md) — resolve、命名 Actor、多节点 +- [Ray + Pulsing](migrate_from_ray.zh.md) — 用 Pulsing 作为 Ray 的通信层 diff --git a/docs/src/quickstart/index.md b/docs/src/quickstart/index.md index fb8c02316..4eff8930c 100644 --- a/docs/src/quickstart/index.md +++ b/docs/src/quickstart/index.md @@ -1,6 +1,8 @@ -# Getting Started +# Quick Start -Get Pulsing running in **5 minutes**. +Get from zero to a **distributed Actor** in about **10 minutes** with three steps: your first Actor, a stateful Actor, then the same code across two nodes. + +--- ## Installation @@ -10,7 +12,35 @@ pip install pulsing --- -## Your First Actor +## 1. Your First Actor (~2 minutes) + +Define a class, add `@pul.remote`, then spawn and call it. + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Greeter: + def greet(self, name: str) -> str: + return f"Hello, {name}!" + +async def main(): + await pul.init() + greeter = await Greeter.spawn() + print(await greeter.greet("World")) # Hello, World! + await pul.shutdown() + +asyncio.run(main()) +``` + +The `@pul.remote` decorator turns the class into a distributed Actor. `spawn()` creates an instance; method calls use normal `await`. + +--- + +## 2. Stateful Actor (~3 minutes) + +Actors hold state. Here, a counter keeps a value and exposes `inc` and `get`. ```python import asyncio @@ -18,24 +48,81 @@ import pulsing as pul @pul.remote class Counter: - def __init__(self, value=0): + def __init__(self, value: int = 0): self.value = value - def inc(self): - self.value += 1 + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + + def get(self) -> int: return self.value async def main(): await pul.init() counter = await Counter.spawn(value=0) - print(await counter.inc()) # 1 - print(await counter.inc()) # 2 + print(await counter.inc()) # 1 + print(await counter.inc(2)) # 3 + print(await counter.get()) # 3 + await pul.shutdown() + +asyncio.run(main()) +``` + +Same idea: one Actor instance, private state, messages via method calls. No shared memory, no locks. + +--- + +## 3. Distributed: Same Code, Two Nodes (~5 minutes) + +Run the same Actor type on two processes. Only the **initialization** changes: bind an address on the first node, join with `seeds` on the second. + +**Node 1 (seed):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8000") + await Counter.spawn(value=0, name="counter") + await asyncio.Event().wait() # keep running + +asyncio.run(main()) +``` + +**Node 2 (join cluster, then resolve and call):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8001", seeds=["127.0.0.1:8000"]) + counter = await Counter.resolve("counter") + print(await counter.inc(10)) # 10 — same API, remote actor await pul.shutdown() asyncio.run(main()) ``` -The `@pul.remote` decorator turns any Python class into a distributed Actor. +**What changed:** `init(addr=..., seeds=...)` and `Counter.resolve("counter")` instead of `spawn()`. The rest of your code stays the same — **location transparency**. --- @@ -59,11 +146,11 @@ The `@pul.remote` decorator turns any Python class into a distributed Actor. [:octicons-arrow-right-24: ~10 minutes](agent.md) -- :material-swap-horizontal:{ .lg .middle } **Migrate from Ray** +- :material-swap-horizontal:{ .lg .middle } **Use with Ray** --- - Replace Ray with one import change. Zero external dependencies. + Bridge Ray actors onto the Pulsing network with `pul.mount()`. Add streaming and discovery to your Ray cluster. [:octicons-arrow-right-24: ~5 minutes](migrate_from_ray.md) @@ -75,6 +162,9 @@ The `@pul.remote` decorator turns any Python class into a distributed Actor. | Goal | Link | |------|------| -| Understand the Actor model | [Guide: Actors](../guide/actors.md) | -| Build a cluster | [Guide: Remote Actors](../guide/remote_actors.md) | -| Operate your system | [Guide: Operations](../guide/operations.md) | +| Named actors and ask vs tell | [Actor Patterns](patterns.md) | +| Form a cluster (Gossip / Head / Ray) | [Cluster Setup](cluster_networking.md) | +| Actor basics and patterns | [Actor Guide](../guide/actors.md) | +| When to use ask / tell / streaming | [Communication Patterns](../guide/communication_patterns.md) | +| Cluster setup and resolve | [Remote Actors](../guide/remote_actors.md) | +| Operate and inspect | [Operations](../guide/operations.md) | diff --git a/docs/src/quickstart/index.zh.md b/docs/src/quickstart/index.zh.md index 503e970bd..56123092e 100644 --- a/docs/src/quickstart/index.zh.md +++ b/docs/src/quickstart/index.zh.md @@ -1,6 +1,8 @@ # 快速开始 -**5 分钟**让 Pulsing 跑起来。 +用三个步骤、约 **10 分钟** 从零到**分布式 Actor**:第一个 Actor、有状态 Actor,再到同一套代码跑在两个节点上。 + +--- ## 安装 @@ -10,7 +12,35 @@ pip install pulsing --- -## 第一个 Actor +## 1. 第一个 Actor(约 2 分钟) + +定义一个类,加上 `@pul.remote`,然后 spawn 并调用。 + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Greeter: + def greet(self, name: str) -> str: + return f"Hello, {name}!" + +async def main(): + await pul.init() + greeter = await Greeter.spawn() + print(await greeter.greet("World")) # Hello, World! + await pul.shutdown() + +asyncio.run(main()) +``` + +`@pul.remote` 把类变成分布式 Actor;`spawn()` 创建实例,方法调用就是普通的 `await`。 + +--- + +## 2. 有状态 Actor(约 3 分钟) + +Actor 自带状态。下面这个计数器维护一个值,并暴露 `inc` 和 `get`。 ```python import asyncio @@ -18,24 +48,81 @@ import pulsing as pul @pul.remote class Counter: - def __init__(self, value=0): + def __init__(self, value: int = 0): self.value = value - def inc(self): - self.value += 1 + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + + def get(self) -> int: return self.value async def main(): await pul.init() counter = await Counter.spawn(value=0) - print(await counter.inc()) # 1 - print(await counter.inc()) # 2 + print(await counter.inc()) # 1 + print(await counter.inc(2)) # 3 + print(await counter.get()) # 3 + await pul.shutdown() + +asyncio.run(main()) +``` + +同样的思路:一个 Actor 实例、私有状态、通过方法调用发消息。无共享内存、无锁。 + +--- + +## 3. 分布式:同一套代码,两个节点(约 5 分钟) + +在两个进程里跑同一种 Actor。**只有初始化不同**:第一个节点绑定地址,第二个节点用 `seeds` 加入集群。 + +**节点 1(seed):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8000") + await Counter.spawn(value=0, name="counter") + await asyncio.Event().wait() # 保持运行 + +asyncio.run(main()) +``` + +**节点 2(加入集群后 resolve 并调用):** + +```python +import asyncio +import pulsing as pul + +@pul.remote +class Counter: + def __init__(self, value: int = 0): + self.value = value + def inc(self, n: int = 1) -> int: + self.value += n + return self.value + +async def main(): + await pul.init(addr="0.0.0.0:8001", seeds=["127.0.0.1:8000"]) + counter = await Counter.resolve("counter") + print(await counter.inc(10)) # 10 — 同一套 API,远程 Actor await pul.shutdown() asyncio.run(main()) ``` -`@pul.remote` 装饰器将任意 Python 类变成分布式 Actor。 +**变化只有:** `init(addr=..., seeds=...)` 和用 `Counter.resolve("counter")` 代替 `spawn()`。其余代码不变 —— **位置透明**。 --- @@ -59,11 +146,11 @@ asyncio.run(main()) [:octicons-arrow-right-24: ~10 分钟](agent.zh.md) -- :material-swap-horizontal:{ .lg .middle } **从 Ray 迁移** +- :material-swap-horizontal:{ .lg .middle } **与 Ray 配合使用** --- - 一行导入替换 Ray。零外部依赖。 + 通过 `pul.mount()` 将 Ray Actor 接入 Pulsing 网络。为 Ray 集群增加流式和发现能力。 [:octicons-arrow-right-24: ~5 分钟](migrate_from_ray.zh.md) @@ -75,6 +162,9 @@ asyncio.run(main()) | 目标 | 链接 | |------|------| -| 理解 Actor 模型 | [指南:Actor](../guide/actors.zh.md) | -| 构建集群 | [指南:远程 Actor](../guide/remote_actors.zh.md) | -| 运维系统 | [指南:运维操作](../guide/operations.zh.md) | +| 命名 Actor 与 ask/tell | [Actor 模式](patterns.zh.md) | +| 组建集群(Gossip / Head / Ray) | [集群组网](cluster_networking.zh.md) | +| Actor 基础与模式 | [Actor 指南](../guide/actors.zh.md) | +| 何时用 ask / tell / streaming | [通信范式](../guide/communication_patterns.zh.md) | +| 集群搭建与 resolve | [远程 Actor](../guide/remote_actors.zh.md) | +| 运维与巡检 | [运维操作](../guide/operations.zh.md) | diff --git a/docs/src/quickstart/llm_inference.md b/docs/src/quickstart/llm_inference.md index 99a49f6d3..2b2536ba9 100644 --- a/docs/src/quickstart/llm_inference.md +++ b/docs/src/quickstart/llm_inference.md @@ -2,6 +2,16 @@ Build a **scalable LLM inference backend** with Pulsing in 10 minutes. +**Before / After:** + +| | Before (single process or ad‑hoc script) | After (Pulsing) | +|---|------------------------------------------|-----------------| +| **API** | Your own HTTP or in-process only | OpenAI-compatible HTTP API (`/v1/chat/completions`) | +| **Scaling** | One process, one model | Router + N workers; add nodes and workers as needed | +| **Streaming** | Hand-rolled if any | Native streaming from Router to client | + +You get a **Router** (HTTP API + load balancing) and **Workers** (model backends). Same Actor model; add more workers or nodes without changing client code. + **What you'll build:** - A Router that exposes an **OpenAI-compatible HTTP API** @@ -43,7 +53,7 @@ Choose a backend: Open **Terminal A**: ```bash -pulsing actor pulsing.actors.Router \ +pulsing actor pulsing.serving.Router \ --addr 0.0.0.0:8000 \ --http_port 8080 \ --model_name my-llm @@ -64,7 +74,7 @@ Open **Terminal B**: === "Transformers (CPU)" ```bash - pulsing actor pulsing.actors.TransformersWorker \ + pulsing actor pulsing.serving.TransformersWorker \ --model_name gpt2 \ --device cpu \ --addr 0.0.0.0:8001 \ @@ -74,7 +84,7 @@ Open **Terminal B**: === "vLLM (GPU)" ```bash - pulsing actor pulsing.actors.VllmWorker \ + pulsing actor pulsing.serving.VllmWorker \ --model Qwen/Qwen2.5-0.5B \ --addr 0.0.0.0:8002 \ --seeds 127.0.0.1:8000 @@ -135,10 +145,10 @@ Add more workers to handle more load: ```bash # Terminal C -pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000 +pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000 # Terminal D -pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000 +pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000 ``` The Router automatically load-balances across all workers. diff --git a/docs/src/quickstart/llm_inference.zh.md b/docs/src/quickstart/llm_inference.zh.md index d42dd2982..f38a56efb 100644 --- a/docs/src/quickstart/llm_inference.zh.md +++ b/docs/src/quickstart/llm_inference.zh.md @@ -2,6 +2,16 @@ 10 分钟内用 Pulsing 构建一个**可扩展的 LLM 推理后端**。 +**前后对比:** + +| | 之前(单进程或临时脚本) | 之后(Pulsing) | +|---|--------------------------|-----------------| +| **API** | 自建 HTTP 或仅进程内 | OpenAI 兼容 HTTP API(`/v1/chat/completions`) | +| **扩展** | 单进程、单模型 | Router + N 个 Worker;按需增加节点与 Worker | +| **流式** | 若有则手写 | Router 到客户端的原生流式 | + +你会得到一个 **Router**(HTTP API + 负载均衡)和若干 **Worker**(模型后端)。同一套 Actor 模型;增加 Worker 或节点无需改客户端代码。 + **你将构建:** - 一个暴露 **OpenAI 兼容 HTTP API** 的 Router @@ -43,7 +53,7 @@ pip install pulsing 打开**终端 A**: ```bash -pulsing actor pulsing.actors.Router \ +pulsing actor pulsing.serving.Router \ --addr 0.0.0.0:8000 \ --http_port 8080 \ --model_name my-llm @@ -64,7 +74,7 @@ pulsing actor pulsing.actors.Router \ === "Transformers (CPU)" ```bash - pulsing actor pulsing.actors.TransformersWorker \ + pulsing actor pulsing.serving.TransformersWorker \ --model_name gpt2 \ --device cpu \ --addr 0.0.0.0:8001 \ @@ -74,7 +84,7 @@ pulsing actor pulsing.actors.Router \ === "vLLM (GPU)" ```bash - pulsing actor pulsing.actors.VllmWorker \ + pulsing actor pulsing.serving.VllmWorker \ --model Qwen/Qwen2.5-0.5B \ --addr 0.0.0.0:8002 \ --seeds 127.0.0.1:8000 @@ -135,10 +145,10 @@ curl -N http://localhost:8080/v1/chat/completions \ ```bash # 终端 C -pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000 +pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8003 --seeds 127.0.0.1:8000 # 终端 D -pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000 +pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --addr 0.0.0.0:8004 --seeds 127.0.0.1:8000 ``` Router 会自动在所有 Worker 间负载均衡。 diff --git a/docs/src/quickstart/migrate_from_ray.md b/docs/src/quickstart/migrate_from_ray.md index df909833b..0a13092ed 100644 --- a/docs/src/quickstart/migrate_from_ray.md +++ b/docs/src/quickstart/migrate_from_ray.md @@ -1,38 +1,64 @@ -# Tutorial: Migrate from Ray +# Tutorial: Ray + Pulsing -Replace Ray with Pulsing in **5 minutes**. One import change, zero external dependencies. +Use Pulsing as the communication backbone for your Ray actors — add streaming, actor discovery, and cross-cluster calls without replacing Ray. --- -## Why Migrate? +## Two Ways to Use Pulsing with Ray -| | Ray | Pulsing | -|---|-----|---------| -| **Dependencies** | Ray cluster, Redis, GCS | None | -| **Startup time** | Seconds | Milliseconds | -| **Memory overhead** | High | Low | -| **Actor model** | Stateful remote objects | Classical (mailbox, FIFO) | -| **Streaming** | Manual | Native | +1. **Bridge mode** — Keep your Ray actors, add Pulsing communication via `pul.mount()` +2. **Standalone mode** — Use Pulsing's native API directly (for new projects or full migration) --- -## Step 1: Change the Import +## Bridge Mode: Add Pulsing to Ray Actors + +The simplest path — keep Ray for scheduling, add Pulsing for communication: ```python -# Before (Ray) import ray +import pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # One line: join the Pulsing network + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + return await proxy.greet(msg) # Cross-process Pulsing call -# After (Pulsing) -from pulsing.compat import ray + async def greet(self, msg): + return f"hello: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello: hi" +pul.cleanup_ray() ``` -**That's it.** Your existing code works. +**What you get:** Ray handles process scheduling and resource management. Pulsing adds streaming, named actor discovery, and direct actor-to-actor communication — without going through Ray's object store. --- -## Step 2: Run Your Code +## Standalone Mode: Pulsing Native API -### Before (Ray) +For new projects or when you want Pulsing's full feature set: + +### API Mapping (Ray -> Pulsing) + +| Ray | Pulsing | +|---|---| +| `ray.init()` | `await pul.init()` | +| `ray.shutdown()` | `await pul.shutdown()` | +| `@ray.remote` | `@pul.remote` | +| `Actor.remote(args...)` | `await Actor.spawn(args...)` | +| `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | +| `ray.get_actor(name)` | `await Actor.resolve(name)` or `await pul.resolve(name)` | + +### Minimal Example + +**Ray:** ```python import ray @@ -43,136 +69,81 @@ ray.init() class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - +print(ray.get(counter.inc.remote())) ray.shutdown() ``` -### After (Pulsing) +**Pulsing:** ```python -from pulsing.compat import ray # ← only this line changed - -ray.init() +import pulsing as pul -@ray.remote +@pul.remote class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value -counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - -ray.shutdown() +async def main(): + await pul.init() + counter = await Counter.spawn(name="counter") + print(await counter.inc()) + await pul.shutdown() ``` ---- +**Key differences:** -## Supported APIs +| Aspect | Ray | Pulsing | +|--------|-----|---------| +| Create actor | `Counter.remote()` | `await Counter.spawn()` — native async | +| Call method | `ray.get(counter.inc.remote())` | `await counter.inc()` — direct await | +| Get by name | `ray.get_actor("counter")` | `await Counter.resolve("counter")` — typed proxy | +| Streaming | Not built-in | Native `async for chunk in actor.stream()` | +| Discovery | Needs GCS | Built-in gossip, zero external deps | -| API | Status | -|-----|--------| -| `ray.init()` | ✅ | -| `ray.shutdown()` | ✅ | -| `@ray.remote` (class) | ✅ | -| `@ray.remote` (function) | ✅ | -| `ray.get()` | ✅ | -| `ray.put()` | ✅ | -| `ray.wait()` | ✅ | -| `ActorClass.remote()` | ✅ | -| `actor.method.remote()` | ✅ | +Same mental model (remote class, spawn, method calls). Pulsing adds native async, streaming, and self-contained clustering. --- -## Distributed Mode - -Ray requires a cluster. Pulsing just needs `--addr` and `--seeds`: +## Distributed Mode Mapping ### Node 1 (seed) ```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8000") +import pulsing as pul -@ray.remote +@pul.remote class Worker: - def process(self, data): + def process(self, data: str) -> str: return f"processed: {data}" -worker = Worker.remote() -# Keep running... -``` - -### Node 2 (join) - -```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) - -# Find remote actor -worker = ray.get_actor("Worker") -result = ray.get(worker.process.remote("hello")) +await pul.init(addr="0.0.0.0:8000") +await Worker.spawn(name="worker") ``` ---- - -## Native Async API (Optional) - -For new code, consider the native async API: +### Node 2 (join + resolve) ```python import pulsing as pul -@pul.remote -class Counter: - def __init__(self): - self.value = 0 - - def inc(self): - self.value += 1 - return self.value - -async def main(): - await pul.init() - counter = await Counter.spawn() - print(await counter.inc()) # 1 - await pul.shutdown() +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) +worker = await Worker.resolve("worker") +result = await worker.process("hello") ``` -**Benefits:** - -- Cleaner `async/await` syntax -- No `ray.get()` boilerplate -- IDE autocompletion works -- Access to streaming messages - --- -## Limitations - -The Ray-compatible API does not support: - -- Ray Serve -- Ray Tune -- Ray Data -- Object Store (large objects) -- Placement Groups +## Notes -For these features, continue using Ray. Pulsing focuses on the Actor model. +- Prefer typed proxy: `await Class.resolve(name)`. +- If only a runtime name is available: `ref = await pul.resolve(name)` then `ref.as_type(Class)` / `ref.as_any()`. --- diff --git a/docs/src/quickstart/migrate_from_ray.zh.md b/docs/src/quickstart/migrate_from_ray.zh.md index c6083b156..5eea3bfa8 100644 --- a/docs/src/quickstart/migrate_from_ray.zh.md +++ b/docs/src/quickstart/migrate_from_ray.zh.md @@ -1,38 +1,64 @@ -# 教程:从 Ray 迁移 +# 教程:Ray + Pulsing -**5 分钟**内用 Pulsing 替换 Ray。一行导入改动,零外部依赖。 +用 Pulsing 作为 Ray Actor 的通信骨干——增加流式、Actor 发现和跨集群调用能力,无需替换 Ray。 --- -## 为什么迁移? +## 两种使用方式 -| | Ray | Pulsing | -|---|-----|---------| -| **依赖** | Ray 集群、Redis、GCS | 无 | -| **启动时间** | 秒级 | 毫秒级 | -| **内存开销** | 高 | 低 | -| **Actor 模型** | 带状态的远程对象 | 经典模型(邮箱、FIFO) | -| **流式消息** | 手动实现 | 原生支持 | +1. **桥接模式** — 保留 Ray Actor,通过 `pul.mount()` 接入 Pulsing 通信 +2. **独立模式** — 直接使用 Pulsing 原生 API(适合新项目或完全迁移) --- -## 步骤 1:修改导入 +## 桥接模式:为 Ray Actor 增加 Pulsing 通信 + +最简单的路径——Ray 负责调度,Pulsing 负责通信: ```python -# 之前 (Ray) import ray +import pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # 一行代码:接入 Pulsing 网络 + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + return await proxy.greet(msg) # 跨进程 Pulsing 调用 -# 之后 (Pulsing) -from pulsing.compat import ray + async def greet(self, msg): + return f"hello: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello: hi" +pul.cleanup_ray() ``` -**完成了。** 现有代码直接可用。 +**你获得的能力:** Ray 处理进程调度和资源管理。Pulsing 增加流式、命名 Actor 发现和直接的 Actor 间通信——不经过 Ray 的对象存储。 --- -## 步骤 2:运行代码 +## 独立模式:Pulsing 原生 API -### 之前 (Ray) +适合新项目或需要 Pulsing 完整特性的场景: + +### API 对照表(Ray -> Pulsing) + +| Ray | Pulsing | +|---|---| +| `ray.init()` | `await pul.init()` | +| `ray.shutdown()` | `await pul.shutdown()` | +| `@ray.remote` | `@pul.remote` | +| `Actor.remote(args...)` | `await Actor.spawn(args...)` | +| `ray.get(actor.method.remote(args...))` | `await actor.method(args...)` | +| `ray.get_actor(name)` | `await Actor.resolve(name)` 或 `await pul.resolve(name)` | + +### 最小示例 + +**Ray:** ```python import ray @@ -43,136 +69,81 @@ ray.init() class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - +print(ray.get(counter.inc.remote())) ray.shutdown() ``` -### 之后 (Pulsing) +**Pulsing:** ```python -from pulsing.compat import ray # ← 只改了这一行 - -ray.init() +import pulsing as pul -@ray.remote +@pul.remote class Counter: def __init__(self): self.value = 0 - def inc(self): self.value += 1 return self.value -counter = Counter.remote() -print(ray.get(counter.inc.remote())) # 1 -print(ray.get(counter.inc.remote())) # 2 - -ray.shutdown() +async def main(): + await pul.init() + counter = await Counter.spawn(name="counter") + print(await counter.inc()) + await pul.shutdown() ``` ---- +**关键差异:** -## 支持的 API +| 方面 | Ray | Pulsing | +|------|-----|---------| +| 创建 Actor | `Counter.remote()` | `await Counter.spawn()` — 原生 async | +| 调用方法 | `ray.get(counter.inc.remote())` | `await counter.inc()` — 直接 await | +| 按名获取 | `ray.get_actor("counter")` | `await Counter.resolve("counter")` — 带类型代理 | +| 流式 | 非内置 | 原生 `async for chunk in actor.stream()` | +| 发现 | 需要 GCS | 内置 gossip,零外部依赖 | -| API | 状态 | -|-----|------| -| `ray.init()` | ✅ | -| `ray.shutdown()` | ✅ | -| `@ray.remote` (类) | ✅ | -| `@ray.remote` (函数) | ✅ | -| `ray.get()` | ✅ | -| `ray.put()` | ✅ | -| `ray.wait()` | ✅ | -| `ActorClass.remote()` | ✅ | -| `actor.method.remote()` | ✅ | +心智模型一致(远程类、spawn、方法调用)。Pulsing 增加了原生 async、流式和自包含集群能力。 --- -## 分布式模式 - -Ray 需要集群。Pulsing 只需要 `--addr` 和 `--seeds`: +## 分布式模式对照 ### 节点 1(种子) ```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8000") +import pulsing as pul -@ray.remote +@pul.remote class Worker: - def process(self, data): + def process(self, data: str) -> str: return f"processed: {data}" -worker = Worker.remote() -# 保持运行... -``` - -### 节点 2(加入) - -```python -from pulsing.compat import ray - -ray.init(address="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) - -# 查找远程 Actor -worker = ray.get_actor("Worker") -result = ray.get(worker.process.remote("hello")) +await pul.init(addr="0.0.0.0:8000") +await Worker.spawn(name="worker") ``` ---- - -## 原生异步 API(可选) - -新代码建议使用原生异步 API: +### 节点 2(加入 + 解析) ```python import pulsing as pul -@pul.remote -class Counter: - def __init__(self): - self.value = 0 - - def inc(self): - self.value += 1 - return self.value - -async def main(): - await pul.init() - counter = await Counter.spawn() - print(await counter.inc()) # 1 - await pul.shutdown() +await pul.init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) +worker = await Worker.resolve("worker") +result = await worker.process("hello") ``` -**优势:** - -- 更简洁的 `async/await` 语法 -- 无需 `ray.get()` 样板代码 -- IDE 自动补全正常工作 -- 可使用流式消息 - --- -## 限制 - -Ray 兼容 API 不支持: - -- Ray Serve -- Ray Tune -- Ray Data -- Object Store(大对象) -- Placement Groups +## 说明 -这些功能请继续使用 Ray。Pulsing 专注于 Actor 模型。 +- 优先使用 typed proxy:`await Class.resolve(name)`。 +- 若只有运行时名称:`ref = await pul.resolve(name)`,再使用 `ref.as_type(Class)` / `ref.as_any()`。 --- diff --git a/docs/src/quickstart/patterns.md b/docs/src/quickstart/patterns.md new file mode 100644 index 000000000..115218399 --- /dev/null +++ b/docs/src/quickstart/patterns.md @@ -0,0 +1,48 @@ +# Actor Patterns + +Common patterns right after your first Actor: named actors, resolve, and when to use ask vs tell. + +--- + +## Named actors and resolve + +Give an actor a **name** so other code can find it with **resolve** (same process or across the cluster): + +```python +import pulsing as pul + +@pul.remote +class Worker: + def process(self, data: str) -> str: + return f"processed: {data}" + +async def main(): + await pul.init() + # Spawn with a name — discoverable via resolve + await Worker.spawn(name="worker") + # Later (or on another node): get a proxy by name + worker = await Worker.resolve("worker") + result = await worker.process("hello") + await pul.shutdown() +``` + +Anonymous actors (no `name=`) are only reachable via the `ActorRef` returned by `spawn()`. + +--- + +## Ask vs tell + +| Pattern | Method | Use when | +|--------|--------|----------| +| **Request–response** | `await ref.ask(msg)` or `await proxy.method()` | You need a return value. | +| **Fire-and-forget** | `await ref.tell(msg)` | You don't need a reply; best-effort delivery. | + +For typed proxies, method calls are like **ask** (they return the result). Use **tell** when you have an `ActorRef` and want to send without waiting. + +--- + +## Next steps + +- [Cluster Setup](cluster_networking.md) — form a cluster (Gossip / Head / Ray) +- [Actor Basics](../guide/actors.md) — deeper model and API +- [Communication Patterns](../guide/communication_patterns.md) — streaming, timeouts, and more diff --git a/docs/src/quickstart/patterns.zh.md b/docs/src/quickstart/patterns.zh.md new file mode 100644 index 000000000..189685161 --- /dev/null +++ b/docs/src/quickstart/patterns.zh.md @@ -0,0 +1,48 @@ +# Actor 模式 + +在写完第一个 Actor 之后常用的几种写法:命名 Actor、resolve,以及何时用 ask / tell。 + +--- + +## 命名 Actor 与 resolve + +给 Actor 起一个 **name**,其他代码(本进程或集群内)可以用 **resolve** 按名查找: + +```python +import pulsing as pul + +@pul.remote +class Worker: + def process(self, data: str) -> str: + return f"processed: {data}" + +async def main(): + await pul.init() + # 带名字 spawn,可通过 resolve 发现 + await Worker.spawn(name="worker") + # 之后(或另一节点):按名拿到 proxy + worker = await Worker.resolve("worker") + result = await worker.process("hello") + await pul.shutdown() +``` + +匿名 Actor(不传 `name=`)只能通过 `spawn()` 返回的 `ActorRef` 访问。 + +--- + +## Ask 与 tell + +| 模式 | 方法 | 适用场景 | +|------|------|----------| +| **请求–响应** | `await ref.ask(msg)` 或 `await proxy.method()` | 需要返回值。 | +| **发送即忘** | `await ref.tell(msg)` | 不需要回复;尽力而为投递。 | + +有类型 proxy 时,方法调用相当于 **ask**(会返回结果)。只有在手头是 `ActorRef` 且不想等待回复时再用 **tell**。 + +--- + +## 下一步 + +- [集群组网](cluster_networking.zh.md) — 组建集群(Gossip / Head / Ray) +- [Actor 基础](../guide/actors.zh.md) — 模型与 API 深入 +- [通信范式](../guide/communication_patterns.zh.md) — 流式、超时等 diff --git a/examples/agent/autogen/distributed.py b/examples/agent/autogen/distributed.py index 2b70c59e1..1d6bf2257 100644 --- a/examples/agent/autogen/distributed.py +++ b/examples/agent/autogen/distributed.py @@ -115,7 +115,7 @@ async def run_with_rank( rank: int, world_size: int, master_addr: str, pulsing_base_port: int ): """Run corresponding role based on rank""" - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime my_addr, seeds = get_pulsing_config(rank, master_addr, pulsing_base_port) role_name, agent_class = ROLE_MAP.get(rank, (f"worker_{rank}", None)) @@ -171,7 +171,7 @@ async def run_manager_logic(runtime): async def run_standalone(): """Standalone mode""" - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime print("Running in standalone mode") runtime = PulsingRuntime() @@ -190,7 +190,7 @@ async def run_standalone(): async def run_writer(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8001", seeds=[]) await runtime.start() @@ -200,7 +200,7 @@ async def run_writer(): async def run_editor(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8002", seeds=["127.0.0.1:8001"]) await runtime.start() @@ -210,7 +210,7 @@ async def run_editor(): async def run_manager(): - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime runtime = PulsingRuntime(addr="0.0.0.0:8003", seeds=["127.0.0.1:8001"]) await runtime.start() diff --git a/examples/agent/autogen/simple.py b/examples/agent/autogen/simple.py index 27b8798c9..7a21711f0 100644 --- a/examples/agent/autogen/simple.py +++ b/examples/agent/autogen/simple.py @@ -18,7 +18,7 @@ SingleThreadedAgentRuntime, message_handler, ) -from pulsing.autogen import PulsingRuntime +from pulsing.integrations.autogen import PulsingRuntime # Define message types diff --git a/examples/agent/langgraph/distributed.py b/examples/agent/langgraph/distributed.py index d413f97b8..7eaa071da 100644 --- a/examples/agent/langgraph/distributed.py +++ b/examples/agent/langgraph/distributed.py @@ -66,7 +66,7 @@ def build_graph(): async def run_distributed(): """Distributed main program""" - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing print("=" * 50) print("LangGraph + Pulsing Distributed Mode") @@ -100,7 +100,7 @@ async def run_distributed(): async def run_worker(node_name: str, port: int, seed_port: int | None = None): """Start Worker""" - from pulsing.langgraph import start_worker + from pulsing.integrations.langgraph import start_worker nodes = {"llm": llm_node, "tool": tool_node} if node_name not in nodes: diff --git a/examples/agent/langgraph/parallel_ideas.py b/examples/agent/langgraph/parallel_ideas.py index 13a57e00f..7463c3538 100644 --- a/examples/agent/langgraph/parallel_ideas.py +++ b/examples/agent/langgraph/parallel_ideas.py @@ -760,7 +760,7 @@ async def main(): os.environ["LLM_MODEL"] = args.model try: - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing except ImportError: with_pulsing = None diff --git a/examples/agent/langgraph/simple.py b/examples/agent/langgraph/simple.py index b3fd9cad6..239b0dbca 100644 --- a/examples/agent/langgraph/simple.py +++ b/examples/agent/langgraph/simple.py @@ -58,7 +58,7 @@ def build_graph(): async def main(): - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing print("=" * 50) print("LangGraph + Pulsing Standalone Mode") diff --git a/examples/agent/pulsing/mbti_discussion.py b/examples/agent/pulsing/mbti_discussion.py index 638b2ab30..ae2af3b83 100644 --- a/examples/agent/pulsing/mbti_discussion.py +++ b/examples/agent/pulsing/mbti_discussion.py @@ -1,13 +1,7 @@ """ Multi-Agent Discussion and Voting Example Based on MBTI Personality Types -Demonstrates the difference between @remote and @agent: - - @remote: Basic Actor decorator - - @agent: Actor with metadata (for visualization/debugging) - -In this example: - - ModeratorActor: Uses @remote (regular Actor) - - MBTIAgent: Uses @agent (with MBTI role metadata) +This example uses `@pul.remote` for all actors. Usage: python mbti_discussion.py --mock --topic "Remote work vs On-site work" @@ -21,8 +15,8 @@ import random from collections import Counter -from pulsing.actor import remote, resolve -from pulsing.agent import agent, runtime, llm, parse_json, list_agents +import pulsing as pul +from pulsing.agent import llm, parse_json # ============================================================================ # MBTI Personality Configuration @@ -154,13 +148,13 @@ def sample_mbti_group(size: int) -> list[str]: # ============================================================================ -# Moderator - Uses @remote (Regular Actor, no metadata) +# Moderator Actor # ============================================================================ -@remote +@pul.remote class ModeratorActor: - """Moderator Actor: Coordinates the entire discussion process (uses @remote)""" + """Moderator Actor: Coordinates the entire discussion process.""" def __init__(self, topic: str, rounds: int, debate_time: float, mock: bool): self.topic = topic @@ -200,7 +194,7 @@ async def start_discussion(self) -> dict: print(f"{'=' * 60}") for agent_info in self.agents: - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) await proxy.form_opinion(self.opinions[-10:]) print(f"\n{'=' * 60}") @@ -228,7 +222,7 @@ async def start_discussion(self) -> dict: continue target = random.choice(opponents) - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) result = await proxy.debate(target) if result.get("success"): @@ -244,7 +238,7 @@ async def start_discussion(self) -> dict: print(f"{'=' * 60}") for agent_info in self.agents: - proxy = await resolve(agent_info["name"]) + proxy = await MBTIAgent.resolve(agent_info["name"]) await proxy.vote() return self._summarize() @@ -277,17 +271,13 @@ def _summarize(self) -> dict: # ============================================================================ -# MBTI Agent - Uses @agent (with metadata, can be used for visualization) +# MBTI Agent # ============================================================================ -@agent( - role="MBTI Participant", - goal="Participate in discussion based on personality traits", - backstory="Express views according to MBTI personality type", -) +@pul.remote class MBTIAgent: - """MBTI Agent: Autonomous Actor participating in discussion (uses @agent, with metadata)""" + """MBTI Agent: Autonomous actor participating in discussion.""" def __init__( self, agent_name: str, mbti: str, topic: str, moderator: str, mock: bool @@ -336,7 +326,7 @@ async def form_opinion(self, others: list[dict]) -> dict: self.stance = data.get("stance", "Neutral") self.argument = data.get("argument", "Needs discussion") - moderator = await resolve(self.moderator_name) + moderator = await ModeratorActor.resolve(self.moderator_name) await moderator.submit_opinion(self.name, self.mbti, self.stance, self.argument) return {"mbti": self.mbti, "stance": self.stance} @@ -390,7 +380,7 @@ async def debate(self, target: dict) -> dict: async def vote(self) -> dict: if self.mock: await asyncio.sleep(random.uniform(0.02, 0.05)) - moderator = await resolve(self.moderator_name) + moderator = await ModeratorActor.resolve(self.moderator_name) await moderator.submit_vote(self.mbti, self.stance or "Abstain") return {"mbti": self.mbti, "vote": self.stance} @@ -416,14 +406,15 @@ async def run( ) print(f"Mode: {'Mock' if mock else 'LLM'}") - async with runtime(): + await pul.init() + try: mbti_group = sample_mbti_group(group_size) dist = Counter(mbti_group) print("\nGroup:") for mbti, count in sorted(dist.items(), key=lambda x: -x[1]): print(f" {mbti} ({MBTI_TYPES[mbti]['name']}): {count}") - # Create moderator (@remote) + # Create moderator moderator = await ModeratorActor.spawn( topic=topic, rounds=rounds, @@ -432,7 +423,7 @@ async def run( name="moderator", ) - # Create participants (@agent, with metadata) + # Create participants for i, mbti in enumerate(mbti_group): agent_name = f"agent_{i}_{mbti}" await MBTIAgent.spawn( @@ -445,14 +436,13 @@ async def run( ) await moderator.register_agent(agent_name, mbti) - # Show @agent metadata functionality - print("\nRegistered Agents (via metadata):") - for name, meta in list_agents().items(): - print(f" {name}: {meta.role}") + print(f"\nRegistered Agents: {len(mbti_group)}") # Start discussion result = await moderator.start_discussion() return result + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/agent/pulsing/parallel_ideas_async.py b/examples/agent/pulsing/parallel_ideas_async.py index 5afc1415d..336a4c78a 100644 --- a/examples/agent/pulsing/parallel_ideas_async.py +++ b/examples/agent/pulsing/parallel_ideas_async.py @@ -15,8 +15,8 @@ import json import random import time -from pulsing.actor import remote, resolve -from pulsing.agent import runtime, llm, parse_json +import pulsing as pul +from pulsing.agent import llm, parse_json # ============================================================================ # Configuration @@ -86,7 +86,7 @@ async def get_llm(): # ============================================================================ -@remote +@pul.remote class JudgeActor: def __init__(self, timeout: float, mock: bool): self.timeout = timeout @@ -132,7 +132,7 @@ async def _timer(self): # Stop all agents for name in self._agents: try: - agent = await resolve(name) + agent = await IdeaAgent.resolve(name) await agent.stop() except Exception as e: print(f"[Judge] Error stopping agent '{name}': {e}") @@ -173,7 +173,7 @@ async def get_result(self) -> dict: # ============================================================================ -@remote +@pul.remote class IdeaAgent: def __init__( self, @@ -293,7 +293,7 @@ async def _run(self) -> dict: elapsed = time.time() - start print(f" [{self.persona}] 📤 Submitted ({elapsed:.1f}s)") - judge = await resolve(self.judge_name) + judge = await JudgeActor.resolve(self.judge_name) result = await judge.submit(self.idea, self.iterations) return { @@ -460,7 +460,7 @@ async def _collaborate(self, experts: list[str]) -> list[dict]: print(f" [{self.persona}] 🤝 Requesting [{expert}]") try: - peer = await asyncio.wait_for(resolve(peer_name), timeout=5) + peer = await asyncio.wait_for(IdeaAgent.resolve(peer_name), timeout=5) resp = await asyncio.wait_for( peer.assist(from_agent=self.persona, context={"idea": self.idea}), timeout=10, @@ -511,7 +511,8 @@ async def run( ) print("=" * 50) - async with runtime(): + await pul.init() + try: # Create Judge judge = await JudgeActor.spawn(timeout=timeout, mock=mock, name="judge") @@ -563,6 +564,8 @@ async def run( ) return {"final": final, "agents": results} + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/agent/pulsing/runtime_lifecycle_example.py b/examples/agent/pulsing/runtime_lifecycle_example.py index 687364875..b11fb3207 100644 --- a/examples/agent/pulsing/runtime_lifecycle_example.py +++ b/examples/agent/pulsing/runtime_lifecycle_example.py @@ -6,10 +6,10 @@ import asyncio -from pulsing.agent import agent, cleanup, runtime +import pulsing as pul -@agent(role="Counter", goal="Accumulate numbers") +@pul.remote class Counter: def __init__(self, initial: int = 0): self.value = initial @@ -25,11 +25,14 @@ async def get_value(self) -> int: async def example_simple(): """Example 1: Simple scenario (no cleanup needed)""" print("\n=== Example 1: Simple Scenario ===") - async with runtime(): + await pul.init() + try: counter = await Counter.spawn(name="counter", initial=0) for _ in range(5): value = await counter.increment() print(f"Current value: {value}") + finally: + await pul.shutdown() async def example_repeated_with_cleanup(): @@ -38,31 +41,29 @@ async def example_repeated_with_cleanup(): for i in range(3): try: - async with runtime(): - counter = await Counter.spawn(name=f"counter_{i}", initial=i * 10) - value = await counter.increment() - print(f"Task {i}: result = {value}") + await pul.init() + counter = await Counter.spawn(name=f"counter_{i}", initial=i * 10) + value = await counter.increment() + print(f"Task {i}: result = {value}") finally: - cleanup() # ⭐ Ensure cleanup each time + await pul.shutdown() print(f"Task {i}: cleaned up") async def example_batch_processing(): """Example 3: Batch processing (shared runtime)""" print("\n=== Example 3: Batch Processing (shared runtime) ===") + await pul.init() try: - async with runtime(): - # Create multiple counters - counters = [] - for i in range(5): - counter = await Counter.spawn(name=f"counter_{i}", initial=i) - counters.append(counter) - - # Concurrent processing - results = await asyncio.gather(*[c.increment() for c in counters]) - print(f"Results: {results}") + counters = [] + for i in range(5): + counter = await Counter.spawn(name=f"counter_{i}", initial=i) + counters.append(counter) + + results = await asyncio.gather(*[c.increment() for c in counters]) + print(f"Results: {results}") finally: - cleanup() + await pul.shutdown() async def example_error_handling(): @@ -71,19 +72,18 @@ async def example_error_handling(): for i in range(2): try: - async with runtime(): - counter = await Counter.spawn(name=f"counter_{i}", initial=i) - await counter.increment() + await pul.init() + counter = await Counter.spawn(name=f"counter_{i}", initial=i) + await counter.increment() - if i == 0: - # Simulate error - raise ValueError("Simulated error") + if i == 0: + raise ValueError("Simulated error") - print(f"Task {i} succeeded") + print(f"Task {i} succeeded") except ValueError as e: print(f"Task {i} failed: {e}") finally: - cleanup() # ⭐ Clean up even on error + await pul.shutdown() print(f"Task {i} cleaned up") @@ -92,15 +92,15 @@ async def example_helper_pattern(): print("\n=== Example 5: Helper Function Pattern ===") async def run_counter_task(task_id: int, increments: int) -> int: - """Encapsulated task function (auto cleanup)""" + """Encapsulated task function""" try: - async with runtime(): - counter = await Counter.spawn(name=f"task_{task_id}", initial=0) - for _ in range(increments): - await counter.increment() - return await counter.get_value() + await pul.init() + counter = await Counter.spawn(name=f"task_{task_id}", initial=0) + for _ in range(increments): + await counter.increment() + return await counter.get_value() finally: - cleanup() + await pul.shutdown() # Run multiple tasks tasks = [run_counter_task(i, i + 1) for i in range(3)] diff --git a/examples/inspect/demo_service.py b/examples/inspect/demo_service.py index 4a56651eb..3c8be3129 100644 --- a/examples/inspect/demo_service.py +++ b/examples/inspect/demo_service.py @@ -24,6 +24,7 @@ import pulsing as pul +@pul.remote class WorkerActor: """A simple worker actor that processes tasks""" @@ -31,25 +32,17 @@ def __init__(self, worker_id: str): self.worker_id = worker_id self.tasks_processed = 0 - def on_start(self, actor_id): - print(f"[Worker {self.worker_id}] Started") + async def process(self, task: str) -> dict[str, str | int]: + self.tasks_processed += 1 + result = f"Processed: {task} (total: {self.tasks_processed})" + print(f"[Worker {self.worker_id}] {result}") + return {"result": result, "worker": self.worker_id} - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "process": - task = msg.get("task", "") - self.tasks_processed += 1 - result = f"Processed: {task} (total: {self.tasks_processed})" - print(f"[Worker {self.worker_id}] {result}") - return {"result": result, "worker": self.worker_id} - - if action == "stats": - return {"worker_id": self.worker_id, "tasks": self.tasks_processed} - - return {"error": "unknown action"} + def stats(self) -> dict[str, str | int]: + return {"worker_id": self.worker_id, "tasks": self.tasks_processed} +@pul.remote class DispatcherActor: """A dispatcher actor that distributes tasks to workers (for demo purposes)""" @@ -57,56 +50,36 @@ def __init__(self): self.workers = [] self.tasks_dispatched = 0 - def on_start(self, actor_id): - print("[Dispatcher] Started") - - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "route": - self.tasks_dispatched += 1 - task = msg.get("task", "") - # Simulate routing logic - worker_id = f"worker-{random.randint(1, 3)}" - return { - "task": task, - "worker": worker_id, - "dispatched": self.tasks_dispatched, - } + def route(self, task: str) -> dict[str, str | int | bool]: + self.tasks_dispatched += 1 + worker_id = f"worker-{random.randint(1, 3)}" + return { + "task": task, + "worker": worker_id, + "dispatched": self.tasks_dispatched, + } - if action == "stats": - return {"dispatcher": True, "tasks_dispatched": self.tasks_dispatched} - - return {"error": "unknown action"} + def stats(self) -> dict[str, int | bool]: + return {"dispatcher": True, "tasks_dispatched": self.tasks_dispatched} +@pul.remote class CacheActor: """A cache actor that stores key-value pairs""" def __init__(self): self.cache = {} - def on_start(self, actor_id): - print("[Cache] Started") - - async def receive(self, msg): - action = msg.get("action") if isinstance(msg, dict) else None - - if action == "get": - key = msg.get("key", "") - value = self.cache.get(key, None) - return {"key": key, "value": value, "found": value is not None} - - if action == "set": - key = msg.get("key", "") - value = msg.get("value", "") - self.cache[key] = value - return {"key": key, "success": True} + def get(self, key: str) -> dict[str, object]: + value = self.cache.get(key, None) + return {"key": key, "value": value, "found": value is not None} - if action == "stats": - return {"cache_size": len(self.cache)} + def set(self, key: str, value: object) -> dict[str, str | bool]: + self.cache[key] = value + return {"key": key, "success": True} - return {"error": "unknown action"} + def stats(self) -> dict[str, int]: + return {"cache_size": len(self.cache)} async def run_node(port: int, seed: str | None): @@ -118,8 +91,9 @@ async def run_node(port: int, seed: str | None): addr = f"127.0.0.1:{port}" seeds = [seed] if seed else None - system = await pul.actor_system(addr, seeds=seeds) - print(f"✓ System started: {system.node_id} @ {system.addr}") + await pul.init(addr=addr, seeds=seeds) + system = pul.ActorSystem(pul.get_system()) + print(f"✓ System started: {addr}") if seed: print(f" Joined via: {seed}") print() @@ -128,12 +102,12 @@ async def run_node(port: int, seed: str | None): if seed is None: # Node 1: Create dispatcher and some workers print("Creating actors on node 1...") - await system.spawn(DispatcherActor(), name="dispatcher") + await DispatcherActor.spawn(name="dispatcher") print(" ✓ actors/dispatcher") for i in range(1, 3): worker_name = f"worker-{i}" - await system.spawn(WorkerActor(worker_name), name=worker_name) + await WorkerActor.spawn(worker_name, name=worker_name) print(f" ✓ actors/{worker_name}") print("\n✓ Node 1 ready!") @@ -156,7 +130,7 @@ async def run_node(port: int, seed: str | None): print("Creating actors on node 2...") for i in range(3, 5): worker_name = f"worker-{i}" - await system.spawn(WorkerActor(worker_name), name=worker_name) + await WorkerActor.spawn(worker_name, name=worker_name) print(f" ✓ actors/{worker_name}") print("\n✓ Node 2 ready!") @@ -164,7 +138,7 @@ async def run_node(port: int, seed: str | None): # Node 3: Add cache await asyncio.sleep(1) print("Creating actors on node 3...") - await system.spawn(CacheActor(), name="cache") + await CacheActor.spawn(name="cache") print(" ✓ actors/cache") print("\n✓ Node 3 ready!") @@ -182,7 +156,7 @@ async def run_node(port: int, seed: str | None): except KeyboardInterrupt: print("\n\nShutting down...") finally: - await system.shutdown() + await pul.shutdown() print("✓ Shutdown complete") diff --git a/examples/python/cluster.py b/examples/python/cluster.py index 1c04a96b4..f219be4e8 100644 --- a/examples/python/cluster.py +++ b/examples/python/cluster.py @@ -15,23 +15,19 @@ import pulsing as pul +@pul.remote class SharedCounter: def __init__(self, node_id: str): self.count = 0 self.node_id = node_id - def on_start(self, actor_id): - print(f"[{actor_id}] Started on {self.node_id}") + def get(self) -> dict[str, int | str]: + return {"count": self.count, "from_node": self.node_id} - async def receive(self, msg): - if msg.get("action") == "get": - return {"count": self.count, "from_node": self.node_id} - elif msg.get("action") == "incr": - n = msg.get("n", 1) - self.count += n - print(f"[{self.node_id}] +{n} -> {self.count}") - return {"count": self.count, "from_node": self.node_id} - return {"error": "unknown action"} + def incr(self, n: int = 1) -> dict[str, int | str]: + self.count += n + print(f"[{self.node_id}] +{n} -> {self.count}") + return {"count": self.count, "from_node": self.node_id} async def run_node(port: int, seed: str | None): @@ -40,59 +36,52 @@ async def run_node(port: int, seed: str | None): addr = f"127.0.0.1:{port}" seeds = [seed] if seed else None - system = await pul.actor_system(addr, seeds=seeds) - print(f"✓ Started: {system.node_id} @ {system.addr}") - if seed: - print(f" Joined via: {seed}") - print() - - if seed is None: - # Node 1: Create actor - await system.spawn( - SharedCounter(str(system.node_id)), - name="counter", - ) - print("✓ Created: counter") - print("Start node 2: python cluster.py --port 8001 --seed 127.0.0.1:8000\n") - - try: - while True: - await asyncio.sleep(5) - members = await system.members() - print(f"Cluster: {len(members)} members") - except asyncio.CancelledError: - pass - await system.shutdown() - else: - # Node 2: Join and interact - await asyncio.sleep(2) - - # Resolve remote actor - actor = None - for _ in range(10): - try: - actor = await system.resolve("counter") - break - except Exception: - print(".", end="", flush=True) - await asyncio.sleep(0.5) - - if not actor: - print("\n✗ Failed to resolve actor") - return - - print("✓ Resolved\n") - - # Interact using simple Python dicts - resp = await actor.ask({"action": "get"}) - print(f"Initial: {resp['count']} (from {resp['from_node']})") + await pul.init(addr=addr, seeds=seeds) + try: + print(f"✓ Started: {addr}") + if seed: + print(f" Joined via: {seed}") + print() - for i in range(1, 4): - resp = await actor.ask({"action": "incr", "n": i * 10}) - print(f"After +{i * 10}: {resp['count']} (from {resp['from_node']})") + if seed is None: + await SharedCounter.spawn(str(port), name="counter") + print("✓ Created: counter") + print("Start node 2: python cluster.py --port 8001 --seed 127.0.0.1:8000\n") - print("\n✓ Done!") - await system.shutdown() + try: + while True: + await asyncio.sleep(5) + print("Cluster running... (press Ctrl+C to stop)") + except asyncio.CancelledError: + pass + else: + await asyncio.sleep(2) + + actor = None + for _ in range(10): + try: + actor = await SharedCounter.resolve("counter") + break + except Exception: + print(".", end="", flush=True) + await asyncio.sleep(0.5) + + if not actor: + print("\n✗ Failed to resolve actor") + return + + print("✓ Resolved\n") + + resp = await actor.get() + print(f"Initial: {resp['count']} (from {resp['from_node']})") + + for i in range(1, 4): + resp = await actor.incr(i * 10) + print(f"After +{i * 10}: {resp['count']} (from {resp['from_node']})") + + print("\n✓ Done!") + finally: + await pul.shutdown() def main(): diff --git a/examples/python/distributed_queue.py b/examples/python/distributed_queue.py index f666dd6b1..236a8e4b0 100644 --- a/examples/python/distributed_queue.py +++ b/examples/python/distributed_queue.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Distributed memory queue example -Demonstrates how to use system.queue.write/read for basic data read/write operations. +Demonstrates how to use pul.queue.write/read for basic data read/write operations. Architecture features: - Each bucket corresponds to an independent BucketStorage Actor @@ -23,13 +23,12 @@ async def main(): """Main function""" logger.info("=== Distributed Memory Queue Example ===\n") - # Create Actor system - system = await pul.actor_system() - logger.info("✓ Actor system started\n") + await pul.init() + logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing - writer = await system.queue.write( + writer = await pul.queue.write( "my_queue", bucket_column="user_id", # Bucket by user_id num_buckets=4, @@ -38,7 +37,7 @@ async def main(): logger.info("✓ Queue created (one Actor per bucket)\n") # Consumer: open queue for reading - reader = await system.queue.read("my_queue") + reader = await pul.queue.read("my_queue") logger.info("✓ Queue opened\n") # Write data (data immediately visible to consumers, no need to wait for persistence) @@ -66,7 +65,7 @@ async def main(): logger.info("✓ Example completed!") finally: - await system.shutdown() + await pul.shutdown() logger.info("System shutdown") diff --git a/examples/python/message_patterns.py b/examples/python/message_patterns.py index 80c6fac16..373e675c7 100644 --- a/examples/python/message_patterns.py +++ b/examples/python/message_patterns.py @@ -10,42 +10,13 @@ import pulsing as pul -class PatternDemo: - """Base Actor with various message patterns.""" - - def __init__(self): - self.value = 0 - - async def receive(self, msg): - # Pattern 1: Simple object messaging (dict, list, string, etc.) - if isinstance(msg, dict): - if msg.get("action") == "add": - self.value += msg.get("n", 1) - return {"value": self.value} - if msg.get("action") == "get": - return {"value": self.value} - - # Pattern 2: Streaming response - just return a generator! - if msg == "stream": - - async def generate(): - for token in ["Hello", " ", "World", "!"]: - yield {"token": token} - await asyncio.sleep(0.1) - - return generate() - - return f"unknown: {msg}" - - @pul.remote -class RemotePatternDemo: - """@pul.remote Actor with cleaner API (recommended).""" +class PatternDemo: + """Actor with various message patterns.""" def __init__(self): self.value = 0 - # Sync method - simple request/response def add(self, n: int = 1) -> dict: self.value += n return {"value": self.value} @@ -61,44 +32,35 @@ async def stream(self): async def main(): - system = await pul.actor_system() - - print("=" * 50) - print("Pattern 1: Base Actor with dict messages") - print("=" * 50) - - actor = await system.spawn(PatternDemo(), name="demo") - - print(await actor.ask({"action": "add", "n": 10})) # {'value': 10} - print(await actor.ask({"action": "add", "n": 5})) # {'value': 15} - print(await actor.ask({"action": "get"})) # {'value': 15} + await pul.init() + try: + print("=" * 50) + print("Pattern 1: Simple method calls") + print("=" * 50) - print("\n" + "=" * 50) - print("Pattern 2: Base Actor streaming (return generator)") - print("=" * 50) + demo = await PatternDemo.spawn(name="demo") - response = await actor.ask("stream") - async for chunk in response.stream_reader(): - print(chunk["token"], end="") - print() + print(await demo.add(10)) # {'value': 10} + print(await demo.add(5)) # {'value': 15} + print(await demo.get()) # {'value': 15} - print("\n" + "=" * 50) - print("Pattern 3: @pul.remote (recommended)") - print("=" * 50) + print("\n" + "=" * 50) + print("Pattern 2: Typed resolve") + print("=" * 50) - service = await RemotePatternDemo.local(system) + resolved = await PatternDemo.resolve("demo") + print(await resolved.get()) - # Direct method calls - no need for ask/tell! - print(await service.add(10)) # {'value': 10} - print(await service.add(5)) # {'value': 15} - print(await service.get()) # {'value': 15} + print("\n" + "=" * 50) + print("Pattern 3: Async generator streaming") + print("=" * 50) - print("\n--- Async generator streaming ---") - async for chunk in service.stream(): - print(chunk["token"], end="") - print() + async for chunk in demo.stream(): + print(chunk["token"], end="") + print() + finally: + await pul.shutdown() - await system.shutdown() print("\n✓ Done!") diff --git a/examples/python/named_actors.py b/examples/python/named_actors.py index d47af1274..1c66d99cb 100644 --- a/examples/python/named_actors.py +++ b/examples/python/named_actors.py @@ -13,15 +13,11 @@ import pulsing as pul +@pul.remote class EchoActor: """Simple echo actor that can be discovered by name.""" - def on_start(self, actor_id): - print(f"[{actor_id}] Started") - - async def receive(self, msg): - # Accept dict messages - message = msg.get("message", "") if isinstance(msg, dict) else str(msg) + def echo(self, message: str) -> dict[str, str]: print(f"[Echo] {message}") return {"echo": message} @@ -29,27 +25,21 @@ async def receive(self, msg): async def main(): print("=== Pulsing Named Actors ===\n") - system = await pul.actor_system() - print(f"✓ System started: {system.node_id}\n") - - # Create named actor (named actors are discoverable via resolve) - await system.spawn(EchoActor(), name="echo") - print("✓ Created: echo (named, discoverable)\n") + await pul.init() + try: + print("✓ System started\n") - # Resolve by name - print("--- Resolve by name ---") - actor = await system.resolve("echo") - resp = await actor.ask({"message": "Hello!"}) - print(f"Response: {resp['echo']}\n") + await EchoActor.spawn(name="echo") + print("✓ Created: echo (named, discoverable)\n") - # List instances - instances = await system.get_named_instances("actors/echo") - print(f"Instances of 'actors/echo': {len(instances)}") - for i in instances: - print(f" {i['node_id']} @ {i['addr']} ({i['status']})") + print("--- Resolve by name ---") + actor = await EchoActor.resolve("echo") + resp = await actor.echo("Hello!") + print(f"Response: {resp['echo']}\n") - print("\n✓ Done!") - await system.shutdown() + print("✓ Done!") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/native_async_example.py b/examples/python/native_async_example.py index c9ebc6399..fc5801f5d 100644 --- a/examples/python/native_async_example.py +++ b/examples/python/native_async_example.py @@ -9,11 +9,10 @@ import asyncio -# Pulsing native API -from pulsing.actor import init, shutdown, remote +import pulsing as pul -@remote +@pul.remote class Counter: """Distributed counter""" @@ -28,7 +27,7 @@ def increment(self, n: int = 1) -> int: return self.value -@remote +@pul.remote class Calculator: """Distributed calculator""" @@ -39,7 +38,7 @@ def multiply(self, a: int, b: int) -> int: return a * b -@remote +@pul.remote class AsyncWorker: """Async Worker""" @@ -64,7 +63,7 @@ async def main(): print("=" * 60) # Initialize (simple!) - await init() + await pul.init() print("✓ Pulsing initialized") # --- Counter --- @@ -99,7 +98,7 @@ async def main(): print(f"Process result: {result}") # --- Shutdown --- - await shutdown() + await pul.shutdown() print("\n✓ Done!") @@ -113,12 +112,12 @@ async def main(): # # | Operation | Pulsing Native (async) | Ray Compat Layer (sync) | # |----------------|-----------------------------|-----------------------------| -# | Initialize | await init() | ray.init() | -# | Decorator | @remote | @ray.remote | +# | Initialize | await pul.init() | ray.init() | +# | Decorator | @pul.remote | @ray.remote | # | Create actor | await Counter.spawn() | Counter.remote() | # | Call method | await counter.incr() | counter.incr.remote() | # | Get result | Direct return | ray.get(ref) | -# | Shutdown | await shutdown() | ray.shutdown() | +# | Shutdown | await pul.shutdown() | ray.shutdown() | # # Recommended to use native API: # - More Pythonic (standard async/await) diff --git a/examples/python/ping_pong.py b/examples/python/ping_pong.py index 12085be6d..cef44f1b1 100644 --- a/examples/python/ping_pong.py +++ b/examples/python/ping_pong.py @@ -9,23 +9,26 @@ import pulsing as pul +@pul.remote class PingPong: - async def receive(self, msg): - if msg == "ping": - return "pong" + def ping(self) -> str: + return "pong" + + def echo(self, msg: str) -> str: return f"echo: {msg}" async def main(): - system = await pul.actor_system() - actor = await system.spawn(PingPong()) + await pul.init() + try: + actor = await PingPong.spawn() - # Simple string message - print(await actor.ask("ping")) # -> pong - print(await actor.ask("hello")) # -> echo: hello + print(await actor.ping()) # -> pong + print(await actor.echo("hello")) # -> echo: hello - await asyncio.sleep(1) # Allow background tasks to complete - await system.shutdown() + await asyncio.sleep(1) # Allow background tasks to complete + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/ray_compat_example.py b/examples/python/ray_compat_example.py deleted file mode 100644 index ea38a857f..000000000 --- a/examples/python/ray_compat_example.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -Ray Compatibility Layer Example (for Migration) - -Demonstrates how to use pulsing.compat.ray to migrate from Ray to Pulsing. -Migration only requires changing one import line! - -Usage: python examples/python/ray_compat_example.py -""" - -# ============================================ -# Migrate from Ray: Just change this line! -# ============================================ -# Before: import ray -# After: -from pulsing.compat import ray - - -@ray.remote -class Counter: - """Distributed counter (Ray style)""" - - def __init__(self, init_value: int = 0): - self.value = init_value - - def get(self) -> int: - return self.value - - def increment(self, n: int = 1) -> int: - self.value += n - return self.value - - -@ray.remote -class Calculator: - """Distributed calculator (Ray style)""" - - def add(self, a: int, b: int) -> int: - return a + b - - def multiply(self, a: int, b: int) -> int: - return a * b - - -def main(): - print("=" * 60) - print("Ray Compatibility Layer Example (from pulsing.compat import ray)") - print("=" * 60) - - # Initialize (Ray style) - ray.init() - print("✓ Pulsing (Ray compat) initialized") - - # --- Counter --- - print("\n--- Counter ---") - counter = Counter.remote(init_value=10) - - # Ray style calls - print(f"Initial value: {ray.get(counter.get.remote())}") - print(f"increment(5): {ray.get(counter.increment.remote(5))}") - print(f"Final value: {ray.get(counter.get.remote())}") - - # --- Calculator --- - print("\n--- Calculator ---") - calc = Calculator.remote() - - print(f"add(10, 20): {ray.get(calc.add.remote(10, 20))}") - print(f"multiply(5, 6): {ray.get(calc.multiply.remote(5, 6))}") - - # --- Batch get --- - print("\n--- Batch Get ---") - refs = [ - calc.add.remote(1, 2), - calc.add.remote(3, 4), - calc.multiply.remote(5, 6), - ] - results = ray.get(refs) - print(f"Batch results: {results}") - - # --- Object Store --- - print("\n--- put/get ---") - ref = ray.put({"message": "Hello from pulsing.compat.ray!"}) - print(f"Result: {ray.get(ref)}") - - # Shutdown (Ray style) - ray.shutdown() - print("\n✓ Done!") - - -if __name__ == "__main__": - main() - - -# ============================================================================= -# Migration Guide -# ============================================================================= -# -# Step 1: Change import -# ------------------- -# Before: -# import ray -# -# After: -# from pulsing.compat import ray -# -# Step 2: Rest of the code remains unchanged! -# ------------------------- -# ray.init() -# @ray.remote -# Counter.remote() -# counter.incr.remote() -# ray.get(ref) -# ray.shutdown() -# -# ============================================================================= -# Next Step: Migrate to Native API (Optional, Better Performance) -# ============================================================================= -# -# from pulsing.actor import init, shutdown, remote -# -# await init() -# -# @remote -# class Counter: -# ... -# -# counter = await Counter.spawn() -# result = await counter.incr() # No need for .remote() + get()! -# -# await shutdown() -# diff --git a/examples/python/remote_actor_example.py b/examples/python/remote_actor_example.py index 3e9b0a013..11d1d5970 100644 --- a/examples/python/remote_actor_example.py +++ b/examples/python/remote_actor_example.py @@ -14,13 +14,13 @@ import asyncio import logging -from pulsing.actor import init, shutdown, remote +import pulsing as pul logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -@remote +@pul.remote class Counter: """Distributed counter""" @@ -39,7 +39,7 @@ def decrement(self, n: int = 1) -> int: return self.value -@remote +@pul.remote class KeyValueStore: """Distributed key-value store""" @@ -56,7 +56,7 @@ def keys(self) -> list: return list(self.store.keys()) -@remote +@pul.remote class AsyncWorker: """Supports async methods""" @@ -79,7 +79,7 @@ async def main(): print("=" * 60) # Simple initialization - await init() + await pul.init() # --- Counter --- print("\n--- Counter ---") @@ -125,7 +125,7 @@ async def main(): print("\n✓ Done!") # Shutdown - await shutdown() + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/python/sync_queue_example.py b/examples/python/sync_queue_example.py index 79d451af8..ba05d3ec8 100644 --- a/examples/python/sync_queue_example.py +++ b/examples/python/sync_queue_example.py @@ -23,14 +23,13 @@ async def main(): """Main function""" logger.info("=== Distributed Memory Queue Example (Synchronous Version) ===\n") - # Create Actor system - system = await pul.actor_system() - logger.info("✓ Actor system started\n") + await pul.init() + logger.info("✓ Global system initialized\n") try: # Producer: open queue for writing, get synchronous wrapper writer = ( - await system.queue.write( + await pul.queue.write( "my_queue", bucket_column="user_id", # Bucket by user_id num_buckets=4, @@ -40,7 +39,7 @@ async def main(): logger.info("✓ Queue created (synchronous writer)\n") # Consumer: open queue for reading, get synchronous wrapper - reader = (await system.queue.read("my_queue")).sync() + reader = (await pul.queue.read("my_queue")).sync() logger.info("✓ Queue opened (synchronous reader)\n") # Synchronously write data @@ -68,7 +67,7 @@ async def main(): logger.info("✓ Example completed!") finally: - await system.shutdown() + await pul.shutdown() logger.info("System shutdown") diff --git a/examples/quickstart/ai_chat_room.py b/examples/quickstart/ai_chat_room.py index 003f37f06..e08fe4a29 100644 --- a/examples/quickstart/ai_chat_room.py +++ b/examples/quickstart/ai_chat_room.py @@ -13,8 +13,7 @@ import argparse import asyncio import random -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul # AI persona configuration AI_PERSONAS = { @@ -57,7 +56,7 @@ } -@remote +@pul.remote class ChatRoom: """Chat room - coordinates agent conversations""" @@ -94,7 +93,7 @@ def get_history(self) -> list[dict]: return self.messages -@remote +@pul.remote class ChatAgent: """AI agent in the chat room""" @@ -141,7 +140,8 @@ async def main(topic: str, rounds: int): print(f"🔄 Discussion rounds: {rounds}") print("\n--- Participants entering ---\n") - async with runtime(): + await pul.init() + try: # Create chat room room = await ChatRoom.spawn(topic=topic, name="chat_room") @@ -181,6 +181,8 @@ async def main(topic: str, rounds: int): print(f" Total messages: {len(history)}") print(f" Participants: {len(agents)} AIs") print(f" Discussion rounds: {rounds}") + finally: + await pul.shutdown() print("\n" + "=" * 60) print("✅ Chat ended!") diff --git a/examples/quickstart/chaos_proof.py b/examples/quickstart/chaos_proof.py index 3cee8a892..d3c550980 100644 --- a/examples/quickstart/chaos_proof.py +++ b/examples/quickstart/chaos_proof.py @@ -4,11 +4,10 @@ import asyncio import random -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote(restart_policy="on_failure", max_restarts=50) +@pul.remote(restart_policy="on_failure", max_restarts=50) class FlakyWorker: def __init__(self): self.call_count = 0 @@ -21,7 +20,8 @@ def work(self, x: int) -> int: async def main(): - async with runtime(): + await pul.init() + try: w = await FlakyWorker.spawn(name="flaky") results, retries = [], 0 @@ -50,6 +50,8 @@ async def main(): else: print(f"⚠️ {50 - ok} tasks failed") print("=" * 50 + "\n") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/quickstart/function_to_fleet.py b/examples/quickstart/function_to_fleet.py index cac61c78a..eb799e1a6 100644 --- a/examples/quickstart/function_to_fleet.py +++ b/examples/quickstart/function_to_fleet.py @@ -1,11 +1,10 @@ import asyncio import os import time -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote +@pul.remote class Worker: async def run(self, x: int) -> int: await asyncio.sleep(0.02) # simulate I/O @@ -15,7 +14,8 @@ async def run(self, x: int) -> int: async def main(): n = int(os.getenv("WORKERS", "8")) m = int(os.getenv("ITEMS", "200")) - async with runtime(): + await pul.init() + try: ws = [await Worker.spawn(name=f"w{i}") for i in range(n)] t0 = time.perf_counter() res = await asyncio.gather(*(ws[i % n].run(i) for i in range(m))) @@ -30,6 +30,8 @@ async def main(): print("=" * 50) print("✅ Same code, more workers = higher throughput") print("=" * 50 + "\n") + finally: + await pul.shutdown() if __name__ == "__main__": diff --git a/examples/quickstart/hello_agent.py b/examples/quickstart/hello_agent.py index e3e1098ab..1196a47e9 100644 --- a/examples/quickstart/hello_agent.py +++ b/examples/quickstart/hello_agent.py @@ -7,11 +7,10 @@ """ import asyncio -from pulsing.actor import remote -from pulsing.agent import runtime +import pulsing as pul -@remote +@pul.remote class Greeter: """A simple greeting agent""" @@ -39,7 +38,8 @@ async def main(): print("🎉 Pulsing Multi-Agent Quick Start") print("=" * 50) - async with runtime(): + await pul.init() + try: # Create two agents alice = await Greeter.spawn(display_name="Alice", name="alice") bob = await Greeter.spawn(display_name="Bob", name="bob") @@ -53,6 +53,8 @@ async def main(): # Bob greets Alice await bob.say_hello_to("alice") + finally: + await pul.shutdown() print("\n" + "=" * 50) print("✅ Done! You've created your first Multi-Agent application") diff --git a/llms.binding.md b/llms.binding.md index 87b4720b2..a2c22ac19 100644 --- a/llms.binding.md +++ b/llms.binding.md @@ -2,117 +2,162 @@ ## Overview -`Pulsing` is a distributed actor framework that provides a communication backbone for building distributed systems, with specialized support for AI applications. +**Pulsing: Backbone for distributed AI systems.** -## Python 接口 +Pulsing is a distributed actor runtime built in Rust, designed for Python. Actor runtime. Streaming-first. Zero dependencies. Built-in discovery. Connect AI agents and services across machines — no Redis, no etcd, no YAML. -### Actor System风格接口 +## Quick Start -```Python +```python import pulsing as pul -system = await pul.actor_system( +await pul.init() + +@pul.remote +class Counter: + def __init__(self): self.value = 0 + def incr(self): self.value += 1; return self.value + +# Create actor +counter = await Counter.spawn(name="counter") +print(await counter.incr()) # 1 + +# Resolve from another process / node +counter2 = await Counter.resolve("counter") +print(await counter2.incr()) # 2 + +await pul.shutdown() +``` + +## Python API + +You must call `await pul.init()` before using `spawn`, `resolve`, or other APIs. + +```python +import pulsing as pul + +# ── Lifecycle ── + +await pul.init( addr: str | None = None, *, seeds: list[str] | None = None, passphrase: str | None = None -) -> ActorSystem +) -await system.shutdown() +await pul.shutdown() -class MyActor: - async def receive(self, msg: Any) -> Any: - ... +# ── Define actor with @pul.remote ── -actorref = await system.spawn( - actor: Actor, # MyActor() - *, - name: str | None = None, - public: bool = False, - restart_policy: str = "never", - max_restarts: int = 3, - min_backoff: float = 0.1, - max_backoff: float = 30.0 -) -> ActorRef +@pul.remote +class Counter: + def __init__(self, init=0): self.value = init -actorref = await system.refer(actorid: ActorId | str) -> ActorRef + def incr(self): # sync method + self.value += 1 + return self.value -actorref = await system.resolve( - name: str, - *, - node_id: int | None = None -) -> ActorRef + async def fetch_and_add(self, url): # async method + data = await http_get(url) + self.value += data + return self.value -response = await actorref.ask(request: Any) -> Any +# ── Create and call ── -await actorref.tell(msg: Any) -> None +counter = await Counter.spawn(name="counter") # create actor, returns typed proxy +result = await counter.incr() # call method directly +# ── Resolve existing actor (e.g. from another process / node) ── +# Prefer typed proxy via Counter.resolve() when you know the actor type. +# Fall back to ref.as_any() when the remote type is unknown. -@pul.remote -class Counter: - # 同步处理函数 - def incr(self): - ... +# 1. Typed proxy (recommended) +proxy = await Counter.resolve("counter") +result = await proxy.incr() - # 异步处理函数 - async def desc(self): - ... +# 2. Typed proxy — manual bind +ref = await pul.resolve("counter", timeout=30) +proxy = ref.as_type(Counter) +result = await proxy.incr() -# 使用 -counter = await Counter.spawn(name="counter") -result = await counter.incr() # 返回 ActorProxy,直接调用方法 +# 3. Untyped proxy — when remote type is unknown +ref = await pul.resolve("service_name") +proxy = ref.as_any() +result = await proxy.any_method(args) -# 队列接口 -writer = await system.queue.write( - topic: str, - *, - bucket_column: str = "id", - num_buckets: int = 4, - batch_size: int = 100, - storage_path: str | None = None, - backend: str = "memory", -) -> QueueWriter +``` + +### Ray Integration -await writer.put(record: dict | list[dict]) -> None -await writer.flush() -> None +`pul.mount` registers any Python object as a Pulsing actor, enabling tight integration between Ray actors and Pulsing. -reader = await system.queue.read( - topic: str, +**Running Pulsing in a Ray cluster:** Every process (driver and workers) must initialize Pulsing. Use `pulsing.ray.init_in_ray()` and pass it in `ray.init(runtime_env={"worker_process_setup_hook": init_in_ray})` so workers call it on startup; the driver must call `init_in_ray()` once in code. See the `pulsing.ray` module for details. + +```python +import pulsing as pul + +# Mount object onto Pulsing network (sync, can be called in __init__) +pul.mount( + instance: Any, # Object to mount *, - bucket_id: int | None = None, - bucket_ids: list[int] | None = None, - rank: int | None = None, - world_size: int | None = None, - num_buckets: int = 4, -) -> QueueReader + name: str, # Pulsing name, used for resolve discovery + public: bool = True, # Whether discoverable by other cluster nodes +) -> None +# Internally: +# 1. Initialize Pulsing (if not yet initialized in this process) +# 2. Wrap instance as a Pulsing actor +# 3. Register on Pulsing network, gossip broadcasts the name -records = await reader.get(limit: int = 100, wait: bool = False) -> list[dict] +# Unmount (call when actor is destroyed) +pul.unmount(name: str) -> None -# 队列使用示例 -writer = await system.queue.write("my_queue", bucket_column="user_id") -await writer.put({"user_id": "u1", "data": "hello"}) +# Cleanup Pulsing state in Ray environment (call before ray.shutdown()) +pul.cleanup_ray() -> None +``` +Example: Ray handles process scheduling, Pulsing handles inter-actor communication. -reader = await system.queue.read("my_queue") -records = await reader.get(limit=10) +```python +import ray, pulsing as pul + +@ray.remote +class Worker: + def __init__(self, name): + pul.mount(self, name=name) # One line to join Pulsing + + async def call_peer(self, peer_name, msg): + proxy = (await pul.resolve(peer_name, timeout=30)).as_any() + return await proxy.greet(msg) # Cross-process Pulsing call + + async def greet(self, msg): + return f"hello from {self.name}: {msg}" + +ray.init() +workers = [Worker.remote(f"w{i}") for i in range(3)] +ray.get(workers[0].call_peer.remote("w1", "hi")) # => "hello from w1: hi" +pul.cleanup_ray() ``` -### Ray风格异步接口 +### Under the Hood: Actor System & Low-level APIs + +The global API is backed by an `ActorSystem` instance. You can create one explicitly when you need multiple systems or finer control. The low-level `spawn`/`refer`/`resolve` APIs operate on `ActorRef` (not typed proxy) and require actors to implement a `receive(self, msg)` method. ```python import pulsing as pul -# 初始化全局系统 -await pul.init( +# ── Explicit ActorSystem ── + +system = await pul.actor_system( addr: str | None = None, *, seeds: list[str] | None = None, passphrase: str | None = None ) -> ActorSystem -await pul.shutdown() +await system.shutdown() + +# ── Low-level spawn (actor must have receive method) ── -# 生成 Actor(使用全局系统) -actorref = await pul.spawn( +actorref = await pul.spawn( # global system actor: Actor, *, name: str | None = None, @@ -123,127 +168,63 @@ actorref = await pul.spawn( max_backoff: float = 30.0 ) -> ActorRef -# 通过 ActorId 获取引用(使用全局系统) -actorref = await pul.refer(actorid: ActorId | str) -> ActorRef - -# 通过名称解析 Actor(使用全局系统) -actorref = await pul.resolve( - name: str, - *, - node_id: int | None = None +actorref = await system.spawn( # explicit system, same signature + actor: Actor, ... ) -> ActorRef -# 发送消息并等待响应 -response = await actorref.ask(request: Any) -> Any - -# 发送消息(不等待响应) -await actorref.tell(msg: Any) -> None - -# 将 ActorRef 绑定到类型,生成 ActorProxy -proxy = Counter.resolve(name) - -@pul.remote -class Counter: - def __init__(self, init=0): self.value = init - - # 同步处理函数 - def incr(self): - ... - - # 异步处理函数 - async def desc(self): - ... - -# 使用方式1:通过 spawn 创建 -counter = await Counter.spawn(name="counter") -result = await counter.incr() # 返回 ActorProxy,直接调用方法 - -# 使用方式2:通过 resolve 解析已有 actor -proxy = await Counter.resolve("counter") -result = await proxy.incr() - -``` +# ── Low-level resolve / refer ── -### Ray风格兼容接口 - -```python -from pulsing.compat import ray - -# 初始化(同步接口,内部使用异步) -ray.init( - address: str | None = None, - *, - ignore_reinit_error: bool = False, - **kwargs -) -> None - -# 关闭系统 -ray.shutdown() -> None - -# 检查是否已初始化 -ray.is_initialized() -> bool - -# 装饰器:将类转换为 Actor -@ray.remote -class MyActor: - def __init__(self, ...): ... - def method(self, ...): ... +actorref = await pul.refer(actorid: ActorId | str) -> ActorRef +actorref = await pul.resolve(name: str, *, node_id=None, timeout=None) -> ActorRef +actorref = await system.resolve(name: str, *, node_id=None) -> ActorRef -# 创建 Actor(同步接口) -actor_handle = MyActor.remote(...) -> _ActorHandle +# ── ActorRef message passing ── -# 调用方法(返回 ObjectRef) -result_ref = actor_handle.method.remote(...) -> ObjectRef +response = await actorref.ask(request: Any) -> Any +await actorref.tell(msg: Any) -> None -# 获取结果(同步接口,支持单个或列表) -result = ray.get( - refs: ObjectRef | list[ObjectRef], - *, - timeout: float | None = None -) -> Any | list[Any] +# ── @pul.remote with explicit system ── -# 将值包装为 ObjectRef(用于 API 兼容) -ref = ray.put(value: Any) -> ObjectRef +counter = await Counter.local(system, name="counter") # spawn on explicit system +result = await counter.incr() -# 等待多个 ObjectRef 完成 -ready, remaining = ray.wait( - refs: list[ObjectRef], - *, - num_returns: int = 1, - timeout: float | None = None -) -> tuple[list[ObjectRef], list[ObjectRef]] +# Queue / Topic on explicit system (same API as pul.queue / pul.topic) +writer = await system.queue.write("my_queue") +reader = await system.queue.read("my_queue") +writer = await system.topic.write("events") +reader = await system.topic.read("events") ``` -### Actor 行为 +### Actor Behavior -#### 基础 Actor(使用 `receive` 方法) +#### Basic Actor (using `receive` method) ```python from pulsing.actor import Actor class EchoActor(Actor): - """receive 方法 - 同步或异步均可,框架自动检测""" + """receive method - sync or async, framework auto-detects""" - # 方式1:同步方法 + # Option 1: Synchronous def receive(self, msg): return msg - # 方式2:异步方法(需要 await 时使用) + # Option 2: Asynchronous (use when you need await) async def receive(self, msg): result = await some_async_operation() return result class FireAndForget(Actor): - """无返回值(适合 tell 调用)""" + """No return value (suitable for tell calls)""" def receive(self, msg): print(f"Received: {msg}") - # 无返回值 + # No return value ``` -**注意:** `receive` 方法可以是 `def` 或 `async def`,Pulsing 会自动检测并正确处理。 -只有当方法内部需要 `await` 其他协程时,才需要使用 `async def`。 +**Note:** `receive` can be `def` or `async def`, Pulsing auto-detects and handles both correctly. +Only use `async def` when the method body needs to `await` other coroutines. -#### @pul.remote 装饰器(推荐) +#### @pul.remote Decorator (Recommended) ```python import pulsing as pul @@ -253,110 +234,216 @@ class Counter: def __init__(self, init=0): self.value = init - # 同步方法 - 阻塞处理,请求按顺序执行 - # 适合:快速计算、状态修改 + # Sync method - blocks actor, requests execute sequentially + # Best for: fast computation, state mutation def incr(self): self.value += 1 return self.value - # 异步方法 - 非阻塞,可并发处理多个请求 - # 适合:IO 密集型操作(网络请求、数据库查询) + # Async method - non-blocking, can handle other requests during await + # Best for: IO-bound operations (network, database) async def fetch_and_add(self, url): - data = await http_get(url) # 等待期间可处理其他请求 + data = await http_get(url) # Other requests served during await self.value += data return self.value - # 无返回值方法 - 适合 tell() 调用 + # No return value - suitable for tell() calls def reset(self): self.value = 0 -# 同步 vs 异步方法的并发行为: -# - def method(): 阻塞 Actor,请求排队顺序执行 -# - async def method(): 非阻塞,await 期间可处理其他请求(并发) +# Sync vs async concurrency behavior: +# - def method(): Blocks actor, requests queued sequentially +# - async def method(): Non-blocking, concurrent during await -# 使用 +# Usage counter = await Counter.spawn(name="counter") -result = await counter.incr() # ask 模式,等待返回 -await counter.reset() # 无返回值,但仍等待完成 +result = await counter.incr() # ask mode, waits for return +await counter.reset() # No return value, but still waits for completion ``` -#### 消息传递模式 +#### Message Passing Patterns ```python -# ask - 发送消息并等待响应 +# ask - send message and wait for response response = await actorref.ask({"action": "get"}) -# tell - 发送消息,不等待响应(fire-and-forget) +# tell - send message, don't wait (fire-and-forget) await actorref.tell({"action": "log", "data": "hello"}) ``` -#### Actor 生命周期 +#### Optional Zerocopy Descriptor Protocol + +Pulsing supports an optional zerocopy fast path to bypass pickle serialization for eligible +Python objects. If the object does not provide the protocol, Pulsing falls back to existing +pickle-based transport automatically. + +```python +from pulsing.core import ZeroCopyDescriptor + +class MyTensorLike: + def __zerocopy__(self, ctx): + return ZeroCopyDescriptor( + buffers=[memoryview(self.buffer)], + dtype="float32", + shape=[1024], + strides=[4], + transport="inline", # e.g. inline/shm + checksum=None, # optional + version=1, + ) +``` + +Rules: + +- `__zerocopy__(ctx)` is optional; missing protocol means fallback to pickle. +- Descriptor is the single source of truth (no separate `__metadata__`). +- Zerocopy is an optimization path for reduced serialization and buffer copies. +- `buffers` should provide contiguous Python buffer views (e.g. `memoryview`, tensor buffer, `bytearray`) to avoid extra Python-side copy. +- Payload validation failure or unsupported descriptor always falls back to pickle unless explicitly forced by runtime config. + +**Automatic stream transfer for large payloads:** + +When the total buffer size exceeds a threshold (default 64 KB), Pulsing automatically uses a descriptor-first stream transfer instead of packing everything into a single message: + +1. A lightweight descriptor header (dtype, shape, strides, buffer lengths) is sent as the first stream frame. +2. Buffer data follows as a sequence of raw chunk frames, each up to `PULSING_ZEROCOPY_CHUNK_BYTES` (default 1 MB). +3. The receiver pre-allocates buffers based on the descriptor and fills them incrementally as chunks arrive. + +Small payloads below the threshold are still sent as a single message with descriptor + data packed together. This is transparent to the user — `actor.receive()` always gets a `ZeroCopyDescriptor` regardless of the transfer mode. + +Environment variables: +- `PULSING_ZEROCOPY`: `auto` (default) / `off` / `force` +- `PULSING_ZEROCOPY_STREAM_THRESHOLD`: minimum buffer size in bytes to trigger stream transfer (default 65536) +- `PULSING_ZEROCOPY_CHUNK_BYTES`: chunk size in bytes for stream transfer (default 1048576, minimum 4096) + +#### Actor Lifecycle ```python from pulsing.actor import Actor, ActorId class MyActor(Actor): def on_start(self, actor_id: ActorId): - """Actor 启动时调用""" + """Called when actor starts""" print(f"Started: {actor_id}") def on_stop(self): - """Actor 停止时调用""" + """Called when actor stops""" print("Stopping...") def metadata(self) -> dict[str, str]: - """返回 Actor 元数据(用于诊断)""" + """Return actor metadata (for diagnostics)""" return {"type": "worker", "version": "1.0"} async def receive(self, msg): return msg ``` -#### 监督与重启策略 +#### Supervision and Restart Policies ```python @pul.remote( restart_policy="on_failure", # "never" | "on_failure" | "always" - max_restarts=3, # 最大重启次数 - min_backoff=0.1, # 最小退避时间(秒) - max_backoff=30.0, # 最大退避时间(秒) + max_restarts=3, # Maximum restart attempts + min_backoff=0.1, # Minimum backoff time (seconds) + max_backoff=30.0, # Maximum backoff time (seconds) ) class ResilientWorker: def process(self, data): - # 如果抛出异常,Actor 会自动重启 + # Actor auto-restarts on exception return heavy_computation(data) ``` -#### 流式响应 +#### Streaming Responses ```python @pul.remote class StreamingService: - # 直接返回 generator,Pulsing 自动处理为流式响应 + # Return a generator, Pulsing auto-handles as streaming response async def generate_stream(self, n): for i in range(n): yield f"chunk_{i}" - # 同步 generator 也支持 + # Sync generators also supported def sync_stream(self, n): for i in range(n): yield f"item_{i}" -# 使用 +# Usage service = await StreamingService.spawn() -# 客户端消费流 +# Client consumes stream async for chunk in service.generate_stream(10): print(chunk) # chunk_0, chunk_1, ... ``` -**注意:** 对于 `@pul.remote` 类,直接返回 generator(同步或异步)即可,Pulsing 会自动检测并按流式响应处理。 +**Note:** For `@pul.remote` classes, simply return a generator (sync or async) and Pulsing auto-detects and handles it as a streaming response. + +### Queue API + +Distributed queue with bucket-based partitioning, for data pipelines: + +```python +import pulsing as pul + +await pul.init() + +# ── Write ── +writer = await pul.queue.write( + "my_queue", + *, + bucket_column: str = "id", # Column for partitioning + num_buckets: int = 4, + batch_size: int = 100, + storage_path: str | None = None, + backend: str = "memory", # Pluggable: "memory" or custom +) -> QueueWriter + +await writer.put({"id": "u1", "data": "hello"}) +await writer.put([{"id": "u1", "data": "a"}, {"id": "u2", "data": "b"}]) +await writer.flush() + +# ── Read ── +reader = await pul.queue.read( + "my_queue", + *, + bucket_id: int | None = None, + bucket_ids: list[int] | None = None, + rank: int | None = None, # For distributed consumption + world_size: int | None = None, + num_buckets: int = 4, +) -> QueueReader + +records = await reader.get(limit=100, wait=False) +``` + +### Topic API + +Lightweight pub/sub for real-time message distribution: + +```python +import pulsing as pul + +await pul.init() + +# ── Publish ── +writer = await pul.topic.write("events") +await writer.publish({"type": "user_login", "user": "alice"}) + +# ── Subscribe ── +reader = await pul.topic.read("events") + +@reader.on_message +async def handle(msg): + print(f"Received: {msg}") + +await reader.start() +``` -## Rust 接口 +## Rust API -Rust API 通过 trait 定义契约,分为三层: +Rust API defines contracts via traits, organized in three layers: -### 快速入门 +### Quick Start ```rust use pulsing_actor::prelude::*; @@ -381,11 +468,11 @@ impl Actor for Echo { async fn main() -> anyhow::Result<()> { let system = ActorSystem::builder().build().await?; - // 命名 actor(可通过 resolve 发现,使用 namespace/name 格式) + // Named actor (discoverable via resolve, uses namespace/name format) let actor = system.spawn_named("services/echo", Echo).await?; let Pong(x): Pong = actor.ask(Ping(1)).await?; - // 匿名 actor(仅通过 ActorRef 访问) + // Anonymous actor (accessible only via ActorRef) let worker = system.spawn(Worker::new()).await?; system.shutdown().await?; @@ -393,55 +480,55 @@ async fn main() -> anyhow::Result<()> { } ``` -### Trait 分层 +### Trait Layers -#### ActorSystemCoreExt(主路径,prelude 自动导入) +#### ActorSystemCoreExt (Main path, auto-imported via prelude) -核心 spawn 与 resolve 能力: +Core spawn and resolve capabilities: ```rust -// Spawn - 简洁 API -system.spawn(actor).await?; // 匿名 actor(不可 resolve) -system.spawn_named(name, actor).await?; // 命名 actor(可 resolve) +// Spawn - Simple API +system.spawn(actor).await?; // Anonymous actor (not resolvable) +system.spawn_named(name, actor).await?; // Named actor (resolvable) -// Spawn - Builder 模式(高级配置) +// Spawn - Builder pattern (advanced configuration) system.spawning() - .name("services/counter") // 可选:有 name = 可 resolve + .name("services/counter") // Optional: named = resolvable .supervision(SupervisionSpec::on_failure().max_restarts(3)) .mailbox_capacity(256) .spawn(actor).await?; -// Resolve - 简单方式 -system.actor_ref(&actor_id).await?; // 按 ActorId 获取 -system.resolve(name).await?; // 按名称解析 +// Resolve - Simple +system.actor_ref(&actor_id).await?; // By ActorId +system.resolve(name).await?; // By name -// Resolve - Builder 模式(高级配置) +// Resolve - Builder pattern (advanced configuration) system.resolving() - .node(node_id) // 可选:指定目标节点 - .policy(RoundRobinPolicy::new()) // 可选:负载均衡策略 - .filter_alive(true) // 可选:只选存活节点 - .resolve(name).await?; // 解析单个 + .node(node_id) // Optional: target node + .policy(RoundRobinPolicy::new()) // Optional: load balancing + .filter_alive(true) // Optional: alive nodes only + .resolve(name).await?; // Resolve single system.resolving() - .list(name).await?; // 获取所有实例 + .list(name).await?; // Get all instances system.resolving() - .lazy(name)?; // 懒解析 + .lazy(name)?; // Lazy resolve ``` -#### ActorSystemAdvancedExt(高级:可重启 supervision) +#### ActorSystemAdvancedExt (Advanced: restartable supervision) -Factory 模式 spawn,支持 supervision 重启(仅命名 actor): +Factory-pattern spawn with supervision restart (named actors only): ```rust -// 命名 actor + factory(可重启 + 可 resolve) -// 注意:匿名 actor 不支持 supervision,因为无法重新解析 +// Named actor + factory (restartable + resolvable) +// Note: anonymous actors don't support supervision (cannot re-resolve) system.spawn_named_factory(name, || Ok(Service::new()), options).await?; ``` -#### ActorSystemOpsExt(运维/诊断/生命周期) +#### ActorSystemOpsExt (Operations / Diagnostics / Lifecycle) -系统信息、集群成员、停止/关闭等: +System info, cluster membership, stop/shutdown: ```rust system.node_id(); @@ -452,22 +539,23 @@ system.stop(name).await?; system.shutdown().await?; ``` -### 关键约定 +### Key Conventions -- **消息编码**:`Message::pack(&T)` 使用 bincode + `type_name::()`;跨版本协议建议 `Message::single("TypeV1", bytes)`。 -- **命名与解析**: - - `spawn_named(name, actor)`:创建可发现 actor,name 即为解析路径 - - `resolve(name)`:一次性解析(迁移后可能 stale) - - `resolve_lazy(name)`:懒解析 + 自动刷新(~5s TTL) -- **流式**:返回 `Message::Stream`,取消语义 best-effort。 -- **监督**:只有 `spawn_named_factory` 支持失败重启,匿名 actor 不支持 supervision。 +- **Message encoding**: `Message::pack(&T)` uses bincode + `type_name::()`; for cross-version protocols use `Message::single("TypeV1", bytes)`. +- **Optional zerocopy**: when payload objects implement `__zerocopy__(ctx)`, Pulsing may bypass pickle and send descriptor + buffers directly; otherwise it uses normal pickle/bytes paths. +- **Naming and resolution**: + - `spawn_named(name, actor)`: Creates a discoverable actor, name is the resolution path + - `resolve(name)`: One-shot resolve (may become stale after migration) + - `resolve_lazy(name)`: Lazy resolve + auto-refresh (~5s TTL) +- **Streaming**: Return `Message::Stream`, cancellation is best-effort. +- **Supervision**: Only `spawn_named_factory` supports failure restart; anonymous actors do not support supervision. -### Behavior(类型安全,Akka Typed 风格) +### Behavior (Type-safe, Akka Typed style) -- **核心**:`Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` -- **约定**:`TypedRef` 要求 `M: Serialize + DeserializeOwned + Send + 'static` +- **Core**: `Behavior` + `TypedRef` + `BehaviorAction (Same/Become/Stop)` +- **Constraint**: `TypedRef` requires `M: Serialize + DeserializeOwned + Send + 'static` -除了定义时候使用函数语法以外,其他与 Actor 完全相同: +Defined using function syntax, otherwise identical to Actor: ```rust fn counter(init: i32) -> Behavior { @@ -477,8 +565,8 @@ fn counter(init: i32) -> Behavior { }) } -// Behavior 实现 IntoActor trait,可以直接传给 spawn/spawn_named -// 无需手动包装,系统会自动转换 +// Behavior implements IntoActor, can be passed directly to spawn/spawn_named +// No manual wrapping needed, system converts automatically let counter = system.spawn(counter(0)).await?; let counter = system.spawn_named("actors/counter", counter(0)).await?; ``` diff --git a/pyproject.toml b/pyproject.toml index 45c0a712f..c20cbf98b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pulsing" version = "0.1.0" -description = "Pulsing - Distributed Actor Framework" +description = "Pulsing: Backbone for distributed AI systems. Actor runtime with streaming, zero dependencies, and built-in discovery." readme = "README.md" authors = [ { name = "Reiase", email = "reiase@gmail.com" } @@ -31,11 +31,11 @@ classifiers = [ keywords = ["actor", "distributed", "async", "llm", "inference"] [project.urls] -Homepage = "https://github.com/reiase/pulsing" -Repository = "https://github.com/reiase/pulsing" -Documentation = "https://github.com/reiase/pulsing#readme" -Issues = "https://github.com/reiase/pulsing/issues" -Changelog = "https://github.com/reiase/pulsing/blob/main/CHANGELOG.md" +Homepage = "https://github.com/DeepLink-org/Pulsing" +Repository = "https://github.com/DeepLink-org/Pulsing" +Documentation = "https://github.com/DeepLink-org/Pulsing#readme" +Issues = "https://github.com/DeepLink-org/Pulsing/issues" +Changelog = "https://github.com/DeepLink-org/Pulsing/blob/main/CHANGELOG.md" [project.optional-dependencies] storage = [ @@ -45,6 +45,7 @@ storage = [ dev = [ "pytest>=8.0", "pytest-asyncio>=0.23", + "pytest-cov>=5.0", "ruff>=0.8", "maturin>=1.0", "pylance", diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index d6a55eddf..36d432799 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -1,24 +1,7 @@ """ Pulsing - Distributed Actor Framework -Two API styles: - -1. Actor System style (explicit system management): - import pulsing as pul - - system = await pul.actor_system() - - @pul.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = await Counter.spawn(name="counter") - result = await counter.incr() - - await system.shutdown() - -2. Ray-style async API (global system): +Usage: import pulsing as pul await pul.init() @@ -32,25 +15,6 @@ def incr(self): self.value += 1; return self.value result = await counter.incr() await pul.shutdown() - -3. Ray-compatible sync API (for migration): - from pulsing.compat import ray - - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - - ray.shutdown() - -Submodules: -- pulsing.actor: Native async API (recommended) -- pulsing.compat.ray: Ray-compatible sync API (for migration) """ import asyncio @@ -58,8 +22,8 @@ def incr(self): self.value += 1; return self.value __version__ = "0.1.0" -# Import from pulsing.actor -from pulsing.actor import ( +# Import from pulsing.core +from pulsing.core import ( # Global system functions init, shutdown, @@ -70,11 +34,13 @@ def incr(self): self.value += 1; return self.value # Resolve function resolve, as_any, + # Mount (attach existing object to Pulsing network) + mount, + unmount, # Types Actor, ActorSystem as _ActorSystem, ActorRef, - ActorRefView, ActorId, ActorProxy, Message, @@ -85,6 +51,27 @@ def incr(self): self.value += 1; return self.value PYTHON_ACTOR_SERVICE_NAME, ) + +# Ray integration (lazy import — only available in Ray environment) +def init_inside_ray(): + """Initialize Pulsing in Ray worker and join cluster (async version). + + Usage:: + + await pul.init_inside_ray() + """ + from pulsing.integrations.ray import async_init_in_ray + + return async_init_in_ray() + + +def cleanup_ray(): + """Clean up Pulsing state in Ray KV store""" + from pulsing.integrations.ray import cleanup + + return cleanup() + + # Import exceptions from pulsing.exceptions import ( PulsingError, @@ -98,17 +85,18 @@ def incr(self): self.value += 1; return self.value class ActorSystem: - """ActorSystem wrapper with queue API + """ActorSystem wrapper with queue/topic API This wraps the Rust ActorSystem and adds Python-level extensions - like the queue API. + like queue and topic APIs. """ def __init__(self, inner: _ActorSystem): self._inner = inner - from pulsing.queue import QueueAPI + from pulsing.streaming import QueueAPI, TopicAPI self.queue = QueueAPI(inner) + self.topic = TopicAPI(inner) async def refer(self, actorid: ActorId | str) -> ActorRef: """Get actor reference by ID @@ -263,6 +251,42 @@ async def refer(actorid: ActorId | str) -> ActorRef: return await system.refer(actorid) +class _GlobalQueueAPI: + """Lazy proxy for pul.queue that uses the global system.""" + + async def write(self, topic, **kwargs): + """Open queue for writing. See QueueAPI.write() for args.""" + from pulsing.streaming import QueueAPI + + return await QueueAPI(get_system()).write(topic, **kwargs) + + async def read(self, topic, **kwargs): + """Open queue for reading. See QueueAPI.read() for args.""" + from pulsing.streaming import QueueAPI + + return await QueueAPI(get_system()).read(topic, **kwargs) + + +class _GlobalTopicAPI: + """Lazy proxy for pul.topic that uses the global system.""" + + async def write(self, topic, **kwargs): + """Open topic for writing. See TopicAPI.write() for args.""" + from pulsing.streaming import TopicAPI + + return await TopicAPI(get_system()).write(topic, **kwargs) + + async def read(self, topic, **kwargs): + """Open topic for reading. See TopicAPI.read() for args.""" + from pulsing.streaming import TopicAPI + + return await TopicAPI(get_system()).read(topic, **kwargs) + + +queue = _GlobalQueueAPI() +topic = _GlobalTopicAPI() + + # Export all public APIs __all__ = [ # Version @@ -280,11 +304,19 @@ async def refer(actorid: ActorId | str) -> ActorRef: "is_initialized", # Decorator "remote", + # Mount (attach existing object to Pulsing network) + "mount", + "unmount", + # Queue & Topic (global entry points) + "queue", + "topic", + # Ray integration + "init_inside_ray", + "cleanup_ray", # Types "Actor", "ActorSystem", "ActorRef", - "ActorRefView", "ActorId", "ActorProxy", "Message", diff --git a/python/pulsing/admin.py b/python/pulsing/admin.py index 76f0da00f..d17a7faad 100644 --- a/python/pulsing/admin.py +++ b/python/pulsing/admin.py @@ -1,14 +1,14 @@ """ Administrative / diagnostic APIs for Pulsing. -These APIs are intentionally kept out of `pulsing.actor`'s top-level `__all__` +These APIs are intentionally kept out of `pulsing.core`'s top-level `__all__` to minimize the main public surface. Import explicitly: from pulsing.admin import list_actors, get_metrics, get_node_info from pulsing.admin import health_check, ping """ -from pulsing.actor.remote import ( +from pulsing.core.remote import ( get_metrics, get_node_info, health_check, diff --git a/python/pulsing/agent/__init__.py b/python/pulsing/agent/__init__.py index be4fdb1ad..25a8f1c84 100644 --- a/python/pulsing/agent/__init__.py +++ b/python/pulsing/agent/__init__.py @@ -1,7 +1,7 @@ """ Pulsing Agent Toolbox -Lightweight multi-agent development tools, fully compatible with pulsing.actor. +Lightweight multi-agent development tools, fully compatible with pulsing.core. Core APIs: - runtime(): Actor system lifecycle management @@ -10,7 +10,7 @@ - parse_json(): JSON parsing Example: - from pulsing.actor import remote, resolve + from pulsing.core import remote, resolve from pulsing.agent import agent, runtime, llm, get_agent_meta # @remote: Basic Actor diff --git a/python/pulsing/agent/base.py b/python/pulsing/agent/base.py index 1f540234f..3787c8aa9 100644 --- a/python/pulsing/agent/base.py +++ b/python/pulsing/agent/base.py @@ -18,7 +18,7 @@ from dataclasses import dataclass, field from typing import Any, Callable, TypeVar -from pulsing.actor import remote +from pulsing.core import remote T = TypeVar("T") diff --git a/python/pulsing/agent/runtime.py b/python/pulsing/agent/runtime.py index 9a84123b6..3dd6f8524 100644 --- a/python/pulsing/agent/runtime.py +++ b/python/pulsing/agent/runtime.py @@ -4,7 +4,7 @@ from contextlib import asynccontextmanager -from pulsing.actor import get_system, init, shutdown +from pulsing.core import get_system, init, shutdown @asynccontextmanager @@ -13,9 +13,17 @@ async def runtime( addr: str | None = None, seeds: list[str] | None = None, passphrase: str | None = None, + head_addr: str | None = None, + is_head_node: bool = False, ): """Actor system runtime context manager.""" - await init(addr=addr, seeds=seeds, passphrase=passphrase) + await init( + addr=addr, + seeds=seeds, + passphrase=passphrase, + head_addr=head_addr, + is_head_node=is_head_node, + ) try: yield get_system() finally: diff --git a/python/pulsing/cli/__main__.py b/python/pulsing/cli/__main__.py index 1b32f4514..3baf04f48 100644 --- a/python/pulsing/cli/__main__.py +++ b/python/pulsing/cli/__main__.py @@ -5,7 +5,7 @@ @hp.param("actor") def actor( - actor_type: str, # Positional argument: full class path (e.g., 'pulsing.actors.worker.TransformersWorker') + actor_type: str, # Positional argument: full class path (e.g., 'pulsing.serving.worker.TransformersWorker') addr: str | None = None, seeds: str | None = None, name: str = "worker", # Actor name (default: "worker") @@ -18,9 +18,9 @@ def actor( Actor type must be a full class path: - Format: 'module.path.ClassName' - - Example: 'pulsing.actors.Router' - - Example: 'pulsing.actors.TransformersWorker' - - Example: 'pulsing.actors.VllmWorker' + - Example: 'pulsing.serving.Router' + - Example: 'pulsing.serving.TransformersWorker' + - Example: 'pulsing.serving.VllmWorker' - Example: 'my_module.my_actor.MyCustomActor' Pass constructor parameters directly as command-line arguments. @@ -29,7 +29,7 @@ def actor( Note: To list actors, use 'pulsing inspect actors' instead. Args: - actor_type: Full class path (positional argument), e.g., 'pulsing.actors.worker.TransformersWorker' + actor_type: Full class path (positional argument), e.g., 'pulsing.serving.worker.TransformersWorker' addr: Actor System bind address (e.g., '0.0.0.0:8000') seeds: Comma-separated list of seed nodes (e.g., '192.168.1.1:8000,192.168.1.2:8000') name: Actor name. Default: 'worker'. Use different names to run multiple workers in the same cluster. @@ -38,17 +38,17 @@ def actor( Examples: # Start a Transformers worker - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --device cpu --name my-worker + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --device cpu --name my-worker # Start a vLLM worker - pulsing actor pulsing.actors.VllmWorker --model Qwen/Qwen2 --role aggregated --max_new_tokens 512 --name vllm-worker + pulsing actor pulsing.serving.VllmWorker --model Qwen/Qwen2 --role aggregated --max_new_tokens 512 --name vllm-worker # Start a Router with OpenAI-compatible API - pulsing actor pulsing.actors.Router --http_host 0.0.0.0 --http_port 8080 --model_name my-llm --worker_name worker + pulsing actor pulsing.serving.Router --http_host 0.0.0.0 --http_port 8080 --model_name my-llm --worker_name worker # Start multiple workers with different names - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-1 --seeds 127.0.0.1:8000 - pulsing actor pulsing.actors.TransformersWorker --model_name gpt2 --name worker-2 --seeds 127.0.0.1:8000 + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-1 --seeds 127.0.0.1:8000 + pulsing actor pulsing.serving.TransformersWorker --model_name gpt2 --name worker-2 --seeds 127.0.0.1:8000 """ from .actors import start_generic_actor @@ -63,9 +63,9 @@ def actor( # Check if actor_type is a valid class path (must contain dots) if "." not in actor_type: raise ValueError( - f"Error: Actor type must be a full class path (e.g., 'pulsing.actors.worker.TransformersWorker').\n" + f"Error: Actor type must be a full class path (e.g., 'pulsing.serving.worker.TransformersWorker').\n" f"Received: '{actor_type}'\n" - f"Example: pulsing actor pulsing.actors.worker.TransformersWorker --model_name gpt2" + f"Example: pulsing actor pulsing.serving.worker.TransformersWorker --model_name gpt2" ) # Parse seeds @@ -257,7 +257,67 @@ def bench( ) +@hp.param("examples") +def examples(name: str | None = None): + """ + List or view Pulsing built-in examples. + + Lists all available examples when called without arguments; + shows detailed description, usage, and source path when given a name. + + Args: + name: Example name (optional). If omitted, lists all examples. + + Examples: + # List all examples + pulsing examples + + # View details of a specific example + pulsing examples counting_game + """ + from pulsing.examples import get_example_detail, list_examples + + if name is None: + all_examples = list_examples() + if not all_examples: + print("No examples available.") + return + print("Available examples:\n") + max_name_len = max(len(n) for n, _, _ in all_examples) + for n, summary, filepath in all_examples: + print(f" {n:<{max_name_len}} {summary}") + print("\nUse 'pulsing examples ' for details.") + return + + detail = get_example_detail(name) + if detail is None: + print(f"Unknown example: '{name}'") + print("Use 'pulsing examples' to see all available examples.") + return + + summary, docstring, filepath = detail + print(f"{'=' * 60}") + print(f" {summary}") + print(f"{'=' * 60}\n") + if docstring: + print(docstring) + print() + print(f"Source path:\n {filepath}\n") + print(f"Quick run:\n python -m pulsing.examples.{name}") + + def main(): + import sys + + # Make `pulsing examples ` work with positional arguments + # hp framework treats params with default values as --name options, so we convert here + if ( + len(sys.argv) >= 3 + and sys.argv[1] == "examples" + and not sys.argv[2].startswith("-") + ): + sys.argv = [sys.argv[0], "examples", "--name", sys.argv[2]] + sys.argv[3:] + hp.launch() diff --git a/python/pulsing/cli/actor_loader.py b/python/pulsing/cli/actor_loader.py index bbdff162a..001073951 100644 --- a/python/pulsing/cli/actor_loader.py +++ b/python/pulsing/cli/actor_loader.py @@ -4,14 +4,14 @@ import json from typing import Any -from pulsing.actor import Actor +from pulsing.core import Actor def load_actor_class(class_path: str) -> type[Actor]: """Load Actor class from module path Args: - class_path: Full class path, e.g., 'pulsing.actors.worker.TransformersWorker' + class_path: Full class path, e.g., 'pulsing.serving.worker.TransformersWorker' Returns: Actor class @@ -23,7 +23,7 @@ def load_actor_class(class_path: str) -> type[Actor]: if "." not in class_path: raise ValueError( f"Invalid class path '{class_path}'. Expected format: 'module.path.ClassName'\n" - f"Example: pulsing.actors.worker.TransformersWorker" + f"Example: pulsing.serving.worker.TransformersWorker" ) # Split module path and class name @@ -57,7 +57,7 @@ def load_actor_class(class_path: str) -> type[Actor]: if not isinstance(actor_class, type) or not issubclass(actor_class, Actor): raise ValueError( f"'{class_name}' is not an Actor subclass.\n" - f"Expected a class that inherits from pulsing.actor.Actor" + f"Expected a class that inherits from pulsing.core.Actor" ) return actor_class diff --git a/python/pulsing/cli/actors.py b/python/pulsing/cli/actors.py index be39117e2..1278ddc98 100644 --- a/python/pulsing/cli/actors.py +++ b/python/pulsing/cli/actors.py @@ -15,14 +15,14 @@ def start_generic_actor( """Start a generic Actor class by full module path Args: - actor_type: Full class path, e.g., 'pulsing.actors.worker.TransformersWorker' + actor_type: Full class path, e.g., 'pulsing.serving.worker.TransformersWorker' addr: Actor System bind address seeds: List of seed node addresses name: Actor name extra_kwargs: Additional CLI arguments to pass to Actor constructor """ import inspect - from pulsing.actor.helpers import spawn_and_run + from pulsing.core.helpers import spawn_and_run print(f"Loading Actor class: {actor_type}") diff --git a/python/pulsing/compat/__init__.py b/python/pulsing/compat/__init__.py deleted file mode 100644 index 84da33c38..000000000 --- a/python/pulsing/compat/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Pulsing Compatibility Layer - -Provides Ray-compatible API for easy migration. - -Usage: - from pulsing.compat import ray - - ray.init() - - @ray.remote - class Counter: - def __init__(self, init=0): self.value = init - def incr(self): self.value += 1; return self.value - - counter = Counter.remote(init=10) - result = ray.get(counter.incr.remote()) - - ray.shutdown() -""" - -from . import ray - -__all__ = ["ray"] diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/core/__init__.py similarity index 86% rename from python/pulsing/actor/__init__.py rename to python/pulsing/core/__init__.py index bea6a7dad..da67eee19 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/core/__init__.py @@ -1,8 +1,8 @@ """ -Pulsing Actor System - Python bindings for distributed actor framework +Pulsing Core - Python bindings for distributed actor framework Simple API: - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote await init() @@ -17,7 +17,7 @@ def incr(self): self.value += 1; return self.value await shutdown() Advanced API: - from pulsing.actor import ActorSystem, Actor, Message, SystemConfig + from pulsing.core import ActorSystem, Actor, Message, SystemConfig """ import asyncio @@ -28,14 +28,14 @@ def incr(self): self.value += 1; return self.value ActorId, ActorRef, ActorSystem, - Message, NodeId, SealedPyMessage, - StreamMessage, + ZeroCopyDescriptor, StreamReader, StreamWriter, SystemConfig, ) +from .messaging import Message, StreamMessage # ============================================================================= @@ -50,13 +50,17 @@ async def init( *, seeds: list[str] = None, passphrase: str = None, + head_addr: str = None, + is_head_node: bool = False, ) -> ActorSystem: """Initialize Pulsing actor system Args: addr: Bind address (e.g., "0.0.0.0:8000"). None for standalone mode. - seeds: Seed nodes to join cluster - passphrase: Enable TLS with this passphrase + seeds: Seed nodes to join cluster (Gossip mode). + passphrase: Enable TLS with this passphrase. + head_addr: Address of head node (worker mode). Mutually exclusive with is_head_node. + is_head_node: If True, this node runs as head. Mutually exclusive with head_addr. Returns: ActorSystem instance @@ -65,17 +69,23 @@ async def init( # Standalone mode await init() - # Cluster mode with TLS - await init(addr="0.0.0.0:8000", passphrase="my-secret") - - # Join existing cluster + # Cluster mode (Gossip + seed) await init(addr="0.0.0.0:8001", seeds=["192.168.1.1:8000"]) + + # Head node + await init(addr="0.0.0.0:8000", is_head_node=True) + + # Worker node + await init(addr="0.0.0.0:8001", head_addr="192.168.1.1:8000") """ global _global_system if _global_system is not None: return _global_system + if is_head_node and head_addr: + raise ValueError("Cannot set both is_head_node and head_addr") + # Build config if addr: config = SystemConfig.with_addr(addr) @@ -84,6 +94,10 @@ async def init( if seeds: config = config.with_seeds(seeds) + if is_head_node: + config = config.with_head_node() + elif head_addr: + config = config.with_head_addr(head_addr) if passphrase: config = config.with_passphrase(passphrase) @@ -190,7 +204,6 @@ async def tell_with_timeout( PYTHON_ACTOR_SERVICE_NAME, ActorClass, ActorProxy, - ActorRefView, PythonActorService, PythonActorServiceProxy, SystemActorProxy, @@ -201,6 +214,8 @@ async def tell_with_timeout( get_system_actor, health_check, list_actors, + mount, + unmount, ping, remote, resolve, @@ -213,34 +228,29 @@ async def tell_with_timeout( PulsingActorError, ) -# NOTE: `__all__` is the *public, stable surface* for `from pulsing.actor import *`. -# We intentionally keep it minimal. Advanced/diagnostic APIs may still be -# importable by name, but are not part of the stable top-level contract. __all__ = [ - # Minimal async API (recommended) "init", "shutdown", "remote", "resolve", + "mount", + "unmount", "get_system", "get_system_actor", "is_initialized", - # Minimal core types commonly used in docs/examples "Actor", "Message", "StreamMessage", "SystemConfig", "ActorSystem", "ActorRef", - "ActorRefView", "ActorId", "ActorProxy", "as_any", "SystemActorProxy", - # Service (for actor_system function) "PythonActorService", "PYTHON_ACTOR_SERVICE_NAME", - # Exceptions + "ZeroCopyDescriptor", "PulsingError", "PulsingRuntimeError", "PulsingActorError", diff --git a/python/pulsing/actor/helpers.py b/python/pulsing/core/helpers.py similarity index 100% rename from python/pulsing/actor/helpers.py rename to python/pulsing/core/helpers.py diff --git a/python/pulsing/core/messaging.py b/python/pulsing/core/messaging.py new file mode 100644 index 000000000..f77935c38 --- /dev/null +++ b/python/pulsing/core/messaging.py @@ -0,0 +1,5 @@ +"""Message types - re-export from Rust _core.""" + +from pulsing._core import Message, StreamMessage + +__all__ = ["Message", "StreamMessage"] diff --git a/python/pulsing/actor/remote.py b/python/pulsing/core/remote.py similarity index 81% rename from python/pulsing/actor/remote.py rename to python/pulsing/core/remote.py index 4b07a9be9..de8fe3fb3 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/core/remote.py @@ -3,7 +3,6 @@ import asyncio import inspect import logging -import os import random import uuid from abc import ABC, abstractmethod @@ -12,15 +11,6 @@ from pulsing._core import ActorRef, ActorSystem, Message, StreamMessage from pulsing.exceptions import PulsingActorError, PulsingRuntimeError -# Protocol version configuration -# Default to v1 for backward compatibility -_DEFAULT_PROTOCOL_VERSION = int(os.getenv("PULSING_PROTOCOL_VERSION", "1")) - - -def _get_protocol_version() -> int: - """Get protocol version from environment or default to v1.""" - return _DEFAULT_PROTOCOL_VERSION - def _consume_task_exception(task: asyncio.Task) -> None: """Consume exception from background task to avoid 'Task exception was never retrieved'.""" @@ -37,60 +27,22 @@ def _consume_task_exception(task: asyncio.Task) -> None: logging.getLogger(__name__).exception("Stream task failed") -def _detect_protocol_version(msg: dict) -> int: - """Auto-detect protocol version from message. - - Returns: - 1 for v1 protocol, 2 for v2 protocol - """ - if "__pulsing_proto__" in msg: - version_str = msg["__pulsing_proto__"] - if isinstance(version_str, str) and version_str.startswith("v"): - return int(version_str[1:]) - return int(version_str) - # v1 compatibility: check for __call__ field - if "__call__" in msg: - return 1 - return 1 # default to v1 +# Wire format version (single protocol) +_PULSING_WIRE_VERSION = "1" -def _wrap_call_v1(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict: - """v1 protocol: legacy format (backward compatible). +def _wrap_call(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict: + """Wrap method call for wire format (namespace isolation). Format: { - "__call__": method_name, - "args": args, - "kwargs": kwargs, - "__async__": is_async + "__pulsing_proto__": version, + "__pulsing__": { "call": method_name, "async": is_async }, + "user_data": { "args": args, "kwargs": kwargs } } """ return { - "__call__": method, - "args": args, - "kwargs": kwargs, - "__async__": is_async, - } - - -def _wrap_call_v2(method: str, args: tuple, kwargs: dict, is_async: bool) -> dict: - """v2 protocol: namespace isolation. - - Format: - { - "__pulsing_proto__": "v2", - "__pulsing__": { - "call": method_name, - "async": is_async - }, - "user_data": { - "args": args, - "kwargs": kwargs - } - } - """ - return { - "__pulsing_proto__": "v2", + "__pulsing_proto__": _PULSING_WIRE_VERSION, "__pulsing__": { "call": method, "async": is_async, @@ -103,70 +55,53 @@ def _wrap_call_v2(method: str, args: tuple, kwargs: dict, is_async: bool) -> dic def _unwrap_call(msg: dict) -> tuple[str, tuple, dict, bool]: - """Unwrap call message, supporting both v1 and v2 protocols. - - Returns: - (method_name, args, kwargs, is_async) - """ - version = _detect_protocol_version(msg) - - if version == 2: - pulsing = msg.get("__pulsing__", {}) - user_data = msg.get("user_data", {}) - return ( - pulsing.get("call", ""), - tuple(user_data.get("args", ())), - dict(user_data.get("kwargs", {})), - pulsing.get("async", False), - ) - else: # v1 - return ( - msg.get("__call__", ""), - tuple(msg.get("args", ())), - dict(msg.get("kwargs", {})), - msg.get("__async__", False), - ) - - -def _wrap_response_v1(result: Any = None, error: str | None = None) -> dict: - """v1 protocol response format.""" - if error: - return {"__error__": error} - return {"__result__": result} + """Unwrap call message. Returns (method_name, args, kwargs, is_async).""" + pulsing = msg.get("__pulsing__", {}) + user_data = msg.get("user_data", {}) + return ( + pulsing.get("call", ""), + tuple(user_data.get("args", ())), + dict(user_data.get("kwargs", {})), + pulsing.get("async", False), + ) -def _wrap_response_v2(result: Any = None, error: str | None = None) -> dict: - """v2 protocol response format.""" +def _wrap_response(result: Any = None, error: str | None = None) -> dict: + """Wrap response for wire format.""" if error: return { - "__pulsing_proto__": "v2", + "__pulsing_proto__": _PULSING_WIRE_VERSION, "__pulsing__": {"error": error}, "user_data": {}, } return { - "__pulsing_proto__": "v2", + "__pulsing_proto__": _PULSING_WIRE_VERSION, "__pulsing__": {"result": result}, "user_data": {}, } def _unwrap_response(resp: dict) -> tuple[Any, str | None]: - """Unwrap response, supporting both v1 and v2 protocols. + """Unwrap response. Returns (result, error) - one of them will be None. - Returns: - (result, error) - one of them will be None + Accepts: wire format (__pulsing__.result/error), legacy (__result__/__error__), + and top-level "result"/"error" (e.g. from Message payload JSON). """ - version = _detect_protocol_version(resp) - - if version == 2: - pulsing = resp.get("__pulsing__", {}) + pulsing = resp.get("__pulsing__", {}) + if isinstance(pulsing, dict): if "error" in pulsing: return (None, pulsing["error"]) - return (pulsing.get("result"), None) - else: # v1 - if "__error__" in resp: - return (None, resp["__error__"]) - return (resp.get("__result__"), None) + if "result" in pulsing: + return (pulsing["result"], None) + if "__error__" in resp: + return (None, resp["__error__"]) + if "__result__" in resp: + return (resp["__result__"], None) + if "error" in resp: + return (None, resp["error"]) + if "result" in resp: + return (resp["result"], None) + return (None, None) _PULSING_ERROR_PREFIX = "__PULSING_ERROR__:" @@ -301,25 +236,36 @@ def get_actor_metadata(name: str) -> dict[str, str] | None: return _actor_metadata_registry.get(name) -class ActorRefView: - """Wrapper around ActorRef that adds .as_any() for an untyped proxy. +def _extract_methods(cls: type) -> tuple[list[str], set[str]]: + """Extract public method names and async method set from a class. - Returned by resolve(name). Delegates .ask(), .tell(), and other - ActorRef attributes to the underlying ref. Use .as_any() to get - a proxy that forwards any method call to the remote actor. + Handles @pul.remote ActorClass and Ray-wrapped classes by unwrapping first. """ + # If it's an ActorClass (@pul.remote decorated), extract the original class + if isinstance(cls, ActorClass): + cls = cls._cls - __slots__ = ("_ref",) - - def __init__(self, ref: ActorRef): - self._ref = ref - - def as_any(self) -> "ActorProxy": - """Return an untyped proxy that forwards any method call to the remote actor.""" - return ActorProxy(self._ref, method_names=None, async_methods=None) + # If it's a Ray ActorClass, extract the original class + try: + from ray.actor import ActorClass as RayActorClass + + if isinstance(cls, RayActorClass): + if hasattr(cls, "__ray_metadata__"): + meta = cls.__ray_metadata__ + if hasattr(meta, "modified_class"): + cls = meta.modified_class + except ImportError: + pass - def __getattr__(self, name: str): - return getattr(self._ref, name) + methods = [] + async_methods = set() + for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): + if name.startswith("_"): + continue + methods.append(name) + if inspect.iscoroutinefunction(method) or inspect.isasyncgenfunction(method): + async_methods.add(name) + return methods, async_methods PYTHON_ACTOR_SERVICE_NAME = "system/python_actor_service" @@ -369,7 +315,10 @@ def from_ref( class _MethodCaller: - """Method caller.""" + """Method caller. Supports two usage patterns: + - await proxy.method(args) — method call + - await proxy.attr — attribute access (no args) + """ def __init__(self, actor_ref: ActorRef, method_name: str, is_async: bool = False): self._ref = actor_ref @@ -382,14 +331,13 @@ def __call__(self, *args, **kwargs): else: return self._sync_call(*args, **kwargs) + def __await__(self): + """Support await proxy.attr for direct attribute access""" + return self().__await__() + async def _sync_call(self, *args, **kwargs) -> Any: """Synchronous method call.""" - # Use configured protocol version (default v1) - protocol_version = _get_protocol_version() - if protocol_version == 2: - call_msg = _wrap_call_v2(self._method, args, kwargs, False) - else: - call_msg = _wrap_call_v1(self._method, args, kwargs, False) + call_msg = _wrap_call(self._method, args, kwargs, False) resp = await _ask_convert_errors(self._ref, call_msg) @@ -409,12 +357,21 @@ async def _sync_call(self, *args, **kwargs) -> Any: if resp.is_stream: return _SyncGeneratorStreamReader(resp) data = resp.to_json() + if not isinstance(data, dict): + return resp if resp.msg_type == "Error": - # Actor execution error raise PulsingActorError( data.get("error", "Remote call failed"), actor_name=str(self._ref.actor_id.id), ) + result, error = _unwrap_response(data) + if error: + raise PulsingActorError( + error, + actor_name=str(self._ref.actor_id.id), + ) + if result is not None: + return result return data.get("result") return resp @@ -445,12 +402,7 @@ def __init__( async def _get_stream(self): """Get stream (lazy initialization)""" if self._stream_reader is None: - # Use configured protocol version (default v1) - protocol_version = _get_protocol_version() - if protocol_version == 2: - call_msg = _wrap_call_v2(self._method, self._args, self._kwargs, True) - else: - call_msg = _wrap_call_v1(self._method, self._args, self._kwargs, True) + call_msg = _wrap_call(self._method, self._args, self._kwargs, True) resp = await _ask_convert_errors(self._ref, call_msg) # Response may be PyMessage (streaming) or direct Python object @@ -484,20 +436,32 @@ async def __anext__(self): reader = await self._get_stream() try: item = await reader.__anext__() - # Check if it's the final result if isinstance(item, dict): + # Wire format (__pulsing__.result/error) or legacy (__result__/__error__) + result, error = _unwrap_response(item) + if error is not None: + raise PulsingActorError( + error, actor_name=str(self._ref.actor_id.id) + ) + if ( + result is not None + and "__yield__" not in item + and "__final__" not in item + ): + # Single-value response (non-streaming) + self._final_result = result + self._got_result = True + raise StopAsyncIteration if "__final__" in item: self._final_result = item.get("__result__") self._got_result = True raise StopAsyncIteration if "__error__" in item: - # Actor execution error raise PulsingActorError( item["__error__"], actor_name=str(self._ref.actor_id.id) ) if "__yield__" in item: return item["__yield__"] - # Single-value response (non-streaming): {"__result__": value} if "__result__" in item: self._final_result = item.get("__result__") self._got_result = True @@ -586,7 +550,7 @@ def __getattr__(self, name: str): raise AttributeError(name) def caller(*args, **kwargs): - msg = _wrap_call_v1(name, args, kwargs, is_async=True) + msg = _wrap_call(name, args, kwargs, is_async=True) delay = max(0.0, self._delay_sec) async def _send(): @@ -638,25 +602,28 @@ def on_stop(self) -> None: if hasattr(self._instance, "on_stop"): self._instance.on_stop() + def metadata(self) -> dict[str, str]: + if hasattr(self._instance, "metadata") and callable(self._instance.metadata): + return self._instance.metadata() + return {} + async def receive(self, msg) -> Any: - # Handle dict-based call format (supporting both v1 and v2) + # Handle dict-based call format if isinstance(msg, dict): - # Detect protocol version - version = _detect_protocol_version(msg) method, args, kwargs, is_async_call = _unwrap_call(msg) if not method or method.startswith("_"): - error_msg = f"Invalid method: {method}" - if version == 2: - return _wrap_response_v2(error=error_msg) - return _wrap_response_v1(error=error_msg) + return _wrap_response(error=f"Invalid method: {method}") - func = getattr(self._instance, method, None) - if func is None or not callable(func): - error_msg = f"Not found: {method}" - if version == 2: - return _wrap_response_v2(error=error_msg) - return _wrap_response_v1(error=error_msg) + _MISSING = object() + attr = getattr(self._instance, method, _MISSING) + if attr is _MISSING: + return _wrap_response(error=f"Not found: {method}") + + if not callable(attr): + return _wrap_response(result=attr) + + func = attr # Detect if it's an async method (including async generators) is_async_method = ( @@ -684,15 +651,9 @@ async def receive(self, msg) -> Any: return self._handle_generator_result(result) if asyncio.iscoroutine(result): result = await result - # Use same protocol version as request - if version == 2: - return _wrap_response_v2(result=result) - return _wrap_response_v1(result=result) + return _wrap_response(result=result) except Exception as e: - error_msg = str(e) - if version == 2: - return _wrap_response_v2(error=error_msg) - return _wrap_response_v1(error=error_msg) + return _wrap_response(error=str(e)) # Handle legacy Message-based call format (for Rust actor compatibility) if isinstance(msg, Message): @@ -921,6 +882,19 @@ class ActorClass: counter = await Counter.local(system, init=10) """ + @staticmethod + def _unwrap_ray_class(cls): + """Extract original user class if cls is a Ray ActorClass""" + try: + from ray.actor import ActorClass as RayActorClass + except ImportError: + return cls + if isinstance(cls, RayActorClass): + for base in type(cls).__bases__: + if base is not RayActorClass and base.__name__ != "Generic": + return base + return cls + def __init__( self, cls: type, @@ -929,6 +903,10 @@ def __init__( min_backoff: float = 0.1, max_backoff: float = 30.0, ): + unwrapped = self._unwrap_ray_class(cls) + # Keep Ray handle so .remote() remains available + self._ray_cls = cls if unwrapped is not cls else None + cls = unwrapped self._cls = cls self._class_name = f"{cls.__module__}.{cls.__name__}" self._restart_policy = restart_policy @@ -953,6 +931,10 @@ def __init__( # Register class _actor_class_registry[self._class_name] = cls + # If original class was decorated with @ray.remote, override with Ray's .remote() method + if self._ray_cls is not None: + self.remote = self._ray_cls.remote + async def spawn( self, *args, @@ -971,7 +953,7 @@ async def spawn( **kwargs: Keyword arguments for the class constructor Example: - from pulsing.actor import init, remote + from pulsing.core import init, remote await init() @@ -1178,6 +1160,7 @@ async def resolve( *, system: ActorSystem | None = None, node_id: int | None = None, + timeout: float | None = None, ) -> ActorProxy: """Resolve actor by name, return typed ActorProxy @@ -1185,6 +1168,8 @@ async def resolve( name: Actor name system: ActorSystem instance, uses global system if not provided node_id: Target node ID, searches in cluster if not provided + timeout: Seconds to wait for the name to appear (gossip convergence). + None means no wait (error immediately if not found). Returns: ActorProxy: Proxy with method type information @@ -1217,7 +1202,7 @@ async def generate(self, prompt): ... # async method, streaming response ) system = _global_system - actor_ref = await system.resolve_named(name, node_id=node_id) + actor_ref = await system.resolve_named(name, node_id=node_id, timeout=timeout) return ActorProxy(actor_ref, self._methods, self._async_methods) @@ -1484,23 +1469,27 @@ async def resolve( name: str, *, node_id: int | None = None, + timeout: float | None = None, ): """Resolve a named actor by name. - Returns an object that supports .ask(), .tell(), and .as_any(). + Returns an ActorRef that supports .ask(), .tell(), .as_any(), and .as_type(). Use .as_any() to get an untyped proxy that forwards any method call. + Use .as_type(Counter) to get a typed proxy with method validation. For typed ActorProxy with method calls, use Counter.resolve(name) instead. Args: name: Actor name node_id: Target node ID, searches in cluster if not provided + timeout: Seconds to wait for the name to appear (gossip convergence). + None means no wait (error immediately if not found). Returns: - ActorRefView: Ref-like object with .as_any() for untyped proxy. + ActorRef: Actor reference with .as_any() / .as_type() for proxy generation. Example: - from pulsing.actor import init, remote, resolve + from pulsing.core import init, remote, resolve await init() @@ -1509,6 +1498,9 @@ async def resolve( proxy = ref.as_any() await proxy.send_text(chat_id, content) + # Wait for name to appear (gossip convergence) + ref = await resolve("peer_node", timeout=30) + # Low-level ask ref = await resolve("my_counter") result = await ref.ask({"__call__": "increment", "args": [], "kwargs": {}}) @@ -1519,29 +1511,142 @@ async def resolve( raise RuntimeError("Actor system not initialized. Call 'await init()' first.") try: - ref = await _global_system.resolve(name, node_id=node_id) - return ActorRefView(ref) + return await _global_system.resolve(name, node_id=node_id, timeout=timeout) except RuntimeError as e: raise _convert_rust_error(e) from e -def as_any(ref: ActorRef | ActorRefView) -> ActorProxy: +def as_any(ref: ActorRef) -> ActorProxy: """Return an untyped proxy that forwards any method call to the remote actor. - Use when you have an ActorRef (or ref from resolve()) and want to call - methods by name without the typed class. + Use when you have an ActorRef and want to call methods by name + without the typed class. Args: - ref: ActorRef from resolve(name), or raw ActorRef from system.resolve_named(). + ref: ActorRef from resolve(name). Example: ref = await resolve("channel.discord") proxy = as_any(ref) # or proxy = ref.as_any() await proxy.send_text(chat_id, content) """ - if isinstance(ref, ActorRefView): - return ref.as_any() - return ActorProxy(ref, method_names=None, async_methods=None) + return ref.as_any() + + +def mount(instance: Any, *, name: str, public: bool = True) -> None: + """Mount an existing Python object to the Pulsing communication network. + + Synchronous interface, can be called in ``__init__``. Automatically: + 1. Initialize Pulsing (if not already, auto-detects Ray environment) + 2. Wrap instance as a Pulsing actor + 3. Register to Pulsing network, other nodes can discover via ``pul.resolve(name)`` + + Args: + instance: Object to mount (any Python instance) + name: Pulsing name, other nodes resolve via this name + public: Whether discoverable by other cluster nodes (default True) + + Example:: + + @ray.remote + class Counter: + def __init__(self, name, peers): + self.name = name + self.peers = sorted(peers) + pul.mount(self, name=name) + + async def greet(self, msg): + return f"Hello from {self.name}: {msg}" + """ + from . import _global_system + + # Auto-initialize Pulsing + if _global_system is None: + _auto_init_pulsing() + + from . import _global_system as system + + if system is None: + raise RuntimeError( + "Pulsing initialization failed. Please call pul.init() or run in Ray environment." + ) + + actor_name = name if "/" in name else f"actors/{name}" + wrapped = _WrappedActor(instance) + + async def _do_mount(): + ref = await system.spawn(wrapped, name=actor_name, public=public) + return ref + + actor_ref = _run_sync_on_pulsing_loop(_do_mount()) + wrapped._inject_delayed(actor_ref) + _register_actor_metadata(actor_name, type(instance)) + + +def unmount(name: str) -> None: + """Unmount a previously mounted actor from the Pulsing network. + + Args: + name: Name used during mounting + """ + from . import _global_system + + if _global_system is None: + return + + actor_name = name if "/" in name else f"actors/{name}" + + async def _do_unmount(): + await _global_system.stop(actor_name) + + _run_sync_on_pulsing_loop(_do_unmount()) + + +def _auto_init_pulsing(): + """Auto-detect environment and initialize Pulsing.""" + try: + import ray + + if ray.is_initialized(): + from pulsing.integrations.ray import init_in_ray + + init_in_ray() + return + except ImportError: + pass + + raise RuntimeError( + "Pulsing not initialized. Please call await pul.init() or run in Ray environment." + ) + + +def _run_sync_on_pulsing_loop(coro): + """Execute coroutine synchronously on Pulsing's background event loop.""" + import asyncio + import concurrent.futures + + # Try to use pulsing.integrations.ray's background loop (Ray environment) + try: + from pulsing.integrations.ray import _loop + + if _loop is not None: + fut = asyncio.run_coroutine_threadsafe(coro, _loop) + return fut.result(timeout=30) + except ImportError: + pass + + # Non-Ray environment: try to create new loop in current thread + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop is None: + return asyncio.run(coro) + + # Already have running loop (e.g., async context), run in new thread + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + return pool.submit(asyncio.run, coro).result(timeout=30) RemoteClass = ActorClass diff --git a/python/pulsing/examples/__init__.py b/python/pulsing/examples/__init__.py new file mode 100644 index 000000000..6f4cf8037 --- /dev/null +++ b/python/pulsing/examples/__init__.py @@ -0,0 +1,33 @@ +""" +pulsing.examples — Pulsing built-in examples collection + +Each submodule is a standalone runnable example, also importable for testing. +""" + +import importlib +import inspect +from pathlib import Path + +# Register all examples: module name -> one-line summary +_EXAMPLES = { + "counting_game": "Pulsing + Ray distributed counting game", +} + + +def list_examples(): + """Return [(name, summary, module_path)] list""" + result = [] + examples_dir = Path(__file__).parent + for name, summary in _EXAMPLES.items(): + filepath = examples_dir / f"{name}.py" + result.append((name, summary, str(filepath))) + return result + + +def get_example_detail(name): + """Return (summary, docstring, filepath), or None if not found""" + if name not in _EXAMPLES: + return None + mod = importlib.import_module(f"pulsing.examples.{name}") + filepath = inspect.getfile(mod) + return (_EXAMPLES[name], (mod.__doc__ or "").strip(), filepath) diff --git a/python/pulsing/examples/counting_game.py b/python/pulsing/examples/counting_game.py new file mode 100644 index 000000000..2a33ea138 --- /dev/null +++ b/python/pulsing/examples/counting_game.py @@ -0,0 +1,125 @@ +"""Counting Game — Pulsing distributed counting game + +20 nodes count in sequence and broadcast, demonstrating Pulsing's actor messaging capability. +Ray is only used for multiprocessing; counting logic is entirely handled by Pulsing actors. + +Usage: + python -m pulsing.examples.counting_game + python -m pulsing.examples.counting_game --num-workers 10 +""" + +import os +import time + +import ray + +import pulsing as pul + + +# ── Counting Actor ─────────────────────────────────────────── + + +@ray.remote +class Counter: + """Each node holds its name, ordered peer list, and counting log.""" + + def __init__(self, name, peers): + self.name = name + self.peers = sorted(peers) + self.log = [] + pul.mount(self, name=name) # One line to join Pulsing network + + async def yield_number(self): + """Yield number: broadcast own number to all nodes""" + num = self.peers.index(self.name) + 1 + for peer in self.peers: + proxy = (await pul.resolve(peer, timeout=30)).as_type(Counter) + await proxy.on_number(num, self.name) + + async def on_number(self, num, from_who): + """Receive number: log it, relay if previous node finished""" + self.log.append({"number": num, "from": from_who}) + idx = self.peers.index(self.name) + if idx > 0 and from_who == self.peers[idx - 1]: + await self.yield_number() + + def get_pid(self): + return os.getpid() + + def get_log(self): + return list(self.log) + + +# ── Run ───────────────────────────────────────────────── + + +def run(num_workers=20): + """Run counting game (requires Ray initialized). Returns logs from all nodes, raises on failure.""" + names = [f"node_{i:02d}" for i in range(num_workers)] + t0 = time.time() + + # 1) Create Ray actors (auto pul.mount in __init__ to join Pulsing) + print(f"[counting_game] Starting {num_workers} nodes ...") + actors = [Counter.remote(name, names) for name in names] + pids = ray.get([a.get_pid.remote() for a in actors]) + assert len(set(pids)) == num_workers, "Not enough worker processes" + print(f"[counting_game] {num_workers} nodes ready ({time.time()-t0:.1f}s)") + + # 2) node_00 yields -> auto relays to node_19 + print("[counting_game] node_00 starting count ...") + ray.get(actors[0].yield_number.remote()) + + # 3) Wait for all nodes to collect complete logs + deadline = time.time() + 30 + while time.time() < deadline: + logs = ray.get([a.get_log.remote() for a in actors]) + done = sum(1 for lg in logs if len(lg) == num_workers) + print( + f"\r[counting_game] Collecting logs {done}/{num_workers}", + end="", + flush=True, + ) + if done == num_workers: + break + time.sleep(0.5) + else: + raise TimeoutError("Counting timeout") + print() + + # 4) Verify: each log entry's 'from' should match the number + for entries in logs: + for e in entries: + assert e["from"] == f"node_{e['number']-1:02d}" + + # 5) Print results + order = " → ".join(f"{i+1}:{names[i]}" for i in range(min(5, num_workers))) + if num_workers > 5: + order += f" → ... → {num_workers}:{names[-1]}" + elapsed = time.time() - t0 + print(f"[counting_game] Counting order: {order}") + print( + f"[counting_game] Passed! {num_workers}x{num_workers}={num_workers**2} messages, {elapsed:.1f}s" + ) + pul.cleanup_ray() + return logs + + +# ── CLI ────────────────────────────────────────────────── + + +def main(): + import argparse + + p = argparse.ArgumentParser(description="Pulsing distributed counting game") + p.add_argument("--num-workers", type=int, default=20) + args = p.parse_args() + + ray.init(num_cpus=args.num_workers + 1) + try: + run(args.num_workers) + finally: + ray.shutdown() + + +if __name__ == "__main__": + main() diff --git a/python/pulsing/integrations/__init__.py b/python/pulsing/integrations/__init__.py new file mode 100644 index 000000000..3560c582b --- /dev/null +++ b/python/pulsing/integrations/__init__.py @@ -0,0 +1,5 @@ +"""Third-party framework integrations.""" + +from . import ray_compat + +__all__ = ["ray_compat"] diff --git a/python/pulsing/autogen/__init__.py b/python/pulsing/integrations/autogen/__init__.py similarity index 91% rename from python/pulsing/autogen/__init__.py rename to python/pulsing/integrations/autogen/__init__.py index dfcbbbca6..93b39e3a8 100644 --- a/python/pulsing/autogen/__init__.py +++ b/python/pulsing/integrations/autogen/__init__.py @@ -2,7 +2,7 @@ Pulsing AutoGen Runtime - Unified Standalone/Distributed Runtime Usage: - from pulsing.autogen import PulsingRuntime + from pulsing.integrations.autogen import PulsingRuntime # Standalone mode runtime = PulsingRuntime() diff --git a/python/pulsing/autogen/agent_wrapper.py b/python/pulsing/integrations/autogen/agent_wrapper.py similarity index 99% rename from python/pulsing/autogen/agent_wrapper.py rename to python/pulsing/integrations/autogen/agent_wrapper.py index 5f200208e..440ea3ef6 100644 --- a/python/pulsing/autogen/agent_wrapper.py +++ b/python/pulsing/integrations/autogen/agent_wrapper.py @@ -6,7 +6,7 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId if TYPE_CHECKING: from .runtime import PulsingRuntime diff --git a/python/pulsing/autogen/runtime.py b/python/pulsing/integrations/autogen/runtime.py similarity index 99% rename from python/pulsing/autogen/runtime.py rename to python/pulsing/integrations/autogen/runtime.py index a7625dad7..e02782739 100644 --- a/python/pulsing/autogen/runtime.py +++ b/python/pulsing/integrations/autogen/runtime.py @@ -30,14 +30,14 @@ cast, ) -from pulsing.actor import ( +from pulsing.core import ( Actor, ActorRef, ActorSystem, Message, SystemConfig, ) -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.autogen") T = TypeVar("T") diff --git a/python/pulsing/langgraph/__init__.py b/python/pulsing/integrations/langgraph/__init__.py similarity index 92% rename from python/pulsing/langgraph/__init__.py rename to python/pulsing/integrations/langgraph/__init__.py index d6c74b063..5e37eb904 100644 --- a/python/pulsing/langgraph/__init__.py +++ b/python/pulsing/integrations/langgraph/__init__.py @@ -3,7 +3,7 @@ Usage: from langgraph.graph import StateGraph - from pulsing.langgraph import with_pulsing + from pulsing.integrations.langgraph import with_pulsing # Original LangGraph code graph = StateGraph(MyState) diff --git a/python/pulsing/langgraph/executor.py b/python/pulsing/integrations/langgraph/executor.py similarity index 98% rename from python/pulsing/langgraph/executor.py rename to python/pulsing/integrations/langgraph/executor.py index a91df119c..36b26689e 100644 --- a/python/pulsing/langgraph/executor.py +++ b/python/pulsing/integrations/langgraph/executor.py @@ -15,8 +15,8 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Callable, Dict -from pulsing.actor import Actor, ActorId, ActorRef, ActorSystem, SystemConfig -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core import Actor, ActorId, ActorRef, ActorSystem, SystemConfig +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/langgraph/wrapper.py b/python/pulsing/integrations/langgraph/wrapper.py similarity index 97% rename from python/pulsing/langgraph/wrapper.py rename to python/pulsing/integrations/langgraph/wrapper.py index aac08b991..faac82954 100644 --- a/python/pulsing/langgraph/wrapper.py +++ b/python/pulsing/integrations/langgraph/wrapper.py @@ -8,8 +8,8 @@ import logging from typing import Any, AsyncIterator, Dict, Optional, Union -from pulsing.actor import ActorSystem, SystemConfig -from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService +from pulsing.core import ActorSystem, SystemConfig +from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService from .executor import NodeExecutorPool logger = logging.getLogger("pulsing.langgraph") diff --git a/python/pulsing/integrations/ray.py b/python/pulsing/integrations/ray.py new file mode 100644 index 000000000..4978ea131 --- /dev/null +++ b/python/pulsing/integrations/ray.py @@ -0,0 +1,163 @@ +""" +pulsing.ray - Initialize Pulsing in Ray cluster + +Each Ray worker process can call init_in_ray() to start Pulsing and auto-join the cluster. +Uses Ray's internal KV store to coordinate seed node discovery. + +Recommended usage: + import ray + from pulsing.integrations.ray import init_in_ray + + ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + init_in_ray() # driver process also needs initialization +""" + +try: + import ray + from ray.experimental.internal_kv import ( + _internal_kv_get, + _internal_kv_put, + _internal_kv_del, + ) +except ImportError: + raise ImportError( + "pulsing.integrations.ray requires Ray. Install with: pip install 'ray[default]'" + ) + +import asyncio +import threading + +_SEED_KEY = "pulsing:seed_addr" + +# Background event loop (for sync init) +_loop = None +_thread = None + + +def _get_node_ip(): + """Get current Ray node IP""" + ctx = ray.get_runtime_context() + node_id = ctx.get_node_id() + for node in ray.nodes(): + if node["NodeID"] == node_id and node["Alive"]: + return node["NodeManagerAddress"] + raise RuntimeError("Cannot get current Ray node IP") + + +def _start_background_loop(): + """Start background event loop thread""" + global _loop, _thread + if _thread is not None: + return + + ready = threading.Event() + + def _run(): + global _loop + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + _loop = loop + ready.set() + loop.run_forever() + + _thread = threading.Thread(target=_run, daemon=True, name="pulsing-event-loop") + _thread.start() + ready.wait() + + +def _run_sync(coro): + """Execute coroutine synchronously in background event loop""" + fut = asyncio.run_coroutine_threadsafe(coro, _loop) + return fut.result(timeout=30) + + +async def _do_init(addr, seeds=None): + from pulsing.core import init + + return await init(addr=addr, seeds=seeds) + + +async def _do_shutdown(): + from pulsing.core import shutdown + + await shutdown() + + +def _get_seed(): + """Get seed address from Ray KV store""" + data = _internal_kv_get(_SEED_KEY) + return data.decode() if data else None + + +def _try_set_seed(addr): + """Atomically write seed address, returns True if write succeeded (I am seed). + + _internal_kv_put(overwrite=False) return value semantics: + False = key doesn't exist, written (success) + True = key already exists, not overwritten (failure) + """ + already_exists = _internal_kv_put(_SEED_KEY, addr.encode(), overwrite=False) + return not already_exists + + +def init_in_ray(): + """Initialize Pulsing in current process and join cluster. + + Can be called directly or used as Ray worker_process_setup_hook: + + ray.init(runtime_env={"worker_process_setup_hook": init_in_ray}) + init_in_ray() # driver also needs this + """ + if not ray.is_initialized(): + raise RuntimeError("Ray not initialized, please call ray.init() first") + + node_ip = _get_node_ip() + _start_background_loop() + + # Seed exists -> join directly + seed_addr = _get_seed() + if seed_addr is not None: + return _run_sync(_do_init(f"{node_ip}:0", seeds=[seed_addr])) + + # Start as potential seed + system = _run_sync(_do_init(f"{node_ip}:0")) + my_addr = str(system.addr) + + if _try_set_seed(my_addr): + return system # Write succeeded, I am seed + + # Race lost (rare), re-join with actual seed + _run_sync(_do_shutdown()) + return _run_sync(_do_init(f"{node_ip}:0", seeds=[_get_seed()])) + + +async def async_init_in_ray(): + """Initialize Pulsing in current process and join cluster (async version). + + Suitable for async Ray actors. + """ + if not ray.is_initialized(): + raise RuntimeError("Ray not initialized, please call ray.init() first") + + node_ip = _get_node_ip() + + seed_addr = _get_seed() + if seed_addr is not None: + return await _do_init(f"{node_ip}:0", seeds=[seed_addr]) + + system = await _do_init(f"{node_ip}:0") + my_addr = str(system.addr) + + if _try_set_seed(my_addr): + return system + + await _do_shutdown() + return await _do_init(f"{node_ip}:0", seeds=[_get_seed()]) + + +def cleanup(): + """Clean up Pulsing state in Ray KV store""" + _internal_kv_del(_SEED_KEY) + + +__all__ = ["init_in_ray", "async_init_in_ray", "cleanup", "_get_seed", "_loop"] diff --git a/python/pulsing/compat/ray.py b/python/pulsing/integrations/ray_compat.py similarity index 94% rename from python/pulsing/compat/ray.py rename to python/pulsing/integrations/ray_compat.py index 5f9329915..b8737bd5e 100644 --- a/python/pulsing/compat/ray.py +++ b/python/pulsing/integrations/ray_compat.py @@ -2,7 +2,7 @@ Ray-compatible API for Pulsing This module provides a Ray-like synchronous API for easy migration. -For new projects, we recommend using the native async API in pulsing.actor. +For new projects, we recommend using the native async API in pulsing.core. Migration from Ray: # Before (Ray) @@ -19,7 +19,7 @@ def incr(self): self.value += 1; return self.value ray.shutdown() # After (Pulsing compat) - from pulsing.compat import ray # Only change this line! + from pulsing.integrations.ray_compat import ray # Only change this line! ray.init() @@ -33,7 +33,7 @@ def incr(self): self.value += 1; return self.value ray.shutdown() Note: This is a synchronous wrapper around async Pulsing. -For better performance in async environments, use pulsing.actor directly. +For better performance in async environments, use pulsing.core directly. """ import asyncio @@ -186,7 +186,7 @@ def __init__(self, cls: type): def _ensure_wrapped(self): if self._pulsing_class is None: - from pulsing.actor import remote + from pulsing.core import remote self._pulsing_class = remote(self._cls) @@ -226,15 +226,15 @@ def init( ignore_reinit_error: If True, ignore if already initialized Example: - from pulsing.compat import ray + from pulsing.integrations.ray_compat import ray ray.init() """ global _system, _loop _ensure_not_initialized(ignore_reinit_error) - from pulsing.actor import ActorSystem, SystemConfig - from pulsing.actor.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService + from pulsing.core import ActorSystem, SystemConfig + from pulsing.core.remote import PYTHON_ACTOR_SERVICE_NAME, PythonActorService # If we're already inside a running event loop (e.g., Jupyter/pytest-asyncio), # we must not call run_until_complete() on it. Use a dedicated background loop. @@ -354,6 +354,11 @@ def wait( return ready, remaining +import sys + +# Self-reference so that "from pulsing.integrations.ray_compat import ray" works +ray = sys.modules[__name__] + __all__ = [ "init", "shutdown", @@ -363,4 +368,5 @@ def wait( "put", "wait", "ObjectRef", + "ray", ] diff --git a/python/pulsing/queue/README.md b/python/pulsing/queue/README.md deleted file mode 100644 index a0d6f9f6d..000000000 --- a/python/pulsing/queue/README.md +++ /dev/null @@ -1,352 +0,0 @@ -# 分布式内存队列 - -基于 Pulsing Actor 架构实现的分布式内存队列系统。 - -**支持可插拔存储后端**,可根据需求选择不同的实现。 - -## 架构概览 - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ 应用层 │ -│ │ -│ Queue / QueueWriter / QueueReader │ -│ │ │ -│ │ get_bucket_ref(topic, bucket_id) │ -│ ▼ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ StorageManager (每节点一个) │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ GetBucket(topic, bucket_id) │ │ -│ │ │ │ │ -│ │ ├─ owner = hash(topic:bucket_id) % nodes ← 一致性哈希 │ │ -│ │ │ │ │ -│ │ ├─ owner == self? │ │ -│ │ │ ├─ Yes → 创建/返回 BucketStorage │ │ -│ │ │ │ → BucketReady(actor_id, node_id) │ │ -│ │ │ │ │ │ -│ │ │ └─ No → Redirect(owner_node_id) │ │ -│ │ │ 客户端重定向到正确节点 │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ┌───────────────────────────┼───────────────────────────┐ - ▼ ▼ ▼ -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│BucketStorage │ │BucketStorage │ │BucketStorage │ -│ bucket_0 │ │ bucket_1 │ │ bucket_2 │ -│ │ │ │ │ │ -│ - buffer[] │ │ - buffer[] │ │ - buffer[] │ -│ - backend │ │ - backend │ │ - backend │ -│ - Condition │ │ - Condition │ │ - Condition │ -└──────────────┘ └──────────────┘ └──────────────┘ - Node A Node B Node A -``` - -## 核心组件 - -### 1. StorageManager(存储管理器) - -**每个节点一个实例**,负责管理两类资源: - -| 资源类型 | 请求消息 | Actor 类型 | 用途 | -|---------|---------|-----------|------| -| Queue Bucket | `GetBucket` | `BucketStorage` | 生产者-消费者队列 | -| Topic Broker | `GetTopic` | `TopicBroker` | 发布-订阅 | - -核心职责: -- 使用**一致性哈希**判断资源的 owner 节点 -- Owner 节点:创建并返回对应 Actor -- 非 Owner 节点:返回 `Redirect`,指向正确节点 - -### 2. BucketStorage(桶存储) - -每个 bucket 一个实例,负责: -- 数据缓冲(内存) -- 数据持久化(由后端实现) -- 消费者阻塞/唤醒(asyncio.Condition) - -### 3. Queue / QueueWriter / QueueReader - -高级 API,对用户隐藏底层复杂性。 - -## 设计特点 - -| 特性 | 说明 | -|------|------| -| **集群唯一性** | StorageManager + 一致性哈希,确保每个 bucket 在集群中只有一个 Actor | -| **智能路由** | 错误请求自动重定向到正确节点 | -| **独立锁/条件变量** | 每个 bucket 独立,无跨 bucket 竞争 | -| **流式传输** | 消费者通过 StreamMessage 接收,内存友好 | -| **实时通知** | 新数据通过 condition + 流推送,无轮询 | - -## 请求流程 - -### Bucket 获取流程 - -``` -Queue.put(record) - │ - ├─ bucket_id = hash(record[bucket_column]) % num_buckets - │ - ▼ -get_bucket_ref(system, topic, bucket_id) - │ - ├─ 本地 StorageManager.GetBucket(...) - │ │ - │ ├─ owner == self? - │ │ ├─ Yes → BucketReady(actor_id) → 返回 ActorRef - │ │ └─ No → Redirect(owner_node_id) - │ │ │ - │ │ ▼ - │ │ owner 节点的 StorageManager.GetBucket(...) - │ │ │ - │ │ └─ BucketReady → 返回 ActorRef - │ - └─ bucket_ref.ask(Put, {record}) -``` - -### 数据传输流程 - -``` -生产者 BucketStorage 消费者 (wait=True) - │ │ │ - │── Put ───────────────────▶│ │ - │ │ buffer.append() │ - │ │ condition.notify_all() ──────▶│ 唤醒 - │◀── PutResponse ───────────│ │ - │ │ │ - │ │◀── GetStream ─────────────────│ - │ │ │ - │ │── StreamMessage chunk ───────▶│ 流式发送 -``` - -## 数据可见性模型 - -``` -┌─────────────────────────────────────────────────────┐ -│ 总数据视图 │ -├─────────────────────────┬───────────────────────────┤ -│ 持久化(若后端支持) │ 内存缓冲 │ -│ [0, persisted_count) │ [persisted_count, total) │ -└─────────────────────────┴───────────────────────────┘ - ↑ - 两部分同时可见 -``` - -- 写入后数据**立即**对消费者可见(在内存缓冲中) -- 达到 `batch_size` 后由后端决定是否持久化 -- 调用 `flush()` 可强制持久化 - -## 快速开始 - -```python -import asyncio -import pulsing as pul - -async def main(): - system = await pul.actor_system() - - # 生产者 - writer = await system.queue.write( - "my_queue", - bucket_column="user_id", - num_buckets=4, - ) - - # 写入数据(立即对消费者可见) - await writer.put({"user_id": "u1", "message": "Hello"}) - - # 消费者 - reader = await system.queue.read("my_queue") - - # 读取数据(内存 + 持久化同时可见) - records = await reader.get(limit=100) - - # 阻塞等待新数据 - records = await reader.get(limit=100, wait=True, timeout=10.0) - - await system.shutdown() - -asyncio.run(main()) -``` - -### 同步 API - -通过 `.sync()` 获取同步包装器,可与异步混用: - -```python -# 生产者用同步,消费者用异步 -sync_writer = writer.sync() -sync_writer.put({"user_id": "u1", "message": "Hello"}) -sync_writer.flush() - -records = await reader.get(limit=100) # 异步读取 - -# 或者反过来 -await writer.put({"user_id": "u2", "message": "World"}) - -sync_reader = reader.sync() -records = sync_reader.get(limit=100) # 同步读取 -``` - -## API - -### `system.queue.write(topic, ...)` - -打开队列用于写入。 - -```python -writer = await system.queue.write( - "my_queue", - bucket_column="user_id", # 分桶列 - num_buckets=4, # 桶数量 - batch_size=100, # 批处理大小 -) - -await writer.put({"user_id": "u1", "msg": "hello"}) -await writer.put([record1, record2, ...]) # 批量写入 -await writer.flush() # 强制持久化 -``` - -### `system.queue.read(topic, ...)` - -打开队列用于读取。支持三种模式: - -```python -# 1. 读取所有 bucket -reader = await system.queue.read("my_queue") - -# 2. 读取指定 bucket -reader = await system.queue.read("my_queue", bucket_id=0) -reader = await system.queue.read("my_queue", bucket_ids=[0, 2]) - -# 3. 分布式消费:通过 rank/world_size 自动分配 bucket -reader0 = await system.queue.read("q", rank=0, world_size=2, num_buckets=4) # bucket 0, 2 -reader1 = await system.queue.read("q", rank=1, world_size=2, num_buckets=4) # bucket 1, 3 - -# 读取数据 -records = await reader.get(limit=100) -records = await reader.get(limit=100, wait=True, timeout=10.0) # 阻塞等待 -``` - -## 分布式消费 - -通过 `rank` 和 `world_size` 实现多消费者并行消费: - -``` -num_buckets=4, world_size=2: - -Consumer (rank=0) Consumer (rank=1) - │ │ - ├─▶ bucket_0 ├─▶ bucket_1 - └─▶ bucket_2 └─▶ bucket_3 -``` - -## 可插拔存储后端 - -队列仅内置 `memory` 后端;持久化等能力通过**插件**以 `register_backend()` 接入,不在 Pulsing 内直接依赖具体实现。 - -### 内置后端 - -| 后端 | 说明 | 适用场景 | -|------|------|----------| -| `memory` | 纯内存,无持久化(默认) | 测试、临时数据 | - -### 插件后端 - -持久化或其它后端由第三方包提供,通过 `register_backend()` 注册后使用: - -```python -# 默认内存后端 -writer = await system.queue.write("my_queue") - -# 使用插件提供的后端(示例) -from my_plugin import MyBackend -from pulsing.queue import register_backend - -register_backend("my_backend", MyBackend) -writer = await system.queue.write("my_queue", backend="my_backend") -``` - -### 自定义后端 - -实现 `StorageBackend` 协议即可: - -```python -class MyBackend: - def __init__(self, bucket_id: int, storage_path: str, **kwargs): - ... - - async def put(self, record: dict) -> None: ... - async def put_batch(self, records: list[dict]) -> None: ... - async def get(self, limit: int, offset: int) -> list[dict]: ... - async def get_stream(self, limit, offset, wait, timeout) -> AsyncIterator: ... - async def flush(self) -> None: ... - async def stats(self) -> dict: ... - def total_count(self) -> int: ... -``` - ---- - -## 设计点评 - -### ✅ 优点 - -1. **集群唯一性保证** - - StorageManager 使用一致性哈希确定 bucket owner - - 非 owner 节点返回 Redirect,避免创建重复 Actor - - 解决了分布式环境下的竞态条件问题 - -2. **架构清晰** - - 三层架构:应用层 (Queue) → 管理层 (StorageManager) → 存储层 (BucketStorage) - - 职责分离,每层只关注自己的逻辑 - -3. **智能路由** - - 客户端无需知道 bucket 在哪个节点 - - 自动重定向到正确节点 - -4. **高并发支持** - - 每个 bucket 独立的锁和条件变量 - - 无跨 bucket 竞争 - -5. **数据实时可见** - - 写入后立即可读(内存缓冲) - - 无需等待持久化 - -6. **流式传输** - - 大数据量传输内存友好 - - 支持阻塞等待新数据 - -### ⚠️ 潜在改进点 - -1. **节点变化处理** - - 当前一致性哈希在节点加入/退出时可能导致 bucket 重分布 - - 可以考虑虚拟节点或一致性哈希环来减少影响 - -2. **元数据持久化** - - Queue 配置(bucket_column, num_buckets)目前不持久化 - - 消费者需要知道这些参数 - - 可以考虑将元数据存储在集群中 - -3. **故障恢复** - - 节点故障时,其 bucket 数据可能丢失(内存部分) - - 可以考虑副本或 WAL 机制 - -4. **性能优化** - - `get_bucket_ref` 每次都查询 StorageManager - - 可以增加客户端缓存,减少 RPC 调用 - -### 📊 适用场景 - -- ✅ 分布式数据管道 -- ✅ 生产者-消费者模式 -- ✅ 分布式训练数据分发 -- ✅ 实时数据流处理 -- ⚠️ 不适合需要强一致性的场景 -- ⚠️ 不适合需要事务的场景 diff --git a/python/pulsing/queue/storage.py b/python/pulsing/queue/storage.py deleted file mode 100644 index d3e70f2c9..000000000 --- a/python/pulsing/queue/storage.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Bucket Storage Actor - Using Pluggable Backend""" - -import asyncio -import logging -from typing import Any, AsyncIterator - -from pulsing.actor import ActorId, StreamMessage, remote - -from .backend import StorageBackend, get_backend_class - -logger = logging.getLogger(__name__) - - -@remote -class BucketStorage: - """Storage Actor for a Single Bucket - - Uses pluggable StorageBackend for data storage. - - Args: - bucket_id: Bucket ID - storage_path: Storage path - batch_size: Batch size - backend: Backend name or backend class - - "memory": Pure in-memory backend (default) - - Custom name/class: Use register_backend() or pass class - backend_options: Additional parameters passed to backend - """ - - def __init__( - self, - bucket_id: int, - storage_path: str, - batch_size: int = 100, - backend: str | type = "memory", - backend_options: dict[str, Any] | None = None, - ): - self.bucket_id = bucket_id - self.storage_path = storage_path - self.batch_size = batch_size - self._backend_type = backend - self._backend_options = backend_options or {} - - # Backend instance (initialized in on_start) - self._backend: StorageBackend | None = None - - def on_start(self, actor_id: ActorId) -> None: - # Create backend instance - backend_class = get_backend_class(self._backend_type) - self._backend = backend_class( - bucket_id=self.bucket_id, - storage_path=self.storage_path, - batch_size=self.batch_size, - **self._backend_options, - ) - backend_name = getattr(backend_class, "__name__", str(self._backend_type)) - logger.info( - f"BucketStorage[{self.bucket_id}] started with {backend_name} at {self.storage_path}" - ) - - def on_stop(self) -> None: - logger.info(f"BucketStorage[{self.bucket_id}] stopping") - - # ========== Public Remote Methods ========== - - async def put(self, record: dict) -> dict: - """Put a single record. - - Args: - record: Record to store - - Returns: - {"status": "ok"} - """ - if not record: - raise ValueError("Missing 'record'") - await self._backend.put(record) - return {"status": "ok"} - - async def put_batch(self, records: list[dict]) -> dict: - """Put multiple records. - - Args: - records: List of records to store - - Returns: - {"status": "ok", "count": N} - """ - if not records: - raise ValueError("Missing 'records'") - await self._backend.put_batch(records) - return {"status": "ok", "count": len(records)} - - async def get(self, limit: int = 100, offset: int = 0) -> list[dict]: - """Get records. - - Args: - limit: Maximum number of records to return - offset: Starting offset - - Returns: - List of records - """ - return await self._backend.get(limit, offset) - - async def get_stream( - self, - limit: int = 100, - offset: int = 0, - wait: bool = False, - timeout: float | None = None, - ) -> AsyncIterator[list[dict]]: - """Get records as a stream. - - Args: - limit: Maximum number of records to return - offset: Starting offset - wait: Whether to wait for new records - timeout: Timeout in seconds - - Yields: - Batches of records - """ - async for records in self._backend.get_stream(limit, offset, wait, timeout): - yield records - - async def flush(self) -> dict: - """Flush pending writes. - - Returns: - {"status": "ok"} - """ - await self._backend.flush() - return {"status": "ok"} - - async def stats(self) -> dict: - """Get storage statistics. - - Returns: - Statistics dict from backend - """ - return await self._backend.stats() diff --git a/python/pulsing/actors/__init__.py b/python/pulsing/serving/__init__.py similarity index 100% rename from python/pulsing/actors/__init__.py rename to python/pulsing/serving/__init__.py diff --git a/python/pulsing/actors/load_stream.py b/python/pulsing/serving/load_stream.py similarity index 99% rename from python/pulsing/actors/load_stream.py rename to python/pulsing/serving/load_stream.py index 293a357c1..3f6ae554e 100644 --- a/python/pulsing/actors/load_stream.py +++ b/python/pulsing/serving/load_stream.py @@ -25,7 +25,7 @@ from collections.abc import Callable from dataclasses import dataclass -from pulsing.actor import ActorRef, Message +from pulsing.core import ActorRef, Message @dataclass diff --git a/python/pulsing/actors/router.py b/python/pulsing/serving/router.py similarity index 98% rename from python/pulsing/actors/router.py rename to python/pulsing/serving/router.py index 991192750..d17dfcc1d 100644 --- a/python/pulsing/actors/router.py +++ b/python/pulsing/serving/router.py @@ -8,7 +8,7 @@ from aiohttp import web -from pulsing.actor import Actor, ActorId, ActorSystem, Message, get_system +from pulsing.core import Actor, ActorId, ActorSystem, Message, get_system @dataclass @@ -431,7 +431,7 @@ class Router(Actor): """Router Actor - OpenAI-compatible HTTP API router as an Actor This actor wraps the start_router/stop_router functions to provide - a CLI-compatible entry point via `pulsing actor pulsing.actors.Router`. + a CLI-compatible entry point via `pulsing actor pulsing.serving.Router`. Args: http_host: HTTP listen address (default: "0.0.0.0") @@ -447,7 +447,7 @@ class Router(Actor): Example: # Start via CLI - pulsing actor pulsing.actors.Router \\ + pulsing actor pulsing.serving.Router \\ --http_host 0.0.0.0 \\ --http_port 8080 \\ --model_name my-llm \\ diff --git a/python/pulsing/actors/scheduler.py b/python/pulsing/serving/scheduler.py similarity index 100% rename from python/pulsing/actors/scheduler.py rename to python/pulsing/serving/scheduler.py diff --git a/python/pulsing/actors/vllm/__init__.py b/python/pulsing/serving/vllm/__init__.py similarity index 100% rename from python/pulsing/actors/vllm/__init__.py rename to python/pulsing/serving/vllm/__init__.py diff --git a/python/pulsing/actors/vllm/handlers.py b/python/pulsing/serving/vllm/handlers.py similarity index 100% rename from python/pulsing/actors/vllm/handlers.py rename to python/pulsing/serving/vllm/handlers.py diff --git a/python/pulsing/actors/vllm/sampling.py b/python/pulsing/serving/vllm/sampling.py similarity index 100% rename from python/pulsing/actors/vllm/sampling.py rename to python/pulsing/serving/vllm/sampling.py diff --git a/python/pulsing/actors/vllm/utils.py b/python/pulsing/serving/vllm/utils.py similarity index 100% rename from python/pulsing/actors/vllm/utils.py rename to python/pulsing/serving/vllm/utils.py diff --git a/python/pulsing/actors/vllm/worker.py b/python/pulsing/serving/vllm/worker.py similarity index 99% rename from python/pulsing/actors/vllm/worker.py rename to python/pulsing/serving/vllm/worker.py index e59f47f38..16a70fa77 100644 --- a/python/pulsing/actors/vllm/worker.py +++ b/python/pulsing/serving/vllm/worker.py @@ -17,7 +17,7 @@ import uuid from typing import Any -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage from .handlers import BaseWorkerHandler, DecodeWorkerHandler, PrefillWorkerHandler from .utils import _is_macos, _setup_macos_metal_env diff --git a/python/pulsing/actors/vllm_worker.py b/python/pulsing/serving/vllm_worker.py similarity index 99% rename from python/pulsing/actors/vllm_worker.py rename to python/pulsing/serving/vllm_worker.py index 574e5055e..ae0c66476 100644 --- a/python/pulsing/actors/vllm_worker.py +++ b/python/pulsing/serving/vllm_worker.py @@ -17,7 +17,7 @@ import uuid from typing import Any -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage from .vllm_handlers import BaseWorkerHandler, DecodeWorkerHandler, PrefillWorkerHandler from .vllm_utils import _is_macos, _setup_macos_metal_env diff --git a/python/pulsing/actors/worker.py b/python/pulsing/serving/worker.py similarity index 99% rename from python/pulsing/actors/worker.py rename to python/pulsing/serving/worker.py index 7d1dd9769..992c2e1af 100644 --- a/python/pulsing/actors/worker.py +++ b/python/pulsing/serving/worker.py @@ -5,7 +5,7 @@ import uuid from dataclasses import dataclass -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.core import Actor, ActorId, Message, StreamMessage @dataclass diff --git a/python/pulsing/queue/__init__.py b/python/pulsing/streaming/__init__.py similarity index 50% rename from python/pulsing/queue/__init__.py rename to python/pulsing/streaming/__init__.py index 0a4184b87..f236fc79c 100644 --- a/python/pulsing/queue/__init__.py +++ b/python/pulsing/streaming/__init__.py @@ -1,25 +1,12 @@ -"""Distributed In-Memory Queue - Based on Pulsing Actor Architecture +"""Streaming - Queue (point-to-point) and Pub/Sub (topic) APIs -Architecture Features: -- Each node has a StorageManager Actor that manages all buckets on this node -- StorageManager uses consistent hashing to determine the owner node for each bucket -- Ensures only one Actor per bucket across the entire cluster -- Supports pluggable storage backends +Queue: + writer = await system.streaming.write("my_queue") # or system.queue + reader = await system.streaming.read("my_queue") -Storage Backends: -- "memory": Pure in-memory backend (built-in default) -- Custom backends: register_backend() or pass class to write_queue() - -Example: - system = await pul.actor_system() - - # Write to queue - writer = await system.queue.write("my_queue") - await writer.put({"id": "1", "data": "hello"}) - - # Read from queue - reader = await system.queue.read("my_queue") - records = await reader.get(limit=10) +Topic: + writer = await system.topic.write("events") + reader = await system.topic.read("events") """ from typing import TYPE_CHECKING, Any @@ -37,6 +24,15 @@ get_storage_manager, get_topic_broker, ) +from .pubsub import ( + PublishMode, + PublishResult, + TopicReader, + TopicWriter, + read_topic, + subscribe_to_topic, + write_topic, +) from .queue import Queue, QueueReader, QueueWriter, read_queue, write_queue from .storage import BucketStorage from .sync_queue import SyncQueue, SyncQueueReader, SyncQueueWriter @@ -46,19 +42,7 @@ class QueueAPI: - """Queue API entry point via system.queue - - Example: - system = await pul.actor_system() - - # Write - writer = await system.queue.write("my_queue") - await writer.put({"id": "1", "data": "hello"}) - - # Read - reader = await system.queue.read("my_queue") - records = await reader.get(limit=10) - """ + """Queue API entry point via system.queue""" def __init__(self, system: "ActorSystem"): self._system = system @@ -74,20 +58,7 @@ async def write( backend: str | type = "memory", backend_options: dict[str, Any] | None = None, ) -> QueueWriter: - """Open queue for writing - - Args: - topic: Queue topic name - bucket_column: Column used for bucketing (default: "id") - num_buckets: Number of buckets (default: 4) - batch_size: Batch size for writes (default: 100) - storage_path: Storage path (default: ./queue_storage/{topic}) - backend: Storage backend ("memory" or custom) - backend_options: Additional backend options - - Returns: - QueueWriter for put/flush operations - """ + """Open queue for writing""" return await write_queue( self._system, topic, @@ -112,22 +83,7 @@ async def read( backend: str | type = "memory", backend_options: dict[str, Any] | None = None, ) -> QueueReader: - """Open queue for reading - - Args: - topic: Queue topic name - bucket_id: Single bucket to read from - bucket_ids: List of buckets to read from - rank: Consumer rank for distributed consumption - world_size: Total consumers for distributed consumption - num_buckets: Number of buckets (default: 4) - storage_path: Storage path - backend: Storage backend (must match writer) - backend_options: Additional backend options - - Returns: - QueueReader for get operations - """ + """Open queue for reading""" return await read_queue( self._system, topic, @@ -142,29 +98,60 @@ async def read( ) +class TopicAPI: + """Topic API entry point via system.topic""" + + def __init__(self, system: "ActorSystem"): + self._system = system + + async def write( + self, + topic: str, + *, + writer_id: str | None = None, + ) -> TopicWriter: + """Open topic for writing""" + return await write_topic(self._system, topic, writer_id=writer_id) + + async def read( + self, + topic: str, + *, + reader_id: str | None = None, + auto_start: bool = False, + ) -> TopicReader: + """Open topic for reading""" + return await read_topic( + self._system, topic, reader_id=reader_id, auto_start=auto_start + ) + + __all__ = [ - # High-level API "QueueAPI", - # Async API + "TopicAPI", "Queue", "QueueWriter", "QueueReader", "write_queue", "read_queue", - # Sync wrapper (obtained via .sync()) "SyncQueue", "SyncQueueWriter", "SyncQueueReader", - # Low-level components "StorageManager", "BucketStorage", "get_storage_manager", "get_bucket_ref", "get_topic_broker", - # Backend related "StorageBackend", "MemoryBackend", "register_backend", "get_backend_class", "list_backends", + "write_topic", + "read_topic", + "subscribe_to_topic", + "TopicWriter", + "TopicReader", + "PublishMode", + "PublishResult", ] diff --git a/python/pulsing/queue/backend.py b/python/pulsing/streaming/backend.py similarity index 71% rename from python/pulsing/queue/backend.py rename to python/pulsing/streaming/backend.py index 74d88c5f5..d1e7383a4 100644 --- a/python/pulsing/queue/backend.py +++ b/python/pulsing/streaming/backend.py @@ -10,7 +10,7 @@ # Custom backend (e.g. from a plugin package) from some_plugin import MyBackend - from pulsing.queue import register_backend + from pulsing.streaming import register_backend register_backend("my_backend", MyBackend) writer = await write_queue(system, "topic", backend="my_backend") @@ -76,6 +76,25 @@ def total_count(self) -> int: """Total record count""" ... + async def put_tensor(self, data: Any, **kwargs: Any) -> Any: + """Optional tensor-native put API.""" + raise NotImplementedError + + async def get_data(self, batch_meta: Any, fields: list[str] | None = None) -> Any: + """Optional tensor-native batch data API.""" + raise NotImplementedError + + async def get_meta( + self, + fields: list[str], + batch_size: int, + task_name: str = "default", + sampler: Any = None, + **sampling_kwargs: Any, + ) -> Any: + """Optional tensor-native metadata API.""" + raise NotImplementedError + class MemoryBackend: """Pure In-Memory Backend - Built-in Default Implementation @@ -163,6 +182,59 @@ async def stats(self) -> dict[str, Any]: def total_count(self) -> int: return len(self.buffer) + async def put_tensor(self, data: Any, **kwargs: Any) -> Any: + if isinstance(data, list): + await self.put_batch(data) + return {"size": len(data)} + if isinstance(data, dict): + await self.put(data) + return {"size": 1} + raise TypeError("MemoryBackend.put_tensor expects dict or list[dict]") + + async def get_data(self, batch_meta: Any, fields: list[str] | None = None) -> Any: + if isinstance(batch_meta, dict): + indexes = batch_meta.get("global_indexes", []) + else: + indexes = getattr(batch_meta, "global_indexes", []) + rows = [self.buffer[i] for i in indexes if 0 <= i < len(self.buffer)] + if not fields: + return rows + return [{k: v for k, v in row.items() if k in fields} for row in rows] + + async def get_meta( + self, + fields: list[str], + batch_size: int, + task_name: str = "default", + sampler: Any = None, + **sampling_kwargs: Any, + ) -> Any: + total = len(self.buffer) + ready = list(range(total)) + if sampler is not None: + sampled, _ = sampler.sample(ready, batch_size, **sampling_kwargs) + else: + sampled = ready[:batch_size] + return { + "samples": [ + { + "partition_id": sampling_kwargs.get("partition_id", "default"), + "global_index": idx, + "fields": { + field: { + "name": field, + "dtype": None, + "shape": None, + "production_status": "ready", + } + for field in fields + }, + } + for idx in sampled + ], + "global_indexes": sampled, + } + # ============================================================ # Backend Registry diff --git a/python/pulsing/topic/broker.py b/python/pulsing/streaming/broker.py similarity index 99% rename from python/pulsing/topic/broker.py rename to python/pulsing/streaming/broker.py index 4210e7f32..82ae9c4fa 100644 --- a/python/pulsing/topic/broker.py +++ b/python/pulsing/streaming/broker.py @@ -9,9 +9,9 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from pulsing.actor import ActorRef, ActorSystem + from pulsing.core import ActorRef, ActorSystem -from pulsing.actor import ActorId, remote +from pulsing.core import ActorId, remote logger = logging.getLogger(__name__) diff --git a/python/pulsing/queue/manager.py b/python/pulsing/streaming/manager.py similarity index 98% rename from python/pulsing/queue/manager.py rename to python/pulsing/streaming/manager.py index bf1fc5894..3f48ac1dc 100644 --- a/python/pulsing/queue/manager.py +++ b/python/pulsing/streaming/manager.py @@ -5,12 +5,12 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import ActorId, ActorRef, ActorSystem, remote +from pulsing.core import ActorId, ActorRef, ActorSystem, remote from .storage import BucketStorage if TYPE_CHECKING: - from pulsing.actor.remote import ActorProxy + from pulsing.core.remote import ActorProxy logger = logging.getLogger(__name__) @@ -190,7 +190,7 @@ async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: self._topics[topic_name] = await self.system.resolve_named(actor_name) logger.debug(f"Resolved existing topic broker: {actor_name}") except Exception: - from pulsing.topic.broker import TopicBroker + from pulsing.streaming.broker import TopicBroker proxy = await TopicBroker.local( self.system, topic_name, self.system, name=actor_name, public=True @@ -504,7 +504,7 @@ async def get_topic_broker( topic: Topic name max_redirects: Maximum redirect count """ - from pulsing.topic.broker import TopicBroker + from pulsing.streaming.broker import TopicBroker manager = await get_storage_manager(system) diff --git a/python/pulsing/topic/topic.py b/python/pulsing/streaming/pubsub.py similarity index 98% rename from python/pulsing/topic/topic.py rename to python/pulsing/streaming/pubsub.py index 03774c79c..ba54d7cf3 100644 --- a/python/pulsing/topic/topic.py +++ b/python/pulsing/streaming/pubsub.py @@ -10,10 +10,10 @@ from typing import TYPE_CHECKING, Any, Callable, Coroutine if TYPE_CHECKING: - from pulsing.actor import ActorRef - from pulsing.actor.remote import ActorProxy + from pulsing.core import ActorRef + from pulsing.core.remote import ActorProxy -from pulsing.actor import Actor, ActorId, ActorSystem, Message +from pulsing.core import Actor, ActorId, ActorSystem, Message logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ class PublishResult: async def _get_broker(system: ActorSystem, topic: str) -> "ActorProxy": """Get topic broker proxy (reuses queue/manager infrastructure)""" - from pulsing.queue.manager import get_topic_broker + from pulsing.streaming.manager import get_topic_broker # get_topic_broker already returns ActorProxy (via TopicBroker.resolve) return await get_topic_broker(system, topic) diff --git a/python/pulsing/queue/queue.py b/python/pulsing/streaming/queue.py similarity index 99% rename from python/pulsing/queue/queue.py rename to python/pulsing/streaming/queue.py index d1756431b..7f1a7a283 100644 --- a/python/pulsing/queue/queue.py +++ b/python/pulsing/streaming/queue.py @@ -8,8 +8,8 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import ActorSystem -from pulsing.actor.remote import ActorProxy +from pulsing.core import ActorSystem +from pulsing.core.remote import ActorProxy from .manager import get_bucket_ref, get_storage_manager @@ -334,7 +334,7 @@ async def write_queue( # Custom backend from a plugin from my_plugin import MyBackend - from pulsing.queue import register_backend + from .backend import register_backend register_backend("my_backend", MyBackend) writer = await write_queue(system, "my_queue", backend="my_backend") """ diff --git a/python/pulsing/streaming/storage.py b/python/pulsing/streaming/storage.py new file mode 100644 index 000000000..35fc33fd3 --- /dev/null +++ b/python/pulsing/streaming/storage.py @@ -0,0 +1,263 @@ +"""Bucket Storage Actor - Using Pluggable Backend""" + +import asyncio +import logging +from typing import Any, AsyncIterator + +from pulsing.core import ActorId, StreamMessage, remote + +from .backend import StorageBackend, get_backend_class + +logger = logging.getLogger(__name__) + + +@remote +class BucketStorage: + """Storage Actor for a Single Bucket + + Uses pluggable StorageBackend for data storage. + + Args: + bucket_id: Bucket ID + storage_path: Storage path + batch_size: Batch size + backend: Backend name or backend class + - "memory": Pure in-memory backend (default) + - Custom name/class: Use register_backend() or pass class + backend_options: Additional parameters passed to backend + """ + + def __init__( + self, + bucket_id: int, + storage_path: str, + batch_size: int = 100, + backend: str | type = "memory", + backend_options: dict[str, Any] | None = None, + ): + self.bucket_id = bucket_id + self.storage_path = storage_path + self.batch_size = batch_size + self._backend_type = backend + self._backend_options = backend_options or {} + + # Backend instance (initialized in on_start) + self._backend: StorageBackend | None = None + self._production_status: dict[int, dict[str, str]] = {} + self._consumption_status: dict[str, set[int]] = {} + self._key_to_index: dict[str, int] = {} + + def on_start(self, actor_id: ActorId) -> None: + # Create backend instance + backend_class = get_backend_class(self._backend_type) + self._backend = backend_class( + bucket_id=self.bucket_id, + storage_path=self.storage_path, + batch_size=self.batch_size, + **self._backend_options, + ) + backend_name = getattr(backend_class, "__name__", str(self._backend_type)) + logger.info( + f"BucketStorage[{self.bucket_id}] started with {backend_name} at {self.storage_path}" + ) + + def on_stop(self) -> None: + logger.info(f"BucketStorage[{self.bucket_id}] stopping") + + # ========== Public Remote Methods ========== + + async def put(self, record: dict) -> dict: + """Put a single record. + + Args: + record: Record to store + + Returns: + {"status": "ok"} + """ + if not record: + raise ValueError("Missing 'record'") + before = self._backend.total_count() + await self._backend.put(record) + fields = [k for k in record.keys() if not str(k).startswith("_")] + self._production_status[before] = {field: "ready" for field in fields} + return {"status": "ok"} + + async def put_batch(self, records: list[dict]) -> dict: + """Put multiple records. + + Args: + records: List of records to store + + Returns: + {"status": "ok", "count": N} + """ + if not records: + raise ValueError("Missing 'records'") + start = self._backend.total_count() + await self._backend.put_batch(records) + for i, record in enumerate(records): + fields = [k for k in record.keys() if not str(k).startswith("_")] + self._production_status[start + i] = {field: "ready" for field in fields} + return {"status": "ok", "count": len(records)} + + async def put_tensor( + self, data: Any, partition_id: str = "default", **kwargs: Any + ) -> dict: + if hasattr(self._backend, "put_tensor"): + meta = await self._backend.put_tensor( + data, partition_id=partition_id, **kwargs + ) + if hasattr(meta, "global_indexes") and hasattr(meta, "field_names"): + for idx in meta.global_indexes: + self._production_status[idx] = { + field: "ready" for field in meta.field_names + } + return {"status": "ok"} + raise NotImplementedError("Backend does not support put_tensor") + + async def get(self, limit: int = 100, offset: int = 0) -> list[dict]: + """Get records. + + Args: + limit: Maximum number of records to return + offset: Starting offset + + Returns: + List of records + """ + return await self._backend.get(limit, offset) + + async def get_stream( + self, + limit: int = 100, + offset: int = 0, + wait: bool = False, + timeout: float | None = None, + ) -> AsyncIterator[list[dict]]: + """Get records as a stream. + + Args: + limit: Maximum number of records to return + offset: Starting offset + wait: Whether to wait for new records + timeout: Timeout in seconds + + Yields: + Batches of records + """ + async for records in self._backend.get_stream(limit, offset, wait, timeout): + yield records + + async def flush(self) -> dict: + """Flush pending writes. + + Returns: + {"status": "ok"} + """ + await self._backend.flush() + return {"status": "ok"} + + async def stats(self) -> dict: + """Get storage statistics. + + Returns: + Statistics dict from backend + """ + return await self._backend.stats() + + async def get_meta( + self, + fields: list[str], + batch_size: int, + task_name: str, + sampler: Any = None, + **sampling_kwargs: Any, + ) -> dict: + if hasattr(self._backend, "get_meta"): + meta = await self._backend.get_meta( + fields=fields, + batch_size=batch_size, + task_name=task_name, + sampler=sampler, + **sampling_kwargs, + ) + if hasattr(meta, "to_dict"): + return meta.to_dict() + return meta + + consumed = self._consumption_status.setdefault(task_name, set()) + ready = [] + for idx in sorted(self._production_status): + if idx in consumed: + continue + status = self._production_status[idx] + if all(status.get(field) == "ready" for field in fields): + ready.append(idx) + + if sampler is not None: + sampled, marked = sampler.sample(ready, batch_size, **sampling_kwargs) + else: + sampled = ready[:batch_size] + marked = sampled + consumed.update(marked) + return { + "samples": [ + { + "partition_id": sampling_kwargs.get("partition_id", "default"), + "global_index": idx, + "fields": { + field: { + "name": field, + "dtype": None, + "shape": None, + "production_status": "ready", + } + for field in fields + }, + } + for idx in sampled + ], + "global_indexes": sampled, + } + + async def get_data(self, batch_meta: dict, fields: list[str] | None = None) -> Any: + if hasattr(self._backend, "get_data"): + return await self._backend.get_data(batch_meta, fields=fields) + + indexes = batch_meta.get("global_indexes") or [ + sample.get("global_index", -1) for sample in batch_meta.get("samples", []) + ] + if hasattr(self._backend, "get_by_indices"): + rows = await self._backend.get_by_indices(indexes) + else: + rows = [] + for idx in indexes: + rows.extend(await self._backend.get(limit=1, offset=idx)) + if fields: + return [{k: v for k, v in row.items() if k in fields} for row in rows] + return rows + + async def mark_consumed(self, task_name: str, global_indexes: list[int]) -> dict: + self._consumption_status.setdefault(task_name, set()).update(global_indexes) + if hasattr(self._backend, "mark_consumed"): + await self._backend.mark_consumed(task_name, global_indexes) + return {"status": "ok"} + + async def reset_consumption(self, task_name: str) -> dict: + self._consumption_status.pop(task_name, None) + if hasattr(self._backend, "reset_consumption"): + await self._backend.reset_consumption(task_name) + return {"status": "ok"} + + async def clear(self, global_indexes: list[int]) -> dict: + if hasattr(self._backend, "clear"): + await self._backend.clear(global_indexes) + return {"status": "ok"} + + async def kv_register(self, key: str, global_index: int) -> dict: + self._key_to_index[key] = global_index + return {"status": "ok"} + + async def kv_resolve(self, keys: list[str]) -> dict: + return {"indexes": [self._key_to_index.get(key, -1) for key in keys]} diff --git a/python/pulsing/queue/sync_queue.py b/python/pulsing/streaming/sync_queue.py similarity index 100% rename from python/pulsing/queue/sync_queue.py rename to python/pulsing/streaming/sync_queue.py diff --git a/python/pulsing/topic/README.md b/python/pulsing/topic/README.md deleted file mode 100644 index 99568a631..000000000 --- a/python/pulsing/topic/README.md +++ /dev/null @@ -1,107 +0,0 @@ -# Pulsing Topic - 轻量级 Pub/Sub 模块 - -## 概述 - -Topic 模块提供轻量级的 Pub/Sub(发布/订阅)功能,**复用 `queue/manager` 的 StorageManager 进行一致性哈希和集群路由**,确保每个 topic 在集群中只有一个 broker。 - -## 架构 - -``` -┌─────────────────────────────────────────────────────────┐ -│ StorageManager │ -│ (queue/manager.py - 每节点一个实例) │ -│ │ -│ ┌─────────────────┐ ┌─────────────────────────────┐ │ -│ │ GetBucket 处理 │ │ GetTopic 处理 │ │ -│ │ (队列 bucket) │ │ (topic broker) │ │ -│ └─────────────────┘ └─────────────────────────────┘ │ -│ │ -│ 一致性哈希 → 确定 owner 节点 → 创建/返回 ActorRef │ -└─────────────────────────────────────────────────────────┘ - │ - ┌────────────────┴────────────────┐ - │ │ - ▼ ▼ - ┌───────────────┐ ┌───────────────┐ - │ BucketStorage │ │ TopicBroker │ - │ (queue 存储) │ │ (pub/sub) │ - └───────────────┘ └───────────────┘ -``` - -## 使用方式 - -### 发布消息 - -```python -from pulsing.topic import write_topic - -writer = await write_topic(system, "events") -await writer.publish({"type": "user_login", "user_id": 123}) -``` - -### 订阅消息 - -```python -from pulsing.topic import read_topic - -reader = await read_topic(system, "events") - -@reader.on_message -async def handle(msg): - print(f"Received: {msg}") - -await reader.start() - -# 停止订阅 -await reader.stop() -``` - -### 发布模式 - -```python -from pulsing.topic import write_topic, PublishMode - -writer = await write_topic(system, "events") - -# 1. Fire-and-forget(默认)- 发送后立即返回 -result = await writer.publish(data) - -# 2. Wait all acks - 等待所有订阅者响应 -result = await writer.publish(data, mode=PublishMode.WAIT_ALL_ACKS) - -# 3. Wait any ack - 等待任一订阅者响应 -result = await writer.publish(data, mode=PublishMode.WAIT_ANY_ACK) - -# 4. Best effort - 尝试发送,记录失败 -result = await writer.publish(data, mode=PublishMode.BEST_EFFORT) -``` - -## 与 Queue 的关系 - -| 特性 | Queue | Topic | -|------|-------|-------| -| 消息模式 | 点对点(生产者-消费者) | 广播(发布-订阅) | -| 消息存储 | 持久化(可配置后端) | 无持久化(内存) | -| 消费语义 | 每条消息只被消费一次 | 每条消息被所有订阅者消费 | -| 管理方式 | StorageManager | StorageManager(复用) | - -## 公开 API - -模块只导出以下必要的 API: - -```python -from pulsing.topic import ( - write_topic, # 获取写入句柄 - read_topic, # 获取读取句柄 - TopicWriter, # 写入句柄类型 - TopicReader, # 读取句柄类型 - PublishMode, # 发布模式枚举 - PublishResult, # 发布结果 -) -``` - -## 内部实现 - -- `TopicBroker`: Broker Actor,管理订阅者和消息分发 -- `_SubscriberActor`: 订阅者 Actor,接收消息并调用用户回调 -- `StorageManager.GetTopic`: 处理 topic broker 的创建和路由 diff --git a/python/pulsing/topic/__init__.py b/python/pulsing/topic/__init__.py deleted file mode 100644 index 759aab6dd..000000000 --- a/python/pulsing/topic/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Topic - Lightweight Pub/Sub Module - -Reuses queue/manager's StorageManager for consistent hashing and redirection, -ensuring only one broker per topic in the cluster. - -Usage: - from pulsing.topic import write_topic, read_topic - - # Publish message - writer = await write_topic(system, "events") - await writer.publish({"type": "user_login"}) - - # Subscribe to messages - reader = await read_topic(system, "events") - - @reader.on_message - async def handle(msg): - print(f"Received: {msg}") - - await reader.start() -""" - -from pulsing.topic.topic import ( - PublishMode, - PublishResult, - TopicReader, - TopicWriter, - read_topic, - subscribe_to_topic, - write_topic, -) - -__all__ = [ - "write_topic", - "read_topic", - "subscribe_to_topic", - "TopicWriter", - "TopicReader", - "PublishMode", - "PublishResult", -] diff --git a/tests/python/agent/__init__.py b/tests/python/agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/python/test_agent_runtime_lifecycle.py b/tests/python/agent/test_agent_runtime_lifecycle.py similarity index 99% rename from tests/python/test_agent_runtime_lifecycle.py rename to tests/python/agent/test_agent_runtime_lifecycle.py index 0201f5309..22dfd6939 100644 --- a/tests/python/test_agent_runtime_lifecycle.py +++ b/tests/python/agent/test_agent_runtime_lifecycle.py @@ -13,7 +13,7 @@ import pytest -from pulsing.actor import get_system, remote +from pulsing.core import get_system, remote from pulsing.agent import ( agent, cleanup, diff --git a/tests/python/apis/actor/test_actor_behavior.py b/tests/python/apis/actor/test_actor_behavior.py index 88c115942..b9f2ea262 100644 --- a/tests/python/apis/actor/test_actor_behavior.py +++ b/tests/python/apis/actor/test_actor_behavior.py @@ -1,5 +1,5 @@ """ -Tests for Actor Behavior as defined in llms.binding.md (Actor 行为 section). +Tests for Actor Behavior as defined in llms.binding.md (Actor Behavior section). Tests cover: 1. Base Actor with receive method (sync/async) @@ -14,7 +14,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId # ============================================================================ @@ -417,3 +417,67 @@ async def test_base_actor_async_generator_stream(system): items.append(response) assert len(items) >= 1 # At least one item + + +# ============================================================================ +# Test: on_stop lifecycle hook +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_on_stop(): + """Test on_stop lifecycle hook is called when actor system shuts down.""" + # Use a separate system so shutdown doesn't affect other tests + sys = await pul.actor_system() + + LifecycleActor.stopped = False + await sys.spawn(LifecycleActor(), name="on_stop_actor") + await asyncio.sleep(0.1) + + await sys.shutdown() + await asyncio.sleep(0.1) + + assert LifecycleActor.stopped is True + + +# ============================================================================ +# Test: @pul.remote metadata() delegation via _WrappedActor +# ============================================================================ + + +@pul.remote +class _MetadataService: + """Remote service with custom metadata.""" + + def metadata(self) -> dict[str, str]: + return {"service": "metadata_test", "version": "2.0"} + + def ping(self): + return "pong" + + +@pytest.mark.asyncio +async def test_remote_metadata_delegation(): + """_WrappedActor delegates metadata() to user instance.""" + from pulsing.core.remote import _WrappedActor + + # Create raw instance and wrap it + instance = object.__new__(_MetadataService._cls) + instance.__init__() + wrapped = _WrappedActor(instance) + meta = wrapped.metadata() + assert meta == {"service": "metadata_test", "version": "2.0"} + + +@pytest.mark.asyncio +async def test_remote_metadata_delegation_no_metadata(): + """_WrappedActor returns empty dict when user instance has no metadata().""" + from pulsing.core.remote import _WrappedActor + + class _NoMeta: + def ping(self): + return "pong" + + wrapped = _WrappedActor(_NoMeta()) + meta = wrapped.metadata() + assert meta == {} diff --git a/tests/python/apis/actor_system/test_actor_system_api.py b/tests/python/apis/actor_system/test_actor_system_api.py index bd0408555..bfcda1ef4 100644 --- a/tests/python/apis/actor_system/test_actor_system_api.py +++ b/tests/python/apis/actor_system/test_actor_system_api.py @@ -16,7 +16,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorId +from pulsing.core import Actor, ActorId # ============================================================================ diff --git a/tests/python/apis/ray_compat/test_ray_compat_api.py b/tests/python/apis/ray_compat/test_ray_compat_api.py index 78986d0d0..b467bd849 100644 --- a/tests/python/apis/ray_compat/test_ray_compat_api.py +++ b/tests/python/apis/ray_compat/test_ray_compat_api.py @@ -15,7 +15,7 @@ import pytest import time -from pulsing.compat import ray +from pulsing.integrations.ray_compat import ray # ============================================================================ diff --git a/tests/python/apis/ray_like/test_ray_like_api.py b/tests/python/apis/ray_like/test_ray_like_api.py index d3716fd25..998c9ea31 100644 --- a/tests/python/apis/ray_like/test_ray_like_api.py +++ b/tests/python/apis/ray_like/test_ray_like_api.py @@ -12,7 +12,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor +from pulsing.core import Actor # ============================================================================ diff --git a/tests/python/cli/__init__.py b/tests/python/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/python/test_cli_actor.py b/tests/python/cli/test_cli_actor.py similarity index 93% rename from tests/python/test_cli_actor.py rename to tests/python/cli/test_cli_actor.py index 0bca36913..570407c7b 100644 --- a/tests/python/test_cli_actor.py +++ b/tests/python/cli/test_cli_actor.py @@ -33,4 +33,4 @@ def test_actor_invalid_class_path_message(self): with pytest.raises(ValueError) as exc_info: actor_cli(actor_type="router") assert "full class path" in str(exc_info.value) - assert "pulsing.actors.worker.TransformersWorker" in str(exc_info.value) + assert "pulsing.serving.worker.TransformersWorker" in str(exc_info.value) diff --git a/tests/python/test_cli_inspect.py b/tests/python/cli/test_cli_inspect.py similarity index 100% rename from tests/python/test_cli_inspect.py rename to tests/python/cli/test_cli_inspect.py diff --git a/tests/python/conftest.py b/tests/python/conftest.py index 03aba5d9b..b9030aa13 100644 --- a/tests/python/conftest.py +++ b/tests/python/conftest.py @@ -22,7 +22,7 @@ async def cleanup_global_system(): # Clean up after test try: - from pulsing.actor import _global_system, shutdown + from pulsing.core import _global_system, shutdown if _global_system is not None: await shutdown() diff --git a/tests/python/core/test_helpers.py b/tests/python/core/test_helpers.py new file mode 100644 index 000000000..1855c29ff --- /dev/null +++ b/tests/python/core/test_helpers.py @@ -0,0 +1,288 @@ +""" +Tests for core/helpers.py and streaming utilities. + +Focus on: +- Protocol unwrapping functions +- Response handling +- Stream message handling +""" + +import asyncio + +import pytest + +import pulsing as pul +from pulsing.core import remote, init, shutdown + + +# ============================================================================ +# Protocol unwrap tests +# ============================================================================ + + +@pytest.mark.asyncio +async def test_unwrap_call(): + """Test wrap/unwrap call message.""" + from pulsing.core.remote import _wrap_call, _unwrap_call + + msg = _wrap_call("my_method", (1, 2, 3), {"key": "val"}, False) + method, args, kwargs, is_async = _unwrap_call(msg) + assert method == "my_method" + assert args == (1, 2, 3) + assert kwargs == {"key": "val"} + assert is_async is False + + msg_async = _wrap_call("async_method", (), {"param": 42}, True) + method, args, kwargs, is_async = _unwrap_call(msg_async) + assert method == "async_method" + assert args == () + assert kwargs == {"param": 42} + assert is_async is True + + +@pytest.mark.asyncio +async def test_unwrap_response(): + """Test wrap/unwrap response message.""" + from pulsing.core.remote import _wrap_response, _unwrap_response + + resp = _wrap_response(result={"data": "success"}) + result, error = _unwrap_response(resp) + assert error is None + assert result == {"data": "success"} + + err = _wrap_response(error="something failed") + result, error = _unwrap_response(err) + assert result is None + assert "something failed" in error + + +# ============================================================================ +# Single value iterator +# ============================================================================ + + +@pytest.mark.asyncio +async def test_single_value_iterator(): + """Test _SingleValueIterator yields one value then stops.""" + from pulsing.core.remote import _SingleValueIterator + + it = _SingleValueIterator("single_value") + results = [] + async for v in it: + results.append(v) + assert results == ["single_value"] + + +# ============================================================================ +# Delayed call proxy +# ============================================================================ + + +@pytest.mark.asyncio +async def test_delayed_call_proxy_cancel(): + """Test that DelayedCallProxy tasks can be cancelled.""" + from pulsing.core.remote import _DelayedCallProxy + + await init() + try: + + @remote + class TestActor: + def ping(self): + return "pong" + + actor = await TestActor.spawn() + # Get the ref through the ref property + ref = actor.ref + + proxy = _DelayedCallProxy(ref, 0.1) + # This returns a task that can be cancelled + task = proxy.ping() + task.cancel() + + # Wait a bit + await asyncio.sleep(0.2) + + finally: + await shutdown() + + +# ============================================================================ +# Exception consuming +# ============================================================================ + + +@pytest.mark.asyncio +async def test_consume_task_exception(): + """Test _consume_task_exception handles various exception types.""" + from pulsing.core.remote import _consume_task_exception + + async def raise_cancelled(): + raise asyncio.CancelledError() + + async def raise_runtime(): + raise RuntimeError("stream closed") + + async def raise_value(): + raise ValueError("bad value") + + # CancelledError should be silently consumed + task = asyncio.create_task(raise_cancelled()) + try: + await task + except asyncio.CancelledError: + pass + _consume_task_exception(task) + + # RuntimeError should be logged but not raise + task = asyncio.create_task(raise_runtime()) + try: + await task + except RuntimeError: + pass + _consume_task_exception(task) + + # ValueError should be logged + task = asyncio.create_task(raise_value()) + try: + await task + except ValueError: + pass + _consume_task_exception(task) + + +# ============================================================================ +# Error path tests +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_error_in_async_generator(): + """Test error handling in async generator.""" + + @remote + class FailingAsyncGenActor: + async def failing_gen(self, fail_at): + for i in range(10): + if i == fail_at: + raise RuntimeError(f"Failed at {fail_at}") + yield i + + await init() + try: + actor = await FailingAsyncGenActor.spawn() + results = [] + with pytest.raises(Exception): + async for v in actor.failing_gen(3): + results.append(v) + # May or may not have results depending on when exception raised + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_sync_generator_error(): + """Test error in sync generator.""" + + @remote + class FailingSyncGenActor: + def failing_gen(self, fail_at): + for i in range(10): + if i == fail_at: + raise RuntimeError(f"Failed at {fail_at}") + yield i + + await init() + try: + actor = await FailingSyncGenActor.spawn() + result = await actor.failing_gen(3) + items = [] + with pytest.raises(Exception): + if hasattr(result, "__aiter__"): + async for v in result: + items.append(v) + elif hasattr(result, "__iter__"): + for v in result: + items.append(v) + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_actor_error_in_on_start(): + """Test error in on_start - actor spawn should still succeed but method calls fail.""" + # Note: When on_start raises, the actor's mailbox may close + # This test verifies the error path is exercised + + @remote + class FailingOnStartActor: + def __init__(self): + self.started = False + + def on_start(self, actor_id): + raise ValueError("on_start error") + + def ping(self): + return "pong" + + await init() + try: + # Actor spawn should succeed + actor = await FailingOnStartActor.spawn() + # The on_start error may cause the actor to stop + # This tests the error handling path + finally: + await shutdown() + + +# ============================================================================ +# Actor lifecycle +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_lifecycle(): + """Test actor lifecycle - on_start callback.""" + + lifecycle_events = [] + + @remote + class LifecycleActor: + def __init__(self, events): + self.events = events + self.value = 0 + + def on_start(self, actor_id): + self.events.append(("on_start", str(actor_id))) + + def on_stop(self): + self.events.append(("on_stop", None)) + + def metadata(self): + return {"type": "lifecycle"} + + def increment(self): + self.value += 1 + return self.value + + def get_value(self): + return self.value + + await init() + try: + actor = await LifecycleActor.spawn(lifecycle_events) + + # Ensure actor has started (on_start runs before first message is processed) + _ = await actor.get_value() + assert any(e[0] == "on_start" for e in lifecycle_events) + + # Use the actor + assert await actor.increment() == 1 + assert await actor.increment() == 2 + + finally: + await shutdown() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/python/core/test_remote_edge_cases.py b/tests/python/core/test_remote_edge_cases.py new file mode 100644 index 000000000..4b4b8e772 --- /dev/null +++ b/tests/python/core/test_remote_edge_cases.py @@ -0,0 +1,584 @@ +""" +Tests for core/remote.py edge cases and uncovered paths. + +Focus areas: +- _WrappedActor edge cases +- Protocol wire format (call/response) +- Attribute access +- Sync generator handling +- on_start/on_stop callbacks +- metadata method +- Error paths +""" + +import asyncio + +import pytest + +import pulsing as pul +from pulsing.core import remote, init, shutdown, get_system + + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture +async def system(): + """Create a standalone ActorSystem for testing.""" + sys = await pul.actor_system() + yield sys + await sys.shutdown() + + +# ============================================================================ +# _WrappedActor: on_start/on_stop callbacks +# ============================================================================ + + +@pytest.mark.asyncio +async def test_wrapped_actor_on_start_callback(): + """Test that on_start is called when actor starts.""" + + @remote + class ActorWithOnStart: + def __init__(self): + self.started = False + self.actor_id = None + + def on_start(self, actor_id): + self.started = True + self.actor_id = actor_id + + def is_started(self): + return self.started, str(self.actor_id) if self.actor_id else None + + await init() + try: + actor = await ActorWithOnStart.spawn() + started, aid = await actor.is_started() + assert started is True + assert aid is not None + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_wrapped_actor_on_stop_callback(): + """Test that on_stop is called when actor stops.""" + results = [] + + @remote + class ActorWithOnStop: + def __init__(self, results_list): + self.results = results_list + + def on_stop(self): + self.results.append("stopped") + + def ping(self): + return "pong" + + await init() + try: + actor_name = "on_stop_test_actor" + actor = await ActorWithOnStop.spawn(results, name=actor_name) + assert await actor.ping() == "pong" + # Stop the actor by name - this should trigger on_stop + await get_system().stop(actor_name) + await asyncio.sleep(0.1) + assert "stopped" in results + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_wrapped_actor_no_on_start_method(): + """Test actor without on_start works normally.""" + + @remote + class NoOnStartActor: + def __init__(self): + self.value = 42 + + def get_value(self): + return self.value + + await init() + try: + actor = await NoOnStartActor.spawn() + assert await actor.get_value() == 42 + finally: + await shutdown() + + +# ============================================================================ +# _WrappedActor: metadata method +# ============================================================================ + + +@pytest.mark.asyncio +async def test_wrapped_actor_metadata(): + """Test that metadata method is called and returned.""" + + @remote + class ActorWithMetadata: + def metadata(self): + return {"version": "1.0", "type": "test"} + + def ping(self): + return "pong" + + await init() + try: + actor = await ActorWithMetadata.spawn(name="metadata_test") + # Metadata should be accessible + ref = actor.ref + # The metadata is stored during spawn + assert await actor.ping() == "pong" + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_wrapped_actor_no_metadata(): + """Test actor without metadata method returns empty dict.""" + + @remote + class NoMetadataActor: + def ping(self): + return "pong" + + await init() + try: + actor = await NoMetadataActor.spawn() + assert await actor.ping() == "pong" + finally: + await shutdown() + + +# ============================================================================ +# _WrappedActor: attribute access +# ============================================================================ + + +@pytest.mark.asyncio +async def test_attribute_access(): + """Test accessing public attributes through protocol.""" + + @remote + class AttributeActor: + def __init__(self): + self.counter = 0 + self.name = "test_actor" + + def increment(self): + self.counter += 1 + return self.counter + + await init() + try: + actor = await AttributeActor.spawn() + # Method call + assert await actor.increment() == 1 + assert await actor.increment() == 2 + finally: + await shutdown() + + +# ============================================================================ +# _WrappedActor: sync generator handling +# ============================================================================ + + +@pytest.mark.asyncio +async def test_sync_generator_method(): + """Test sync generator method returns sequence of values.""" + + @remote + class GeneratorActor: + def count_up(self, n): + for i in range(n): + yield i + + await init() + try: + actor = await GeneratorActor.spawn() + # Sync generator methods need await then iterate + result = await actor.count_up(5) + items = [] + if hasattr(result, "__aiter__"): + async for item in result: + items.append(item) + elif hasattr(result, "__iter__"): + for item in result: + items.append(item) + else: + items.append(result) + assert len(items) >= 1 + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_sync_generator_with_exception(): + """Test sync generator that raises exception.""" + + @remote + class FailingGeneratorActor: + def failing_gen(self, fail_at): + for i in range(10): + if i == fail_at: + raise ValueError(f"Failed at {fail_at}") + yield i + + await init() + try: + actor = await FailingGeneratorActor.spawn() + result = await actor.failing_gen(3) + items = [] + with pytest.raises(Exception): + if hasattr(result, "__aiter__"): + async for item in result: + items.append(item) + elif hasattr(result, "__iter__"): + for item in result: + items.append(item) + finally: + await shutdown() + + +# ============================================================================ +# Protocol wire format +# ============================================================================ + + +@pytest.mark.asyncio +async def test_protocol_call_format(): + """Test protocol call/response format (single wire format).""" + from pulsing.core.remote import ( + _wrap_call, + _wrap_response, + _unwrap_call, + _unwrap_response, + ) + + msg = _wrap_call("test_method", (1, 2), {"key": "value"}, True) + assert msg["__pulsing_proto__"] == "1" + assert msg["__pulsing__"]["call"] == "test_method" + assert msg["__pulsing__"]["async"] is True + assert msg["user_data"]["args"] == (1, 2) + + method, args, kwargs, is_async = _unwrap_call(msg) + assert method == "test_method" + assert args == (1, 2) + assert kwargs == {"key": "value"} + assert is_async is True + + resp = _wrap_response(result="success") + assert resp["__pulsing_proto__"] == "1" + assert resp["__pulsing__"]["result"] == "success" + result, error = _unwrap_response(resp) + assert result == "success" + assert error is None + + err_resp = _wrap_response(error="failed") + assert err_resp["__pulsing__"]["error"] == "failed" + result, error = _unwrap_response(err_resp) + assert result is None + assert error == "failed" + + +# ============================================================================ +# Invalid method handling +# ============================================================================ + + +@pytest.mark.asyncio +async def test_call_private_method(): + """Test that calling private methods (starting with _) returns error.""" + + @remote + class PrivateMethodActor: + def public_method(self): + return "public" + + def _private_method(self): + return "private" + + await init() + try: + actor = await PrivateMethodActor.spawn() + # Public method should work + assert await actor.public_method() == "public" + # Private method should be blocked (AttributeError on proxy) + with pytest.raises(AttributeError): + _ = actor._private_method + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_call_nonexistent_method(): + """Test that calling non-existent method returns error.""" + + @remote + class SimpleActor: + def existing_method(self): + return "exists" + + await init() + try: + actor = await SimpleActor.spawn() + # Existing method works + assert await actor.existing_method() == "exists" + # Non-existent method raises AttributeError + with pytest.raises(AttributeError): + _ = actor.nonexistent_method + finally: + await shutdown() + + +# ============================================================================ +# Message protocol edge cases +# ============================================================================ + + +@pytest.mark.asyncio +async def test_unknown_message_type(): + """Test actor behavior with unknown message type.""" + from pulsing._core import Message + + @remote + class MessageHandlingActor: + def ping(self): + return "pong" + + await init() + try: + actor = await MessageHandlingActor.spawn() + # Normal call works + assert await actor.ping() == "pong" + finally: + await shutdown() + + +# ============================================================================ +# Async generator edge cases +# ============================================================================ + + +@pytest.mark.asyncio +async def test_async_generator_immediate_break(): + """Test async generator when caller breaks immediately.""" + + @remote + class AsyncGenActor: + async def stream_values(self, n): + for i in range(n): + await asyncio.sleep(0.01) + yield i + + await init() + try: + actor = await AsyncGenActor.spawn() + # Break on first value + count = 0 + async for value in actor.stream_values(10): + count += 1 + if count >= 2: + break + assert count == 2 + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_async_generator_empty(): + """Test async generator that yields nothing.""" + + @remote + class EmptyGenActor: + async def empty_stream(self): + return + yield # Never reached + + await init() + try: + actor = await EmptyGenActor.spawn() + results = [] + async for value in actor.empty_stream(): + results.append(value) + assert results == [] + finally: + await shutdown() + + +# ============================================================================ +# Complex scenarios +# ============================================================================ + + +@pytest.mark.asyncio +async def test_actor_with_both_sync_and_async_methods(): + """Test actor mixing sync methods, async methods, and generators.""" + + @remote + class MixedActor: + def __init__(self): + self.sync_count = 0 + self.async_count = 0 + + def sync_method(self, x): + self.sync_count += 1 + return x * 2 + + async def async_method(self, x): + await asyncio.sleep(0.01) + self.async_count += 1 + return x * 3 + + async def async_gen(self, n): + for i in range(n): + await asyncio.sleep(0.01) + yield i * 10 + + def get_counts(self): + return self.sync_count, self.async_count + + await init() + try: + actor = await MixedActor.spawn() + + # Sync method + assert await actor.sync_method(5) == 10 + + # Async method + assert await actor.async_method(5) == 15 + + # Async generator + async_gen_results = [v async for v in actor.async_gen(3)] + assert async_gen_results == [0, 10, 20] + + # Check counts + sc, ac = await actor.get_counts() + assert sc == 1 + assert ac == 1 + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_actor_exception_preserves_state(): + """Test that actor state is preserved after exception.""" + + @remote + class StatefulActor: + def __init__(self): + self.value = 0 + + def increment(self): + self.value += 1 + return self.value + + def fail(self): + raise ValueError("Intentional failure") + + def get_value(self): + return self.value + + await init() + try: + actor = await StatefulActor.spawn() + + # First increment + assert await actor.increment() == 1 + + # This fails but shouldn't corrupt state + with pytest.raises(Exception): + await actor.fail() + + # State should still be intact + assert await actor.get_value() == 1 + + # Can continue incrementing + assert await actor.increment() == 2 + finally: + await shutdown() + + +# ============================================================================ +# Delayed call advanced scenarios +# ============================================================================ + + +@pytest.mark.asyncio +async def test_delayed_call_with_args(): + """Test delayed call with arguments.""" + + @remote + class DelayedArgsActor: + def __init__(self): + self.messages = [] + + def schedule_message(self, delay, msg): + self.delayed(delay).record(msg) + + def record(self, msg): + self.messages.append(msg) + + def get_messages(self): + return list(self.messages) + + await init() + try: + actor = await DelayedArgsActor.spawn() + await actor.schedule_message(0.05, "hello") + + assert await actor.get_messages() == [] + await asyncio.sleep(0.1) + assert await actor.get_messages() == ["hello"] + finally: + await shutdown() + + +@pytest.mark.asyncio +async def test_multiple_delayed_calls(): + """Test multiple delayed calls execute in order.""" + + @remote + class MultiDelayedActor: + def __init__(self): + self.events = [] + + def schedule_all(self): + self.delayed(0.02).record("second") + self.delayed(0.01).record("first") + self.events.append("immediate") + + def record(self, msg): + self.events.append(msg) + + def get_events(self): + return list(self.events) + + await init() + try: + actor = await MultiDelayedActor.spawn() + await actor.schedule_all() + + # Immediate should be recorded + assert "immediate" in await actor.get_events() + + await asyncio.sleep(0.05) + + events = await actor.get_events() + assert "first" in events + assert "second" in events + finally: + await shutdown() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/python/integrations/__init__.py b/tests/python/integrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/python/test_ray_compat_running_loop.py b/tests/python/integrations/test_ray_compat_running_loop.py similarity index 93% rename from tests/python/test_ray_compat_running_loop.py rename to tests/python/integrations/test_ray_compat_running_loop.py index f5b6b4d22..2af79e5c9 100644 --- a/tests/python/test_ray_compat_running_loop.py +++ b/tests/python/integrations/test_ray_compat_running_loop.py @@ -7,7 +7,7 @@ def test_ray_compat_init_inside_running_loop(): This covers environments like Jupyter or pytest-asyncio where an event loop is already running on the main thread. """ - from pulsing.compat import ray + from pulsing.integrations.ray_compat import ray async def main(): ray.init() diff --git a/tests/python/integrations/test_ray_init.py b/tests/python/integrations/test_ray_init.py new file mode 100644 index 000000000..5546e31bc --- /dev/null +++ b/tests/python/integrations/test_ray_init.py @@ -0,0 +1,322 @@ +""" +Tests for pulsing.ray - Pulsing initialization in Ray cluster + +Tests: +- init_in_ray() basic behavior +- Seed registration via Ray KV store +- Multi-actor cluster formation +- async_init_in_ray() +- cleanup() +- Error cases +""" + +import pytest + +# Skip all tests if ray is not installed +ray = pytest.importorskip("ray") + + +def _reset_pulsing_state(): + """Reset all Pulsing module state (system, background loop, KV).""" + import pulsing.core as pc + import pulsing.integrations.ray as pray + + # Shutdown Pulsing system via background loop + if pc._global_system is not None and pray._loop is not None: + try: + pray._run_sync(pray._do_shutdown()) + except Exception: + pass + + # Force clear global system (safety net) + pc._global_system = None + + # Stop background event loop + if pray._loop is not None: + try: + pray._loop.call_soon_threadsafe(pray._loop.stop) + except Exception: + pass + if pray._thread is not None: + try: + pray._thread.join(timeout=5) + except Exception: + pass + pray._loop = None + pray._thread = None + + # Clean KV store + try: + pray.cleanup() + except Exception: + pass + + +NUM_WORKERS = 20 + + +@pytest.fixture +def ray_env(): + """Initialize local Ray cluster with clean Pulsing state.""" + ray.init(num_cpus=NUM_WORKERS + 1) + _reset_pulsing_state() # ensure clean state before test + yield + _reset_pulsing_state() # cleanup after test + ray.shutdown() + + +# ============================================================================ +# Test: init_in_ray() basic +# ============================================================================ + + +def test_init_returns_system(ray_env): + """init_in_ray() returns a Pulsing ActorSystem.""" + from pulsing.integrations.ray import init_in_ray + + system = init_in_ray() + assert system is not None + assert system.addr is not None + + +def test_init_stores_seed_in_kv(ray_env): + """First caller's address is stored as seed in Ray KV.""" + from pulsing.integrations.ray import _get_seed, init_in_ray + + system = init_in_ray() + seed_addr = _get_seed() + assert seed_addr is not None + assert seed_addr == str(system.addr) + + +def test_init_sets_global_system(ray_env): + """init_in_ray() sets pulsing.actor global system.""" + from pulsing.core import is_initialized + from pulsing.integrations.ray import init_in_ray + + assert not is_initialized() + init_in_ray() + assert is_initialized() + + +# ============================================================================ +# Test: error cases +# ============================================================================ + + +def test_init_raises_without_ray(): + """init_in_ray() raises when Ray is not initialized.""" + from pulsing.integrations.ray import init_in_ray + + with pytest.raises(RuntimeError, match="Ray not initialized"): + init_in_ray() + + +async def test_async_init_raises_without_ray(): + """async_init_in_ray() raises when Ray is not initialized.""" + from pulsing.integrations.ray import async_init_in_ray + + with pytest.raises(RuntimeError, match="Ray not initialized"): + await async_init_in_ray() + + +# ============================================================================ +# Test: cleanup() +# ============================================================================ + + +def test_cleanup_clears_kv(ray_env): + """cleanup() removes seed from KV store.""" + from pulsing.integrations.ray import _get_seed, cleanup, init_in_ray + + init_in_ray() + assert _get_seed() is not None + + cleanup() + assert _get_seed() is None + + +# ============================================================================ +# Test: Ray actor integration +# ============================================================================ + + +def test_init_in_ray_actor(ray_env): + """init_in_ray() works inside a Ray actor.""" + + @ray.remote + class Worker: + def setup(self): + from pulsing.integrations.ray import init_in_ray + + system = init_in_ray() + return str(system.addr) + + def ping(self): + return "pong" + + worker = Worker.remote() + addr = ray.get(worker.setup.remote()) + assert addr is not None + assert ":" in addr + + result = ray.get(worker.ping.remote()) + assert result == "pong" + + +def test_multi_actor_same_seed(ray_env): + """All workers in separate processes discover the same seed.""" + import os + + from pulsing.integrations.ray import _get_seed, init_in_ray + + driver_pid = os.getpid() + + # Driver becomes seed + init_in_ray() + seed_addr = _get_seed() + + @ray.remote + class Worker: + def setup(self): + import os + + from pulsing.integrations.ray import init_in_ray + + init_in_ray() + return os.getpid() + + def get_seed(self): + from pulsing.integrations.ray import _get_seed + + return _get_seed() + + workers = [Worker.remote() for _ in range(NUM_WORKERS)] + pids = ray.get([w.setup.remote() for w in workers]) + + # Verify multi-process: all PIDs different from driver + assert all( + pid != driver_pid for pid in pids + ), "Workers should run in separate processes" + + # Verify multi-process: workers are in distinct processes + unique_pids = set(pids) + assert ( + len(unique_pids) == NUM_WORKERS + ), f"Expected {NUM_WORKERS} distinct processes, got {len(unique_pids)}" + + # All workers see the same seed + seeds = ray.get([w.get_seed.remote() for w in workers]) + assert all(s == seed_addr for s in seeds) + + +def test_concurrent_init_without_driver(ray_env): + """20 processes concurrently call init_in_ray(), exactly one becomes seed.""" + import os + + @ray.remote + class Worker: + def setup(self): + import os + + from pulsing.integrations.ray import init_in_ray + + system = init_in_ray() + return os.getpid(), str(system.addr) + + def get_seed(self): + from pulsing.integrations.ray import _get_seed + + return _get_seed() + + # Launch all workers at once — they race to become seed + workers = [Worker.remote() for _ in range(NUM_WORKERS)] + results = ray.get([w.setup.remote() for w in workers]) + pids = [r[0] for r in results] + addrs = [r[1] for r in results] + + # Verify multi-process: all workers in distinct processes + unique_pids = set(pids) + assert ( + len(unique_pids) == NUM_WORKERS + ), f"Expected {NUM_WORKERS} distinct processes, got {len(unique_pids)}" + # None should be the driver + assert os.getpid() not in unique_pids + + # All workers got a valid address + assert len(addrs) == NUM_WORKERS + assert all(a and ":" in a for a in addrs) + + # All workers see the same seed + seeds = ray.get([w.get_seed.remote() for w in workers]) + unique_seeds = set(seeds) + assert ( + len(unique_seeds) == 1 + ), f"Expected 1 seed, got {len(unique_seeds)}: {unique_seeds}" + + # The seed must be one of the workers' addresses + seed = unique_seeds.pop() + assert seed in addrs + + +def test_actor_becomes_seed_without_driver(ray_env): + """When driver doesn't init, first actor becomes seed.""" + + @ray.remote + class Worker: + def setup(self): + from pulsing.integrations.ray import init_in_ray + + system = init_in_ray() + return str(system.addr) + + def get_seed(self): + from pulsing.integrations.ray import _get_seed + + return _get_seed() + + # First actor becomes seed + w1 = Worker.remote() + addr1 = ray.get(w1.setup.remote()) + seed = ray.get(w1.get_seed.remote()) + assert seed == addr1 + + # Second actor joins + w2 = Worker.remote() + ray.get(w2.setup.remote()) + seed2 = ray.get(w2.get_seed.remote()) + assert seed2 == seed + + +# ============================================================================ +# Test: async_init_in_ray() +# ============================================================================ + + +async def test_async_init_returns_system(ray_env): + """async_init_in_ray() returns a system.""" + from pulsing.integrations.ray import async_init_in_ray + + system = await async_init_in_ray() + assert system is not None + assert system.addr is not None + + +async def test_async_init_stores_seed(ray_env): + """async_init_in_ray() stores seed in KV.""" + from pulsing.integrations.ray import _get_seed, async_init_in_ray + + system = await async_init_in_ray() + assert _get_seed() == str(system.addr) + + +# ============================================================================ +# Test: counting game (end-to-end Pulsing messaging across Ray workers) +# ============================================================================ + + +def test_counting_game(ray_env): + """20 processes play counting game via Pulsing actor (reuses pulsing.examples).""" + from pulsing.examples.counting_game import run + + run(num_workers=NUM_WORKERS) diff --git a/tests/python/streaming/__init__.py b/tests/python/streaming/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/python/test_queue.py b/tests/python/streaming/test_queue.py similarity index 99% rename from tests/python/test_queue.py rename to tests/python/streaming/test_queue.py index 318fac679..ae9a0a624 100644 --- a/tests/python/test_queue.py +++ b/tests/python/streaming/test_queue.py @@ -24,7 +24,7 @@ import pytest import pulsing as pul -from pulsing.queue import ( +from pulsing.streaming import ( BucketStorage, Queue, QueueReader, @@ -1054,7 +1054,7 @@ def test_sync_queue_standalone(): # Setup in background loop async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( @@ -1121,7 +1121,7 @@ def test_sync_writer_reader_standalone(): async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( @@ -1188,7 +1188,7 @@ def test_sync_reader_offset_standalone(): async def setup(): import pulsing as pul - from pulsing.queue import write_queue, read_queue + from pulsing.streaming import write_queue, read_queue system = await pul.actor_system() writer = await write_queue( diff --git a/tests/python/test_queue_backends.py b/tests/python/streaming/test_queue_backends.py similarity index 97% rename from tests/python/test_queue_backends.py rename to tests/python/streaming/test_queue_backends.py index 67d20b69c..1f87310b2 100644 --- a/tests/python/test_queue_backends.py +++ b/tests/python/streaming/test_queue_backends.py @@ -19,7 +19,7 @@ import pytest import pulsing as pul -from pulsing.queue import ( +from pulsing.streaming import ( BucketStorage, MemoryBackend, Queue, @@ -448,6 +448,24 @@ async def stats(self) -> dict[str, Any]: def total_count(self) -> int: return len(self.data) + async def put_tensor(self, data: Any, **kwargs: Any) -> Any: + raise NotImplementedError + + async def get_data( + self, batch_meta: Any, fields: list[str] | None = None + ) -> Any: + raise NotImplementedError + + async def get_meta( + self, + fields: list[str], + batch_size: int, + task_name: str = "default", + sampler: Any = None, + **sampling_kwargs: Any, + ) -> Any: + raise NotImplementedError + # Verify it satisfies protocol (duck typing) backend = MinimalBackend(bucket_id=0) assert isinstance(backend, StorageBackend) diff --git a/tests/python/test_queue_topic_chaos.py b/tests/python/streaming/test_queue_topic_chaos.py similarity index 89% rename from tests/python/test_queue_topic_chaos.py rename to tests/python/streaming/test_queue_topic_chaos.py index a47e5dbba..8e4f75495 100644 --- a/tests/python/test_queue_topic_chaos.py +++ b/tests/python/streaming/test_queue_topic_chaos.py @@ -1,12 +1,12 @@ """ -Queue & Topic 混沌测试 +Queue & Topic Chaos Testing -在随机延迟、高并发、动态加入/退出、随机参数等混沌场景下验证: -- Queue: 数据不丢、不重(按 rank/world_size 分桶)、无死锁 -- Topic: 订阅者动态变化时发布不崩溃、交付语义可区分、慢/失败订阅者被踢或超时 -- 与 StorageManager 共享资源时无阻塞、无竞态 +Validates under chaotic scenarios with random delays, high concurrency, dynamic join/leave, random parameters: +- Queue: no data loss, no duplication (bucketed by rank/world_size), no deadlock +- Topic: no crash during publish when subscribers dynamically change, distinguishable delivery semantics, slow/failed subscribers kicked or timeout +- No blocking or race conditions when sharing resources with StorageManager -运行: pytest tests/python/test_queue_topic_chaos.py -v -s +Run: pytest tests/python/test_queue_topic_chaos.py -v -s """ from __future__ import annotations @@ -20,12 +20,17 @@ import pytest import pulsing as pul -from pulsing.queue import read_queue, write_queue -from pulsing.topic import PublishMode, read_topic, write_topic +from pulsing.streaming import ( + read_queue, + write_queue, + PublishMode, + read_topic, + write_topic, +) # ============================================================================= -# Fixtures & 随机负载工具 +# Fixtures & Random Load Utilities # ============================================================================= @@ -44,21 +49,21 @@ def temp_storage_path(): def _random_sleep(max_ms: int = 20): - """短随机延迟,模拟混沌.""" + """Short random delay to simulate chaos.""" return asyncio.sleep(random.uniform(0, max_ms) / 1000.0) def _chaos_sleep( min_ms: int = 0, max_ms: int = 50, occasional_long_ms: int | None = 120 ): - """随机延迟:常规 min~max_ms,小概率长延迟(模拟抖动)。""" + """Random delay: normally min~max_ms, small chance of long delay (simulating jitter).""" if occasional_long_ms and random.random() < 0.08: return asyncio.sleep(random.uniform(max_ms, occasional_long_ms) / 1000.0) return asyncio.sleep(random.uniform(min_ms, max_ms) / 1000.0) # ============================================================================= -# Queue 混沌 +# Queue Chaos # ============================================================================= @@ -66,7 +71,7 @@ def _chaos_sleep( async def test_queue_chaos_concurrent_producer_consumer( actor_system, temp_storage_path ): - """混沌:多生产者 + 多消费者(rank/world_size),随机 put/get/延迟,验证不丢不重.""" + """Chaos: multiple producers + multiple consumers (rank/world_size), random put/get/delay, verify no loss or duplication.""" random.seed(42) topic = "chaos_q_concurrent" num_buckets = random.choice([3, 4, 5, 6]) @@ -139,7 +144,7 @@ async def consumer(rank: int): async def test_queue_chaos_many_buckets_parallel_handles( actor_system, temp_storage_path ): - """混沌:多桶、多 writer 并行写,多 reader 并行读;用单 reader 收齐后校验总数(多 reader 会瓜分数据).""" + """Chaos: many buckets, multiple writers in parallel, multiple readers in parallel; use single reader to collect all and verify total (multiple readers would split data).""" random.seed(43) topic = "chaos_q_many_buckets" num_buckets = random.randint(4, 12) @@ -174,7 +179,7 @@ async def write_batch(wid: int): await asyncio.gather(*[write_batch(w) for w in range(num_writers)]) - # 单 reader 读全量,避免多 reader 瓜分导致并集不足 expected_count + # Single reader reads full data, avoiding multiple readers splitting data resulting in insufficient union r = await read_queue( actor_system, topic=topic, @@ -198,7 +203,7 @@ async def write_batch(wid: int): @pytest.mark.asyncio async def test_queue_chaos_reader_reset_and_reread(actor_system, temp_storage_path): - """混沌:同一 reader 多次 reset + get,与间歇写入交错,随机 limit/延迟.""" + """Chaos: same reader multiple reset + get, interleaved with intermittent writes, random limit/delay.""" random.seed(44) topic = "chaos_q_reset" num_buckets = random.choice([2, 3, 4]) @@ -238,13 +243,13 @@ async def test_queue_chaos_reader_reset_and_reread(actor_system, temp_storage_pa # ============================================================================= -# Topic 混沌 +# Topic Chaos # ============================================================================= @pytest.mark.asyncio async def test_topic_chaos_subscribers_join_leave_during_publish(actor_system): - """混沌:发布过程中订阅者动态加入/退出,随机阶段数/每阶段消息数/模式/延迟.""" + """Chaos: subscribers dynamically join/leave during publishing, random phases/messages per phase/mode/delay.""" random.seed(45) topic_name = "chaos_t_join_leave" writer = await write_topic(actor_system, topic_name) @@ -299,7 +304,7 @@ async def on_msg(msg): @pytest.mark.asyncio async def test_topic_chaos_many_publishers_many_subscribers(actor_system): - """混沌:多发布者 + 多订阅者,随机发布模式/条数/延迟,验证每人收到预期条数.""" + """Chaos: multiple publishers + multiple subscribers, random publish mode/count/delay, verify each receives expected count.""" random.seed(46) topic_name = "chaos_t_many" num_publishers = random.randint(3, 6) @@ -348,7 +353,7 @@ async def publish_batch(pid: int): @pytest.mark.asyncio async def test_topic_chaos_slow_callback_best_effort(actor_system): - """混沌:部分订阅者 callback 很慢,随机条数/延迟/超时,best_effort 验证不崩溃.""" + """Chaos: some subscriber callbacks are slow, random count/delay/timeout, best_effort verify no crash.""" random.seed(47) topic_name = "chaos_t_slow" writer = await write_topic(actor_system, topic_name) @@ -388,13 +393,13 @@ async def slow_cb(m): # ============================================================================= -# 混合:Queue + Topic 同时混沌 +# Mixed: Queue + Topic Chaos Simultaneously # ============================================================================= @pytest.mark.asyncio async def test_chaos_mixed_queue_and_topic_same_loop(actor_system, temp_storage_path): - """混沌:同一 loop 内 queue + topic 并发,随机条数/桶数/延迟.""" + """Chaos: queue + topic concurrent in same loop, random count/buckets/delay.""" random.seed(48) q_topic = "chaos_mixed_q" t_topic = "chaos_mixed_t" @@ -448,7 +453,7 @@ async def topic_chaos(): @pytest.mark.asyncio async def test_chaos_rapid_open_close_handles(actor_system, temp_storage_path): - """混沌:快速反复创建/丢弃 queue writer 和 topic reader,随机次数/延迟.""" + """Chaos: rapidly create/discard queue writer and topic reader repeatedly, random times/delay.""" random.seed(49) n_writes = random.randint(6, 12) n_readers = random.randint(4, 10) @@ -480,13 +485,13 @@ async def test_chaos_rapid_open_close_handles(actor_system, temp_storage_path): assert result.subscriber_count >= 0 # ------------------------------------------------------------------------- - # 新增:高复杂度 / 随机负载风暴 + # Added: High Complexity / Random Load Storm # ------------------------------------------------------------------------- @pytest.mark.asyncio async def test_queue_chaos_storm_random_params(actor_system, temp_storage_path): - """混沌风暴:全随机参数(桶数/消费者数/生产者数/条数/get limit/延迟),验证不丢不重.""" + """Chaos storm: fully random parameters (buckets/consumers/producers/count/get limit/delay), verify no loss or duplication.""" random.seed(100) topic = "chaos_q_storm" num_buckets = random.randint(2, 8) @@ -565,7 +570,7 @@ async def consumer(rank: int): @pytest.mark.asyncio async def test_topic_chaos_storm_random_params(actor_system): - """混沌风暴:全随机 topic 参数(发布者/订阅者数量、条数、模式、延迟),验证交付.""" + """Chaos storm: fully random topic parameters (publishers/subscribers count, messages, mode, delay), verify delivery.""" random.seed(101) topic_name = "chaos_t_storm" num_publishers = random.randint(2, 5) @@ -611,7 +616,7 @@ async def pub(pid: int): @pytest.mark.asyncio async def test_chaos_storm_multi_queue_multi_topic(actor_system, temp_storage_path): - """混沌风暴:多 queue + 多 topic 同时跑,各自随机负载,验证无死锁、数据一致.""" + """Chaos storm: multiple queues + multiple topics running simultaneously, each with random load, verify no deadlock and data consistency.""" random.seed(102) q_topics = ["chaos_storm_q1", "chaos_storm_q2"] t_topics = ["chaos_storm_t1", "chaos_storm_t2"] diff --git a/tests/python/test_topic.py b/tests/python/streaming/test_topic.py similarity index 97% rename from tests/python/test_topic.py rename to tests/python/streaming/test_topic.py index c21ce051b..08f64e195 100644 --- a/tests/python/test_topic.py +++ b/tests/python/streaming/test_topic.py @@ -16,7 +16,7 @@ import pytest import pulsing as pul -from pulsing.topic import ( +from pulsing.streaming import ( PublishMode, PublishResult, TopicReader, @@ -728,7 +728,7 @@ async def test_double_start_stop(actor_system): @pytest.mark.asyncio async def test_topic_broker_via_storage_manager(actor_system): """Test that topic broker is created via StorageManager.""" - from pulsing.queue.manager import get_storage_manager + from pulsing.streaming.manager import get_storage_manager # Ensure StorageManager exists manager = await get_storage_manager(actor_system) @@ -748,7 +748,7 @@ async def test_topic_broker_via_storage_manager(actor_system): @pytest.mark.asyncio async def test_list_topics(actor_system): """Test listing topics via StorageManager.""" - from pulsing.queue.manager import get_storage_manager + from pulsing.streaming.manager import get_storage_manager # Create some topics await write_topic(actor_system, "list_topic_1") @@ -868,7 +868,7 @@ async def handle_slow(msg): @pytest.mark.asyncio async def test_publish_timeout_error(actor_system): """Test that publish raises TimeoutError when timeout expires.""" - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId # Create an intentionally slow subscriber class SlowSubscriber(Actor): @@ -891,7 +891,7 @@ async def receive(self, msg): await actor_system.spawn(slow_actor, name=actor_name, public=True) # Register with broker using helper function - from pulsing.topic import subscribe_to_topic + from pulsing.streaming import subscribe_to_topic await subscribe_to_topic( actor_system, "timeout_error_topic", "slow_sub", actor_name @@ -909,7 +909,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_ask_with_timeout_success(actor_system): """Test ask_with_timeout helper function (success case).""" - from pulsing.actor import Actor, ActorId, ask_with_timeout + from pulsing.core import Actor, ActorId, ask_with_timeout class EchoActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -932,7 +932,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_ask_with_timeout_error(actor_system): """Test ask_with_timeout raises TimeoutError when timeout expires.""" - from pulsing.actor import Actor, ActorId, ask_with_timeout + from pulsing.core import Actor, ActorId, ask_with_timeout class SlowActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -956,7 +956,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_tell_with_timeout_success(actor_system): """Test tell_with_timeout helper function (success case).""" - from pulsing.actor import Actor, ActorId, tell_with_timeout + from pulsing.core import Actor, ActorId, tell_with_timeout received = [] @@ -985,7 +985,7 @@ async def receive(self, msg): @pytest.mark.asyncio async def test_default_publish_timeout(): """Test that DEFAULT_PUBLISH_TIMEOUT is reasonable.""" - from pulsing.topic.topic import DEFAULT_PUBLISH_TIMEOUT + from pulsing.streaming.pubsub import DEFAULT_PUBLISH_TIMEOUT # Default timeout should be a reasonable value (30 seconds) assert DEFAULT_PUBLISH_TIMEOUT == 30.0 @@ -994,7 +994,7 @@ async def test_default_publish_timeout(): @pytest.mark.asyncio async def test_default_ask_timeout(): """Test that DEFAULT_ASK_TIMEOUT is reasonable.""" - from pulsing.actor import DEFAULT_ASK_TIMEOUT + from pulsing.core import DEFAULT_ASK_TIMEOUT # Default timeout should be a reasonable value (30 seconds) assert DEFAULT_ASK_TIMEOUT == 30.0 @@ -1011,8 +1011,8 @@ async def test_subscriber_failure_threshold_eviction(actor_system): Verify P0-3 fix: Subscribers are automatically evicted after 3 consecutive failures. """ - from pulsing.actor import Actor, ActorId - from pulsing.topic.broker import MAX_CONSECUTIVE_FAILURES + from pulsing.core import Actor, ActorId + from pulsing.streaming.broker import MAX_CONSECUTIVE_FAILURES # Verify configuration constants assert MAX_CONSECUTIVE_FAILURES == 3 @@ -1035,7 +1035,7 @@ async def receive(self, msg): await actor_system.spawn(failing_actor, name=actor_name, public=True) # Register failing subscriber with broker using helper function - from pulsing.topic import subscribe_to_topic + from pulsing.streaming import subscribe_to_topic await subscribe_to_topic( actor_system, "eviction_test_topic", "failing_sub", actor_name @@ -1069,7 +1069,7 @@ async def test_subscriber_ttl_config(): Verify P0-3 fix: TTL re-resolve configuration. """ - from pulsing.topic.broker import REF_TTL_SECONDS, MAX_CONSECUTIVE_FAILURES + from pulsing.streaming.broker import REF_TTL_SECONDS, MAX_CONSECUTIVE_FAILURES # Verify configuration is reasonable assert REF_TTL_SECONDS == 60.0, "TTL should be 60 seconds" @@ -1120,7 +1120,7 @@ async def test_default_mailbox_capacity_config(): Verify P1-1 fix: SystemConfig's default mailbox capacity. """ # Python side uses through Rust bindings, verify default value exists - from pulsing.actor import SystemConfig + from pulsing.core import SystemConfig config = SystemConfig.standalone() # Verify config can be created normally @@ -1138,7 +1138,7 @@ async def test_resolve_named_returns_actor(actor_system): Verify P1-2 fix: resolve_named basic functionality. """ - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId class TestActor(Actor): def on_start(self, actor_id: ActorId) -> None: @@ -1170,7 +1170,7 @@ async def test_resolve_named_multiple_calls(actor_system): Verify P1-2 fix: Multiple resolves should return valid ActorRefs. Note: RoundRobin cannot be verified in single-node environment, but basic functionality can be verified. """ - from pulsing.actor import Actor, ActorId + from pulsing.core import Actor, ActorId class CounterActor(Actor): def __init__(self): diff --git a/tests/python/test_actor_list.py b/tests/python/test_actor_list.py index 8c117fad3..b8ebbca63 100644 --- a/tests/python/test_actor_list.py +++ b/tests/python/test_actor_list.py @@ -3,7 +3,7 @@ import asyncio import pytest import json -from pulsing.actor import init, remote, get_system, list_actors +from pulsing.core import init, remote, get_system, list_actors from pulsing.cli.inspect import _print_actors_table import io import sys diff --git a/tests/python/test_chaos.py b/tests/python/test_chaos.py index 9a2ac89ba..7af7cf615 100644 --- a/tests/python/test_chaos.py +++ b/tests/python/test_chaos.py @@ -1,7 +1,7 @@ import asyncio import random import pytest -from pulsing.actor import ( +from pulsing.core import ( Actor, ActorId, Message, diff --git a/tests/python/test_receive_error_behavior.py b/tests/python/test_receive_error_behavior.py index 7415cb5c1..3ac21295a 100644 --- a/tests/python/test_receive_error_behavior.py +++ b/tests/python/test_receive_error_behavior.py @@ -1,15 +1,15 @@ """ -Tests for receive error behavior (业务错误不杀 actor、panic 停止不恢复). +Tests for receive error behavior (business errors don't kill actor, panic stops without recovery). Covers: -1. receive 返回/抛出错误时:错误返回给调用者,actor 不退出,可继续处理下一条消息 -2. 多次 receive 错误:每次错误只回传调用方,actor 始终存活 +1. When receive returns/raises error: error returned to caller, actor doesn't exit, can process next message +2. Multiple receive errors: each error only returned to caller, actor stays alive """ import pytest import pulsing as pul -from pulsing.actor import Actor +from pulsing.core import Actor # ============================================================================ @@ -26,12 +26,12 @@ async def system(): # ============================================================================ -# Actor: 对特定消息返回错误,其它消息正常处理 +# Actor: returns error for specific message, processes other messages normally # ============================================================================ class ErrorOnBadMessageActor(Actor): - """收到 'bad' 时 raise,其它消息 echo.""" + """Raises when receiving 'bad', echoes other messages.""" async def receive(self, msg): if msg == "bad": @@ -40,27 +40,27 @@ async def receive(self, msg): # ============================================================================ -# Test: receive 出错只回传调用者,actor 不退出 +# Test: receive error only returned to caller, actor doesn't exit # ============================================================================ @pytest.mark.asyncio async def test_receive_error_returned_to_caller_actor_stays_alive(system): - """receive 返回/抛出错误时:调用者收到错误,actor 不退出,下一条消息正常处理。""" + """When receive returns/raises error: caller receives error, actor doesn't exit, next message processed normally.""" ref = await system.spawn(ErrorOnBadMessageActor(), name="error_on_bad") - # 第一条:触发错误,应收到异常 + # 1st message: trigger error, should receive exception with pytest.raises(Exception): await ref.ask("bad") - # 第二条:actor 仍存活,应正常返回 + # 2nd message: actor still alive, should return normally result = await ref.ask("ok") assert result == "ok" @pytest.mark.asyncio async def test_receive_multiple_errors_then_success(system): - """多次 receive 出错:每次错误只回传调用方,actor 始终存活,最后一条正常。""" + """Multiple receive errors: each error only returned to caller, actor stays alive, final message succeeds.""" ref = await system.spawn(ErrorOnBadMessageActor(), name="multi_error") for _ in range(3): diff --git a/tests/python/test_remote_decorator.py b/tests/python/test_remote_decorator.py index 58100a8ea..aca7d26ab 100644 --- a/tests/python/test_remote_decorator.py +++ b/tests/python/test_remote_decorator.py @@ -25,7 +25,7 @@ @pytest.mark.asyncio async def test_proxy_method_validation(): """Test that proxy validates method names when methods list is provided.""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class Service: @@ -66,7 +66,7 @@ def valid_method(self): @pytest.mark.asyncio async def test_sync_method_error_handling(): """Test error handling in sync methods.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class ErrorService: @@ -90,7 +90,7 @@ def will_fail(self): @pytest.mark.asyncio async def test_async_method_error_handling(): """Test error handling in async methods.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class AsyncErrorService: @@ -120,7 +120,7 @@ async def will_fail(self): @pytest.mark.asyncio async def test_actor_proxy_from_ref_dynamic_mode(): """Test ActorProxy.from_ref in dynamic mode (no method list).""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class DynamicService: @@ -152,7 +152,7 @@ def method_b(self): @pytest.mark.asyncio async def test_actor_proxy_from_ref_with_async_methods(): """Test ActorProxy.from_ref with explicit async_methods set.""" - from pulsing.actor import init, shutdown, remote, ActorProxy, get_system + from pulsing.core import init, shutdown, remote, ActorProxy, get_system @remote class HybridService: @@ -197,7 +197,7 @@ async def async_method(self): @pytest.mark.asyncio async def test_remote_delayed_call(): """Test self.delayed(sec).method(...) schedules a tell after delay.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class DelayedCallService: @@ -237,7 +237,7 @@ def get_received(self): @pytest.mark.asyncio async def test_remote_delayed_call_cancel(): """Test that the task returned by delayed().method() can be cancelled.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class DelayedCancelService: @@ -278,7 +278,7 @@ def get_received(self): @pytest.mark.asyncio async def test_async_method_does_not_block_actor(): """Test that async methods don't block the actor from receiving new messages.""" - from pulsing.actor import init, shutdown, remote + from pulsing.core import init, shutdown, remote @remote class NonBlockingService: diff --git a/tests/python/test_resolve_as_any.py b/tests/python/test_resolve_as_any.py index 858ae4d3b..2a408887a 100644 --- a/tests/python/test_resolve_as_any.py +++ b/tests/python/test_resolve_as_any.py @@ -1,9 +1,10 @@ """ -Tests for resolve().as_any() and as_any(ref): untyped proxy that forwards any method call. +Tests for resolve().as_any() / .as_type() and as_any(ref): proxy generation on ActorRef. Covers: -- resolve(name) returns an object with .as_any() -- ref.as_any() returns a proxy; await proxy.method(...) works without knowing the actor type +- resolve(name) returns ActorRef with .as_any() and .as_type() +- ref.as_any() returns an untyped proxy +- ref.as_type(cls) returns a typed proxy - as_any(ref) function works with ref from resolve() or raw ActorRef - typed_proxy.as_any() returns an any proxy with the same underlying ref - ref.ask() / ref.tell() still work (backward compatibility) @@ -14,7 +15,7 @@ import pytest import pulsing as pul -from pulsing.actor import Actor, ActorRefView, as_any, remote +from pulsing.core import Actor, ActorRef, as_any, remote # ============================================================================ @@ -55,12 +56,14 @@ async def test_resolve_returns_ref_view_with_as_any(initialized_pul): @pytest.mark.asyncio -async def test_resolve_returns_actor_ref_view(initialized_pul): - """resolve(name) returns ActorRefView (or equivalent with .as_any()).""" +async def test_resolve_returns_actor_ref(initialized_pul): + """resolve(name) returns ActorRef with .as_any() and .as_type().""" await pul.spawn(_EchoActor(), name="ref_view_echo", public=True) ref = await pul.resolve("ref_view_echo") - assert isinstance(ref, ActorRefView) + assert isinstance(ref, ActorRef) + assert hasattr(ref, "as_any") + assert hasattr(ref, "as_type") # ============================================================================ @@ -163,7 +166,7 @@ async def test_as_any_function_with_ref_from_resolve(initialized_pul): @pytest.mark.asyncio async def test_as_any_function_with_raw_ref(initialized_pul): """as_any(ref) works when ref is raw ActorRef from system.resolve().""" - from pulsing.actor import get_system + from pulsing.core import get_system await _ServiceWithMethods.spawn(name="as_any_raw_svc", public=True) @@ -204,7 +207,7 @@ async def test_typed_proxy_as_any(initialized_pul): @pytest.mark.asyncio async def test_resolve_ref_ask_still_works(initialized_pul): - """After resolve(), ref.ask(msg) still works (ActorRefView delegates to _ref).""" + """After resolve(), ref.ask(msg) still works.""" await pul.spawn(_EchoActor(), name="compat_ask_echo", public=True) ref = await pul.resolve("compat_ask_echo") @@ -234,3 +237,75 @@ async def receive(self, msg): await asyncio.sleep(0.05) result = await ref.ask("get") assert result == 3 + + +# ============================================================================ +# Test: ref.as_type(cls) — typed proxy from ActorRef +# ============================================================================ + + +@pytest.mark.asyncio +async def test_as_type_on_actor_ref(initialized_pul): + """ref.as_type(cls) returns a typed proxy with method validation.""" + await _ServiceWithMethods.spawn(name="as_type_svc", public=True) + + ref = await pul.resolve("as_type_svc") + proxy = ref.as_type(_ServiceWithMethods) + + result = await proxy.get_value() + assert result == 0 + + result = await proxy.set_value(99) + assert result == 99 + + result = await proxy.get_value() + assert result == 99 + + +@pytest.mark.asyncio +async def test_as_type_rejects_invalid_method(initialized_pul): + """Typed proxy from as_type() rejects methods not on the class.""" + await _ServiceWithMethods.spawn(name="as_type_reject_svc", public=True) + + ref = await pul.resolve("as_type_reject_svc") + proxy = ref.as_type(_ServiceWithMethods) + + with pytest.raises(AttributeError, match="No method"): + proxy.nonexistent_method # Access triggers __getattr__ validation + + +@pytest.mark.asyncio +async def test_as_type_async_method(initialized_pul): + """as_type() proxy correctly handles async methods.""" + await _ServiceWithMethods.spawn(name="as_type_async_svc", public=True) + + ref = await pul.resolve("as_type_async_svc") + proxy = ref.as_type(_ServiceWithMethods) + + result = await proxy.async_incr() + assert result == 1 + result = await proxy.async_incr() + assert result == 2 + + +# ============================================================================ +# Test: Counter.resolve(name, timeout=...) +# ============================================================================ + + +@pytest.mark.asyncio +async def test_counter_resolve_with_timeout(initialized_pul): + """Counter.resolve(name, timeout=...) passes timeout to underlying resolve.""" + await _ServiceWithMethods.spawn(name="timeout_svc", public=True) + + # Should succeed with timeout (actor already exists) + proxy = await _ServiceWithMethods.resolve("timeout_svc", timeout=5) + result = await proxy.get_value() + assert result == 0 + + +@pytest.mark.asyncio +async def test_counter_resolve_timeout_not_found(initialized_pul): + """Counter.resolve(name, timeout=...) raises after timeout if not found.""" + with pytest.raises(RuntimeError): + await _ServiceWithMethods.resolve("nonexistent_actor", timeout=0.3) diff --git a/tests/python/test_sealed_message.py b/tests/python/test_sealed_message.py index f8fbbcdb9..aa1206ee0 100644 --- a/tests/python/test_sealed_message.py +++ b/tests/python/test_sealed_message.py @@ -10,13 +10,15 @@ """ import asyncio +import os from dataclasses import dataclass import pytest -from pulsing.actor import ( +from pulsing.core import ( Actor, Message, SealedPyMessage, + ZeroCopyDescriptor, ) import pulsing as pul @@ -143,6 +145,54 @@ async def receive(self, msg): return {"received": msg} +class ZeroCopyPayload: + """Object implementing Pulsing zerocopy descriptor protocol.""" + + def __init__(self, raw: bytes): + self.raw = raw + + def __zerocopy__(self, _ctx): + return ZeroCopyDescriptor( + buffers=[memoryview(self.raw)], + dtype="u8", + shape=[len(self.raw)], + strides=[1], + transport="inline", + checksum=None, + version=1, + ) + + +class ZeroCopyInspectorActor(Actor): + async def receive(self, msg): + if isinstance(msg, ZeroCopyDescriptor): + buffers = msg.buffers + return { + "is_descriptor": True, + "buffer_count": len(buffers), + "size": len(buffers[0]), + "dtype": msg.dtype, + } + return {"is_descriptor": False, "type": type(msg).__name__} + + +class NonContiguousZeroCopyPayload: + def __init__(self, raw: bytes): + self.raw = raw + + def __zerocopy__(self, _ctx): + view = memoryview(self.raw)[::2] + return ZeroCopyDescriptor( + buffers=[view], + dtype="u8", + shape=[len(view)], + strides=[2], + transport="inline", + checksum=None, + version=1, + ) + + # ============================================================================ # Fixtures # ============================================================================ @@ -412,6 +462,117 @@ async def test_tell_with_dict(actor_system): assert response["value"] == 8 +@pytest.mark.asyncio +async def test_ask_with_zerocopy_descriptor(actor_system): + """ask() should use zerocopy when object defines __zerocopy__.""" + os.environ["PULSING_ZEROCOPY"] = "auto" + actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-inspector") + response = await actor_ref.ask(ZeroCopyPayload(b"abcdef")) + assert response["is_descriptor"] is True + assert response["buffer_count"] == 1 + assert response["size"] == 6 + assert response["dtype"] == "u8" + + +@pytest.mark.asyncio +async def test_ask_with_zerocopy_force(actor_system): + """force mode should reject payloads without __zerocopy__.""" + os.environ["PULSING_ZEROCOPY"] = "force" + actor_ref = await actor_system.spawn(EchoAnyActor(), name="zc-force") + with pytest.raises(Exception): + await actor_ref.ask({"not": "zerocopy"}) + os.environ["PULSING_ZEROCOPY"] = "auto" + + +@pytest.mark.asyncio +async def test_ask_with_zerocopy_large_buffer(actor_system): + """Large payload (>= stream threshold) goes through descriptor-first stream path.""" + os.environ["PULSING_ZEROCOPY"] = "auto" + os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "65536" + os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "65536" + try: + actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-large") + payload = bytearray(8 * 1024 * 1024) + response = await actor_ref.ask(ZeroCopyPayload(payload)) + assert response["is_descriptor"] is True + assert response["buffer_count"] == 1 + assert response["size"] == len(payload) + finally: + os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None) + os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None) + + +@pytest.mark.asyncio +async def test_ask_with_zerocopy_small_buffer_single_path(actor_system): + """Small payload (< stream threshold) stays on single-message path.""" + os.environ["PULSING_ZEROCOPY"] = "auto" + os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "1048576" + try: + actor_ref = await actor_system.spawn(ZeroCopyInspectorActor(), name="zc-small") + payload = b"small_payload_1234" + response = await actor_ref.ask(ZeroCopyPayload(payload)) + assert response["is_descriptor"] is True + assert response["buffer_count"] == 1 + assert response["size"] == len(payload) + assert response["dtype"] == "u8" + finally: + os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None) + + +@pytest.mark.asyncio +async def test_ask_with_zerocopy_stream_threshold_boundary(actor_system): + """Payload exactly at stream threshold goes through stream path.""" + threshold = 4096 + os.environ["PULSING_ZEROCOPY"] = "auto" + os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = str(threshold) + os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "4096" + try: + actor_ref = await actor_system.spawn( + ZeroCopyInspectorActor(), name="zc-boundary" + ) + payload = bytearray(threshold) + response = await actor_ref.ask(ZeroCopyPayload(payload)) + assert response["is_descriptor"] is True + assert response["buffer_count"] == 1 + assert response["size"] == threshold + finally: + os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None) + os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None) + + +@pytest.mark.asyncio +async def test_ask_with_zerocopy_stream_multi_chunk(actor_system): + """Large buffer is transmitted in multiple chunks and reassembled correctly.""" + os.environ["PULSING_ZEROCOPY"] = "auto" + os.environ["PULSING_ZEROCOPY_STREAM_THRESHOLD"] = "4096" + os.environ["PULSING_ZEROCOPY_CHUNK_BYTES"] = "4096" + try: + actor_ref = await actor_system.spawn( + ZeroCopyInspectorActor(), name="zc-multichunk" + ) + # 5 chunks worth of data + payload = bytearray(range(256)) * 80 # 20480 bytes + response = await actor_ref.ask(ZeroCopyPayload(bytes(payload))) + assert response["is_descriptor"] is True + assert response["buffer_count"] == 1 + assert response["size"] == len(payload) + finally: + os.environ.pop("PULSING_ZEROCOPY_STREAM_THRESHOLD", None) + os.environ.pop("PULSING_ZEROCOPY_CHUNK_BYTES", None) + + +@pytest.mark.asyncio +async def test_zerocopy_force_rejects_non_contiguous_buffer(actor_system): + """Force mode rejects non-contiguous buffer views.""" + os.environ["PULSING_ZEROCOPY"] = "force" + actor_ref = await actor_system.spawn( + ZeroCopyInspectorActor(), name="zc-noncontiguous" + ) + with pytest.raises(Exception): + await actor_ref.ask(NonContiguousZeroCopyPayload(b"0123456789")) + os.environ["PULSING_ZEROCOPY"] = "auto" + + # ============================================================================ # Backward Compatibility Tests # ============================================================================ diff --git a/tests/python/test_system_actor.py b/tests/python/test_system_actor.py index 527bdaf05..92a1e2860 100644 --- a/tests/python/test_system_actor.py +++ b/tests/python/test_system_actor.py @@ -9,7 +9,7 @@ import asyncio import pytest import pulsing as pul -from pulsing.actor import ( +from pulsing.core import ( get_python_actor_service, get_system_actor, remote, diff --git a/tests/python/test_zerocopy_protocol.py b/tests/python/test_zerocopy_protocol.py new file mode 100644 index 000000000..46334d4b7 --- /dev/null +++ b/tests/python/test_zerocopy_protocol.py @@ -0,0 +1,117 @@ +import pytest +import pulsing as pul +from pulsing.core import Actor, ZeroCopyDescriptor + + +class _ZeroCopyPayload: + def __init__(self, payload: bytes): + self.payload = payload + + def __zerocopy__(self, _ctx): + return ZeroCopyDescriptor( + buffers=[memoryview(self.payload)], + dtype="u8", + shape=[len(self.payload)], + strides=[1], + transport="inline", + checksum=None, + version=1, + ) + + +class _Inspector(Actor): + async def receive(self, msg): + if isinstance(msg, ZeroCopyDescriptor): + buffers = msg.buffers + return { + "kind": "descriptor", + "version": msg.version, + "buffer_count": len(buffers), + "first_size": len(buffers[0]), + } + return {"kind": "normal", "type": type(msg).__name__} + + +@pytest.fixture +async def actor_system(): + system = await pul.actor_system() + yield system + await system.shutdown() + + +@pytest.mark.asyncio +async def test_zerocopy_auto_uses_descriptor(actor_system, monkeypatch): + monkeypatch.setenv("PULSING_ZEROCOPY", "auto") + ref = await actor_system.spawn(_Inspector(), name="zc-auto") + resp = await ref.ask(_ZeroCopyPayload(b"hello")) + assert resp["kind"] == "descriptor" + assert resp["version"] == 1 + assert resp["buffer_count"] == 1 + assert resp["first_size"] == 5 + + +@pytest.mark.asyncio +async def test_zerocopy_off_falls_back_pickle(actor_system, monkeypatch): + monkeypatch.setenv("PULSING_ZEROCOPY", "off") + ref = await actor_system.spawn(_Inspector(), name="zc-off") + resp = await ref.ask(_ZeroCopyPayload(b"hello")) + assert resp["kind"] == "normal" + assert resp["type"] == "_ZeroCopyPayload" + + +@pytest.mark.asyncio +async def test_zerocopy_force_rejects_non_descriptor(actor_system, monkeypatch): + monkeypatch.setenv("PULSING_ZEROCOPY", "force") + ref = await actor_system.spawn(_Inspector(), name="zc-force") + with pytest.raises(Exception): + await ref.ask({"x": 1}) + + +@pytest.mark.asyncio +async def test_zerocopy_small_payload_single_path(actor_system, monkeypatch): + """Small payload below stream threshold stays on single-message path.""" + monkeypatch.setenv("PULSING_ZEROCOPY", "auto") + monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "1048576") + ref = await actor_system.spawn(_Inspector(), name="zc-small") + resp = await ref.ask(_ZeroCopyPayload(b"tiny")) + assert resp["kind"] == "descriptor" + assert resp["first_size"] == 4 + + +@pytest.mark.asyncio +async def test_zerocopy_large_payload_stream_path(actor_system, monkeypatch): + """Large payload above stream threshold goes through descriptor-first stream.""" + monkeypatch.setenv("PULSING_ZEROCOPY", "auto") + monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "4096") + monkeypatch.setenv("PULSING_ZEROCOPY_CHUNK_BYTES", "4096") + ref = await actor_system.spawn(_Inspector(), name="zc-stream") + big = bytes(range(256)) * 64 # 16384 bytes, 4 chunks + resp = await ref.ask(_ZeroCopyPayload(big)) + assert resp["kind"] == "descriptor" + assert resp["first_size"] == len(big) + assert resp["buffer_count"] == 1 + + +@pytest.mark.asyncio +async def test_zerocopy_stream_data_integrity(actor_system, monkeypatch): + """Data transmitted via stream path arrives intact.""" + monkeypatch.setenv("PULSING_ZEROCOPY", "auto") + monkeypatch.setenv("PULSING_ZEROCOPY_STREAM_THRESHOLD", "4096") + monkeypatch.setenv("PULSING_ZEROCOPY_CHUNK_BYTES", "4096") + + class _DataVerifier(Actor): + async def receive(self, msg): + if isinstance(msg, ZeroCopyDescriptor): + data = bytes(msg.buffers[0]) + return { + "size": len(data), + "checksum": sum(data) % 65536, + } + return {} + + ref = await actor_system.spawn(_DataVerifier(), name="zc-verify") + payload = bytes(range(256)) * 80 # 20480 bytes + expected_checksum = sum(payload) % 65536 + resp = await ref.ask(_ZeroCopyPayload(payload)) + assert resp["size"] == len(payload) + assert resp["checksum"] == expected_checksum