diff --git a/.github/workflows/tag-release-image.yml b/.github/workflows/tag-release-image.yml index 30c0171443..fbb8623d39 100644 --- a/.github/workflows/tag-release-image.yml +++ b/.github/workflows/tag-release-image.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: inputs: tag: - description: 'Release tag to build (e.g., v1.0.2-sglang)' + description: 'Release tag to build (e.g., v1.0.3-sglang)' required: true type: string diff --git a/README.md b/README.md index d721d7768c..31990191db 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@
| Paper | Documentation | 中文文档 | Ask DeepWiki | 🤗 Models & Data |
-
WeChat (微信) Group |
+
WeChat (微信) Group |
+
**AReaL** is a reinforcement learning (RL) infrastructure designed to bridge foundation
model training with modern agent-based applications. It was originally developed by
diff --git a/ROADMAP.md b/ROADMAP.md
index 94bf632d4d..233880add1 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -211,4 +211,4 @@ ______________________________________________________________________
**Questions about the roadmap?** Open a discussion in
[GitHub Discussions](https://github.com/inclusionAI/AReaL/discussions) or ask in our
-[WeChat group](./assets/wechat_qrcode.png).
+[WeChat group](./assets/figures/wechat_qrcode.png).
diff --git a/areal/engine/fsdp_utils/__init__.py b/areal/engine/fsdp_utils/__init__.py
index 6853251ba3..8620b34fda 100644
--- a/areal/engine/fsdp_utils/__init__.py
+++ b/areal/engine/fsdp_utils/__init__.py
@@ -65,16 +65,25 @@ def apply_fsdp2(model, fsdp_kwargs, wrap_policy):
"PyTorch version >= 2.4 is required for using fully_shard API (FSDP2)"
)
- default_transformer_cls_names_to_wrap = getattr(model, "_no_split_modules", list())
+ def _normalize_wrap_class_names(value):
+ if value is None:
+ return []
+ if isinstance(value, str):
+ return [value]
+ return value if isinstance(value, list) else list(value)
+
+ default_transformer_cls_names_to_wrap = _normalize_wrap_class_names(
+ getattr(model, "_no_split_modules", list())
+ )
fsdp_transformer_layer_cls_to_wrap = (
wrap_policy.transformer_layer_cls_to_wrap if wrap_policy is not None else list()
)
+ fsdp_transformer_layer_cls_to_wrap = _normalize_wrap_class_names(
+ fsdp_transformer_layer_cls_to_wrap
+ )
if not fsdp_transformer_layer_cls_to_wrap:
fsdp_transformer_layer_cls_to_wrap = default_transformer_cls_names_to_wrap
- if isinstance(fsdp_transformer_layer_cls_to_wrap, str):
- fsdp_transformer_layer_cls_to_wrap = [fsdp_transformer_layer_cls_to_wrap]
-
assert (
len(fsdp_transformer_layer_cls_to_wrap) > 0
and fsdp_transformer_layer_cls_to_wrap[0] is not None
diff --git a/assets/community/README.md b/assets/community/README.md
new file mode 100644
index 0000000000..2670bd0af5
--- /dev/null
+++ b/assets/community/README.md
@@ -0,0 +1,34 @@
+# Community Meeting Materials
+
+This folder stores materials for AReaL's community meetings and public presentations,
+including recordings, agendas, and presentation slides. Community meetings are open to
+everyone and serve as a space for project updates, feature discussions, and Q&A.
+
+For background on how the project is governed and how to participate, please see:
+
+- [Governance](../../GOVERNANCE.md) -- project roles, decision-making, and maintainers
+- [Code of Conduct](../../CODE_OF_CONDUCT.md) -- community standards and reporting
+- [Contributing Guide](../../CONTRIBUTING.md) -- how to get started with contributions
+
+## Upcoming Meetings
+
+| Date | Agenda | Slides | Recording |
+| ---- | ------ | ------ | --------- |
+| TBD | TBD | TBD | TBD |
+
+## Past Meetings
+
+| Date | Agenda | Slides | Recording |
+| ---- | ------ | ------ | --------- |
+| TBD | TBD | TBD | TBD |
+
+## How to Add Materials
+
+After each meeting, add a new row to the **Past Meetings** table with:
+
+- **Date** -- meeting date in `YYYY-MM-DD` format
+- **Agenda** -- link to the agenda document (e.g. Google Doc, Tencent Doc, Yuque Doc, or
+ a file in this folder)
+- **Slides** -- link to the slide deck or place the PDF/PPTX in this folder and link it
+ here
+- **Recording** -- link to the video recording (e.g. Zoom, Tencent Meeting, Dingding)
diff --git a/assets/1.5b_time_n1n4n16.png b/assets/figures/1.5b_time_n1n4n16.png
similarity index 100%
rename from assets/1.5b_time_n1n4n16.png
rename to assets/figures/1.5b_time_n1n4n16.png
diff --git a/assets/7b_zero_eval_acc.png b/assets/figures/7b_zero_eval_acc.png
similarity index 100%
rename from assets/7b_zero_eval_acc.png
rename to assets/figures/7b_zero_eval_acc.png
diff --git a/assets/7b_zero_training_curve.png b/assets/figures/7b_zero_training_curve.png
similarity index 100%
rename from assets/7b_zero_training_curve.png
rename to assets/figures/7b_zero_training_curve.png
diff --git a/assets/algo_ablation.png b/assets/figures/algo_ablation.png
similarity index 100%
rename from assets/algo_ablation.png
rename to assets/figures/algo_ablation.png
diff --git a/assets/arch.png b/assets/figures/arch.png
similarity index 100%
rename from assets/arch.png
rename to assets/figures/arch.png
diff --git a/assets/areal_lite_layers.png b/assets/figures/areal_lite_layers.png
similarity index 100%
rename from assets/areal_lite_layers.png
rename to assets/figures/areal_lite_layers.png
diff --git a/assets/async_scaling_vs_verl.png b/assets/figures/async_scaling_vs_verl.png
similarity index 100%
rename from assets/async_scaling_vs_verl.png
rename to assets/figures/async_scaling_vs_verl.png
diff --git a/assets/async_timeline.png b/assets/figures/async_timeline.png
similarity index 100%
rename from assets/async_timeline.png
rename to assets/figures/async_timeline.png
diff --git a/assets/decoupled_ppo_obj.png b/assets/figures/decoupled_ppo_obj.png
similarity index 100%
rename from assets/decoupled_ppo_obj.png
rename to assets/figures/decoupled_ppo_obj.png
diff --git a/assets/distill_1.5b_24k_curve.png b/assets/figures/distill_1.5b_24k_curve.png
similarity index 100%
rename from assets/distill_1.5b_24k_curve.png
rename to assets/figures/distill_1.5b_24k_curve.png
diff --git a/assets/gen_scaling_trend.png b/assets/figures/gen_scaling_trend.png
similarity index 100%
rename from assets/gen_scaling_trend.png
rename to assets/figures/gen_scaling_trend.png
diff --git a/assets/gsm8k_2.5-1.5b-ins_training_curve.png b/assets/figures/gsm8k_2.5-1.5b-ins_training_curve.png
similarity index 100%
rename from assets/gsm8k_2.5-1.5b-ins_training_curve.png
rename to assets/figures/gsm8k_2.5-1.5b-ins_training_curve.png
diff --git a/assets/interrupt_gen_ablation.png b/assets/figures/interrupt_gen_ablation.png
similarity index 100%
rename from assets/interrupt_gen_ablation.png
rename to assets/figures/interrupt_gen_ablation.png
diff --git a/assets/logo.png b/assets/figures/logo.png
similarity index 100%
rename from assets/logo.png
rename to assets/figures/logo.png
diff --git a/assets/staleness_throughput.png b/assets/figures/staleness_throughput.png
similarity index 100%
rename from assets/staleness_throughput.png
rename to assets/figures/staleness_throughput.png
diff --git a/assets/sync_one_step_gen.png b/assets/figures/sync_one_step_gen.png
similarity index 100%
rename from assets/sync_one_step_gen.png
rename to assets/figures/sync_one_step_gen.png
diff --git a/assets/thpt_comparison.png b/assets/figures/thpt_comparison.png
similarity index 100%
rename from assets/thpt_comparison.png
rename to assets/figures/thpt_comparison.png
diff --git a/assets/wechat_icon.png b/assets/figures/wechat_icon.png
similarity index 100%
rename from assets/wechat_icon.png
rename to assets/figures/wechat_icon.png
diff --git a/assets/wechat_qrcode.png b/assets/figures/wechat_qrcode.png
similarity index 100%
rename from assets/wechat_qrcode.png
rename to assets/figures/wechat_qrcode.png
diff --git a/blog/AReaL_v0_1.md b/blog/AReaL_v0_1.md
index 476f528b01..da12bd58c1 100644
--- a/blog/AReaL_v0_1.md
+++ b/blog/AReaL_v0_1.md
@@ -30,7 +30,7 @@ We observe that the response length first **shrinks in the 8K training stage**,
Our experiments are conducted on 16 nodes, each equipped with 8 H800 GPUs. The results,
along with the associated training curves, are presented below.
-
+
*Figure 1. Training rewards and response lengths during RL training. The base model is
DeepSeek-R1-Distill-Qwen-1.5B. Curves are averaged with a window size of 25.*
@@ -82,7 +82,7 @@ accelerate RL training, significantly boosting research progress. We provide det
hardware requirements and environment setup guides for different configurations in
[our tutorials](/examples/README.md).
-
+
*Figure 2. Total RL training time for 10 epochs across different resource
configurations.*
@@ -96,14 +96,14 @@ rewards and response lengths gradually increasing during training. This **simult
growth of response lengths and rewards** suggests **emergent deep thinking
capabilities** in solving complex reasoning problems.
- *Figure 3.
+ *Figure 3.
Qwen2.5-7B-Zero RL training curve*
Evaluation of intermediate checkpoints on MATH500 and AIME24 datasets shows continuous
improvement in both accuracy and response length:
- *Figure 4. Test accuracy and
-response length on MATH500 and AIME24 datasets*
+ *Figure 4. Test accuracy
+and response length on MATH500 and AIME24 datasets*
Additional experiments on the
[DeepScaleR](https://github.com/agentica-project/deepscaler) dataset show similar
diff --git a/blog/AReaL_v0_2.md b/blog/AReaL_v0_2.md
index 6b2020fb3e..f297906dbc 100644
--- a/blog/AReaL_v0_2.md
+++ b/blog/AReaL_v0_2.md
@@ -53,7 +53,7 @@ we randomized the answer options.
### Training Speed Comparison
-
+
AReaL v0.2.0 features the following system optimizations:
diff --git a/blog/AReaL_v0_3.md b/blog/AReaL_v0_3.md
index b989a2f56a..270f0ef748 100644
--- a/blog/AReaL_v0_3.md
+++ b/blog/AReaL_v0_3.md
@@ -58,7 +58,7 @@ model version used for rollout generation is limited to only one or two steps ol
However, all these systems still follow a batched generation setting, the issue of
system inefficiency during the generation phase still remains unaddressed.
-
+
*Fig.1. Left: Execution timeline of a synchronous RL training. Right: Execution timeline
of one-step overlap RL system.*
@@ -69,7 +69,7 @@ Synchronous systems distribute generation across all devices, reducing the per-G
decoding batch size. This pushes the decoding process into a memory-IO-bound regime
where additional devices fail to improve throughput.
-
+
*Fig2. Left: Strong scaling of batched generation throughput for a 1.5B LRM. Right:
Generation becomes memory-IO bound as GPU count increases.*
@@ -82,7 +82,7 @@ flexibility for a customized RL workflow. We implement these principles in AReaL
presents the architecture and data flow of AREAL. The system comprises 4 core
components:
-
+
*Fig.3 The architecture featuring asynchronous generation and training components.*
@@ -117,7 +117,7 @@ components:
Fig.4. This asynchronous pipeline ensures continuous full utilization of both
generation and training resources.
-
+
*Fig 4. Execution timeline of our fully asynchronous RL system.*
@@ -156,7 +156,7 @@ To overcome these two challenges, we propose two solutions:
represents the policy used for sampling trajectories and the proxy policy is a
proximal policy serving as a recent target to regularize the update of online policy.
-
+
## Validating Asynchronous AReaL
@@ -200,7 +200,7 @@ Fig.5. Without interruptible generation, the controller must wait for the longes
response. In particular, interruptible generation leads to a 12% and 17% throughput
increase for 1.5B and 7B models respectively on 4 nodes.
-
+
*Fig.5 Ablation study of interruptible generation.*
@@ -214,7 +214,7 @@ final performance due to the improper clipping center and policy changes during
interruptible generation. Furthermore, increasing data staleness consistently degrades
learning performance.
-
+
*Fig.6 Ablation Study on Decoupled PPO Objective with DeepSeek-R1-Distill-Qwen-1.5B.
Left: Learning curves with naive PPO. Right: Learning curves with decoupled PPO
@@ -245,7 +245,7 @@ zero-staleness oracle. When properly constrained, moderate staleness (e.g., η
minimal impact on final performance while significantly accelerating training through
the asynchronous pipeline, as demonstrated in Tab.3 and Fig.7.
-
+
*Fig.7 The relationship between η and training throughput. Larger η leads to higher
throughput.*
diff --git a/docs/en/tutorial/installation.md b/docs/en/tutorial/installation.md
index 72dd57f8c0..399ec468b0 100644
--- a/docs/en/tutorial/installation.md
+++ b/docs/en/tutorial/installation.md
@@ -24,7 +24,7 @@ The following hardware configuration has been extensively tested:
| Git LFS | Required for downloading models, datasets, and AReaL code. See [installation guide](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage) |
| Docker | 27.5.1 |
| NVIDIA Container Toolkit | See [installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) |
-| AReaL Image | `ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang` (default) or `v1.0.2-vllm`. Includes runtime dependencies and Ray components. |
+| AReaL Image | `ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang` (default) or `v1.0.3-vllm`. Includes runtime dependencies and Ray components. |
**Note**: This tutorial does not cover the installation of NVIDIA Drivers, CUDA, or
shared storage mounting, as these depend on your specific node configuration and system
@@ -42,11 +42,11 @@ We recommend using Docker with our provided image. The Dockerfile is available i
top-level directory of the AReaL repository.
```bash
-docker pull ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+docker pull ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
docker run -it --name areal-node1 \
--privileged --gpus all --network host \
--shm-size 700g -v /path/to/mount:/path/to/mount \
- ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang \
+ ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang \
/bin/bash
git clone https://github.com/inclusionAI/AReaL /path/to/mount/AReaL
cd /path/to/mount/AReaL
@@ -54,7 +54,7 @@ uv pip install -e . --no-deps
```
A vLLM variant of the Docker image is also available at
-`ghcr.io/inclusionai/areal-runtime:v1.0.2-vllm`. Replace the image tag in the commands
+`ghcr.io/inclusionai/areal-runtime:v1.0.3-vllm`. Replace the image tag in the commands
above if you prefer vLLM as the inference backend.
### Option 2: Custom Environment Installation
diff --git a/docs/zh/tutorial/installation.md b/docs/zh/tutorial/installation.md
index eb7c7c9675..813c0b0af1 100644
--- a/docs/zh/tutorial/installation.md
+++ b/docs/zh/tutorial/installation.md
@@ -24,7 +24,7 @@
| Git LFS | 用于下载模型、数据集和 AReaL 代码。请参阅[安装指南](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage) |
| Docker | 27.5.1 |
| NVIDIA Container Toolkit | 请参阅[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) |
-| AReaL 镜像 | `ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang`(默认)或 `v1.0.2-vllm`。包含运行时依赖和 Ray 组件。 |
+| AReaL 镜像 | `ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang`(默认)或 `v1.0.3-vllm`。包含运行时依赖和 Ray 组件。 |
**注意**:本教程不涵盖 NVIDIA 驱动、CUDA 或共享存储挂载的安装,因为这些取决于您具体的节点配置和系统版本。请独立完成这些安装。
@@ -37,18 +37,18 @@
我们推荐使用 Docker 和提供的镜像。Dockerfile 位于 AReaL 仓库的顶级目录。
```bash
-docker pull ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+docker pull ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
docker run -it --name areal-node1 \
--privileged --gpus all --network host \
--shm-size 700g -v /path/to/mount:/path/to/mount \
- ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang \
+ ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang \
/bin/bash
git clone https://github.com/inclusionAI/AReaL /path/to/mount/AReaL
cd /path/to/mount/AReaL
uv pip install -e . --no-deps
```
-vLLM 变体的 Docker 镜像也可使用: `ghcr.io/inclusionai/areal-runtime:v1.0.2-vllm`。如果您偏好使用 vLLM
+vLLM 变体的 Docker 镜像也可使用: `ghcr.io/inclusionai/areal-runtime:v1.0.3-vllm`。如果您偏好使用 vLLM
作为推理后端,请将上述命令中的镜像标签替换为该变体。
### 方式 2:自定义环境安装
diff --git a/examples/skypilot/README.md b/examples/skypilot/README.md
index eb8eef2543..2f26f1c51c 100644
--- a/examples/skypilot/README.md
+++ b/examples/skypilot/README.md
@@ -25,7 +25,7 @@ resources:
cpus: 8+
memory: 32GB+
disk_size: 256GB
- image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+ image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
num_nodes: 1
@@ -78,7 +78,7 @@ Specify the resources and image used to run the experiment.
```yaml
resources:
accelerators: A100:8
- image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+ image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
memory: 256+
cpus: 32+
diff --git a/examples/skypilot/ray_cluster.sky.yaml b/examples/skypilot/ray_cluster.sky.yaml
index 963e74dcea..28e6dacfcb 100644
--- a/examples/skypilot/ray_cluster.sky.yaml
+++ b/examples/skypilot/ray_cluster.sky.yaml
@@ -1,7 +1,7 @@
resources:
accelerators: A100:8
- image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+ image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
memory: 32+
cpus: 8+
diff --git a/examples/skypilot/single_node.sky.yaml b/examples/skypilot/single_node.sky.yaml
index 6122a7fde7..99c46243ca 100644
--- a/examples/skypilot/single_node.sky.yaml
+++ b/examples/skypilot/single_node.sky.yaml
@@ -8,7 +8,7 @@ resources:
cpus: 8+
memory: 32GB+
disk_size: 256GB
- image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+ image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
num_nodes: 1
diff --git a/pyproject.toml b/pyproject.toml
index 3d0bb2564f..c5738a4dec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ description = "AReaL: A Large-Scale Asynchronous Reinforcement Learning System"
readme = "README.md"
license = {text = "Apache-2.0"}
requires-python = ">=3.11,<3.13"
-version = "1.0.2"
+version = "1.0.3"
authors = [
{name = "AReaL Team"},
]
diff --git a/pyproject.vllm.toml b/pyproject.vllm.toml
index 10bfefbf02..9a25771af5 100644
--- a/pyproject.vllm.toml
+++ b/pyproject.vllm.toml
@@ -24,7 +24,7 @@ description = "AReaL: A Large-Scale Asynchronous Reinforcement Learning System"
readme = "README.md"
license = {text = "Apache-2.0"}
requires-python = ">=3.11,<3.13"
-version = "1.0.2"
+version = "1.0.3"
authors = [
{name = "AReaL Team"},
]
diff --git a/uv.lock b/uv.lock
index 674263e5d6..f9e1309143 100644
--- a/uv.lock
+++ b/uv.lock
@@ -299,7 +299,7 @@ wheels = [
[[package]]
name = "areal"
-version = "1.0.2"
+version = "1.0.3"
source = { editable = "." }
dependencies = [
{ name = "aiofiles", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
diff --git a/uv.vllm.lock b/uv.vllm.lock
index 4854168c46..a815200775 100644
--- a/uv.vllm.lock
+++ b/uv.vllm.lock
@@ -286,7 +286,7 @@ wheels = [
[[package]]
name = "areal"
-version = "1.0.2"
+version = "1.0.3"
source = { editable = "." }
dependencies = [
{ name = "aiofiles", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },