axistore80-coder · pull · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.github/workflows/tag-release-image.yml b/.github/workflows/tag-release-image.yml
@@ -6,7 +6,7 @@ on:
   workflow_dispatch:
     inputs:
       tag:
-        description: 'Release tag to build (e.g., v1.0.2-sglang)'
+        description: 'Release tag to build (e.g., v1.0.3-sglang)'
         required: true
         type: string
 

diff --git a/README.md b/README.md
@@ -4,13 +4,13 @@
 
 <p align="center">
 | <a href="https://arxiv.org/pdf/2505.24298"><b>Paper</b></a> | <a href="https://inclusionai.github.io/AReaL/"><b>Documentation</b></a> | <a href="https://inclusionai.github.io/AReaL/zh/"><b>中文文档</b></a> | <a href="https://deepwiki.com/inclusionAI/AReaL"><b>Ask DeepWiki</b></a> | <a href="https://huggingface.co/collections/inclusionAI/"><b>🤗 Models & Data</b></a> |
-<a href="./assets/wechat_qrcode.png" target="_blank"><img src="./assets/wechat_icon.png" width="20" style="vertical-align: middle;"> <b>WeChat (微信) Group</b></a> |
+<a href="./assets/figures/wechat_qrcode.png" target="_blank"><img src="./assets/figures/wechat_icon.png" width="20" style="vertical-align: middle;"> <b>WeChat (微信) Group</b></a> |
   <a href="https://gitcgr.com/inclusionAI/AReaL">
     <img src="https://gitcgr.com/badge/inclusionAI/AReaL.svg" alt="gitcgr" />
   </a>
 </p>
 
-<img align="right" alt="ReaL" src="/assets/logo.png" width="20%">
+<img align="right" alt="ReaL" src="/assets/figures/logo.png" width="20%">
 
 **AReaL** is a reinforcement learning (RL) infrastructure designed to bridge foundation
 model training with modern agent-based applications. It was originally developed by

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -211,4 +211,4 @@ ______________________________________________________________________
 
 **Questions about the roadmap?** Open a discussion in
 [GitHub Discussions](https://github.com/inclusionAI/AReaL/discussions) or ask in our
-[WeChat group](./assets/wechat_qrcode.png).
+[WeChat group](./assets/figures/wechat_qrcode.png).
diff --git a/areal/engine/fsdp_utils/__init__.py b/areal/engine/fsdp_utils/__init__.py
@@ -65,16 +65,25 @@ def apply_fsdp2(model, fsdp_kwargs, wrap_policy):
         "PyTorch version >= 2.4 is required for using fully_shard API (FSDP2)"
     )
 
-    default_transformer_cls_names_to_wrap = getattr(model, "_no_split_modules", list())
+    def _normalize_wrap_class_names(value):
+        if value is None:
+            return []
+        if isinstance(value, str):
+            return [value]
+        return value if isinstance(value, list) else list(value)
+
+    default_transformer_cls_names_to_wrap = _normalize_wrap_class_names(
+        getattr(model, "_no_split_modules", list())
+    )
     fsdp_transformer_layer_cls_to_wrap = (
         wrap_policy.transformer_layer_cls_to_wrap if wrap_policy is not None else list()
     )
+    fsdp_transformer_layer_cls_to_wrap = _normalize_wrap_class_names(
+        fsdp_transformer_layer_cls_to_wrap
+    )
     if not fsdp_transformer_layer_cls_to_wrap:
         fsdp_transformer_layer_cls_to_wrap = default_transformer_cls_names_to_wrap
 
-    if isinstance(fsdp_transformer_layer_cls_to_wrap, str):
-        fsdp_transformer_layer_cls_to_wrap = [fsdp_transformer_layer_cls_to_wrap]
-
     assert (
         len(fsdp_transformer_layer_cls_to_wrap) > 0
         and fsdp_transformer_layer_cls_to_wrap[0] is not None

diff --git a/assets/community/README.md b/assets/community/README.md
@@ -0,0 +1,34 @@
+# Community Meeting Materials
+
+This folder stores materials for AReaL's community meetings and public presentations,
+including recordings, agendas, and presentation slides. Community meetings are open to
+everyone and serve as a space for project updates, feature discussions, and Q&A.
+
+For background on how the project is governed and how to participate, please see:
+
+- [Governance](../../GOVERNANCE.md) -- project roles, decision-making, and maintainers
+- [Code of Conduct](../../CODE_OF_CONDUCT.md) -- community standards and reporting
+- [Contributing Guide](../../CONTRIBUTING.md) -- how to get started with contributions
+
+## Upcoming Meetings
+
+| Date | Agenda | Slides | Recording |
+| ---- | ------ | ------ | --------- |
+| TBD  | TBD    | TBD    | TBD       |
+
+## Past Meetings
+
+| Date | Agenda | Slides | Recording |
+| ---- | ------ | ------ | --------- |
+| TBD  | TBD    | TBD    | TBD       |
+
+## How to Add Materials
+
+After each meeting, add a new row to the **Past Meetings** table with:
+
+- **Date** -- meeting date in `YYYY-MM-DD` format
+- **Agenda** -- link to the agenda document (e.g. Google Doc, Tencent Doc, Yuque Doc, or
+  a file in this folder)
+- **Slides** -- link to the slide deck or place the PDF/PPTX in this folder and link it
+  here
+- **Recording** -- link to the video recording (e.g. Zoom, Tencent Meeting, Dingding)
diff --git a/assets/1.5b_time_n1n4n16.png → assets/figures/1.5b_time_n1n4n16.png b/assets/1.5b_time_n1n4n16.png → assets/figures/1.5b_time_n1n4n16.png
diff --git a/assets/7b_zero_eval_acc.png → assets/figures/7b_zero_eval_acc.png b/assets/7b_zero_eval_acc.png → assets/figures/7b_zero_eval_acc.png
diff --git a/assets/7b_zero_training_curve.png → assets/figures/7b_zero_training_curve.png b/assets/7b_zero_training_curve.png → assets/figures/7b_zero_training_curve.png
diff --git a/assets/algo_ablation.png → assets/figures/algo_ablation.png b/assets/algo_ablation.png → assets/figures/algo_ablation.png
diff --git a/assets/arch.png → assets/figures/arch.png b/assets/arch.png → assets/figures/arch.png
diff --git a/assets/areal_lite_layers.png → assets/figures/areal_lite_layers.png b/assets/areal_lite_layers.png → assets/figures/areal_lite_layers.png
diff --git a/assets/async_scaling_vs_verl.png → assets/figures/async_scaling_vs_verl.png b/assets/async_scaling_vs_verl.png → assets/figures/async_scaling_vs_verl.png
diff --git a/assets/async_timeline.png → assets/figures/async_timeline.png b/assets/async_timeline.png → assets/figures/async_timeline.png
diff --git a/assets/decoupled_ppo_obj.png → assets/figures/decoupled_ppo_obj.png b/assets/decoupled_ppo_obj.png → assets/figures/decoupled_ppo_obj.png
diff --git a/assets/distill_1.5b_24k_curve.png → assets/figures/distill_1.5b_24k_curve.png b/assets/distill_1.5b_24k_curve.png → assets/figures/distill_1.5b_24k_curve.png
diff --git a/assets/gen_scaling_trend.png → assets/figures/gen_scaling_trend.png b/assets/gen_scaling_trend.png → assets/figures/gen_scaling_trend.png
diff --git a/assets/gsm8k_2.5-1.5b-ins_training_curve.png → ...res/gsm8k_2.5-1.5b-ins_training_curve.png b/assets/gsm8k_2.5-1.5b-ins_training_curve.png → ...res/gsm8k_2.5-1.5b-ins_training_curve.png
diff --git a/assets/interrupt_gen_ablation.png → assets/figures/interrupt_gen_ablation.png b/assets/interrupt_gen_ablation.png → assets/figures/interrupt_gen_ablation.png
diff --git a/assets/logo.png → assets/figures/logo.png b/assets/logo.png → assets/figures/logo.png
diff --git a/assets/staleness_throughput.png → assets/figures/staleness_throughput.png b/assets/staleness_throughput.png → assets/figures/staleness_throughput.png
diff --git a/assets/sync_one_step_gen.png → assets/figures/sync_one_step_gen.png b/assets/sync_one_step_gen.png → assets/figures/sync_one_step_gen.png
diff --git a/assets/thpt_comparison.png → assets/figures/thpt_comparison.png b/assets/thpt_comparison.png → assets/figures/thpt_comparison.png
diff --git a/assets/wechat_icon.png → assets/figures/wechat_icon.png b/assets/wechat_icon.png → assets/figures/wechat_icon.png
diff --git a/assets/wechat_qrcode.png → assets/figures/wechat_qrcode.png b/assets/wechat_qrcode.png → assets/figures/wechat_qrcode.png
diff --git a/blog/AReaL_v0_1.md b/blog/AReaL_v0_1.md
@@ -30,7 +30,7 @@ We observe that the response length first **shrinks in the 8K training stage**,
 Our experiments are conducted on 16 nodes, each equipped with 8 H800 GPUs. The results,
 along with the associated training curves, are presented below.
 
-![16nodes_reward_length.png](/assets/distill_1.5b_24k_curve.png)
+![16nodes_reward_length.png](/assets/figures/distill_1.5b_24k_curve.png)
 
 *Figure 1. Training rewards and response lengths during RL training. The base model is
 DeepSeek-R1-Distill-Qwen-1.5B. Curves are averaged with a window size of 25.*
@@ -82,7 +82,7 @@ accelerate RL training, significantly boosting research progress. We provide det
 hardware requirements and environment setup guides for different configurations in
 [our tutorials](/examples/README.md).
 
-![hours.png](/assets/1.5b_time_n1n4n16.png)
+![hours.png](/assets/figures/1.5b_time_n1n4n16.png)
 
 *Figure 2. Total RL training time for 10 epochs across different resource
 configurations.*
@@ -96,14 +96,14 @@ rewards and response lengths gradually increasing during training. This **simult
 growth of response lengths and rewards** suggests **emergent deep thinking
 capabilities** in solving complex reasoning problems.
 
-![7b_zero_training_curve.png](/assets/7b_zero_training_curve.png) *Figure 3.
+![7b_zero_training_curve.png](/assets/figures/7b_zero_training_curve.png) *Figure 3.
 Qwen2.5-7B-Zero RL training curve*
 
 Evaluation of intermediate checkpoints on MATH500 and AIME24 datasets shows continuous
 improvement in both accuracy and response length:
 
-![7b_zero_eval_acc.png](/assets/7b_zero_eval_acc.png) *Figure 4. Test accuracy and
-response length on MATH500 and AIME24 datasets*
+![7b_zero_eval_acc.png](/assets/figures/7b_zero_eval_acc.png) *Figure 4. Test accuracy
+and response length on MATH500 and AIME24 datasets*
 
 Additional experiments on the
 [DeepScaleR](https://github.com/agentica-project/deepscaler) dataset show similar

diff --git a/blog/AReaL_v0_2.md b/blog/AReaL_v0_2.md
@@ -53,7 +53,7 @@ we randomized the answer options.
 
 ### Training Speed Comparison
 
-![throughput_comparision_with_v0.1.0.png](/assets/thpt_comparison.png)
+![throughput_comparison_with_v0.1.0.png](/assets/figures/thpt_comparison.png)
 
 AReaL v0.2.0 features the following system optimizations:
 

diff --git a/blog/AReaL_v0_3.md b/blog/AReaL_v0_3.md
@@ -58,7 +58,7 @@ model version used for rollout generation is limited to only one or two steps ol
 However, all these systems still follow a batched generation setting, the issue of
 system inefficiency during the generation phase still remains unaddressed.
 
-![](/assets/sync_one_step_gen.png)
+![](/assets/figures/sync_one_step_gen.png)
 
 *Fig.1. Left: Execution timeline of a synchronous RL training. Right: Execution timeline
 of one-step overlap RL system.*
@@ -69,7 +69,7 @@ Synchronous systems distribute generation across all devices, reducing the per-G
 decoding batch size. This pushes the decoding process into a memory-IO-bound regime
 where additional devices fail to improve throughput.
 
-![](/assets/gen_scaling_trend.png)
+![](/assets/figures/gen_scaling_trend.png)
 
 *Fig2. Left: Strong scaling of batched generation throughput for a 1.5B LRM. Right:
 Generation becomes memory-IO bound as GPU count increases.*
@@ -82,7 +82,7 @@ flexibility for a customized RL workflow. We implement these principles in AReaL
 presents the architecture and data flow of AREAL. The system comprises 4 core
 components:
 
-![](/assets/arch.png)
+![](/assets/figures/arch.png)
 
 *Fig.3 The architecture featuring asynchronous generation and training components.*
 
@@ -117,7 +117,7 @@ components:
   Fig.4. This asynchronous pipeline ensures continuous full utilization of both
   generation and training resources.
 
-![](/assets/async_timeline.png)
+![](/assets/figures/async_timeline.png)
 
 *Fig 4. Execution timeline of our fully asynchronous RL system.*
 
@@ -156,7 +156,7 @@ To overcome these two challenges, we propose two solutions:
   represents the policy used for sampling trajectories and the proxy policy is a
   proximal policy serving as a recent target to regularize the update of online policy.
 
-![](/assets/decoupled_ppo_obj.png)
+![](/assets/figures/decoupled_ppo_obj.png)
 
 ## Validating Asynchronous AReaL
 
@@ -200,7 +200,7 @@ Fig.5. Without interruptible generation, the controller must wait for the longes
 response. In particular, interruptible generation leads to a 12% and 17% throughput
 increase for 1.5B and 7B models respectively on 4 nodes.
 
-![](/assets/interrupt_gen_ablation.png)
+![](/assets/figures/interrupt_gen_ablation.png)
 
 *Fig.5 Ablation study of interruptible generation.*
 
@@ -214,7 +214,7 @@ final performance due to the improper clipping center and policy changes during
 interruptible generation. Furthermore, increasing data staleness consistently degrades
 learning performance.
 
-![](/assets/algo_ablation.png)
+![](/assets/figures/algo_ablation.png)
 
 *Fig.6 Ablation Study on Decoupled PPO Objective with DeepSeek-R1-Distill-Qwen-1.5B.
 Left: Learning curves with naive PPO. Right: Learning curves with decoupled PPO
@@ -245,7 +245,7 @@ zero-staleness oracle. When properly constrained, moderate staleness (e.g., η
 minimal impact on final performance while significantly accelerating training through
 the asynchronous pipeline, as demonstrated in Tab.3 and Fig.7.
 
-![](/assets/staleness_throughput.png)
+![](/assets/figures/staleness_throughput.png)
 
 *Fig.7 The relationship between η and training throughput. Larger η leads to higher
 throughput.*

diff --git a/docs/en/tutorial/installation.md b/docs/en/tutorial/installation.md
@@ -24,7 +24,7 @@ The following hardware configuration has been extensively tested:
 | Git LFS                  | Required for downloading models, datasets, and AReaL code. See [installation guide](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage) |
 | Docker                   |                                                                                                 27.5.1                                                                                                 |
 | NVIDIA Container Toolkit |                                         See [installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)                                          |
-| AReaL Image              |                                    `ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang` (default) or `v1.0.2-vllm`. Includes runtime dependencies and Ray components.                                     |
+| AReaL Image              |                                    `ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang` (default) or `v1.0.3-vllm`. Includes runtime dependencies and Ray components.                                     |
 
 **Note**: This tutorial does not cover the installation of NVIDIA Drivers, CUDA, or
 shared storage mounting, as these depend on your specific node configuration and system
@@ -42,19 +42,19 @@ We recommend using Docker with our provided image. The Dockerfile is available i
 top-level directory of the AReaL repository.
 
 ```bash
-docker pull ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+docker pull ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
 docker run -it --name areal-node1 \
    --privileged --gpus all --network host \
    --shm-size 700g -v /path/to/mount:/path/to/mount \
-   ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang \
+   ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang \
    /bin/bash
 git clone https://github.com/inclusionAI/AReaL /path/to/mount/AReaL
 cd /path/to/mount/AReaL
 uv pip install -e . --no-deps
 ```
 
 A vLLM variant of the Docker image is also available at
-`ghcr.io/inclusionai/areal-runtime:v1.0.2-vllm`. Replace the image tag in the commands
+`ghcr.io/inclusionai/areal-runtime:v1.0.3-vllm`. Replace the image tag in the commands
 above if you prefer vLLM as the inference backend.
 
 ### Option 2: Custom Environment Installation

diff --git a/docs/zh/tutorial/installation.md b/docs/zh/tutorial/installation.md
@@ -24,7 +24,7 @@
 | Git LFS                  | 用于下载模型、数据集和 AReaL 代码。请参阅[安装指南](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage) |
 | Docker                   |                                                                                 27.5.1                                                                                 |
 | NVIDIA Container Toolkit |                             请参阅[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)                              |
-| AReaL 镜像               |                                 `ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang`（默认）或 `v1.0.2-vllm`。包含运行时依赖和 Ray 组件。                                 |
+| AReaL 镜像               |                                 `ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang`（默认）或 `v1.0.3-vllm`。包含运行时依赖和 Ray 组件。                                 |
 
 **注意**：本教程不涵盖 NVIDIA 驱动、CUDA 或共享存储挂载的安装，因为这些取决于您具体的节点配置和系统版本。请独立完成这些安装。
 
@@ -37,18 +37,18 @@
 我们推荐使用 Docker 和提供的镜像。Dockerfile 位于 AReaL 仓库的顶级目录。
 
 ```bash
-docker pull ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+docker pull ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
 docker run -it --name areal-node1 \
    --privileged --gpus all --network host \
    --shm-size 700g -v /path/to/mount:/path/to/mount \
-   ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang \
+   ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang \
    /bin/bash
 git clone https://github.com/inclusionAI/AReaL /path/to/mount/AReaL
 cd /path/to/mount/AReaL
 uv pip install -e . --no-deps
 ```
 
-vLLM 变体的 Docker 镜像也可使用： `ghcr.io/inclusionai/areal-runtime:v1.0.2-vllm`。如果您偏好使用 vLLM
+vLLM 变体的 Docker 镜像也可使用： `ghcr.io/inclusionai/areal-runtime:v1.0.3-vllm`。如果您偏好使用 vLLM
 作为推理后端，请将上述命令中的镜像标签替换为该变体。
 
 ### 方式 2：自定义环境安装

diff --git a/examples/skypilot/README.md b/examples/skypilot/README.md
@@ -25,7 +25,7 @@ resources:
   cpus: 8+
   memory: 32GB+
   disk_size: 256GB
-  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
 
 num_nodes: 1
 
@@ -78,7 +78,7 @@ Specify the resources and image used to run the experiment.
 ```yaml
 resources:
   accelerators: A100:8
-  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
   memory: 256+
   cpus: 32+
 

diff --git a/examples/skypilot/ray_cluster.sky.yaml b/examples/skypilot/ray_cluster.sky.yaml
@@ -1,7 +1,7 @@
 
 resources:
   accelerators: A100:8
-  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
   memory: 32+
   cpus: 8+
 

diff --git a/examples/skypilot/single_node.sky.yaml b/examples/skypilot/single_node.sky.yaml
@@ -8,7 +8,7 @@ resources:
   cpus: 8+
   memory: 32GB+
   disk_size: 256GB
-  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.2-sglang
+  image_id: docker:ghcr.io/inclusionai/areal-runtime:v1.0.3-sglang
 
 num_nodes: 1
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ description = "AReaL: A Large-Scale Asynchronous Reinforcement Learning System"
 readme = "README.md"
 license = {text = "Apache-2.0"}
 requires-python = ">=3.11,<3.13"
-version = "1.0.2"
+version = "1.0.3"
 authors = [
     {name = "AReaL Team"},
 ]

diff --git a/pyproject.vllm.toml b/pyproject.vllm.toml
@@ -24,7 +24,7 @@ description = "AReaL: A Large-Scale Asynchronous Reinforcement Learning System"
 readme = "README.md"
 license = {text = "Apache-2.0"}
 requires-python = ">=3.11,<3.13"
-version = "1.0.2"
+version = "1.0.3"
 authors = [
     {name = "AReaL Team"},
 ]

diff --git a/uv.lock b/uv.lock
diff --git a/uv.vllm.lock b/uv.vllm.lock
@@ -286,7 +286,7 @@ wheels = [
 
 [[package]]
 name = "areal"
-version = "1.0.2"
+version = "1.0.3"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },