Infini-AI-Lab · haizhongzheng · May 25, 2026 · May 25, 2026
@@ -234,7 +234,14 @@ def _create_rollout_dataloader(self, agent_config: AgentConfig) -> Any:
         """
         ds_cfg = agent_config.rollout_dataset
         tokenizer = self._load_tokenizer(agent_config)
-        dataset = _create_dataset_from_config(ds_cfg, tokenizer)
+        # For offline_dir derivation, the rollout's logical name is
+        # ``dataset_name`` if specified, else the dataset_fn module name
+        # (e.g. ``deepscaler`` from
+        # ``astraflow.dataflow.dataset.deepscaler:get_deepscaler_rl_dataset``).
+        name = ds_cfg.get("dataset_name") or _module_basename(ds_cfg.get("dataset_fn", ""))
+        dataset = _create_dataset_from_config(
+            ds_cfg, tokenizer, data_root=agent_config.data_root, name=name,
+        )
 
         batch_size = ds_cfg.get("batch_size", 1)
         return _create_dataloader(dataset, batch_size=batch_size)
@@ -294,7 +301,9 @@ def _create_eval_datasets(
                     f"and no legacy eval_workflow_specs fallback is available"
                 )
 
-            dataset = _create_dataset_from_config(ds_cfg, tokenizer)
+            dataset = _create_dataset_from_config(
+                ds_cfg, tokenizer, data_root=agent_config.data_root, name=name,
+            )
             eval_datasets[name] = (dataset, repeat, wf)
 
         return eval_datasets
@@ -1542,13 +1551,30 @@ def _import_function(import_path: str) -> Any:
     return getattr(module, func_name)
 
 
-def _create_dataset_from_config(ds_cfg: dict[str, Any], tokenizer: Any) -> Any:
+def _module_basename(dataset_fn_path: str) -> str | None:
+    """Return the last module component of a ``module.path:fn`` import path."""
+    module_path, _, _ = dataset_fn_path.rpartition(":")
+    if not module_path:
+        return None
+    return module_path.rsplit(".", 1)[-1]
+
+
+def _create_dataset_from_config(
+    ds_cfg: dict[str, Any],
+    tokenizer: Any,
+    data_root: str | None = None,
+    name: str | None = None,
+) -> Any:
     """Create a dataset from a config dict using ``dataset_fn``.
 
     The ``dataset_fn`` field is a Python import path like
     ``"astraflow.dataflow.dataset.deepscaler:get_deepscaler_rl_dataset"``.
     Extra fields in ``ds_cfg`` are forwarded as kwargs when supported by
     the target dataset function.
+
+    If ``data_root`` is set and ``ds_cfg`` does not specify ``offline_dir``,
+    one is auto-derived as ``f"{data_root}/{name}"`` — making it easy to
+    flip a recipe between online and offline by setting a single env var.
     """
     dataset_fn_path = ds_cfg.get("dataset_fn")
     if dataset_fn_path is None:
@@ -1564,6 +1590,13 @@ def _create_dataset_from_config(ds_cfg: dict[str, Any], tokenizer: Any) -> Any:
     }
     kwargs.setdefault("tokenizer", tokenizer)
 
+    if data_root and name and "offline_dir" not in kwargs:
+        kwargs["offline_dir"] = f"{data_root}/{name}"
+        logger.info(
+            "Auto-derived offline_dir for dataset %r: %s",
+            name, kwargs["offline_dir"],
+        )
+
     sig = inspect.signature(dataset_fn)
     accepts_var_kwargs = any(
         p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
@@ -1572,7 +1605,7 @@ def _create_dataset_from_config(ds_cfg: dict[str, Any], tokenizer: Any) -> Any:
         return dataset_fn(**kwargs)
 
     filtered_kwargs = {
-        name: kwargs[name] for name in sig.parameters if name in kwargs
+        pname: kwargs[pname] for pname in sig.parameters if pname in kwargs
     }
     return dataset_fn(**filtered_kwargs)
 

@@ -37,6 +37,19 @@ class AgentConfig:
     tokenizer_path: str | None = None
     """Path to tokenizer (HuggingFace model name or local path)."""
 
+    data_root: str | None = None
+    """Root directory for pre-downloaded datasets (offline mode).
+
+    When set, every entry in ``rollout_dataset`` and ``eval_datasets``
+    that does not already specify ``offline_dir`` gets one auto-derived
+    as ``f"{data_root}/{name}"`` — where ``name`` is the dict key for
+    eval datasets, and the value of ``dataset_name`` (falling back to
+    the dataset_fn module name) for the rollout dataset.
+
+    Use ``examples/math/offline/download_math_datasets.py`` to populate
+    this directory.
+    """
+
     rollout_dataset: dict[str, Any] | None = None
     """Dataset config for rollout data acquisition.
 

@@ -26,6 +26,7 @@ on distributed GPU clusters.
    :caption: Recipes
 
    recipes/math
+   recipes/math-offline
    recipes/code
    recipes/multi-agent
    recipes/agentbench

@@ -0,0 +1,74 @@
+# Math (Offline)
+
+Run the math RL recipe on a node with **no internet access** by pre-downloading every training and evaluation dataset to a local directory.
+
+**Recipe**: [`examples/math/offline/qwen3-8b-m2po-full-offline/`](https://github.com/Infini-AI-Lab/astraflow/tree/main/examples/math/offline/qwen3-8b-m2po-full-offline)
+
+**Downloader**: [`examples/math/offline/download_math_datasets.py`](https://github.com/Infini-AI-Lab/astraflow/tree/main/examples/math/offline/download_math_datasets.py)
+
+This is the same Qwen3-8B / M2PO / TCP recipe as [Math](math.md), with one difference: at startup the AstraFlow service loads every dataset from disk instead of fetching from the HuggingFace Hub.
+
+## 1. One-time prep — download datasets
+
+From the repo root:
+
+```bash
+python examples/math/offline/download_math_datasets.py --root data-data/math
+```
+
+This writes 8 dataset directories under `data-data/math/` (~400 MB total) plus a `MANIFEST.json`:
+
+| Directory       | HF source                                   | Split | Use     |
+|-----------------|---------------------------------------------|-------|---------|
+| `deepscaler`    | `agentica-org/DeepScaleR-Preview-Dataset`   | train | rollout |
+| `dapo_filter`   | `aaabiao/dapo_filter`                       | train | rollout |
+| `aime24`        | `HuggingFaceH4/aime_2024`                   | train | eval    |
+| `aime25`        | `math-ai/aime25`                            | test  | eval    |
+| `amc`           | `rawsh/2024_AMC12`                          | train | eval    |
+| `math500`       | `HuggingFaceH4/MATH-500`                    | test  | eval    |
+| `minerva`       | `math-ai/minervamath`                       | test  | eval    |
+| `olympiadbench` | `math-ai/olympiadbench`                     | test  | eval    |
+
+Re-running is idempotent (skips populated dirs). Useful flags:
+
+- `--force` — re-download even if a directory exists
+- `--only deepscaler,aime24` — partial subset
+- `--verify` — skip download; just load each from disk and assert non-empty
+
+## 2. Run training
+
+```bash
+bash examples/math/offline/qwen3-8b-m2po-full-offline/scripts/run_qwen3-8b-m2po-full-offline.sh
+```
+
+You can confirm the offline path is active by looking for these lines in the AstraFlow service log:
+
+```text
+Auto-derived offline_dir for dataset 'deepscaler': data-data/math/deepscaler
+Loading DeepScaleR dataset from offline path: data-data/math/deepscaler
+Auto-derived offline_dir for dataset 'aime24': data-data/math/aime24
+... (same for aime25, amc, minerva, math500)
+```
+
+## How it works
+
+The recipe's `experiment.yaml` sets a single field under `dataflow`:
+
+```yaml
+dataflow:
+  data_root: data-data/math
+```
+
+At startup `astraflow.dataflow.service` walks every entry in `rollout_dataset` and `eval_datasets`; for each one that does not already specify `offline_dir`, it auto-derives `offline_dir = f"{data_root}/{name}"`. The `name` is:
+
+- the **dict key** for eval datasets (`aime24`, `aime25`, `amc`, `minerva`, `math500`)
+- the **`dataset_fn` module basename** for the rollout dataset (`deepscaler` from `astraflow.dataflow.dataset.deepscaler:get_deepscaler_rl_dataset`)
+
+The downloader uses the same naming convention, so the two sides stay in sync. To opt a single dataset out — e.g. point one eval at a different snapshot — just set `offline_dir:` explicitly on that entry; explicit values always win.
+
+To convert any other recipe to offline mode, add the same `dataflow.data_root` field; no other changes are required.
+
+## Caveats
+
+- **Model and tokenizer weights are *not* covered** by the dataset downloader. `model_path` / `tokenizer_path` still point at `Qwen/Qwen3-8B` and resolve via the HuggingFace cache. For a fully air-gapped run, pre-fetch them with `huggingface-cli download Qwen/Qwen3-8B --local-dir /local/models/Qwen3-8B` and edit the two paths in `experiment.yaml`.
+- The downloader needs internet at prep time. Once `data-data/math/` is populated, training itself works with `HF_HUB_OFFLINE=1` / `HF_DATASETS_OFFLINE=1`.
@@ -0,0 +1,61 @@
+# Offline math datasets
+
+Pre-download every dataset used by the math recipes so training can run on
+a node with no internet access.
+
+## 1. Download (one-time)
+
+From the repo root:
+
+```bash
+python examples/math/offline/download_math_datasets.py --root data-data/math
+```
+
+This writes 8 dataset directories under `data-data/math/` and a
+`MANIFEST.json` summary.  Re-running is a no-op (skips populated dirs);
+pass `--force` to re-download, or `--only deepscaler,aime24` for a subset.
+
+| dir              | HF source                                   | split | use     |
+|------------------|---------------------------------------------|-------|---------|
+| `deepscaler`     | `agentica-org/DeepScaleR-Preview-Dataset`   | train | rollout |
+| `dapo_filter`    | `aaabiao/dapo_filter`                       | train | rollout |
+| `aime24`         | `HuggingFaceH4/aime_2024`                   | train | eval    |
+| `aime25`         | `math-ai/aime25`                            | test  | eval    |
+| `amc`            | `rawsh/2024_AMC12`                          | train | eval    |
+| `math500`        | `HuggingFaceH4/MATH-500`                    | test  | eval    |
+| `minerva`        | `math-ai/minervamath`                       | test  | eval    |
+| `olympiadbench`  | `math-ai/olympiadbench`                     | test  | eval    |
+
+## 2. Verify
+
+```bash
+python examples/math/offline/download_math_datasets.py --verify
+```
+
+Loads every directory with `load_from_disk` and prints row counts; exits
+non-zero if any dataset is missing or empty.
+
+## 3. Run training with offline data
+
+The matching recipe is `examples/math/offline/qwen3-8b-m2po-full-offline/`.  Its
+`experiment.yaml` sets `dataflow.data_root: data-data/math`, which causes
+`astraflow.dataflow.service` to auto-derive each loader's `offline_dir`
+as `data-data/math/<name>` (the dict key for evals, or the `dataset_fn`
+module name for the rollout).  No per-entry edits required.
+
+```bash
+bash examples/math/offline/qwen3-8b-m2po-full-offline/scripts/run_qwen3-8b-m2po-full-offline.sh
+```
+
+## Notes
+
+- **Model weights are *not* covered.**  `model_path` / `tokenizer_path`
+  still point at `Qwen/Qwen3-8B` and will be pulled from HF Hub on first
+  use.  Either let HF cache them once, or pre-fetch with
+  `huggingface-cli download Qwen/Qwen3-8B` and point the YAML at the
+  local snapshot for a fully air-gapped run.
+- Convention: a dataset directory name in `--root` must match the
+  `name` used by `_create_dataset_from_config` (eval dict key, or
+  rollout `dataset_fn` module basename).  The download script and the
+  service use the same `MATH_DATASETS` table / derivation, so they stay
+  in sync.