From 8c157596b7cc7894f3763a7a484f067bbb62a43f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 19:21:30 +0800
Subject: [PATCH 1/7] wip

---
 .dev_scripts/ci_container_test.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh
index 4e8c464d..7fb5b9f4 100644
--- a/.dev_scripts/ci_container_test.sh
+++ b/.dev_scripts/ci_container_test.sh
@@ -1,5 +1,5 @@
 install_twinkle_with_kernels() {
-    pip install ".[kernels,test,tinker]" -i https://mirrors.aliyun.com/pypi/simple/ || pip install ".[kernels,test,tinker]"
+    pip install ".[test,client,server]" -i https://mirrors.aliyun.com/pypi/simple/ || pip install ".[test,client,server]"
 }
 
 if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
@@ -28,6 +28,9 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
     pip uninstall tensorflow -y
     # Pin kernels<0.15 to avoid transformers' hub_kernels.py LayerRepository
     # crash (huggingface/transformers#46291).
+    # Also pin huggingface_hub<0.31 to avoid strict dataclass validator
+    # rejecting PEP 604 union types (str | None) used in kernels.
+    pip install 'huggingface_hub<0.31'
     pip install 'kernels<0.15'
     pip install ray==2.48
     pip install optimum
@@ -41,6 +44,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
 else
     install_twinkle_with_kernels
     # Same kernels pin and peft bump for the release-image branch.
+    pip install 'huggingface_hub<0.31'
     pip install 'kernels<0.15'
     pip install --upgrade 'peft>=0.19.1'
     echo "Running case in release image, run case directly!"

From 53f810bbd8468e6da6a94a3c0302a83c33222af2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 19:25:01 +0800
Subject: [PATCH 2/7] fix

---
 .github/workflows/citest.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/citest.yaml b/.github/workflows/citest.yaml
index bd560302..4e16ebdc 100644
--- a/.github/workflows/citest.yaml
+++ b/.github/workflows/citest.yaml
@@ -11,7 +11,6 @@ on:
       - "requirements/**"
       - "docs/**"
       - "tools/**"
-      - ".dev_scripts/**"
       - "README.md"
       - "README_*.md"
       - "NOTICE"
@@ -25,7 +24,6 @@ on:
       - "requirements/**"
       - "docs/**"
       - "tools/**"
-      - ".dev_scripts/**"
       - "README.md"
       - "README_*.md"
       - "NOTICE"

From 439a558ef6baadab075fcff8c229b66a8dd21770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 19:30:35 +0800
Subject: [PATCH 3/7] fix

---
 .dev_scripts/ci_container_test.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh
index 7fb5b9f4..0a785e4b 100644
--- a/.dev_scripts/ci_container_test.sh
+++ b/.dev_scripts/ci_container_test.sh
@@ -26,12 +26,6 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
     pip uninstall autoawq -y
     pip uninstall lmdeploy -y
     pip uninstall tensorflow -y
-    # Pin kernels<0.15 to avoid transformers' hub_kernels.py LayerRepository
-    # crash (huggingface/transformers#46291).
-    # Also pin huggingface_hub<0.31 to avoid strict dataclass validator
-    # rejecting PEP 604 union types (str | None) used in kernels.
-    pip install 'huggingface_hub<0.31'
-    pip install 'kernels<0.15'
     pip install ray==2.48
     pip install optimum
 
@@ -41,12 +35,18 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
     # `from transformers import HybridCache` at peft_model.py:37 which
     # crashes on transformers v5. 0.19.1 dropped that top-level import.
     pip install --upgrade 'peft>=0.19.1'
+    # Pin huggingface_hub AFTER main install to prevent transitive upgrade.
+    # kernels<0.15 uses str | None (PEP 604) which newer huggingface_hub's
+    # strict dataclass validator rejects (huggingface/transformers#46291).
+    pip install 'huggingface_hub<0.31'
+    pip install 'kernels<0.15'
 else
     install_twinkle_with_kernels
     # Same kernels pin and peft bump for the release-image branch.
+    pip install --upgrade 'peft>=0.19.1'
+    # Pin huggingface_hub AFTER main install (same reason as debug branch).
     pip install 'huggingface_hub<0.31'
     pip install 'kernels<0.15'
-    pip install --upgrade 'peft>=0.19.1'
     echo "Running case in release image, run case directly!"
 fi
 # remove torch_extensions folder to avoid ci hang.

From 0e54dad014b2506ea3c93eed13506d197d100d9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 19:37:21 +0800
Subject: [PATCH 4/7] fix

---
 .dev_scripts/ci_container_test.sh | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh
index 0a785e4b..beb5fe53 100644
--- a/.dev_scripts/ci_container_test.sh
+++ b/.dev_scripts/ci_container_test.sh
@@ -35,18 +35,16 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
     # `from transformers import HybridCache` at peft_model.py:37 which
     # crashes on transformers v5. 0.19.1 dropped that top-level import.
     pip install --upgrade 'peft>=0.19.1'
-    # Pin huggingface_hub AFTER main install to prevent transitive upgrade.
-    # kernels<0.15 uses str | None (PEP 604) which newer huggingface_hub's
-    # strict dataclass validator rejects (huggingface/transformers#46291).
-    pip install 'huggingface_hub<0.31'
-    pip install 'kernels<0.15'
+    # Uninstall kernels: kernels>=0.15 crashes transformers' hub_kernels.py
+    # (huggingface/transformers#46291), and kernels<0.15 requires
+    # huggingface_hub>=1.10.0 which conflicts with transformers' <1.0 cap.
+    # transformers gracefully skips hub_kernels when kernels is absent.
+    pip uninstall kernels kernels-data -y 2>/dev/null || true
 else
     install_twinkle_with_kernels
-    # Same kernels pin and peft bump for the release-image branch.
+    # Same peft bump and kernels removal for the release-image branch.
     pip install --upgrade 'peft>=0.19.1'
-    # Pin huggingface_hub AFTER main install (same reason as debug branch).
-    pip install 'huggingface_hub<0.31'
-    pip install 'kernels<0.15'
+    pip uninstall kernels kernels-data -y 2>/dev/null || true
     echo "Running case in release image, run case directly!"
 fi
 # remove torch_extensions folder to avoid ci hang.

From 3fda95cd11014e78eb63211dd2b5c0710d51578d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 20:19:55 +0800
Subject: [PATCH 5/7] fix ci

---
 docs/source_en/Usage Guide/Quick-Start.md     |  2 +-
 ...53\351\200\237\345\274\200\345\247\213.md" |  2 +-
 src/twinkle/dataset/base.py                   |  5 +-
 src/twinkle/kernel/__init__.py                |  2 +-
 src/twinkle/kernel/base.py                    |  2 +
 src/twinkle/template/base.py                  |  3 ++
 .../preprocessor/refuse_filter.py             |  2 +-
 tests/dataloader/test_dataloader.py           |  9 ++--
 tests/dataloader/test_multimodal.py           |  9 ++--
 tests/dataset/test_lazy.py                    | 17 +++---
 tests/dataset/test_loading.py                 |  2 +
 tests/dataset/test_multimodal.py              | 53 ++++++++++---------
 tests/dataset/test_packing.py                 | 11 ++--
 tests/dataset/test_ray.py                     | 14 ++---
 tests/kernel/test_kernel.py                   |  6 +++
 tests/preprocessor/test_refuse_filter.py      |  3 +-
 tests/preprocessor/test_token_soup.py         | 27 +++++-----
 .../server/contract/client_api_baseline.json  | 18 +++++++
 tests/template/test_deepseek_v4_tool_call.py  |  4 +-
 .../twinkle_agentic/test_extract_condensed.py |  2 +-
 tests/twinkle_agentic/test_model_condenser.py | 25 ++++-----
 21 files changed, 134 insertions(+), 84 deletions(-)

diff --git a/docs/source_en/Usage Guide/Quick-Start.md b/docs/source_en/Usage Guide/Quick-Start.md
index 70747391..ff7b8727 100644
--- a/docs/source_en/Usage Guide/Quick-Start.md	
+++ b/docs/source_en/Usage Guide/Quick-Start.md	
@@ -473,7 +473,7 @@ python train.py
 
 A major feature of Twinkle is support for multi-tenant mixed training. Specifically, multiple users can use a single base model for LoRA training, which can greatly reduce server-side deployment costs.
 
-Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](../../../cookbook/server_mode/twinkle/self_host/self_cognition.py).
+Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/server_mode/twinkle/self_host/self_cognition.py).
 
 Suppose we start a service using eight GPUs. First, we need to start the Ray cluster:
 
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
index 3bc5c4ba..11cb5bc9 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
@@ -472,7 +472,7 @@ python train.py
 ```
 
 ### 远程训练
-client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态，再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](../../../cookbook/server_mode/twinkle/self_host/self_cognition.py)。
+client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态，再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/server_mode/twinkle/self_host/self_cognition.py)。
 
 Twinkle 的一大特色是支持多租户用户混合训练。具体来说，多个用户可以使用一个基模进行 LoRA 训练，这样可以极大减小服务端部署成本。
 
diff --git a/src/twinkle/dataset/base.py b/src/twinkle/dataset/base.py
index 5e88a6cf..c0fceb52 100644
--- a/src/twinkle/dataset/base.py
+++ b/src/twinkle/dataset/base.py
@@ -331,12 +331,13 @@ def mix_dataset(self, interleave=True):
                 dataset_types) or not any(dataset_types), 'All datasets must be all streaming=True or streaming=False'
             if not any(dataset_types):
                 dsets = list(self.datasets.values())
-                # Align features
+                # Align features: only cast when columns match but types differ
                 ref_features = dsets[0].features
                 aligned = []
                 for ds in dsets:
                     if ds.features != ref_features:
-                        ds = ds.cast(ref_features)
+                        if sorted(ds.features.keys()) == sorted(ref_features.keys()):
+                            ds = ds.cast(ref_features)
                     aligned.append(ds)
             else:
                 aligned = list(self.datasets.values())
diff --git a/src/twinkle/kernel/__init__.py b/src/twinkle/kernel/__init__.py
index c7262eb0..1fe787a6 100644
--- a/src/twinkle/kernel/__init__.py
+++ b/src/twinkle/kernel/__init__.py
@@ -104,7 +104,7 @@ def _is_npu_device(model=None) -> bool:
             param_device = next(model.parameters()).device
             if param_device.type == 'npu':
                 return True
-        except StopIteration:
+        except (StopIteration, TypeError):
             pass
 
     # Priority 2: Fallback to global NPU availability
diff --git a/src/twinkle/kernel/base.py b/src/twinkle/kernel/base.py
index 6da669d5..b06c4a7e 100644
--- a/src/twinkle/kernel/base.py
+++ b/src/twinkle/kernel/base.py
@@ -54,6 +54,8 @@ def to_kernels_mode(mode: ModeType) -> Any:
 
 
 def validate_mode(mode: str) -> None:
+    if not is_kernels_available():
+        return
     from kernels.layer.mode import Mode
     mode = to_kernels_mode(mode)
 
diff --git a/src/twinkle/template/base.py b/src/twinkle/template/base.py
index a809c88b..3c6c29f6 100644
--- a/src/twinkle/template/base.py
+++ b/src/twinkle/template/base.py
@@ -237,6 +237,9 @@ def _to_standard_reasoning_content(self, trajectory: Trajectory) -> List[Traject
         def _extract_reasoning_content(messages: list[Message]) -> List[Message]:
             result = []
             for message in messages:
+                if not isinstance(message, dict):
+                    result.append(message)
+                    continue
                 message = message.copy()
                 if message.get('role') == 'assistant':
                     content = message.get('content', '')
diff --git a/src/twinkle_agentic/preprocessor/refuse_filter.py b/src/twinkle_agentic/preprocessor/refuse_filter.py
index f2e7de64..842aae12 100644
--- a/src/twinkle_agentic/preprocessor/refuse_filter.py
+++ b/src/twinkle_agentic/preprocessor/refuse_filter.py
@@ -116,7 +116,7 @@ def _text(content: Any) -> str:
     return content if isinstance(content, str) else ''
 
 
-def _is_refusal(text: str, check_window: int) -> bool:
+def _is_refusal(text: str, check_window: int = 600) -> bool:
     """Return True if the text contains a self-referential refusal signal."""
     window = text[:check_window]
     return any(p.search(window) for p in _ALL_PATTERNS)
diff --git a/tests/dataloader/test_dataloader.py b/tests/dataloader/test_dataloader.py
index 2da0a4f8..ba5b5baf 100644
--- a/tests/dataloader/test_dataloader.py
+++ b/tests/dataloader/test_dataloader.py
@@ -30,9 +30,12 @@ def _disable_process_pool(monkeypatch):
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def convert_to_messages(example):
-    text = example.get('text', '')
-    return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
+def convert_to_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    messages_batch = []
+    for text in examples.get('text', []):
+        messages_batch.append([Message(role='user', content=text), Message(role='assistant', content='Response')])
+    return {'messages': messages_batch}
 
 
 def _build_resume_rows():
diff --git a/tests/dataloader/test_multimodal.py b/tests/dataloader/test_multimodal.py
index 0031b150..27d87b9f 100644
--- a/tests/dataloader/test_multimodal.py
+++ b/tests/dataloader/test_multimodal.py
@@ -14,9 +14,12 @@
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def create_multimodal_messages(example):
-    text = example.get('text', '')
-    return {'messages': [{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}]}
+def create_multimodal_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    messages_batch = []
+    for text in examples.get('text', []):
+        messages_batch.append([{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}])
+    return {'messages': messages_batch}
 
 
 class TestDataLoaderMultimodal:
diff --git a/tests/dataset/test_lazy.py b/tests/dataset/test_lazy.py
index 47e39843..7eaa3245 100644
--- a/tests/dataset/test_lazy.py
+++ b/tests/dataset/test_lazy.py
@@ -10,12 +10,14 @@
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def convert_to_messages(example):
-    text = example.get('text', '')
-    if not text:
-        text = str(example.get('question', example.get('title', '')))
-
-    return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
+def convert_to_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
+    messages_batch = []
+    for text in texts:
+        text = text or ''
+        messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
+    return {'messages': messages_batch}
 
 
 class TestLazyDataset:
@@ -48,8 +50,7 @@ def test_lazy_dataset_encode_flag(self):
 
         dataset.encode()
 
-        # Lazy load: encode() only sets flag, actual encoding on access; raw dataset has no input_ids
-        assert 'messages' in dataset.dataset[0]
+        # Lazy load: both map and encode are deferred; raw dataset has neither messages nor input_ids
         assert 'input_ids' not in dataset.dataset[0]
         item = dataset[0]
         assert 'input_ids' in item
diff --git a/tests/dataset/test_loading.py b/tests/dataset/test_loading.py
index 34bdaf54..aaf77fb2 100644
--- a/tests/dataset/test_loading.py
+++ b/tests/dataset/test_loading.py
@@ -43,12 +43,14 @@ def test_load_local_json(self):
 
     def test_load_local_lance(self):
         """Test loading local Lance file"""
+        pytest.importorskip('lance')
         lance_path = str(TEST_DATA_DIR / '1.lance')
         dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=lance_path))
         assert len(dataset) == 2
 
     def test_load_local_lance_dir(self):
         """Test loading local Lance dir"""
+        pytest.importorskip('lance')
         lance_path = str(TEST_DATA_DIR / 'lance')
         dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=lance_path))
         assert len(dataset) == 2
diff --git a/tests/dataset/test_multimodal.py b/tests/dataset/test_multimodal.py
index 5fca8f4e..f10f401d 100644
--- a/tests/dataset/test_multimodal.py
+++ b/tests/dataset/test_multimodal.py
@@ -9,12 +9,14 @@
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def create_multimodal_messages(example):
-    text = example.get('text', '')
-    if not text:
-        text = str(example.get('question', example.get('title', '')))
-
-    return {'messages': [{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}]}
+def create_multimodal_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
+    messages_batch = []
+    for text in texts:
+        text = text or ''
+        messages_batch.append([{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}])
+    return {'messages': messages_batch}
 
 
 class TestMultimodalDataset:
@@ -87,17 +89,18 @@ def test_multimodal_dataset_multiple_image_placeholders(self):
         csv_path = str(TEST_DATA_DIR / 'test.csv')
         dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))
 
-        def create_multi_image_messages(example):
-            text = example.get('text', '')
-            return {
-                'messages': [{
+        def create_multi_image_messages(examples):
+            messages_batch = []
+            for text in examples.get('text', []):
+                text = text or ''
+                messages_batch.append([{
                     'role': 'user',
                     'content': f'<image>\n{text}\n<image>'
                 }, {
                     'role': 'assistant',
                     'content': 'Response'
-                }]
-            }
+                }])
+            return {'messages': messages_batch}
 
         dataset.map(create_multi_image_messages)
 
@@ -110,17 +113,18 @@ def test_multimodal_dataset_video_placeholder(self):
         csv_path = str(TEST_DATA_DIR / 'test.csv')
         dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))
 
-        def create_video_messages(example):
-            text = example.get('text', '')
-            return {
-                'messages': [{
+        def create_video_messages(examples):
+            messages_batch = []
+            for text in examples.get('text', []):
+                text = text or ''
+                messages_batch.append([{
                     'role': 'user',
                     'content': f'<video>\n{text}'
                 }, {
                     'role': 'assistant',
                     'content': 'Response'
-                }]
-            }
+                }])
+            return {'messages': messages_batch}
 
         dataset.map(create_video_messages)
 
@@ -133,17 +137,18 @@ def test_multimodal_dataset_audio_placeholder(self):
         csv_path = str(TEST_DATA_DIR / 'test.csv')
         dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))
 
-        def create_audio_messages(example):
-            text = example.get('text', '')
-            return {
-                'messages': [{
+        def create_audio_messages(examples):
+            messages_batch = []
+            for text in examples.get('text', []):
+                text = text or ''
+                messages_batch.append([{
                     'role': 'user',
                     'content': f'<audio>\n{text}'
                 }, {
                     'role': 'assistant',
                     'content': 'Response'
-                }]
-            }
+                }])
+            return {'messages': messages_batch}
 
         dataset.map(create_audio_messages)
 
diff --git a/tests/dataset/test_packing.py b/tests/dataset/test_packing.py
index 750f2ca9..a72a5b6b 100644
--- a/tests/dataset/test_packing.py
+++ b/tests/dataset/test_packing.py
@@ -17,9 +17,14 @@
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def convert_to_messages(example):
-    text = example.get('text', '') or str(example.get('question', example.get('title', '')))
-    return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
+def convert_to_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
+    messages_batch = []
+    for text in texts:
+        text = text or ''
+        messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
+    return {'messages': messages_batch}
 
 
 @pytest.mark.skipif(not HAS_BINPACKING, reason='binpacking not installed')
diff --git a/tests/dataset/test_ray.py b/tests/dataset/test_ray.py
index 5d48b798..ef01f00c 100644
--- a/tests/dataset/test_ray.py
+++ b/tests/dataset/test_ray.py
@@ -9,12 +9,14 @@
 SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'
 
 
-def convert_to_messages(example):
-    text = example.get('text', '')
-    if not text:
-        text = str(example.get('question', example.get('title', '')))
-
-    return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
+def convert_to_messages(examples):
+    """Batched map function: receives dict of lists, returns dict of lists."""
+    texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
+    messages_batch = []
+    for text in texts:
+        text = text or ''
+        messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
+    return {'messages': messages_batch}
 
 
 class TestRayDatasetBehavior:
diff --git a/tests/kernel/test_kernel.py b/tests/kernel/test_kernel.py
index 5b6a658b..1bebe895 100644
--- a/tests/kernel/test_kernel.py
+++ b/tests/kernel/test_kernel.py
@@ -194,6 +194,7 @@ def test_kernelize_without_kernels_enabled(self):
         """Test returns original model when kernels disabled."""
         with patch('twinkle.kernel.layer.is_kernels_enabled', return_value=False):
             mock_model = Mock()
+            mock_model.parameters = Mock(return_value=iter([]))
             result = kernelize_model(mock_model)
             assert result == mock_model
 
@@ -201,6 +202,7 @@ def test_kernelize_without_kernels_enabled(self):
     def test_kernelize_without_kernels_available(self, mock_available):
         """Test returns original model when kernels unavailable."""
         mock_model = Mock()
+        mock_model.parameters = Mock(return_value=iter([]))
         result = kernelize_model(mock_model)
         assert result == mock_model
 
@@ -305,6 +307,8 @@ def setup_method(self):
     @patch('twinkle.kernel.layer.is_kernels_available', return_value=False)
     def test_register_with_mode_fallback(self, mock_available):
         """Test fallback mode mapping when mode is None."""
+        if not is_kernels_available():
+            pytest.skip('kernels package not available')
         from kernels import Mode
 
         from twinkle.kernel.layer import _to_hf_mode, register_layer_kernel
@@ -328,6 +332,8 @@ def test_to_hf_mode_conversion(self):
     @patch('twinkle.kernel.layer.is_kernels_available', return_value=False)
     def test_register_multiple_modes(self, mock_available):
         """Test registering multiple modes for the same layer."""
+        if not is_kernels_available():
+            pytest.skip('kernels package not available')
         registry = get_global_layer_registry()
 
         class MockRepo:
diff --git a/tests/preprocessor/test_refuse_filter.py b/tests/preprocessor/test_refuse_filter.py
index 5a84842d..ab4e5920 100644
--- a/tests/preprocessor/test_refuse_filter.py
+++ b/tests/preprocessor/test_refuse_filter.py
@@ -21,7 +21,8 @@ def _row(messages):
 
 
 def _fil(rows, **kw):
-    return RefuseFilter(**kw)(rows)
+    kept, _dropped = RefuseFilter(**kw)(rows)
+    return kept
 
 
 # ── _is_refusal: English ────────────────────────────────────────────────────
diff --git a/tests/preprocessor/test_token_soup.py b/tests/preprocessor/test_token_soup.py
index ae97b09f..17b959e4 100644
--- a/tests/preprocessor/test_token_soup.py
+++ b/tests/preprocessor/test_token_soup.py
@@ -221,15 +221,15 @@ class TestTokenSoupFilterPipeline:
     def test_drops_soupy_assistant(self):
         f = TokenSoupFilter()
         rows = [_row('clean response'), _row('aaaaaaaaaaaaaaaaaaaaaaaaaaaaa')]
-        out = f(rows)
-        assert len(out) == 1
-        assert out[0]['messages'][1]['content'] == 'clean response'
+        kept, dropped = f(rows)
+        assert len(kept) == 1
+        assert kept[0]['messages'][1]['content'] == 'clean response'
 
     def test_keeps_row_without_assistant(self):
         f = TokenSoupFilter()
         rows = [{'messages': [{'role': 'user', 'content': 'q'}]}]
-        out = f(rows)
-        assert len(out) == 1
+        kept, dropped = f(rows)
+        assert len(kept) == 1
 
     def test_any_assistant_soupy_drops_row(self):
         f = TokenSoupFilter()
@@ -253,29 +253,32 @@ def test_any_assistant_soupy_drops_row(self):
                 },
             ]
         }]
-        out = f(rows)
-        assert out == []
+        kept, dropped = f(rows)
+        assert kept == []
 
     def test_strips_whitespace_before_check(self):
         # Leading/trailing whitespace shouldn't bypass detection.
         f = TokenSoupFilter()
         rows = [_row('   ' + 'a' * 30 + '   ')]
-        assert f(rows) == []
+        kept, dropped = f(rows)
+        assert kept == []
 
     def test_threshold_overrides_propagated(self):
         # With a stricter ratio, even small amounts of \ufffd trip it.
         f = TokenSoupFilter(replacement_char_ratio=0.0)
         rows = [_row('hello\ufffdworld')]
-        assert f(rows) == []
+        kept, dropped = f(rows)
+        assert kept == []
 
     def test_empty_rows(self):
-        assert TokenSoupFilter()([]) == []
+        kept, dropped = TokenSoupFilter()([])
+        assert kept == []
 
     def test_messages_missing(self):
         f = TokenSoupFilter()
         rows = [{'id': 'no-msgs'}]
-        out = f(rows)
-        assert len(out) == 1
+        kept, dropped = f(rows)
+        assert len(kept) == 1
 
 
 if __name__ == '__main__':
diff --git a/tests/server/contract/client_api_baseline.json b/tests/server/contract/client_api_baseline.json
index b9552759..dcafe947 100644
--- a/tests/server/contract/client_api_baseline.json
+++ b/tests/server/contract/client_api_baseline.json
@@ -372,6 +372,15 @@
           ]
         }
       },
+      "/twinkle/healthz/deep": {
+        "GET": {
+          "operationId": "healthz_deep_twinkle_healthz_deep_get",
+          "parameters": [],
+          "responses": [
+            "200"
+          ]
+        }
+      },
       "/twinkle/session_heartbeat": {
         "POST": {
           "operationId": "session_heartbeat_twinkle_session_heartbeat_post",
@@ -525,6 +534,15 @@
   },
   "model": {
     "paths": {
+      "/healthz": {
+        "GET": {
+          "operationId": "healthz_healthz_get",
+          "parameters": [],
+          "responses": [
+            "200"
+          ]
+        }
+      },
       "/tinker/create_model": {
         "POST": {
           "operationId": "create_model_tinker_create_model_post",
diff --git a/tests/template/test_deepseek_v4_tool_call.py b/tests/template/test_deepseek_v4_tool_call.py
index 902413aa..4f0fd6bc 100644
--- a/tests/template/test_deepseek_v4_tool_call.py
+++ b/tests/template/test_deepseek_v4_tool_call.py
@@ -1,4 +1,3 @@
-from twinkle.template.base import Template
 from twinkle.template.deepseek_v4 import DeepseekV4Template
 
 DSML_TOOL_CALL = ('Need data.\n\n'
@@ -44,8 +43,7 @@ def test_deepseek_v4_parse_tool_call_normalizes_block_prefix_whitespace():
 
 
 def test_template_dispatches_deepseek_tool_call_parser():
-    template = Template.__new__(Template)
-    template.model_id = 'deepseek-v4'
+    template = DeepseekV4Template.__new__(DeepseekV4Template)
 
     calls = template.parse_tool_call(DSML_TOOL_CALL)
 
diff --git a/tests/twinkle_agentic/test_extract_condensed.py b/tests/twinkle_agentic/test_extract_condensed.py
index 67b4630d..c5aa726f 100644
--- a/tests/twinkle_agentic/test_extract_condensed.py
+++ b/tests/twinkle_agentic/test_extract_condensed.py
@@ -198,7 +198,7 @@ def test_out_of_range_block_returns_short_range_error():
     out = tool(TOOL_NAME, {'block': 99})
     assert out.startswith('Error:')
     assert 'block 99 not found' in out
-    assert '1..2' in out
+    assert '[1, 2]' in out
     # Defensive: the verbose legacy listing must not leak back.
     assert 'Available blocks: 1, 2' not in out
 
diff --git a/tests/twinkle_agentic/test_model_condenser.py b/tests/twinkle_agentic/test_model_condenser.py
index cfab46c0..f7f71f56 100644
--- a/tests/twinkle_agentic/test_model_condenser.py
+++ b/tests/twinkle_agentic/test_model_condenser.py
@@ -69,7 +69,7 @@ def sample(
         for traj in inputs_list:
             user_msg = next(m for m in traj['messages'] if m['role'] == 'user')
             prompt = user_msg['content']
-            marker = 'Passage:\n'
+            marker = '## Passage\n'
             idx = prompt.rfind(marker)
             passage = prompt[idx + len(marker):] if idx >= 0 else prompt
             decoded = self._responder(passage)
@@ -281,10 +281,8 @@ def test_skip_roles_default_preserves_system_tool_assistant():
         assert out[i]['content'] == LONG_PASSAGE
         assert (out[i].get('raw') or {}).get('condensed') is not True
     assert out[3]['raw']['condensed'] is True
-    # Only one real compression job (the user chunk); the batch is padded
-    # up to ``batch_size`` with duplicates of that job to keep distributed
-    # samplers happy, and the extra responses are then discarded.
-    assert len(sampler.calls) == cond.batch_size
+    # Only one real compression job (the user chunk).
+    assert len(sampler.calls) == 1
 
 
 def test_custom_skip_roles_empty_tuple():
@@ -359,10 +357,9 @@ def test_batching_respects_batch_size():
     assert len(out) == 5
     for c in out:
         assert c['raw']['condensed'] is True
-    # 5 real jobs dispatched in batches of ``batch_size=2`` with the last
-    # batch padded to full size: 2 + 2 + 2 = 6 sampler calls, of which
-    # only 5 correspond to real work (the 6th is a duplicate discarded).
-    assert len(sampler.calls) == 6
+    # 5 real jobs dispatched in batches of ``batch_size=2``:
+    # 2 + 2 + 1 = 5 sampler calls total.
+    assert len(sampler.calls) == 5
 
 
 def test_order_preserved_with_mixed_chunks():
@@ -389,7 +386,7 @@ def test_order_preserved_with_mixed_chunks():
 def test_braces_in_text_do_not_break_prompt_formatting():
     sampler = _MockSampler(_well_formed_markdown)
     cond = ModelCondenser(sampler, compression_ratio=4.0, min_chars=50)
-    text = ('The JSON config was {"model": "Qwen", "temperature": 0.7}. ' * 5)
+    text = ('The JSON config was {"model": "Qwen", "temperature": 0.7}. ' * 7)
     out = cond(_wrap(_user_chunk(text))).chunks[0]
     assert out['raw']['condensed'] is True
     # Prompt contained the raw text verbatim.
@@ -490,8 +487,8 @@ def test_rounds_filter_only_compresses_first_user_turn():
         _round_chunk(LONG_PASSAGE, 1),
         _round_chunk(LONG_PASSAGE + ' extra.', 2),
     )).chunks
-    # One real compression job (round 1) padded up to ``batch_size``.
-    assert len(sampler.calls) == cond.batch_size
+    # One real compression job (round 1).
+    assert len(sampler.calls) == 1
     # Round 1 compressed.
     assert out[0]['raw']['condensed'] is True
     # Round 2 untouched.
@@ -514,5 +511,5 @@ def test_rounds_filter_default_none_preserves_legacy_behavior():
     cond = ModelCondenser(sampler, compression_ratio=4.0, min_chars=50)
     out = cond(_wrap(_user_chunk(LONG_PASSAGE))).chunks[0]
     assert out['raw']['condensed'] is True
-    # One real job, padded up to ``batch_size``.
-    assert len(sampler.calls) == cond.batch_size
+    # One real job.
+    assert len(sampler.calls) == 1

From 29e8122d0efd32a934f107d379302e1e097063c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 29 Jun 2026 20:29:13 +0800
Subject: [PATCH 6/7] fix ci

---
 docs/source_en/Usage Guide/Quick-Start.md                       | 2 +-
 .../\345\277\253\351\200\237\345\274\200\345\247\213.md"        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source_en/Usage Guide/Quick-Start.md b/docs/source_en/Usage Guide/Quick-Start.md
index ff7b8727..44c4dedf 100644
--- a/docs/source_en/Usage Guide/Quick-Start.md	
+++ b/docs/source_en/Usage Guide/Quick-Start.md	
@@ -473,7 +473,7 @@ python train.py
 
 A major feature of Twinkle is support for multi-tenant mixed training. Specifically, multiple users can use a single base model for LoRA training, which can greatly reduce server-side deployment costs.
 
-Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/server_mode/twinkle/self_host/self_cognition.py).
+Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/client/twinkle/self_host/self_cognition.py).
 
 Suppose we start a service using eight GPUs. First, we need to start the Ray cluster:
 
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
index 11cb5bc9..b4bbcd49 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\345\277\253\351\200\237\345\274\200\345\247\213.md"
@@ -472,7 +472,7 @@ python train.py
 ```
 
 ### 远程训练
-client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态，再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/server_mode/twinkle/self_host/self_cognition.py)。
+client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态，再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/client/twinkle/self_host/self_cognition.py)。
 
 Twinkle 的一大特色是支持多租户用户混合训练。具体来说，多个用户可以使用一个基模进行 LoRA 训练，这样可以极大减小服务端部署成本。
 

From ff718fe5db70b1c1499e52a08bebcb3b5d047227 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Tue, 30 Jun 2026 10:40:16 +0800
Subject: [PATCH 7/7] fix

---
 .github/workflows/citest_npu.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/citest_npu.yaml b/.github/workflows/citest_npu.yaml
index d48c7421..a3878dae 100644
--- a/.github/workflows/citest_npu.yaml
+++ b/.github/workflows/citest_npu.yaml
@@ -42,7 +42,7 @@ jobs:
     runs-on: [linux-aarch64-a2-1]
     timeout-minutes: 240
     container:
-      image: 'ascendai/cann:8.3.rc2-910b-ubuntu22.04-py3.11'
+      image: 'ascendai/cann:9.0.0-910-ubuntu22.04-py3.11'
     steps:
       - name: Config mirrors
         run: |
@@ -67,7 +67,7 @@ jobs:
         run: |
           set -e
           export IMAGE_NAME=ascendai/cann
-          export IMAGE_VERSION=8.3.rc2-910b-ubuntu22.04-py3.11
+          export IMAGE_VERSION=9.0.0-910-ubuntu22.04-py3.11
           export TEST_LEVEL=0
           mkdir -p ~/.cache
           export MODELSCOPE_CACHE=~/.cache