Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions .dev_scripts/ci_container_test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
install_twinkle_with_kernels() {
pip install ".[kernels,test,tinker]" -i https://mirrors.aliyun.com/pypi/simple/ || pip install ".[kernels,test,tinker]"
pip install ".[test,client,server]" -i https://mirrors.aliyun.com/pypi/simple/ || pip install ".[test,client,server]"
}

if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
Expand All @@ -26,9 +26,6 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
pip uninstall autoawq -y
pip uninstall lmdeploy -y
pip uninstall tensorflow -y
# Pin kernels<0.15 to avoid transformers' hub_kernels.py LayerRepository
# crash (huggingface/transformers#46291).
pip install 'kernels<0.15'
pip install ray==2.48
pip install optimum

Expand All @@ -38,11 +35,16 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
# `from transformers import HybridCache` at peft_model.py:37 which
# crashes on transformers v5. 0.19.1 dropped that top-level import.
pip install --upgrade 'peft>=0.19.1'
# Uninstall kernels: kernels>=0.15 crashes transformers' hub_kernels.py
# (huggingface/transformers#46291), and kernels<0.15 requires
# huggingface_hub>=1.10.0 which conflicts with transformers' <1.0 cap.
# transformers gracefully skips hub_kernels when kernels is absent.
pip uninstall kernels kernels-data -y 2>/dev/null || true
else
install_twinkle_with_kernels
# Same kernels pin and peft bump for the release-image branch.
pip install 'kernels<0.15'
# Same peft bump and kernels removal for the release-image branch.
pip install --upgrade 'peft>=0.19.1'
pip uninstall kernels kernels-data -y 2>/dev/null || true
echo "Running case in release image, run case directly!"
fi
# remove torch_extensions folder to avoid ci hang.
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/citest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ on:
- "requirements/**"
- "docs/**"
- "tools/**"
- ".dev_scripts/**"
- "README.md"
- "README_*.md"
- "NOTICE"
Expand All @@ -25,7 +24,6 @@ on:
- "requirements/**"
- "docs/**"
- "tools/**"
- ".dev_scripts/**"
- "README.md"
- "README_*.md"
- "NOTICE"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/citest_npu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
runs-on: [linux-aarch64-a2-1]
timeout-minutes: 240
container:
image: 'ascendai/cann:8.3.rc2-910b-ubuntu22.04-py3.11'
image: 'ascendai/cann:9.0.0-910-ubuntu22.04-py3.11'
steps:
- name: Config mirrors
run: |
Expand All @@ -67,7 +67,7 @@ jobs:
run: |
set -e
export IMAGE_NAME=ascendai/cann
export IMAGE_VERSION=8.3.rc2-910b-ubuntu22.04-py3.11
export IMAGE_VERSION=9.0.0-910-ubuntu22.04-py3.11
export TEST_LEVEL=0
mkdir -p ~/.cache
export MODELSCOPE_CACHE=~/.cache
Expand Down
2 changes: 1 addition & 1 deletion docs/source_en/Usage Guide/Quick-Start.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ python train.py

A major feature of Twinkle is support for multi-tenant mixed training. Specifically, multiple users can use a single base model for LoRA training, which can greatly reduce server-side deployment costs.

Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](../../../cookbook/server_mode/twinkle/self_host/self_cognition.py).
Checkpoint resumption is also supported in client-server training. The recommended flow is to call `model.resume_from_checkpoint(resume_path)` to restore weights and optimizer state, then call `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` to skip consumed data. See [Twinkle-Client](./Server%20and%20Client/Twinkle-Client.md) and [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/client/twinkle/self_host/self_cognition.py).

Suppose we start a service using eight GPUs. First, we need to start the Ray cluster:

Expand Down
2 changes: 1 addition & 1 deletion docs/source_zh/使用指引/快速开始.md
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ python train.py
```

### 远程训练
client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态,再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](../../../cookbook/server_mode/twinkle/self_host/self_cognition.py)。
client-server 训练场景同样支持断点续训。推荐流程是调用 `model.resume_from_checkpoint(resume_path)` 恢复权重和优化器状态,再调用 `dataloader.resume_from_checkpoint(progress['consumed_train_samples'])` 跳过已消费数据。详细示例可参考 [Twinkle客户端](./服务端和客户端/Twinkle客户端.md) 和 [self_cognition.py](https://github.com/modelscope/twinkle/blob/main/cookbook/client/twinkle/self_host/self_cognition.py)。

Twinkle 的一大特色是支持多租户用户混合训练。具体来说,多个用户可以使用一个基模进行 LoRA 训练,这样可以极大减小服务端部署成本。

Expand Down
5 changes: 3 additions & 2 deletions src/twinkle/dataset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,13 @@ def mix_dataset(self, interleave=True):
dataset_types) or not any(dataset_types), 'All datasets must be all streaming=True or streaming=False'
if not any(dataset_types):
dsets = list(self.datasets.values())
# Align features
# Align features: only cast when columns match but types differ
ref_features = dsets[0].features
aligned = []
for ds in dsets:
if ds.features != ref_features:
ds = ds.cast(ref_features)
if sorted(ds.features.keys()) == sorted(ref_features.keys()):
ds = ds.cast(ref_features)
aligned.append(ds)
else:
aligned = list(self.datasets.values())
Expand Down
2 changes: 1 addition & 1 deletion src/twinkle/kernel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _is_npu_device(model=None) -> bool:
param_device = next(model.parameters()).device
if param_device.type == 'npu':
return True
except StopIteration:
except (StopIteration, TypeError):
pass

# Priority 2: Fallback to global NPU availability
Expand Down
2 changes: 2 additions & 0 deletions src/twinkle/kernel/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def to_kernels_mode(mode: ModeType) -> Any:


def validate_mode(mode: str) -> None:
if not is_kernels_available():
return
from kernels.layer.mode import Mode
mode = to_kernels_mode(mode)

Expand Down
3 changes: 3 additions & 0 deletions src/twinkle/template/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ def _to_standard_reasoning_content(self, trajectory: Trajectory) -> List[Traject
def _extract_reasoning_content(messages: list[Message]) -> List[Message]:
result = []
for message in messages:
if not isinstance(message, dict):
result.append(message)
continue
message = message.copy()
if message.get('role') == 'assistant':
content = message.get('content', '')
Expand Down
2 changes: 1 addition & 1 deletion src/twinkle_agentic/preprocessor/refuse_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def _text(content: Any) -> str:
return content if isinstance(content, str) else ''


def _is_refusal(text: str, check_window: int) -> bool:
def _is_refusal(text: str, check_window: int = 600) -> bool:
"""Return True if the text contains a self-referential refusal signal."""
window = text[:check_window]
return any(p.search(window) for p in _ALL_PATTERNS)
Expand Down
9 changes: 6 additions & 3 deletions tests/dataloader/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ def _disable_process_pool(monkeypatch):
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def convert_to_messages(example):
text = example.get('text', '')
return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
def convert_to_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
messages_batch = []
for text in examples.get('text', []):
messages_batch.append([Message(role='user', content=text), Message(role='assistant', content='Response')])
return {'messages': messages_batch}


def _build_resume_rows():
Expand Down
9 changes: 6 additions & 3 deletions tests/dataloader/test_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def create_multimodal_messages(example):
text = example.get('text', '')
return {'messages': [{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}]}
def create_multimodal_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
messages_batch = []
for text in examples.get('text', []):
messages_batch.append([{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}])
return {'messages': messages_batch}


class TestDataLoaderMultimodal:
Expand Down
17 changes: 9 additions & 8 deletions tests/dataset/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def convert_to_messages(example):
text = example.get('text', '')
if not text:
text = str(example.get('question', example.get('title', '')))

return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
def convert_to_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
messages_batch = []
for text in texts:
text = text or ''
messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
return {'messages': messages_batch}


class TestLazyDataset:
Expand Down Expand Up @@ -48,8 +50,7 @@ def test_lazy_dataset_encode_flag(self):

dataset.encode()

# Lazy load: encode() only sets flag, actual encoding on access; raw dataset has no input_ids
assert 'messages' in dataset.dataset[0]
# Lazy load: both map and encode are deferred; raw dataset has neither messages nor input_ids
assert 'input_ids' not in dataset.dataset[0]
item = dataset[0]
assert 'input_ids' in item
Expand Down
2 changes: 2 additions & 0 deletions tests/dataset/test_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ def test_load_local_json(self):

def test_load_local_lance(self):
"""Test loading local Lance file"""
pytest.importorskip('lance')
lance_path = str(TEST_DATA_DIR / '1.lance')
dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=lance_path))
assert len(dataset) == 2

def test_load_local_lance_dir(self):
"""Test loading local Lance dir"""
pytest.importorskip('lance')
lance_path = str(TEST_DATA_DIR / 'lance')
dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=lance_path))
assert len(dataset) == 2
Expand Down
53 changes: 29 additions & 24 deletions tests/dataset/test_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def create_multimodal_messages(example):
text = example.get('text', '')
if not text:
text = str(example.get('question', example.get('title', '')))

return {'messages': [{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}]}
def create_multimodal_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
messages_batch = []
for text in texts:
text = text or ''
messages_batch.append([{'role': 'user', 'content': f'<image>\n{text}'}, {'role': 'assistant', 'content': 'Response'}])
return {'messages': messages_batch}


class TestMultimodalDataset:
Expand Down Expand Up @@ -87,17 +89,18 @@ def test_multimodal_dataset_multiple_image_placeholders(self):
csv_path = str(TEST_DATA_DIR / 'test.csv')
dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))

def create_multi_image_messages(example):
text = example.get('text', '')
return {
'messages': [{
def create_multi_image_messages(examples):
messages_batch = []
for text in examples.get('text', []):
text = text or ''
messages_batch.append([{
'role': 'user',
'content': f'<image>\n{text}\n<image>'
}, {
'role': 'assistant',
'content': 'Response'
}]
}
}])
return {'messages': messages_batch}

dataset.map(create_multi_image_messages)

Expand All @@ -110,17 +113,18 @@ def test_multimodal_dataset_video_placeholder(self):
csv_path = str(TEST_DATA_DIR / 'test.csv')
dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))

def create_video_messages(example):
text = example.get('text', '')
return {
'messages': [{
def create_video_messages(examples):
messages_batch = []
for text in examples.get('text', []):
text = text or ''
messages_batch.append([{
'role': 'user',
'content': f'<video>\n{text}'
}, {
'role': 'assistant',
'content': 'Response'
}]
}
}])
return {'messages': messages_batch}

dataset.map(create_video_messages)

Expand All @@ -133,17 +137,18 @@ def test_multimodal_dataset_audio_placeholder(self):
csv_path = str(TEST_DATA_DIR / 'test.csv')
dataset = Dataset(dataset_meta=DatasetMeta(dataset_id=csv_path))

def create_audio_messages(example):
text = example.get('text', '')
return {
'messages': [{
def create_audio_messages(examples):
messages_batch = []
for text in examples.get('text', []):
text = text or ''
messages_batch.append([{
'role': 'user',
'content': f'<audio>\n{text}'
}, {
'role': 'assistant',
'content': 'Response'
}]
}
}])
return {'messages': messages_batch}

dataset.map(create_audio_messages)

Expand Down
11 changes: 8 additions & 3 deletions tests/dataset/test_packing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def convert_to_messages(example):
text = example.get('text', '') or str(example.get('question', example.get('title', '')))
return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
def convert_to_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
messages_batch = []
for text in texts:
text = text or ''
messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
return {'messages': messages_batch}


@pytest.mark.skipif(not HAS_BINPACKING, reason='binpacking not installed')
Expand Down
14 changes: 8 additions & 6 deletions tests/dataset/test_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
SKIP_MODEL_DOWNLOAD = os.getenv('SKIP_MODEL_DOWNLOAD', 'false').lower() == 'true'


def convert_to_messages(example):
text = example.get('text', '')
if not text:
text = str(example.get('question', example.get('title', '')))

return {'messages': [Message(role='user', content=text), Message(role='assistant', content='Response')]}
def convert_to_messages(examples):
"""Batched map function: receives dict of lists, returns dict of lists."""
texts = examples.get('text', None) or examples.get('question', None) or examples.get('title', [])
messages_batch = []
for text in texts:
text = text or ''
messages_batch.append([Message(role='user', content=str(text)), Message(role='assistant', content='Response')])
return {'messages': messages_batch}


class TestRayDatasetBehavior:
Expand Down
6 changes: 6 additions & 0 deletions tests/kernel/test_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,15 @@ def test_kernelize_without_kernels_enabled(self):
"""Test returns original model when kernels disabled."""
with patch('twinkle.kernel.layer.is_kernels_enabled', return_value=False):
mock_model = Mock()
mock_model.parameters = Mock(return_value=iter([]))
result = kernelize_model(mock_model)
assert result == mock_model

@patch('twinkle.kernel.layer.is_kernels_available', return_value=False)
def test_kernelize_without_kernels_available(self, mock_available):
"""Test returns original model when kernels unavailable."""
mock_model = Mock()
mock_model.parameters = Mock(return_value=iter([]))
result = kernelize_model(mock_model)
assert result == mock_model

Expand Down Expand Up @@ -305,6 +307,8 @@ def setup_method(self):
@patch('twinkle.kernel.layer.is_kernels_available', return_value=False)
def test_register_with_mode_fallback(self, mock_available):
"""Test fallback mode mapping when mode is None."""
if not is_kernels_available():
pytest.skip('kernels package not available')
from kernels import Mode

from twinkle.kernel.layer import _to_hf_mode, register_layer_kernel
Expand All @@ -328,6 +332,8 @@ def test_to_hf_mode_conversion(self):
@patch('twinkle.kernel.layer.is_kernels_available', return_value=False)
def test_register_multiple_modes(self, mock_available):
"""Test registering multiple modes for the same layer."""
if not is_kernels_available():
pytest.skip('kernels package not available')
registry = get_global_layer_registry()

class MockRepo:
Expand Down
3 changes: 2 additions & 1 deletion tests/preprocessor/test_refuse_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def _row(messages):


def _fil(rows, **kw):
return RefuseFilter(**kw)(rows)
kept, _dropped = RefuseFilter(**kw)(rows)
return kept


# ── _is_refusal: English ────────────────────────────────────────────────────
Expand Down
Loading
Loading