Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
63a1c2b
feat(kernel): add HubRef dataclass and hub() factory
kevssim Jun 26, 2026
7049f6f
feat(kernel): add _infer_device helper
kevssim Jun 26, 2026
1547d8c
feat(kernel): add _resolve_value with device-conditional dispatch
kevssim Jun 26, 2026
1be64aa
feat(kernel): add _replace_class and _replace_attr helpers
kevssim Jun 26, 2026
c72883b
feat(kernel): add _load_hub_ref with lazy kernels import
kevssim Jun 26, 2026
d4318b1
feat(kernel): add kernelize() dispatcher
kevssim Jun 26, 2026
77165c9
feat(kernel): add npu_impls/rms_norm module
kevssim Jun 26, 2026
ac47045
feat(kernel): add npu_impls/rotary module
kevssim Jun 26, 2026
f0d0a23
feat(kernel): add npu_impls/swiglu module
kevssim Jun 26, 2026
1e9902e
feat(kernel): add npu_impls/attention module
kevssim Jun 26, 2026
4fc02c1
feat(kernel): add npu_impls/moe module
kevssim Jun 26, 2026
a5421f9
feat(kernel): add npu_impls/fla module
kevssim Jun 26, 2026
87e8477
feat(kernel): add npu_builtin() bundle and class-attr replacement
kevssim Jun 26, 2026
39a9225
refactor(kernel): expose only kernelize, hub, npu_builtin
kevssim Jun 26, 2026
f4c491f
refactor(kernel): remove legacy registry/function/layer/base/monkey_p…
kevssim Jun 26, 2026
3fb7071
refactor(cookbook): migrate to new twinkle.kernel API
kevssim Jun 26, 2026
109cf24
docs(kernel): rewrite Chinese doc for new mapping API
kevssim Jun 26, 2026
c7babac
docs(kernel): rewrite English doc for new mapping API
kevssim Jun 26, 2026
997697f
fix(kernel): gate SDPA install on NPU host and FLA flag on MindSpeed …
kevssim Jun 26, 2026
a742827
wip
kevssim Jun 26, 2026
dc7cb93
wip
kevssim Jun 26, 2026
398dd37
wip
kevssim Jun 26, 2026
126efc3
wip
kevssim Jun 29, 2026
a20da5a
Revert "wip"
kevssim Jun 29, 2026
ff67fc1
lint
kevssim Jun 29, 2026
9603c62
Merge remote-tracking branch 'origin/main' into refactor/kernel-mappi…
kevssim Jun 30, 2026
b06cfe5
delete
kevssim Jun 30, 2026
1b5f8c9
wip
kevssim Jun 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cookbook/transformers/ep_fsdp2_lora_qwen3_5_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from twinkle.model import TransformersModel
from twinkle.preprocessor import SelfCognitionProcessor
from twinkle.utils.framework import Torch
from twinkle.kernel import kernelize_model
from twinkle.kernel import kernelize, npu_builtin

logger = get_logger()
args = CLI.from_args()
Expand Down Expand Up @@ -95,7 +95,7 @@ def train():
)
# npu patch
if Torch.is_npu_available():
model = kernelize_model(model, mode='train', device='npu')
model = kernelize(model, npu_builtin(model))
lora_cfg = _build_lora_config(ENABLE_EP)
model.add_adapter_to_model(args.lora.adapter_name, lora_cfg,
gradient_accumulation_steps=args.training.gradient_accumulation_steps)
Expand Down
4 changes: 2 additions & 2 deletions cookbook/transformers/fsdp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from twinkle.model import TransformersModel
from twinkle.preprocessor import SelfCognitionProcessor
from twinkle.utils.framework import Torch
from twinkle.kernel import kernelize_model
from twinkle.kernel import kernelize, npu_builtin

logger = get_logger()
args = CLI.from_args()
Expand Down Expand Up @@ -59,7 +59,7 @@ def train():
model.model._no_split_modules = {'Qwen3_5DecoderLayer'}
# npu patch
if Torch.is_npu_available():
model = kernelize_model(model, mode='train', device='npu')
model = kernelize(model, npu_builtin(model))

lora_config = LoraConfig(**args.get_lora_args())
model.add_adapter_to_model(
Expand Down
4 changes: 2 additions & 2 deletions cookbook/transformers/sp_fsdp_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from twinkle.model import TransformersModel
from twinkle.preprocessor import SelfCognitionProcessor
from twinkle.utils.framework import Torch
from twinkle.kernel import kernelize_model
from twinkle.kernel import kernelize, npu_builtin

logger = get_logger()
args = CLI.from_args()
Expand Down Expand Up @@ -68,7 +68,7 @@ def train():
)
# npu patch
if Torch.is_npu_available():
model = kernelize_model(model, mode='train', device='npu')
model = kernelize(model, npu_builtin(model))
lora_config = LoraConfig(**args.get_lora_args())
model.add_adapter_to_model(args.lora.adapter_name, lora_config,
gradient_accumulation_steps=args.training.gradient_accumulation_steps)
Expand Down
Loading