diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 95ca9c901..1ed198a78 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -52,7 +52,7 @@ jobs:
       build: ${{ steps.check_build_trigger.outputs.build }}
     steps:
     - name: Checkout source code
-      uses: actions/checkout@v6
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
       with:
         ref: ${{ github.event.pull_request.head.sha }}
     - id: check_build_trigger
@@ -71,10 +71,10 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v6
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38  # v5.4.0
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -82,31 +82,73 @@ jobs:
       run: |
         pip install --upgrade build pip twine
 
-    - name: Build source distribution and wheels
-      run: python -m build
+    - name: Build source distribution
+      run: python -m build --sdist   # was: python -m build
 
     - name: Check distributions
       run: twine check dist/*
 
     - name: Store distributions
-      uses: actions/upload-artifact@v7
+      uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1  # v4.6.1
       with:
+        name: dist-sdist             # explicit name for downstream retrieval
         path: dist
 
+  build_wheels:
+    name: Build binary wheels (${{ matrix.os }})
+    needs: [check_build_trigger]
+    if: needs.check_build_trigger.outputs.build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest    # → manylinux_2_17_x86_64
+          - windows-latest   # → win_amd64
+          - macos-13         # → macosx_13_*_x86_64 (Intel)
+          - macos-14         # → macosx_14_*_arm64 (Apple Silicon)
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+
+    - name: Build wheels
+      uses: pypa/cibuildwheel@fa04202e88ea28b84d5d4d20696ee8dfc0119436  # v2.23.0
+      # All config is read from [tool.cibuildwheel] in pyproject.toml:
+      #   build/skip selectors, test command, per-platform archs
+
+    - name: Validate wheels
+      run: |
+        pip install twine
+        twine check ./wheelhouse/*.whl
+
+    - name: Upload wheel artifacts
+      uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1  # v4.6.1
+      with:
+        name: cibw-wheels-${{ matrix.os }}
+        path: ./wheelhouse/*.whl
+
   publish_pypi:
     name: Publish to PyPI
     runs-on: ubuntu-latest
-    needs: [build]
+    needs: [build, build_wheels]       # was: needs: [build]
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
-    - name: Retrieve distributions
-      uses: actions/download-artifact@v7
+    - name: Retrieve sdist
+      uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806  # v4.1.9
       with:
-        name: artifact
+        name: dist-sdist               # matches renamed artifact
         path: dist
+
+    - name: Retrieve binary wheels
+      uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806  # v4.1.9
+      with:
+        pattern: cibw-wheels-*         # globs all 4 matrix artifacts
+        path: dist
+        merge-multiple: true           # flatten: cibw-wheels-os1/a.whl → dist/a.whl
+
     - name: Publish to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      if: github.event_name == 'release' && github.event.action == 'published'
+      uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e  # release/v1
       with:
         skip-existing: true
         user: __token__
diff --git a/.gitignore b/.gitignore
index d502765ce..b80ad2f10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -116,6 +116,10 @@ dmypy.json
 
 # Cython debug symbols
 cython_debug/
+
+# Cython-generated C source files (anywhere in the package tree)
+pythainlp/**/*.c
+
 notebooks/iso_11940-dev.ipynb
 
 # vscode devcontainer
diff --git a/pyproject.toml b/pyproject.toml
index 308fda4ae..2f88d1486 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "hatch-cython>=0.5.0", "cython>=3.0"]
 build-backend = "hatchling.build"
 
 [project]
@@ -233,6 +233,8 @@ noauto-onnx = [
 # Cython-based dependencies - for tests.noauto_cython
 noauto-cython = [
     "phunspell>=0.1.6",
+    "hatch-cython>=0.5.0",
+    "cython>=3.0",
 ]
 
 # Network-dependent tests - for tests.noauto_network
@@ -311,6 +313,22 @@ include = [
     "README.md",
 ]
 
+[tool.hatch.build.hooks.cython]
+dependencies = ["cython>=3.0"]
+optional = true
+
+[tool.hatch.build.hooks.cython.options]
+# Compile only .pyx files in pythainlp/_ext — do NOT compile .py files.
+# Without compile_py=false, hatch-cython would compile every .py file in
+# the package into a Cython extension, which is not what we want.
+compile_py = false
+
+# hatch-cython internally invokes setuptools' build_ext. Restrict package
+# discovery to pythainlp only so setuptools doesn't error on the flat layout
+# (multiple top-level directories: build_tools, fuzz, notebooks, pythainlp).
+[tool.setuptools.packages.find]
+include = ["pythainlp*"]
+
 [tool.bumpversion]
 current_version = "5.3.4"
 commit = true
@@ -497,6 +515,10 @@ module = [
 ]
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = ["pythainlp._ext.*"]
+ignore_missing_imports = true
+
 [tool.pylint.main]
 disable = [
     "import-error",
@@ -507,3 +529,29 @@ disable = [
     "too-many-branches",
     "too-many-statements",
 ]
+
+# ---------------------------------------------------------------------------
+# cibuildwheel — binary wheel build matrix
+# Docs: https://cibuildwheel.readthedocs.io/en/stable/options/
+# ---------------------------------------------------------------------------
+[tool.cibuildwheel]
+# CPython 3.9–3.13 (stable; matches requires-python = ">=3.9")
+build = "cp39-* cp310-* cp311-* cp312-* cp313-*"
+skip  = "pp* *-musllinux_*"           # PyPy and Alpine excluded (complex toolchain, deferred)
+
+[tool.cibuildwheel.test]
+# After wheel install, verify _thai_fast loaded as a compiled .so/.pyd
+# (not a pure-Python fallback).  No test deps required.
+# Note: pythainlp/_ext/_thai_fast has NO .py fallback — ImportError here
+# means compilation failed silently, which also fails this step explicitly.
+command = "python -c \"import pythainlp._ext._thai_fast as m; assert m.__file__.endswith(('.so', '.pyd')), 'NOT compiled: ' + m.__file__; print('CIBW OK:', m.__file__)\""
+
+[tool.cibuildwheel.linux]
+manylinux-x86_64-image = "manylinux2014"  # glibc >= 2.17 (RHEL 7+ / Ubuntu 18.04+)
+archs = "x86_64"                          # linux aarch64 deferred — QEMU adds ~20 min/version on GitHub runners
+
+[tool.cibuildwheel.macos]
+archs = "auto"                            # macos-13 runner = Intel (auto → x86_64); macos-14 runner = ARM (auto → arm64)
+
+[tool.cibuildwheel.windows]
+archs = "AMD64"
diff --git a/pythainlp/_ext/__init__.py b/pythainlp/_ext/__init__.py
new file mode 100644
index 000000000..838267c6f
--- /dev/null
+++ b/pythainlp/_ext/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Optional Cython-compiled extensions for performance-critical functions.
+
+These extensions are built at install time when a C compiler and Cython are
+available. If unavailable (e.g., PyPy, no compiler), the pure Python
+implementations in pythainlp.util are used as fallback.
+"""
diff --git a/pythainlp/_ext/_normalize_fast.pyi b/pythainlp/_ext/_normalize_fast.pyi
new file mode 100644
index 000000000..d91915366
--- /dev/null
+++ b/pythainlp/_ext/_normalize_fast.pyi
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Type stubs for pythainlp._ext._normalize_fast Cython extension."""
+
+def remove_tonemark(text: str) -> str: ...
+def remove_dup_spaces(text: str) -> str: ...
diff --git a/pythainlp/_ext/_normalize_fast.pyx b/pythainlp/_ext/_normalize_fast.pyx
new file mode 100644
index 000000000..89ba9d7a7
--- /dev/null
+++ b/pythainlp/_ext/_normalize_fast.pyx
@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Cython-optimized text normalization functions.
+
+Provides faster implementations of remove_tonemark and remove_dup_spaces
+using C-level typed memory views and byte filtering.
+
+These functions are API-compatible with their equivalents in
+pythainlp.util.normalize and can be used as faster drop-in replacements
+when explicitly imported.
+"""
+# cython: language_level=3
+# cython: boundscheck=False
+# cython: wraparound=False
+
+import re as _re
+
+from pythainlp import thai_tonemarks as _tonemarks_str
+
+# Frozenset of tone mark characters for O(1) membership test.
+# Must contain single-char strings (not ints): when Cython converts a
+# Py_UCS4 value via the `in` operator it produces chr(c), not an integer.
+cdef frozenset _TONE_SET = frozenset(_tonemarks_str)
+
+# Use the same regex pattern as normalize.py to keep newline behaviour
+# identical (collapses sequences of spaces+newlines into a single newline)
+_RE_REMOVE_NEWLINES = _re.compile(r"[ \n]*\n[ \n]*")
+
+
+cpdef str remove_tonemark(object text):
+    """Remove Thai tone marks from text using UTF-8 byte-level filtering.
+
+    Thai tone marks occupy the Unicode range U+0E48-U+0E4B, which encodes
+    in UTF-8 as the three-byte sequence 0xE0 0xB9 {0x88-0x8B}.  Filtering
+    at the byte level using typed memory views avoids per-character Python
+    object creation and outperforms repeated str.replace() calls on long texts.
+
+    :param text: input text (str or str-like object)
+    :type text: str
+    :return: text with all Thai tone marks removed
+    :rtype: str
+    """
+    cdef str _text = str(text)
+    if not _text:
+        return _text
+
+    # Fast path: bail out early if none of the four tone marks are present
+    cdef Py_UCS4 c
+    cdef bint found = False
+    for c in _text:
+        if c in _TONE_SET:
+            found = True
+            break
+    if not found:
+        return _text
+
+    # Encode once to UTF-8 bytes; use memoryview for C-level access.
+    # IMPORTANT: the byte pattern below is hard-coded for the four Thai tone
+    # marks U+0E48–U+0E4B (encoding: 0xE0 0xB9 {0x88–0x8B}).  If
+    # pythainlp.thai_tonemarks is ever extended beyond those four codepoints
+    # this filter will silently miss any additions; update the scan range
+    # in the while-loop accordingly.
+    cdef bytes src_bytes = _text.encode("utf-8")
+    cdef const unsigned char[:] src = src_bytes
+    cdef Py_ssize_t n = len(src)
+
+    # Pre-allocate output buffer (same size as input; result is always smaller)
+    cdef bytearray dst_arr = bytearray(n)
+    cdef unsigned char[:] dst = dst_arr
+    cdef Py_ssize_t i = 0
+    cdef Py_ssize_t j = 0
+    cdef unsigned char b0
+
+    while i < n:
+        b0 = src[i]
+        # All Thai tone marks share first two bytes 0xE0 0xB9
+        if b0 == 0xE0 and i + 2 < n and src[i + 1] == 0xB9:
+            if 0x88 <= src[i + 2] <= 0x8B:
+                i += 3  # skip tone-mark sequence
+                continue
+        dst[j] = b0
+        j += 1
+        i += 1
+
+    return bytes(dst_arr[:j]).decode("utf-8")
+
+
+cpdef str remove_dup_spaces(object text):
+    """Remove duplicate ASCII spaces and collapse newlines; strip result.
+
+    Behaviorally identical to pythainlp.util.normalize.remove_dup_spaces:
+    - Only ASCII space (0x20) runs are collapsed (not tabs or other whitespace)
+    - Newline normalisation is delegated to the same compiled regex
+
+    :param text: input text (str or str-like object)
+    :type text: str
+    :return: text without duplicate spaces, with newlines normalised and
+             leading/trailing whitespace stripped
+    :rtype: str
+    """
+    cdef str _text = str(text)
+    cdef list out = []
+    cdef Py_UCS4 c
+    cdef bint prev_space = False
+    for c in _text:
+        if c == 32:  # ASCII space 0x20
+            if not prev_space:
+                out.append(" ")
+            prev_space = True
+        else:
+            out.append(chr(c))
+            prev_space = False
+    result = "".join(out)
+    result = _RE_REMOVE_NEWLINES.sub("\n", result)
+    return result.strip()
diff --git a/pythainlp/_ext/_thai_fast.pyi b/pythainlp/_ext/_thai_fast.pyi
new file mode 100644
index 000000000..186feb2e8
--- /dev/null
+++ b/pythainlp/_ext/_thai_fast.pyi
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Type stubs for pythainlp._ext._thai_fast Cython extension."""
+
+def is_thai_char(ch: str) -> bool: ...
+def is_thai(text: str, ignore_chars: str = ...) -> bool: ...
+def count_thai(
+    text: str,
+    ignore_chars: str = ...,  # defaults to whitespace + digits + punctuation
+) -> float: ...
diff --git a/pythainlp/_ext/_thai_fast.pyx b/pythainlp/_ext/_thai_fast.pyx
new file mode 100644
index 000000000..8186f22c2
--- /dev/null
+++ b/pythainlp/_ext/_thai_fast.pyx
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Cython-optimized Thai character classification functions.
+
+Provides faster implementations of is_thai_char, is_thai, and count_thai
+by eliminating Python dispatch overhead and using C-level type declarations
+for the inner character iteration loops.
+
+These functions are API-compatible with their equivalents in
+pythainlp.util.thai and are loaded as transparent replacements when the
+Cython extension is available.
+"""
+# cython: language_level=3
+# cython: boundscheck=False
+# cython: wraparound=False
+
+import string as _string
+
+cdef unsigned int _TH_FIRST = 0x0E00  # U+0E00: first Thai character
+cdef unsigned int _TH_LAST  = 0x0E7F  # U+0E7F: last Thai character
+
+
+cpdef bint is_thai_char(object ch):
+    """Return True if ch is a single Thai Unicode character.
+
+    :param ch: input character (str or str-like object; must be exactly one character)
+    :type ch: str
+    :return: True if ch is a Thai character, otherwise False.
+    :rtype: bool
+
+    .. note::
+        Unlike the pure-Python implementation (which raises ``TypeError``
+        for empty or multi-character strings via ``ord()``), this
+        implementation returns ``False`` for any input whose length is
+        not exactly 1.
+    """
+    cdef str _ch = str(ch)
+    if len(_ch) != 1:
+        return False
+    cdef Py_UCS4 c = _ch[0]
+    return _TH_FIRST <= c <= _TH_LAST
+
+
+cpdef bint is_thai(object text, object ignore_chars="."):
+    """Return True if every non-ignored character in text is Thai.
+
+    :param text: input text (str or str-like object)
+    :type text: str
+    :param ignore_chars: characters to ignore during validation;
+        ``None`` is treated the same as ``""`` (no characters ignored)
+    :type ignore_chars: str or None
+    :return: True if text consists only of Thai and ignored characters
+    :rtype: bool
+    """
+    cdef str _text = str(text)
+    # Mirror the Python version: treat None/empty as "ignore nothing"
+    if not ignore_chars:
+        ignore_chars = ""
+    cdef str _ic = ignore_chars
+    cdef Py_UCS4 c
+    for c in _text:
+        if c not in _ic and not (_TH_FIRST <= c <= _TH_LAST):
+            return False
+    return True
+
+
+# Match the default ignore_chars used by the Python count_thai implementation
+_DEFAULT_IGNORE_CHARS: str = (
+    _string.whitespace + _string.digits + _string.punctuation
+)
+
+
+cpdef double count_thai(object text, str ignore_chars=_DEFAULT_IGNORE_CHARS):
+    """Return proportion of Thai characters in text (0.0–100.0).
+
+    :param text: input text (str or str-like object); non-str values (including None) return 0.0
+        to match the behaviour of the pure-Python implementation
+    :type text: str
+    :param ignore_chars: characters to exclude from the denominator,
+        defaults to whitespace, digits, and punctuation marks
+    :type ignore_chars: str
+    :return: percentage of Thai characters in the non-ignored portion
+    :rtype: float
+    """
+    # Matches Python version: non-str or falsy input → 0.0
+    if not text or not isinstance(text, str):
+        return 0.0
+    cdef str _text = text
+    # Normalise: treat empty string as no ignore chars (matches Python version)
+    if not ignore_chars:
+        ignore_chars = ""
+    cdef Py_UCS4 c
+    cdef Py_ssize_t num_thai = 0
+    cdef Py_ssize_t num_ignore = 0
+    cdef Py_ssize_t total = len(_text)
+    for c in _text:
+        if c in ignore_chars:
+            num_ignore += 1
+        elif _TH_FIRST <= c <= _TH_LAST:
+            num_thai += 1
+    cdef Py_ssize_t denom = total - num_ignore
+    if denom == 0:
+        return 0.0
+    return (num_thai / denom) * 100.0
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 02138420f..f65adcb43 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -145,8 +145,7 @@ def remove_tonemark(text: str) -> str:
         'สองพันหนึงรอยสีสิบเจ็ดลานสีแสนแปดหมืนสามพันหกรอยสีสิบเจ็ด'
     """
     for ch in tonemarks:
-        while ch in text:
-            text = text.replace(ch, "")
+        text = text.replace(ch, "")
     return text
 
 
@@ -386,3 +385,14 @@ def maiyamok(sent: Union[str, list[str]]) -> list[str]:
         "5.2",
     )
     return expand_maiyamok(sent)
+
+
+# Keep references to the pure-Python implementations before the Cython
+# override below so they remain importable for benchmarking and testing.
+_py_remove_tonemark = remove_tonemark
+_py_remove_dup_spaces = remove_dup_spaces
+
+# Note: Cython overrides for remove_tonemark and remove_dup_spaces are NOT
+# loaded here — Python's str.replace() bulk C operations outperform the
+# Cython encode→byte-filter→decode approach.  The Cython implementations
+# remain in pythainlp._ext._normalize_fast for reference and testing.
diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py
index 77a198168..4696aa0be 100644
--- a/pythainlp/util/thai.py
+++ b/pythainlp/util/thai.py
@@ -408,3 +408,31 @@ def analyze_thai_text(text: str) -> dict[str, int]:
             results[char] += 1
 
     return dict(results)
+
+
+# Keep references to the pure-Python implementations before the Cython
+# override below so they remain importable for benchmarking and testing.
+_py_is_thai_char = is_thai_char
+_py_is_thai = is_thai
+_py_count_thai = count_thai
+
+# Load Cython-compiled fast implementations when available.
+# Falls back silently to the Python implementations above on PyPy,
+# systems without a C compiler, or when hatch-cython was not used at build time.
+try:
+    from pythainlp._ext._thai_fast import count_thai as _fast_count_thai
+    from pythainlp._ext._thai_fast import is_thai as _fast_is_thai
+    from pythainlp._ext._thai_fast import is_thai_char as _fast_is_thai_char
+except ImportError:
+    pass
+else:
+    count_thai = _fast_count_thai
+    is_thai = _fast_is_thai
+
+    def _is_thai_char_fast(ch: str) -> bool:
+        # ord(ch) raises the same TypeError as the pure-Python implementation
+        # for empty strings or strings of length != 1, preserving behavior.
+        _ = ord(ch)
+        return _fast_is_thai_char(ch)
+
+    is_thai_char = _is_thai_char_fast
diff --git a/scripts/bench_full_evidence.py b/scripts/bench_full_evidence.py
new file mode 100644
index 000000000..15296490a
--- /dev/null
+++ b/scripts/bench_full_evidence.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+"""
+Comprehensive benchmark + cProfile evidence for Phase 1 Cython extensions.
+
+Generates:
+  1. Environment details
+  2. Multi-scale comparison (small / medium / large)
+  3. cProfile hotspot analysis (before / after)
+  4. Dataset description
+
+Usage:
+    PYTHONPATH=. python3 scripts/bench_full_evidence.py
+"""
+
+import cProfile
+import io
+import platform
+import pstats
+import sys
+import timeit
+from collections.abc import Callable
+from typing import Optional
+
+
+# ---------------------------------------------------------------------------
+# 1. Environment
+# ---------------------------------------------------------------------------
+def print_env() -> None:
+    print("=" * 72)
+    print("ENVIRONMENT")
+    print("=" * 72)
+    print(f"  OS            : {platform.system()} {platform.release()}")
+    print(f"  Architecture  : {platform.machine()}")
+    print(f"  CPU           : {_get_cpu_model()}")
+    print(f"  Python        : {sys.version}")
+    print(f"  pythainlp     : {_get_pythainlp_version()}")
+    cython_ver = _get_cython_status()
+    print(f"  Cython ext    : {cython_ver}")
+    print()
+
+
+def _get_cpu_model() -> str:
+    try:
+        with open("/proc/cpuinfo") as f:
+            for line in f:
+                if line.startswith("model name"):
+                    return line.split(":", 1)[1].strip()
+    except OSError:
+        return platform.processor() or "unknown"
+    return platform.processor() or "unknown"
+
+
+def _get_pythainlp_version() -> str:
+    try:
+        import pythainlp
+
+        return pythainlp.__version__
+    except Exception:
+        return "unknown"
+
+
+def _get_cython_status() -> str:
+    try:
+        from pythainlp._ext import _thai_fast, _normalize_fast  # noqa: F401  # pyright: ignore[reportUnusedImport]
+
+        return "loaded (compiled)"
+    except ImportError:
+        return "NOT available (pure Python mode)"
+
+
+# ---------------------------------------------------------------------------
+# 2. Dataset
+# ---------------------------------------------------------------------------
+# Thai Wikipedia-style sample text (real Thai prose)
+_SAMPLE_SHORT = "สวัสดีครับ"  # 10 chars
+_SAMPLE_MEDIUM = "ภาษาไทยเป็นภาษาที่มีวรรณยุกต์ ทำให้การออกเสียงมีความซับซ้อน" * 5  # ~310 chars
+_SAMPLE_LONG = (
+    "ประเทศไทยมีชื่อเรียกอย่างเป็นทางการว่า ราชอาณาจักรไทย "
+    "เป็นรัฐที่ตั้งอยู่ในภูมิภาคเอเชียตะวันออกเฉียงใต้ "
+    "มีพรมแดนทางทิศตะวันออกติดลาวและกัมพูชา ทิศใต้ติดอ่าวไทยและมาเลเซีย "
+    "ทิศตะวันตกติดทะเลอันดามันและพม่า ทิศเหนือติดพม่าและลาว "
+    "โดยมีแม่น้ำโขงกั้นเป็นบางช่วง "
+) * 50  # ~6,000+ chars
+
+_SAMPLE_HUGE = _SAMPLE_LONG * 10  # ~60,000+ chars
+
+_TONE_SHORT = "คำว่า ต้น ไม้ แล้ว ก็ น้ำ"  # ~25 chars with tonemarks
+_TONE_LONG = (
+    "น้ำตกเจ็ดสาวน้อย เป็นน้ำตกที่สวยงามมาก ตั้งอยู่ในอุทยานแห่งชาติ "
+    "เขื่อนศรีนครินทร์ จังหวัดกาญจนบุรี ล้อมรอบด้วยป่าดิบชื้น "
+    "ต้นไม้ใหญ่ น้ำตกไหลจากหน้าผาสูง สร้างความชุ่มเย็นให้กับบริเวณรอบข้าง "
+) * 40  # ~6,000+ chars
+
+
+def print_dataset() -> None:
+    print("=" * 72)
+    print("DATASET")
+    print("=" * 72)
+    print("  Real Thai prose, constructed from Thai Wikipedia-style text.")
+    print(f"  Short  : {len(_SAMPLE_SHORT):>8,} chars  (single greeting)")
+    print(f"  Medium : {len(_SAMPLE_MEDIUM):>8,} chars  (paragraph)")
+    print(f"  Long   : {len(_SAMPLE_LONG):>8,} chars  (article)")
+    print(f"  Huge   : {len(_SAMPLE_HUGE):>8,} chars  (corpus batch)")
+    print(f"  Tone-S : {len(_TONE_SHORT):>8,} chars  (short with tonemarks)")
+    print(f"  Tone-L : {len(_TONE_LONG):>8,} chars  (long with tonemarks)")
+    print()
+
+
+# ---------------------------------------------------------------------------
+# 3. Benchmark helpers
+# ---------------------------------------------------------------------------
+def bench(
+    label: str,
+    func_py: Callable[..., object],
+    func_cy: Optional[Callable[..., object]],
+    args: tuple,
+    number: int = 50_000,
+) -> dict:
+    """Benchmark a single function, return result dict."""
+    # Python
+    timer_py = timeit.Timer(lambda: func_py(*args))
+    times_py = timer_py.repeat(repeat=5, number=number)
+    best_py = min(times_py)
+
+    # Cython
+    if func_cy is not None:
+        timer_cy = timeit.Timer(lambda: func_cy(*args))
+        times_cy = timer_cy.repeat(repeat=5, number=number)
+        best_cy = min(times_cy)
+        speedup = best_py / best_cy
+    else:
+        best_cy = None
+        speedup = None
+
+    return {
+        "label": label,
+        "py_time": best_py,
+        "cy_time": best_cy,
+        "speedup": speedup,
+        "number": number,
+    }
+
+
+def print_table(title: str, rows: list[dict]) -> None:
+    print(f"\n{'─' * 72}")
+    print(f"  {title}")
+    print(f"{'─' * 72}")
+    print(
+        f"  {'Function':<35} {'Python':>10} {'Cython':>10} {'Speedup':>10}"
+    )
+    print(f"  {'─' * 67}")
+    for row in rows:
+        cy_str = (
+            f"{row['cy_time']:.4f}s" if row["cy_time"] is not None else "N/A"
+        )
+        sp_str = (
+            f"{row['speedup']:.1f}x" if row["speedup"] is not None else "—"
+        )
+        print(
+            f"  {row['label']:<35} {row['py_time']:>9.4f}s {cy_str:>10} {sp_str:>10}"
+        )
+    print()
+
+
+# ---------------------------------------------------------------------------
+# 4. cProfile analysis
+# ---------------------------------------------------------------------------
+def profile_function(
+    func: Callable[..., object], args: tuple, repeat: int = 100_000
+) -> str:
+    """Profile a function with cProfile and return top-10 hotspots."""
+    pr = cProfile.Profile()
+    pr.enable()
+    for _ in range(repeat):
+        func(*args)
+    pr.disable()
+
+    stream = io.StringIO()
+    ps = pstats.Stats(pr, stream=stream)
+    ps.sort_stats("cumulative")
+    ps.print_stats(15)
+    return stream.getvalue()
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main() -> None:
+    print_env()
+    print_dataset()
+
+    # Import Python baselines
+    from pythainlp.util.thai import (
+        _py_count_thai,
+        _py_is_thai,
+        _py_is_thai_char,
+    )
+    from pythainlp.util.normalize import _py_remove_tonemark
+
+    # Import Cython (may be None)
+    try:
+        from pythainlp._ext._thai_fast import (
+            count_thai as cy_count_thai,
+            is_thai as cy_is_thai,
+            is_thai_char as cy_is_thai_char,
+        )
+        from pythainlp._ext._normalize_fast import (
+            remove_tonemark as cy_remove_tonemark,
+        )
+
+        have_ext = True
+    except ImportError:
+        cy_is_thai_char = None
+        cy_is_thai = None
+        cy_count_thai = None
+        cy_remove_tonemark = None
+        have_ext = False
+
+    if not have_ext:
+        print("⚠  Cython extensions NOT available. Showing Python-only.\n")
+
+    # ── Multi-Scale: is_thai_char ──────────────────────────────────────
+    rows_itc = []
+    rows_itc.append(
+        bench(
+            "is_thai_char (1M calls)",
+            _py_is_thai_char,
+            cy_is_thai_char,
+            ("ก",),
+            number=1_000_000,
+        )
+    )
+    print_table("is_thai_char — Single Character Check", rows_itc)
+
+    # ── Multi-Scale: is_thai ───────────────────────────────────────────
+    rows_it = []
+    for label, text, n in [
+        ("is_thai (short, 10 ch)", _SAMPLE_SHORT, 500_000),
+        ("is_thai (medium, ~310 ch)", _SAMPLE_MEDIUM, 100_000),
+        ("is_thai (long, ~6K ch)", _SAMPLE_LONG, 10_000),
+        ("is_thai (huge, ~60K ch)", _SAMPLE_HUGE, 1_000),
+    ]:
+        rows_it.append(bench(label, _py_is_thai, cy_is_thai, (text,), n))
+    print_table("is_thai — Small-Scale vs Big-Scale", rows_it)
+
+    # ── Multi-Scale: count_thai ────────────────────────────────────────
+    rows_ct = []
+    for label, text, n in [
+        ("count_thai (short, 10 ch)", _SAMPLE_SHORT, 500_000),
+        ("count_thai (medium, ~310 ch)", _SAMPLE_MEDIUM, 50_000),
+        ("count_thai (long, ~6K ch)", _SAMPLE_LONG, 5_000),
+        ("count_thai (huge, ~60K ch)", _SAMPLE_HUGE, 500),
+    ]:
+        rows_ct.append(bench(label, _py_count_thai, cy_count_thai, (text,), n))
+    print_table("count_thai — Small-Scale vs Big-Scale", rows_ct)
+
+    # ── Multi-Scale: remove_tonemark ───────────────────────────────────
+    rows_rt = []
+    for label, text, n in [
+        ("remove_tonemark (short, ~25 ch)", _TONE_SHORT, 500_000),
+        ("remove_tonemark (long, ~6K ch)", _TONE_LONG, 5_000),
+    ]:
+        rows_rt.append(
+            bench(label, _py_remove_tonemark, cy_remove_tonemark, (text,), n)
+        )
+    print_table("remove_tonemark — Small-Scale vs Big-Scale", rows_rt)
+
+    # ── cProfile Hotspot Analysis ──────────────────────────────────────
+    print("=" * 72)
+    print("cPROFILE HOTSPOT ANALYSIS")
+    print("=" * 72)
+    print(
+        "  Profiling count_thai on long text (~6K chars) × 100K calls"
+    )
+    print("  to show where time is spent before/after Cython.\n")
+
+    print("── BEFORE (Pure Python count_thai) ──")
+    profile_out = profile_function(
+        _py_count_thai,
+        (_SAMPLE_LONG,),
+        repeat=100_000,
+    )
+    print(profile_out)
+
+    if cy_count_thai is not None:
+        print("── AFTER (Cython count_thai) ──")
+        profile_out = profile_function(
+            cy_count_thai,
+            (_SAMPLE_LONG,),
+            repeat=100_000,
+        )
+        print(profile_out)
+
+    print("── BEFORE (Pure Python remove_tonemark) ──")
+    profile_out = profile_function(
+        _py_remove_tonemark,
+        (_TONE_LONG,),
+        repeat=50_000,
+    )
+    print(profile_out)
+
+    if cy_remove_tonemark is not None:
+        print("── AFTER (Cython remove_tonemark) ──")
+        profile_out = profile_function(
+            cy_remove_tonemark,
+            (_TONE_LONG,),
+            repeat=50_000,
+        )
+        print(profile_out)
+
+    print("=" * 72)
+    print("BENCHMARK COMPLETE")
+    print("=" * 72)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/_noauto_loader.py b/tests/_noauto_loader.py
new file mode 100644
index 000000000..880b58f44
--- /dev/null
+++ b/tests/_noauto_loader.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Shared loader factory for noauto test suites."""
+
+from collections.abc import Callable
+from unittest import TestLoader, TestSuite
+
+
+def make_load_tests(
+    test_packages: list[str],
+) -> Callable[[TestLoader, TestSuite, str], TestSuite]:
+    """Return a load_tests function bound to *test_packages*.
+
+    Each noauto ``__init__.py`` calls this factory so the
+    unittest load-test protocol is implemented in one place.
+    See: https://docs.python.org/3/library/unittest.html#id1
+    """
+
+    def load_tests(
+        loader: TestLoader, standard_tests: TestSuite, pattern: str
+    ) -> TestSuite:
+        suite = TestSuite()
+        for name in test_packages:
+            suite.addTests(loader.loadTestsFromName(name))
+        return suite
+
+    return load_tests
diff --git a/tests/core/__init__.py b/tests/core/__init__.py
index b4d67630f..be67ac153 100644
--- a/tests/core/__init__.py
+++ b/tests/core/__init__.py
@@ -25,6 +25,7 @@
     "tests.core.test_tools",
     "tests.core.test_transliterate",
     "tests.core.test_util",
+    "tests.core.test_util_cython",
 ]
 
 
diff --git a/tests/core/test_util_cython.py b/tests/core/test_util_cython.py
new file mode 100644
index 000000000..ca73e712d
--- /dev/null
+++ b/tests/core/test_util_cython.py
@@ -0,0 +1,64 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+
+"""Coverage tests for the Cython fallback paths in pythainlp.util.thai.
+
+Kept separate from test_util.py to isolate sys.modules/reload side-effects.
+"""
+
+import importlib
+import unittest
+from unittest.mock import patch
+
+
+class TestThaiUtilPurePython(unittest.TestCase):
+    """Call _py_* directly to keep the original function bodies covered."""
+
+    def test_pure_python_is_thai_char(self):
+        from pythainlp.util.thai import _py_is_thai_char
+
+        self.assertTrue(_py_is_thai_char("ก"))
+        self.assertTrue(_py_is_thai_char("๕"))
+        self.assertFalse(_py_is_thai_char("A"))
+        self.assertFalse(_py_is_thai_char(" "))
+        with self.assertRaises(TypeError):
+            _py_is_thai_char("")
+
+    def test_pure_python_is_thai(self):
+        from pythainlp.util.thai import _py_is_thai
+
+        self.assertTrue(_py_is_thai("กาลเวลา"))
+        self.assertFalse(_py_is_thai("กาล-เวลา"))
+        self.assertTrue(_py_is_thai("กาล-เวลา", ignore_chars="-"))
+        self.assertTrue(_py_is_thai(""))
+
+    def test_pure_python_count_thai(self):
+        from pythainlp.util.thai import _py_count_thai
+
+        self.assertEqual(_py_count_thai("ไทย"), 100.0)
+        self.assertEqual(_py_count_thai("Python"), 0.0)
+        # ignore_chars="" → "1" is non-Thai, so 1/2 chars = 50%
+        self.assertAlmostEqual(_py_count_thai("ก1", ignore_chars=""), 50.0)
+
+
+class TestThaiUtilImportFallback(unittest.TestCase):
+    """Cover the ``except ImportError: pass`` branch in thai.py.
+
+    Patches sys.modules to make _thai_fast unimportable, reloads thai.py to
+    execute the fallback path, then restores the module to its original state.
+    """
+
+    def test_cython_import_error_fallback(self):
+        import pythainlp.util.thai as thai_mod
+
+        try:
+            with patch.dict(
+                "sys.modules", {"pythainlp._ext._thai_fast": None}
+            ):
+                importlib.reload(thai_mod)
+                self.assertTrue(thai_mod.is_thai_char("ก"))
+                self.assertEqual(thai_mod.count_thai("ไทย"), 100.0)
+        finally:
+            # Guaranteed restore: runs whether assertions pass or fail
+            importlib.reload(thai_mod)
diff --git a/tests/noauto_cython/__init__.py b/tests/noauto_cython/__init__.py
index 92e348e7d..7e2068d8a 100644
--- a/tests/noauto_cython/__init__.py
+++ b/tests/noauto_cython/__init__.py
@@ -15,26 +15,14 @@
 workflows with appropriate build environments.
 """
 
-from unittest import TestLoader, TestSuite
+from tests._noauto_loader import make_load_tests
 
-# Names of module to be tested
 test_packages: list[str] = [
     "tests.noauto_cython.testn_spell_cython",
+    "tests.noauto_cython.testn_fast_functions",
 ]
 
-
-def load_tests(
-    loader: TestLoader, standard_tests: TestSuite, pattern: str
-) -> TestSuite:
-    """Load test protocol
-    See: https://docs.python.org/3/library/unittest.html#id1
-    """
-    suite = TestSuite()
-    for test_package in test_packages:
-        tests = loader.loadTestsFromName(test_package)
-        suite.addTests(tests)
-    return suite
-
+load_tests = make_load_tests(test_packages)
 
 if __name__ == "__main__":
     from unittest import main
diff --git a/tests/noauto_cython/testn_fast_functions.py b/tests/noauto_cython/testn_fast_functions.py
new file mode 100644
index 000000000..b94678b93
--- /dev/null
+++ b/tests/noauto_cython/testn_fast_functions.py
@@ -0,0 +1,211 @@
+# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: Apache-2.0
+"""Correctness and performance tests for Cython-compiled fast functions.
+
+These tests verify that the Cython implementations in pythainlp._ext produce
+identical output to the pure Python implementations they replace.
+
+Tests are skipped automatically when the Cython extensions are not built
+(e.g., on PyPy or systems without a C compiler).
+"""
+
+import unittest
+
+try:
+    from pythainlp._ext._normalize_fast import (
+        remove_dup_spaces as fast_remove_dup_spaces,
+    )
+    from pythainlp._ext._normalize_fast import (
+        remove_tonemark as fast_remove_tonemark,
+    )
+    from pythainlp._ext._thai_fast import (
+        count_thai as fast_count_thai,
+    )
+    from pythainlp._ext._thai_fast import (
+        is_thai as fast_is_thai,
+    )
+    from pythainlp._ext._thai_fast import (
+        is_thai_char as fast_is_thai_char,
+    )
+
+    HAVE_EXT = True
+except ImportError:
+    HAVE_EXT = False
+
+
+class FastThaiCharCorrectnessTest(unittest.TestCase):
+    """Verify Cython _thai_fast functions match Python implementations."""
+
+    def setUp(self) -> None:
+        if not HAVE_EXT:
+            self.skipTest(
+                "pythainlp._ext Cython extensions not built; skipping"
+            )
+
+    def test_is_thai_char_thai(self) -> None:
+        for ch in ["ก", "ข", "ค", "๑", "฿", "ๆ", "ๅ"]:
+            with self.subTest(ch=ch):
+                self.assertTrue(fast_is_thai_char(ch))
+
+    def test_is_thai_char_non_thai(self) -> None:
+        for ch in ["a", "Z", "0", "9", " ", "あ", "中", "€"]:
+            with self.subTest(ch=ch):
+                self.assertFalse(fast_is_thai_char(ch))
+
+    def test_is_thai_char_boundary(self) -> None:
+        # First and last code points in the Thai Unicode block
+        self.assertTrue(fast_is_thai_char(chr(0x0E00)))
+        self.assertTrue(fast_is_thai_char(chr(0x0E7F)))
+        # Just outside the Thai block
+        self.assertFalse(fast_is_thai_char(chr(0x0DFF)))
+        self.assertFalse(fast_is_thai_char(chr(0x0E80)))
+
+    def test_is_thai_char_empty(self) -> None:
+        self.assertFalse(fast_is_thai_char(""))
+
+    def test_is_thai_char_matches_python(self) -> None:
+        # Use the pure-Python reference saved before the Cython override runs.
+        # Empty string is excluded: Python's ord("") raises TypeError while
+        # Cython returns False — this known difference is covered separately
+        # in test_is_thai_char_empty.
+        from pythainlp.util.thai import _py_is_thai_char as py_is_thai_char
+
+        test_chars = [
+            "ก",
+            "ข",
+            "ค",
+            "a",
+            "1",
+            " ",
+            chr(0x0E00),
+            chr(0x0E7F),
+            chr(0x0DFF),
+            chr(0x0E80),
+            "あ",
+        ]
+        for ch in test_chars:
+            with self.subTest(ch=repr(ch)):
+                self.assertEqual(
+                    fast_is_thai_char(ch),
+                    py_is_thai_char(ch),
+                    f"Mismatch for {repr(ch)}",
+                )
+
+    def test_is_thai_matches_python(self) -> None:
+        from pythainlp.util.thai import _py_is_thai as py_is_thai
+
+        test_cases = [
+            ("ทดสอบ", "."),
+            ("ทดสอบ1", "."),
+            ("hello", "."),
+            ("ทดสอบ123", "123"),
+            ("", "."),
+            ("ก.", "."),
+        ]
+        for text, ignore in test_cases:
+            with self.subTest(text=repr(text)):
+                self.assertEqual(
+                    fast_is_thai(text, ignore),
+                    py_is_thai(text, ignore),
+                    f"Mismatch for {repr(text)!r}, ignore={repr(ignore)!r}",
+                )
+
+    def test_count_thai_matches_python(self) -> None:
+        from pythainlp.util.thai import _py_count_thai as py_count_thai
+
+        test_cases = [
+            ("ไทยเอ็นแอลพี 3.0", ""),
+            ("PyThaiNLP 3.0", ""),
+            ("ใช้งาน PyThaiNLP 3.0", ""),
+            ("", ""),
+            ("กขค", ""),
+            ("กขค 123", " 0123456789"),
+        ]
+        for text, ignore in test_cases:
+            with self.subTest(text=repr(text)):
+                self.assertAlmostEqual(
+                    fast_count_thai(text, ignore),
+                    py_count_thai(text, ignore),
+                    places=6,
+                    msg=f"Mismatch for {repr(text)!r}",
+                )
+
+
+class FastNormalizeCorrectnessTest(unittest.TestCase):
+    """Verify Cython _normalize_fast functions match Python implementations."""
+
+    def setUp(self) -> None:
+        if not HAVE_EXT:
+            self.skipTest(
+                "pythainlp._ext Cython extensions not built; skipping"
+            )
+
+    def test_remove_tonemark_matches_python(self) -> None:
+        from pythainlp.util.normalize import (
+            _py_remove_tonemark as py_remove_tonemark,
+        )
+
+        test_cases = [
+            "จิ้น",
+            "เก๋า",
+            "สองพันหนึ่งร้อยสี่สิบเจ็ดล้านสี่แสนแปดหมื่นสามพันหกร้อยสี่สิบเจ็ด",
+            "",
+            "no tonemarks here ก ข ค",
+            "ก่ก้ก๊ก๋",
+            "mixed Thai and English text กับ tone marks ่้๊๋",
+        ]
+        for text in test_cases:
+            with self.subTest(text=repr(text)):
+                self.assertEqual(
+                    fast_remove_tonemark(text),
+                    py_remove_tonemark(text),
+                    f"Mismatch for {repr(text)}",
+                )
+
+    def test_remove_tonemark_removes_all_four(self) -> None:
+        # Each of the four Thai tone marks must be removed
+        from pythainlp import thai_tonemarks
+
+        for mark in thai_tonemarks:
+            text = f"ก{mark}า"
+            result = fast_remove_tonemark(text)
+            self.assertNotIn(
+                mark,
+                result,
+                f"Tone mark U+{ord(mark):04X} was not removed",
+            )
+
+    def test_remove_dup_spaces_matches_python(self) -> None:
+        from pythainlp.util.normalize import (
+            remove_dup_spaces as py_remove_dup_spaces,
+        )
+
+        test_cases = [
+            "ก    ข    ค",
+            "  ab  c d  ",
+            "normal spaces",
+            "",
+            "   leading",
+            "trailing   ",
+            "a  b  c",
+        ]
+        for text in test_cases:
+            with self.subTest(text=repr(text)):
+                self.assertEqual(
+                    fast_remove_dup_spaces(text),
+                    py_remove_dup_spaces(text),
+                    f"Mismatch for {repr(text)}",
+                )
+
+    def test_remove_dup_spaces_preserves_tabs(self) -> None:
+        # Tabs are NOT collapsed (only ASCII 0x20 spaces are)
+        from pythainlp.util.normalize import (
+            remove_dup_spaces as py_remove_dup_spaces,
+        )
+
+        text = "a\t\tb"
+        self.assertEqual(
+            fast_remove_dup_spaces(text), py_remove_dup_spaces(text)
+        )
+
diff --git a/tests/noauto_network/__init__.py b/tests/noauto_network/__init__.py
index 57b6322ca..570aea480 100644
--- a/tests/noauto_network/__init__.py
+++ b/tests/noauto_network/__init__.py
@@ -18,26 +18,13 @@
 with appropriate network access and caching.
 """
 
-from unittest import TestLoader, TestSuite
+from tests._noauto_loader import make_load_tests
 
-# Names of module to be tested
 test_packages: list[str] = [
     "tests.noauto_network.testn_spell_network",
 ]
 
-
-def load_tests(
-    loader: TestLoader, standard_tests: TestSuite, pattern: str
-) -> TestSuite:
-    """Load test protocol
-    See: https://docs.python.org/3/library/unittest.html#id1
-    """
-    suite = TestSuite()
-    for test_package in test_packages:
-        tests = loader.loadTestsFromName(test_package)
-        suite.addTests(tests)
-    return suite
-
+load_tests = make_load_tests(test_packages)
 
 if __name__ == "__main__":
     from unittest import main
diff --git a/tests/noauto_onnx/__init__.py b/tests/noauto_onnx/__init__.py
index 0bc3325a5..5e17fb142 100644
--- a/tests/noauto_onnx/__init__.py
+++ b/tests/noauto_onnx/__init__.py
@@ -17,9 +17,8 @@
 workflows dedicated to ONNX Runtime-based features.
 """
 
-from unittest import TestLoader, TestSuite
+from tests._noauto_loader import make_load_tests
 
-# Names of module to be tested
 test_packages: list[str] = [
     "tests.noauto_onnx.testn_spell_onnx",
     "tests.noauto_onnx.testn_tag_onnx",
@@ -27,19 +26,7 @@
     "tests.noauto_onnx.testn_transliterate_onnx",
 ]
 
-
-def load_tests(
-    loader: TestLoader, standard_tests: TestSuite, pattern: str
-) -> TestSuite:
-    """Load test protocol
-    See: https://docs.python.org/3/library/unittest.html#id1
-    """
-    suite = TestSuite()
-    for test_package in test_packages:
-        tests = loader.loadTestsFromName(test_package)
-        suite.addTests(tests)
-    return suite
-
+load_tests = make_load_tests(test_packages)
 
 if __name__ == "__main__":
     from unittest import main
diff --git a/tests/noauto_tensorflow/__init__.py b/tests/noauto_tensorflow/__init__.py
index dd71f2b28..f05f6cc5c 100644
--- a/tests/noauto_tensorflow/__init__.py
+++ b/tests/noauto_tensorflow/__init__.py
@@ -17,26 +17,13 @@
 workflows dedicated to TensorFlow-based features.
 """
 
-from unittest import TestLoader, TestSuite
+from tests._noauto_loader import make_load_tests
 
-# Names of module to be tested
 test_packages: list[str] = [
     "tests.noauto_tensorflow.testn_tokenize_tensorflow",
 ]
 
-
-def load_tests(
-    loader: TestLoader, standard_tests: TestSuite, pattern: str
-) -> TestSuite:
-    """Load test protocol
-    See: https://docs.python.org/3/library/unittest.html#id1
-    """
-    suite = TestSuite()
-    for test_package in test_packages:
-        tests = loader.loadTestsFromName(test_package)
-        suite.addTests(tests)
-    return suite
-
+load_tests = make_load_tests(test_packages)
 
 if __name__ == "__main__":
     from unittest import main
diff --git a/tests/noauto_torch/__init__.py b/tests/noauto_torch/__init__.py
index 1b97e04f6..2a337528b 100644
--- a/tests/noauto_torch/__init__.py
+++ b/tests/noauto_torch/__init__.py
@@ -19,9 +19,8 @@
 workflows dedicated to PyTorch-based features.
 """
 
-from unittest import TestLoader, TestSuite
+from tests._noauto_loader import make_load_tests
 
-# Names of module to be tested
 test_packages: list[str] = [
     "tests.noauto_torch.testn_augment_torch",
     "tests.noauto_torch.testn_lm_torch",
@@ -33,19 +32,7 @@
     "tests.noauto_torch.testn_transliterate_torch",
 ]
 
-
-def load_tests(
-    loader: TestLoader, standard_tests: TestSuite, pattern: str
-) -> TestSuite:
-    """Load test protocol
-    See: https://docs.python.org/3/library/unittest.html#id1
-    """
-    suite = TestSuite()
-    for test_package in test_packages:
-        tests = loader.loadTestsFromName(test_package)
-        suite.addTests(tests)
-    return suite
-
+load_tests = make_load_tests(test_packages)
 
 if __name__ == "__main__":
     from unittest import main