From aa17eb96fd7c22c5a41eabfbd285f7370f911010 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 13:12:10 +0100
Subject: [PATCH 01/42] Add lazy spaCy CLI loading and static launcher

---
 MANIFEST.in                      |   1 +
 setup.cfg                        |   2 +-
 setup.py                         |   2 +-
 spacy/cli/__init__.py            | 110 +++++++++++++++++++---------
 spacy/cli/_dispatch.py           | 105 +++++++++++++++++++++++++++
 spacy/cli/_util.py               |  18 ++---
 spacy/tests/test_cli_app.py      |   4 ++
 spacy/tests/test_cli_launcher.py | 102 ++++++++++++++++++++++++++
 spacy_cli/__init__.py            |   1 +
 spacy_cli/build_manifest.py      |  99 ++++++++++++++++++++++++++
 spacy_cli/cli_manifest.json      | 118 +++++++++++++++++++++++++++++++
 spacy_cli/main.py                |  69 ++++++++++++++++++
 spacy_cli/static.py              |  24 +++++++
 13 files changed, 610 insertions(+), 45 deletions(-)
 create mode 100644 spacy/cli/_dispatch.py
 create mode 100644 spacy/tests/test_cli_launcher.py
 create mode 100644 spacy_cli/__init__.py
 create mode 100644 spacy_cli/build_manifest.py
 create mode 100644 spacy_cli/cli_manifest.json
 create mode 100644 spacy_cli/main.py
 create mode 100644 spacy_cli/static.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 1caf758464f..36465ea94a0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include spacy *.pyi *.pyx *.pxd *.txt *.cfg *.jinja *.toml *.hh
+recursive-include spacy_cli *.json
 include LICENSE
 include README.md
 include pyproject.toml
diff --git a/setup.cfg b/setup.cfg
index c4928af9224..a0cfac9749a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -68,7 +68,7 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-    spacy = spacy.cli:setup_cli
+    spacy = spacy_cli.main:main
 
 [options.extras_require]
 lookups =
diff --git a/setup.py b/setup.py
index 33178662df4..4243f8731f3 100755
--- a/setup.py
+++ b/setup.py
@@ -213,7 +213,7 @@ def setup_package():
         version=about["__version__"],
         ext_modules=ext_modules,
         cmdclass={"build_ext": build_ext_subclass},
-        package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
+        package_data={"": ["*.pyx", "*.pxd", "*.pxi"], "spacy_cli": ["*.json"]},
     )
 
 
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 3095778fe22..dcfb4b8a92e 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,40 +1,82 @@
+import sys
+from importlib import import_module
+from typing import Iterable
+
+from typer.main import get_command
 from wasabi import msg
 
-# Needed for testing
-from . import download as download_module  # noqa: F401
-from ._util import app, setup_cli  # noqa: F401
-from .apply import apply  # noqa: F401
-from .assemble import assemble_cli  # noqa: F401
-
-# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
-# are registered automatically and won't have to be imported here.
-from .benchmark_speed import benchmark_speed_cli  # noqa: F401
-from .convert import convert  # noqa: F401
-from .debug_config import debug_config  # noqa: F401
-from .debug_data import debug_data  # noqa: F401
-from .debug_diff import debug_diff  # noqa: F401
-from .debug_model import debug_model  # noqa: F401
-from .download import download  # noqa: F401
-from .evaluate import evaluate  # noqa: F401
-from .find_function import find_function  # noqa: F401
-from .find_threshold import find_threshold  # noqa: F401
-from .info import info  # noqa: F401
-from .init_config import fill_config, init_config  # noqa: F401
-from .init_pipeline import init_pipeline_cli  # noqa: F401
-from .package import package  # noqa: F401
-from .pretrain import pretrain  # noqa: F401
-from .profile import profile  # noqa: F401
-from .project.assets import project_assets  # type: ignore[attr-defined]  # noqa: F401
-from .project.clone import project_clone  # type: ignore[attr-defined]  # noqa: F401
-from .project.document import (  # type: ignore[attr-defined]  # noqa: F401
-    project_document,
+from ..util import registry
+from ._dispatch import (
+    GROUP_MODULES,
+    PUBLIC_ATTRS,
+    SUBCOMMAND_MODULES,
+    TOP_LEVEL_MODULES,
 )
-from .project.dvc import project_update_dvc  # type: ignore[attr-defined]  # noqa: F401
-from .project.pull import project_pull  # type: ignore[attr-defined]  # noqa: F401
-from .project.push import project_push  # type: ignore[attr-defined]  # noqa: F401
-from .project.run import project_run  # type: ignore[attr-defined]  # noqa: F401
-from .train import train_cli  # type: ignore[attr-defined]  # noqa: F401
-from .validate import validate  # type: ignore[attr-defined]  # noqa: F401
+from ._dispatch import iter_builtin_modules
+from ._util import COMMAND, add_project_cli, app
+
+HELP_OPTIONS = {"--help", "-h"}
+ROOT_OPTIONS = HELP_OPTIONS | {"--install-completion", "--show-completion"}
+
+__all__ = [
+    "app",
+    "load_all_commands",
+    "load_for_argv",
+    "setup_cli",
+    *sorted(PUBLIC_ATTRS),
+]
+
+
+def _import_modules(module_names: Iterable[str]) -> None:
+    for module_name in module_names:
+        import_module(module_name)
+
+
+def load_all_commands() -> None:
+    _import_modules(iter_builtin_modules())
+    add_project_cli()
+
+
+def load_for_argv(argv: Iterable[str]) -> None:
+    args = list(argv)
+    if not args or args[0] in ROOT_OPTIONS or args[0].startswith("-"):
+        load_all_commands()
+        return
+    command = args[0]
+    if command == "project":
+        add_project_cli()
+        return
+    if command in GROUP_MODULES:
+        subcommand = args[1] if len(args) > 1 and not args[1].startswith("-") else None
+        if subcommand is not None and (command, subcommand) in SUBCOMMAND_MODULES:
+            _import_modules(SUBCOMMAND_MODULES[(command, subcommand)])
+            return
+        _import_modules(GROUP_MODULES[command])
+        return
+    if command in TOP_LEVEL_MODULES:
+        _import_modules(TOP_LEVEL_MODULES[command])
+
+
+def setup_cli() -> None:
+    # Make sure entry-point CLI integrations are imported before command dispatch.
+    registry.cli.get_all()
+    load_for_argv(sys.argv[1:])
+    command = get_command(app)
+    command(prog_name=COMMAND)
+
+
+def __getattr__(name: str):
+    if name not in PUBLIC_ATTRS:
+        raise AttributeError(f"module 'spacy.cli' has no attribute {name!r}")
+    module_name, attr_name = PUBLIC_ATTRS[name]
+    module = import_module(module_name)
+    value = module if attr_name is None else getattr(module, attr_name)
+    globals()[name] = value
+    return value
+
+
+def __dir__():
+    return sorted(set(globals()) | set(PUBLIC_ATTRS))
 
 
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
diff --git a/spacy/cli/_dispatch.py b/spacy/cli/_dispatch.py
new file mode 100644
index 00000000000..e1975dd7e1d
--- /dev/null
+++ b/spacy/cli/_dispatch.py
@@ -0,0 +1,105 @@
+from typing import Dict, Iterable, Optional, Tuple
+
+
+CommandPath = Tuple[str, ...]
+
+
+TOP_LEVEL_MODULES: Dict[str, Tuple[str, ...]] = {
+    "apply": ("spacy.cli.apply",),
+    "assemble": ("spacy.cli.assemble",),
+    "convert": ("spacy.cli.convert",),
+    "debug-data": ("spacy.cli.debug_data",),
+    "download": ("spacy.cli.download",),
+    "evaluate": ("spacy.cli.evaluate",),
+    "find-function": ("spacy.cli.find_function",),
+    "find-threshold": ("spacy.cli.find_threshold",),
+    "info": ("spacy.cli.info",),
+    "package": ("spacy.cli.package",),
+    "pretrain": ("spacy.cli.pretrain",),
+    "profile": ("spacy.cli.profile",),
+    "train": ("spacy.cli.train",),
+    "validate": ("spacy.cli.validate",),
+}
+
+
+GROUP_MODULES: Dict[str, Tuple[str, ...]] = {
+    "benchmark": (
+        "spacy.cli.benchmark_speed",
+        "spacy.cli.evaluate",
+    ),
+    "debug": (
+        "spacy.cli.debug_config",
+        "spacy.cli.debug_data",
+        "spacy.cli.debug_diff",
+        "spacy.cli.debug_model",
+        "spacy.cli.profile",
+    ),
+    "init": (
+        "spacy.cli.init_config",
+        "spacy.cli.init_pipeline",
+    ),
+}
+
+
+SUBCOMMAND_MODULES: Dict[CommandPath, Tuple[str, ...]] = {
+    ("benchmark", "accuracy"): ("spacy.cli.evaluate",),
+    ("benchmark", "speed"): ("spacy.cli.benchmark_speed",),
+    ("debug", "config"): ("spacy.cli.debug_config",),
+    ("debug", "data"): ("spacy.cli.debug_data",),
+    ("debug", "diff-config"): ("spacy.cli.debug_diff",),
+    ("debug", "model"): ("spacy.cli.debug_model",),
+    ("debug", "profile"): ("spacy.cli.profile",),
+    ("init", "config"): ("spacy.cli.init_config",),
+    ("init", "fill-config"): ("spacy.cli.init_config",),
+    ("init", "labels"): ("spacy.cli.init_pipeline",),
+    ("init", "nlp"): ("spacy.cli.init_pipeline",),
+    ("init", "vectors"): ("spacy.cli.init_pipeline",),
+}
+
+
+PUBLIC_ATTRS: Dict[str, Tuple[str, Optional[str]]] = {
+    "app": ("spacy.cli._util", "app"),
+    "apply": ("spacy.cli.apply", "apply"),
+    "assemble_cli": ("spacy.cli.assemble", "assemble_cli"),
+    "benchmark_speed_cli": ("spacy.cli.benchmark_speed", "benchmark_speed_cli"),
+    "convert": ("spacy.cli.convert", "convert"),
+    "debug_config": ("spacy.cli.debug_config", "debug_config"),
+    "debug_data": ("spacy.cli.debug_data", "debug_data"),
+    "debug_diff": ("spacy.cli.debug_diff", "debug_diff"),
+    "debug_model": ("spacy.cli.debug_model", "debug_model"),
+    "download": ("spacy.cli.download", "download"),
+    "download_module": ("spacy.cli.download", None),
+    "evaluate": ("spacy.cli.evaluate", "evaluate"),
+    "fill_config": ("spacy.cli.init_config", "fill_config"),
+    "find_function": ("spacy.cli.find_function", "find_function"),
+    "find_threshold": ("spacy.cli.find_threshold", "find_threshold"),
+    "info": ("spacy.cli.info", "info"),
+    "init_config": ("spacy.cli.init_config", "init_config"),
+    "init_pipeline_cli": ("spacy.cli.init_pipeline", "init_pipeline_cli"),
+    "package": ("spacy.cli.package", "package"),
+    "pretrain": ("spacy.cli.pretrain", "pretrain"),
+    "profile": ("spacy.cli.profile", "profile"),
+    "project_assets": ("spacy.cli.project.assets", "project_assets"),
+    "project_clone": ("spacy.cli.project.clone", "project_clone"),
+    "project_document": ("spacy.cli.project.document", "project_document"),
+    "project_pull": ("spacy.cli.project.pull", "project_pull"),
+    "project_push": ("spacy.cli.project.push", "project_push"),
+    "project_run": ("spacy.cli.project.run", "project_run"),
+    "project_update_dvc": ("spacy.cli.project.dvc", "project_update_dvc"),
+    "train_cli": ("spacy.cli.train", "train_cli"),
+    "validate": ("spacy.cli.validate", "validate"),
+}
+
+
+def iter_builtin_modules() -> Iterable[str]:
+    seen = set()
+    for modules in TOP_LEVEL_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
+    for modules in GROUP_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 309b6b1e79a..75b21dd3775 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -23,9 +23,7 @@
 from click.shell_completion import split_arg_string
 from thinc.api import Config, ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
-from typer.main import get_command
 from wasabi import Printer, msg
-from weasel import app as project_cli
 
 from .. import about
 from ..compat import Literal
@@ -72,19 +70,21 @@
 benchmark_cli = typer.Typer(name="benchmark", help=BENCHMARK_HELP, no_args_is_help=True)
 debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
 init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
+_PROJECT_CLI_ADDED = False
 
-app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
 app.add_typer(debug_cli)
 app.add_typer(benchmark_cli)
 app.add_typer(init_cli)
 
 
-def setup_cli() -> None:
-    # Make sure the entry-point for CLI runs, so that they get imported.
-    registry.cli.get_all()
-    # Ensure that the help messages always display the correct prompt
-    command = get_command(app)
-    command(prog_name=COMMAND)
+def add_project_cli() -> None:
+    global _PROJECT_CLI_ADDED
+    if _PROJECT_CLI_ADDED:
+        return
+    from weasel import app as project_cli
+
+    app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
+    _PROJECT_CLI_ADDED = True
 
 
 def parse_config_overrides(
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 1789d60ea4c..031de1fc25a 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -6,12 +6,16 @@
 import srsly
 from typer.testing import CliRunner
 
+from spacy.cli import load_all_commands
 from spacy.cli._util import app, get_git_version
 from spacy.tokens import Doc, DocBin, Span
 
 from .util import make_tempdir, normalize_whitespace
 
 
+load_all_commands()
+
+
 def has_git():
     try:
         get_git_version()
diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
new file mode 100644
index 00000000000..392572fdeec
--- /dev/null
+++ b/spacy/tests/test_cli_launcher.py
@@ -0,0 +1,102 @@
+import importlib
+import subprocess
+import sys
+
+import pytest
+
+from spacy_cli.build_manifest import build_manifest
+from spacy_cli.static import load_manifest
+
+launcher_module = importlib.import_module("spacy_cli.main")
+
+
+def _run_python(code: str) -> str:
+    result = subprocess.run(
+        [sys.executable, "-c", code],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return result.stdout.strip()
+
+
+def test_cli_package_import_is_lazy():
+    output = _run_python(
+        "import sys; import spacy.cli; "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["False", "False"]
+
+
+def test_load_for_argv_imports_only_requested_command():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['train', '--help']); "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["True", "False"]
+
+
+def test_load_for_argv_imports_project_on_demand():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['project', '--help']); print('weasel' in sys.modules)"
+    )
+    assert output == "True"
+
+
+def test_manifest_is_current():
+    assert build_manifest() == load_manifest()
+
+
+def test_launcher_root_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["root_help"]
+
+
+def test_launcher_command_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["train", "--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["command_help"]["train"]
+
+
+def test_launcher_unknown_command_uses_static_error(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["definitely-not-a-command"])
+    assert exc.value.code == 2
+    assert "No such command 'definitely-not-a-command'" in capsys.readouterr().out
+
+
+def test_launcher_non_help_command_falls_back_to_live(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    launcher_module.main(["train", "config.cfg"])
+    assert called == [True]
+
+
+def test_launcher_root_help_falls_back_with_plugins(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    monkeypatch.setattr(launcher_module, "get_plugin_command_names", lambda: {"custom"})
+    launcher_module.main(["--help"])
+    assert called == [True]
diff --git a/spacy_cli/__init__.py b/spacy_cli/__init__.py
new file mode 100644
index 00000000000..a2cb1f66b78
--- /dev/null
+++ b/spacy_cli/__init__.py
@@ -0,0 +1 @@
+"""Lightweight launcher package for the spaCy console script."""
diff --git a/spacy_cli/build_manifest.py b/spacy_cli/build_manifest.py
new file mode 100644
index 00000000000..6e019bdcb95
--- /dev/null
+++ b/spacy_cli/build_manifest.py
@@ -0,0 +1,99 @@
+import json
+from pathlib import Path
+from typing import Dict, Iterable, List
+
+from typer.main import get_command
+from typer.testing import CliRunner
+
+from spacy.cli import load_all_commands
+from spacy.cli._util import COMMAND, app
+
+from .static import MANIFEST_FILE, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+
+DEFAULT_ENV = {"COLUMNS": "100", "LINES": "40", "TERM": "xterm-256color"}
+
+
+def _invoke(runner: CliRunner, cli, args: Iterable[str]):
+    return runner.invoke(cli, list(args), prog_name=COMMAND, env=DEFAULT_ENV)
+
+
+def _get_help(runner: CliRunner, cli, args: Iterable[str]) -> str:
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        err = f"Could not render help for: {' '.join(args) or '<root>'}"
+        raise RuntimeError(err)
+    return result.stdout
+
+
+def _maybe_get_help(runner: CliRunner, cli, args: Iterable[str]):
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        return None
+    return result.stdout
+
+
+def build_manifest() -> Dict[str, object]:
+    load_all_commands()
+    cli = get_command(app)
+    runner = CliRunner()
+    known_top_level: List[str] = sorted(cli.commands.keys())
+    known_groups: Dict[str, List[str]] = {}
+    hidden_top_level: List[str] = []
+    hidden_group_commands: Dict[str, List[str]] = {}
+    group_help: Dict[str, str] = {}
+    command_help: Dict[str, str] = {}
+    unknown_subcommand: Dict[str, str] = {}
+
+    for name, command in cli.commands.items():
+        if getattr(command, "hidden", False):
+            hidden_top_level.append(name)
+        if hasattr(command, "commands"):
+            subcommands = sorted(command.commands.keys())
+            known_groups[name] = subcommands
+            hidden_group_commands[name] = sorted(
+                sub_name
+                for sub_name, sub_cmd in command.commands.items()
+                if getattr(sub_cmd, "hidden", False)
+            )
+            group_help[name] = _get_help(runner, app, [name])
+            unknown_subcommand[name] = _invoke(
+                runner, app, [name, UNKNOWN_SUBCOMMAND_TOKEN]
+            ).stdout
+            for sub_name in subcommands:
+                help_text = _maybe_get_help(runner, app, [name, sub_name])
+                if help_text is not None:
+                    command_help[f"{name} {sub_name}"] = help_text
+        else:
+            command_help[name] = _get_help(runner, app, [name])
+
+    return {
+        "command": COMMAND,
+        "known_top_level": known_top_level,
+        "known_groups": known_groups,
+        "hidden_top_level": hidden_top_level,
+        "hidden_group_commands": hidden_group_commands,
+        "root_help": _get_help(runner, app, []),
+        "group_help": group_help,
+        "command_help": command_help,
+        "errors": {
+            "missing_command": _invoke(runner, app, []).stdout,
+            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).stdout,
+            "unknown_subcommand": unknown_subcommand,
+        },
+    }
+
+
+def write_manifest(path: Path) -> Path:
+    data = build_manifest()
+    path.write_text(
+        json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) + "\n"
+    )
+    return path
+
+
+def main() -> None:
+    write_manifest(Path(__file__).with_name(MANIFEST_FILE))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
new file mode 100644
index 00000000000..361a10dca16
--- /dev/null
+++ b/spacy_cli/cli_manifest.json
@@ -0,0 +1,118 @@
+{
+  "command": "python -m spacy",
+  "command_help": {
+    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [default: None] [required]                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [default: None]      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│                              [default: None]                                                     │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size [default: None]    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data is written to stdout, so you \n can pipe them forward to a JSON file: $ spacy convert some_file.conllu --file-type json >          \n some_file.json                                                                                     \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [default: None] [required]            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│                                           [default: None]                                        │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│                                           [default: None]                                        │\n│ --lang             -l       TEXT          Language (if tokenizer required) [default: None]       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"[training]\".           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when comparing against the      \n default configuration (or specifically when `compare_to` is None).                                 \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│                                                     [default: None]                              │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url [default: None]                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function. registry_name (Optional[str]): Name of the       \n catalogue registry.                                                                                \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry. [default: None]                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by thresholds - e.g.       \n `textcat_multilabel` and `spancat`, but not `textcat`. Note that the full path to the              \n corresponding threshold attribute in the config has to be provided.                                \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [default: None] [required]                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format                │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [default: None] [required]  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration            │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    scores_key         TEXT  Metric to optimize [default: None] [required]                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│                                      [default: None]                                             │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible version of the pipeline.     \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline [default: None]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [default: None] [required]      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the labels [default: None] [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the prepared data [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the         \n [initialize] block of your config to initialize a model with vectors.                              \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [default: None] [required]  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [default: None] [required]           │\n│ *    output_dir       PATH  Pipeline output directory [default: None] [required]                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│                                  [default: None]                                                 │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [default: None] [required]                                                │\n│ *    kwargs      TEXT  [default: None] [required]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom registered functions    \n like pipeline components), they are copied into the package and imported in the __init__.py.       \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [default: None] [required]          │\n│ *    output_dir      DIRECTORY  Output parent directory [default: None] [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json [default: None]        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│                                                         [default: None]                          │\n│ --version           -v                            TEXT  Package version to override meta         │\n│                                                         [default: None]                          │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained using a word2vec-style  \n distributional similarity algorithm, and train a component like a CNN, BiLSTM, etc to predict      \n vectors which match the pretrained ones. The weights are saved to a directory after each epoch.    \n You can then pass a path to one of these pretrained weights files to the 'spacy train' command.    \n This technique may be especially helpful if you have little labelled data. However, it's still     \n quite experimental, so your mileage may vary.                                                      \n To load the weights back in during 'spacy train', you need to ensure all settings are the same     \n between pretraining and training. Ideally, this is done by using the same config file for both     \n commands.                                                                                          \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [default: None] [required]                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [default: None]          │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│                                   [default: None]                                                │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│                                   [default: None]                                                │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project assets": "                                                                                                    \n Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]                                      \n                                                                                                    \n Fetch project assets like datasets and pretrained weights. Assets are defined in the \"assets\"      \n section of the project.yml. If a checksum is provided in the project.yml, the file is only         \n downloaded if no local file with the same checksum exists.                                         \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-and-assets.md          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --sparse  -S        Use sparse checkout for assets provided via Git, to only check out and clone │\n│                     the files needed. Requires Git v22.2+.                                       │\n│ --extra   -e        Download all assets, including those marked as 'extra'.                      │\n│ --help              Show this message and exit.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [default: None] [required]              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n│                        [default: None]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│                         [default: None]                                                          │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project document": "                                                                                                    \n Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]                                    \n                                                                                                    \n Auto-generate a README.md for a project. If the content is saved to a file, hidden markers are     \n added so you can add custom content before or after the auto-generated section and only the        \n auto-generated docs will be replaced when you re-run the command.                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-document               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output    -o       PATH  Path to output Markdown file for output. Defaults to - for standard   │\n│                            output                                                                │\n│                            [default: -]                                                          │\n│ --no-emoji  -NE            Don't use emoji                                                       │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n│                                   [default: None]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project pull": "                                                                                                    \n Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Retrieve available precomputed outputs from a remote storage. You can alias remotes in your        \n project.yml by mapping them to storage paths. A storage can be anything that the smart_open        \n library can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.                 \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-push                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project push": "                                                                                                    \n Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Persist outputs to a remote storage. You can alias remotes in your project.yml by mapping them to  \n storage paths. A storage can be anything that the smart_open library can upload to, e.g. AWS,      \n Google Cloud Storage, SSH, local directories etc.                                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block           \n \"[training]\". The --code argument lets you pass in a Python file that's imported before training.  \n It can be used to register custom functions and architectures that can then be referenced in the   \n config.                                                                                            \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│                                              [default: None]                                     │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  },
+  "errors": {
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_subcommand": {
+      "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    }
+  },
+  "group_help": {
+    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  },
+  "hidden_group_commands": {
+    "benchmark": [],
+    "debug": [],
+    "init": [
+      "nlp"
+    ],
+    "project": []
+  },
+  "hidden_top_level": [
+    "link",
+    "debug-data",
+    "profile"
+  ],
+  "known_groups": {
+    "benchmark": [
+      "accuracy",
+      "speed"
+    ],
+    "debug": [
+      "config",
+      "data",
+      "diff-config",
+      "model",
+      "profile"
+    ],
+    "init": [
+      "config",
+      "fill-config",
+      "labels",
+      "nlp",
+      "vectors"
+    ],
+    "project": [
+      "assets",
+      "clone",
+      "document",
+      "dvc",
+      "pull",
+      "push",
+      "run"
+    ]
+  },
+  "known_top_level": [
+    "apply",
+    "assemble",
+    "benchmark",
+    "convert",
+    "debug",
+    "debug-data",
+    "download",
+    "evaluate",
+    "find-function",
+    "find-threshold",
+    "info",
+    "init",
+    "link",
+    "package",
+    "pretrain",
+    "profile",
+    "project",
+    "train",
+    "validate"
+  ],
+  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using pip.  │\n│                  If --direct flag is set, the command expects the full package name with         │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions. Expects a loadable    │\n│                  spaCy pipeline and path to the data, which can be a directory or a file. The    │\n│                  data files can be provided in multiple formats:     1. .spacy files     2.      │\n│                  .jsonl files with a specified \"field\" to read the text from.     3. Files with  │\n│                  any other extension are assumed to be containing        a single document.      │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes all      │\n│                  settings for initializing the pipeline. To override settings in the config,     │\n│                  e.g. settings that point to local paths or that you want to experiment with,    │\n│                  you can override them as command line options. The --code argument lets you     │\n│                  pass in a Python file that can be used to register custom functions that are    │\n│                  referenced in the config.                                                       │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the displacy_path  │\n│                  argument.                                                                       │\n│ find-function    Find the module, path and line number to the file the registered function is    │\n│                  defined in, if available.                                                       │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If --create-meta  │\n│                  is set and a meta.json already exists in the output directory, the existing     │\n│                  values will be used as the defaults in the command-line prompt. After           │\n│                  packaging, \"python -m build --sdist\" is run in the package directory, which     │\n│                  will create a .tar.gz archive that can be installed via \"pip install\".          │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using   │\n│                  an approximate language-modelling objective. Two objective types are available, │\n│                  vector-based and character-based.                                               │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The config    │\n│                  file includes all settings and hyperparameters used during training. To         │\n│                  override settings in the config, e.g. settings that point to local paths or     │\n│                  that you want to experiment with, you can override them as command line         │\n│                  options. For instance, --training.batch_size 128 overrides the value of         │\n│                  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a  │\n│                  Python file that's imported before training. It can be used to register custom  │\n│                  functions and architectures that can then be referenced in the config.          │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks if │\n│                  the installed packages are compatible and shows upgrade instructions if         │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes commands to     │\n│                  check and validate your config files, training and evaluation data, and custom  │\n│                  model implementations.                                                          │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates. You'd typically start  │\n│                  by cloning a project template to a local directory and fetching its assets like │\n│                  datasets etc. See the project's project.yml for the available commands.         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+}
diff --git a/spacy_cli/main.py b/spacy_cli/main.py
new file mode 100644
index 00000000000..cbe6e376c58
--- /dev/null
+++ b/spacy_cli/main.py
@@ -0,0 +1,69 @@
+import sys
+from typing import Iterable, Optional
+
+from .static import HELP_OPTIONS, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+from .static import get_plugin_command_names, load_manifest
+
+
+def _write_output(text: str) -> None:
+    sys.stdout.write(text)
+    if not text.endswith("\n"):
+        sys.stdout.write("\n")
+
+
+def _run_live() -> None:
+    from spacy.cli import setup_cli
+
+    setup_cli()
+
+
+def _try_static(argv: Iterable[str]):
+    args = list(argv)
+    manifest = load_manifest()
+    plugin_command_names = get_plugin_command_names()
+    known_groups = manifest["known_groups"]
+    known_top_level = set(manifest["known_top_level"])
+    if not args:
+        return manifest["errors"]["missing_command"], 2
+    first = args[0]
+    if first in HELP_OPTIONS:
+        if plugin_command_names:
+            return None
+        return manifest["root_help"], 0
+    if first.startswith("-"):
+        return None
+    if first not in known_top_level:
+        if first in plugin_command_names:
+            return None
+        template = manifest["errors"]["unknown_command"]
+        return template.replace(UNKNOWN_COMMAND_TOKEN, first), 2
+    if first in known_groups:
+        if len(args) == 1 or args[1] in HELP_OPTIONS:
+            if plugin_command_names:
+                return None
+            return manifest["group_help"][first], 0
+        second = args[1]
+        if second not in known_groups[first]:
+            if plugin_command_names:
+                return None
+            template = manifest["errors"]["unknown_subcommand"][first]
+            return template.replace(UNKNOWN_SUBCOMMAND_TOKEN, second), 2
+        if any(arg in HELP_OPTIONS for arg in args[2:]):
+            return manifest["command_help"][f"{first} {second}"], 0
+        return None
+    if any(arg in HELP_OPTIONS for arg in args[1:]):
+        return manifest["command_help"][first], 0
+    return None
+
+
+def main(argv: Optional[Iterable[str]] = None) -> None:
+    args = sys.argv[1:] if argv is None else list(argv)
+    try:
+        static_result = _try_static(args)
+    except Exception:
+        return _run_live()
+    if static_result is None:
+        return _run_live()
+    text, code = static_result
+    _write_output(text)
+    raise SystemExit(code)
diff --git a/spacy_cli/static.py b/spacy_cli/static.py
new file mode 100644
index 00000000000..51594ceef9a
--- /dev/null
+++ b/spacy_cli/static.py
@@ -0,0 +1,24 @@
+import json
+from functools import lru_cache
+from importlib.metadata import entry_points
+from importlib.resources import files
+from typing import Any, Dict, Set
+
+
+HELP_OPTIONS = {"--help", "-h"}
+PLUGIN_ENTRY_POINT_GROUP = "spacy_cli"
+MANIFEST_FILE = "cli_manifest.json"
+UNKNOWN_COMMAND_TOKEN = "__SPACY_UNKNOWN_COMMAND__"
+UNKNOWN_SUBCOMMAND_TOKEN = "__SPACY_UNKNOWN_SUBCOMMAND__"
+
+
+@lru_cache(maxsize=1)
+def load_manifest() -> Dict[str, Any]:
+    data = files("spacy_cli").joinpath(MANIFEST_FILE).read_text(encoding="utf8")
+    return json.loads(data)
+
+
+def get_plugin_command_names() -> Set[str]:
+    return {
+        entry_point.name for entry_point in entry_points(group=PLUGIN_ENTRY_POINT_GROUP)
+    }

From c7d7a724f22e352547d3706dfdcc35e6487a0c5d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 14:32:31 +0100
Subject: [PATCH 02/42] Fix lazy load on modules where the function shadows

---
 spacy/cli/__init__.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index dcfb4b8a92e..ded45efe9f7 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,4 +1,5 @@
 import sys
+import types
 from importlib import import_module
 from typing import Iterable
 
@@ -79,6 +80,19 @@ def __dir__():
     return sorted(set(globals()) | set(PUBLIC_ATTRS))
 
 
+class _CLIModule(types.ModuleType):
+    def __setattr__(self, name, value):
+        if isinstance(value, types.ModuleType) and name in PUBLIC_ATTRS:
+            _, attr_name = PUBLIC_ATTRS[name]
+            if attr_name is not None:
+                super().__setattr__(name, getattr(value, attr_name))
+                return
+        super().__setattr__(name, value)
+
+
+sys.modules[__name__].__class__ = _CLIModule
+
+
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
 def link(*args, **kwargs):
     """As of spaCy v3.0, symlinks like "en" are not supported anymore. You can load trained

From 126deace20c1e9382917e675361b6448188cfcf5 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 14:32:41 +0100
Subject: [PATCH 03/42] Update manifest

---
 spacy_cli/cli_manifest.json | 78 ++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
index 361a10dca16..12d760416c5 100644
--- a/spacy_cli/cli_manifest.json
+++ b/spacy_cli/cli_manifest.json
@@ -1,55 +1,55 @@
 {
   "command": "python -m spacy",
   "command_help": {
-    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [default: None] [required]                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [default: None]      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│                              [default: None]                                                     │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size [default: None]    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data is written to stdout, so you \n can pipe them forward to a JSON file: $ spacy convert some_file.conllu --file-type json >          \n some_file.json                                                                                     \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [default: None] [required]            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│                                           [default: None]                                        │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│                                           [default: None]                                        │\n│ --lang             -l       TEXT          Language (if tokenizer required) [default: None]       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"[training]\".           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when comparing against the      \n default configuration (or specifically when `compare_to` is None).                                 \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│                                                     [default: None]                              │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url [default: None]                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function. registry_name (Optional[str]): Name of the       \n catalogue registry.                                                                                \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry. [default: None]                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by thresholds - e.g.       \n `textcat_multilabel` and `spancat`, but not `textcat`. Note that the full path to the              \n corresponding threshold attribute in the config has to be provided.                                \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [default: None] [required]                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format                │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [default: None] [required]  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration            │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    scores_key         TEXT  Metric to optimize [default: None] [required]                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│                                      [default: None]                                             │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible version of the pipeline.     \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline [default: None]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [default: None] [required]      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the labels [default: None] [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the prepared data [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the         \n [initialize] block of your config to initialize a model with vectors.                              \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [default: None] [required]  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [default: None] [required]           │\n│ *    output_dir       PATH  Pipeline output directory [default: None] [required]                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│                                  [default: None]                                                 │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [default: None] [required]                                                │\n│ *    kwargs      TEXT  [default: None] [required]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom registered functions    \n like pipeline components), they are copied into the package and imported in the __init__.py.       \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [default: None] [required]          │\n│ *    output_dir      DIRECTORY  Output parent directory [default: None] [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json [default: None]        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│                                                         [default: None]                          │\n│ --version           -v                            TEXT  Package version to override meta         │\n│                                                         [default: None]                          │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained using a word2vec-style  \n distributional similarity algorithm, and train a component like a CNN, BiLSTM, etc to predict      \n vectors which match the pretrained ones. The weights are saved to a directory after each epoch.    \n You can then pass a path to one of these pretrained weights files to the 'spacy train' command.    \n This technique may be especially helpful if you have little labelled data. However, it's still     \n quite experimental, so your mileage may vary.                                                      \n To load the weights back in during 'spacy train', you need to ensure all settings are the same     \n between pretraining and training. Ideally, this is done by using the same config file for both     \n commands.                                                                                          \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [default: None] [required]                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [default: None]          │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│                                   [default: None]                                                │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│                                   [default: None]                                                │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [required]                                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size                    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data                              \n is written to stdout, so you can pipe them forward to a JSON file:                                 \n $ spacy convert some_file.conllu --file-type json > some_file.json                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [required]                            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│ --lang             -l       TEXT          Language (if tokenizer required)                       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\".                     \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when                            \n comparing against the default configuration (or specifically when `compare_to` is None).           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download                                                            \n AVAILABLE PACKAGES: https://spacy.io/models                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url                                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function.                                                  \n registry_name (Optional): Name of the catalogue registry.                                          \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry.                                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by                         \n thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note                      \n that the full path to the corresponding threshold attribute in the config has to                   \n be provided.                                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [required]                                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format [required]     │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [required]                  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration [required] │\n│ *    scores_key         TEXT  Metric to optimize [required]                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible                              \n version of the pipeline.                                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [required]                      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the labels [required]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the prepared data [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the  block  \n of your config to initialize a model with vectors.                                                 \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [required]                  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [required]                           │\n│ *    output_dir       PATH  Pipeline output directory [required]                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [required]                                                                │\n│ *    kwargs      TEXT  [required]                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom                         \n registered functions like pipeline components), they are copied into the                           \n package and imported in the __init__.py.                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [required]                          │\n│ *    output_dir      DIRECTORY  Output parent directory [required]                               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json                        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│ --version           -v                            TEXT  Package version to override meta         │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained                         \n using a word2vec-style distributional similarity algorithm, and train a                            \n component like a CNN, BiLSTM, etc to predict vectors which match the                               \n pretrained ones. The weights are saved to a directory after each epoch. You                        \n can then pass a path to one of these pretrained weights files to the                               \n 'spacy train' command.                                                                             \n                                                                                                    \n This technique may be especially helpful if you have little labelled data.                         \n However, it's still quite experimental, so your mileage may vary.                                  \n                                                                                                    \n To load the weights back in during 'spacy train', you need to ensure                               \n all settings are the same between pretraining and training. Ideally,                               \n this is done by using the same config file for both commands.                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [required]                                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project assets": "                                                                                                    \n Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]                                      \n                                                                                                    \n Fetch project assets like datasets and pretrained weights. Assets are defined in the \"assets\"      \n section of the project.yml. If a checksum is provided in the project.yml, the file is only         \n downloaded if no local file with the same checksum exists.                                         \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-and-assets.md          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --sparse  -S        Use sparse checkout for assets provided via Git, to only check out and clone │\n│                     the files needed. Requires Git v22.2+.                                       │\n│ --extra   -e        Download all assets, including those marked as 'extra'.                      │\n│ --help              Show this message and exit.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [default: None] [required]              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n│                        [default: None]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│                         [default: None]                                                          │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [required]                              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project document": "                                                                                                    \n Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]                                    \n                                                                                                    \n Auto-generate a README.md for a project. If the content is saved to a file, hidden markers are     \n added so you can add custom content before or after the auto-generated section and only the        \n auto-generated docs will be replaced when you re-run the command.                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-document               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output    -o       PATH  Path to output Markdown file for output. Defaults to - for standard   │\n│                            output                                                                │\n│                            [default: -]                                                          │\n│ --no-emoji  -NE            Don't use emoji                                                       │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n│                                   [default: None]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project pull": "                                                                                                    \n Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Retrieve available precomputed outputs from a remote storage. You can alias remotes in your        \n project.yml by mapping them to storage paths. A storage can be anything that the smart_open        \n library can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.                 \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-push                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project push": "                                                                                                    \n Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Persist outputs to a remote storage. You can alias remotes in your project.yml by mapping them to  \n storage paths. A storage can be anything that the smart_open library can upload to, e.g. AWS,      \n Google Cloud Storage, SSH, local directories etc.                                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block           \n \"[training]\". The --code argument lets you pass in a Python file that's imported before training.  \n It can be used to register custom functions and architectures that can then be referenced in the   \n config.                                                                                            \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│                                              [default: None]                                     │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\". The   \n --code argument lets you pass in a Python file that's imported before training. It can be used to  \n register custom functions and architectures that can then be referenced in the config.             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "errors": {
-    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "missing_command": "",
+    "unknown_command": "",
     "unknown_subcommand": {
-      "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+      "benchmark": "",
+      "debug": "",
+      "init": "",
+      "project": ""
     }
   },
   "group_help": {
-    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation         │\n│            data in the binary .spacy format. The --gold-preproc option sets up the               │\n│            evaluation examples with gold-standard sentences and tokens for the                   │\n│            predictions. Gold preprocessing helps the annotations align to the                    │\n│            tokenization, and may result in sequences of more consistent length. However,         │\n│            it may reduce runtime accuracy due to train/test skew. To render a sample of          │\n│            dependency parses in a HTML file, set as output directory as the                      │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark                 │\n│            data in the binary .spacy format.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs            │\n│               useful stats, and can help you find problems like invalid entity annotations,      │\n│               cyclic dependencies, low data labels and more.                                     │\n│ profile       Profile which functions take the most time in a spaCy pipeline.                    │\n│               Input should be formatted as one JSON object per line with a key \"text\".           │\n│               It can either be provided as a JSONL file, or be read from sys.sytdin.             │\n│               If no input file is specified, the IMDB dataset is loaded via Thinc.               │\n│ config        Debug a config file and show validation errors. The command will                   │\n│               create all objects in the tree and validate them. Note that some config            │\n│               validation errors are blocking and will prevent the rest of the config from        │\n│               being resolved. This means that you may not see all validation errors at           │\n│               once and some issues are only shown once previous errors have been fixed.          │\n│               Similar as with the 'train' command, you can override settings from the config     │\n│               as command line options. For instance, --training.batch_size 128 overrides         │\n│               the value of \"batch_size\" in the block \"\".                                         │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If                                                                           │\n│               additional settings were used in the creation of the config file, then you         │\n│               must supply these as extra parameters to the command when comparing to the default │\n│               settings. The generated diff                                                       │\n│               can also be used when posting to the discussion forum to provide more              │\n│               information for the maintainers.                                                   │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure       │\n│               and activations during training.                                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements            │\n│               specified via the CLI arguments, this command generates a config with the          │\n│               optimal settings for your use case. This includes the choice of architecture,      │\n│               pretrained weights and related hyperparameters.                                    │\n│ fill-config   Fill partial config file with default values. Will add all missing settings        │\n│               from the default config and will create all objects, check the registered          │\n│               functions for their default values and update the base config. This command        │\n│               can be used with a config generated via the training quickstart widget:            │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that            │\n│               you can use in the  block of your config to initialize                             │\n│               a model with vectors.                                                              │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the            │\n│               training process, since spaCy won't have to preprocess the data to                 │\n│               extract the labels.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are                 │\n│            defined in the \"assets\" section of the project.yml. If a checksum is                  │\n│            provided in the project.yml, the file is only downloaded if no local file             │\n│            with the same checksum exists.                                                        │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will                 │\n│            only download the files from the given subdirectory. The GitHub repo                  │\n│            defaults to the official Weasel template repo, but can be customized                  │\n│            (including using a private repo).                                                     │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file,           │\n│            hidden markers are added so you can add custom content before or after the            │\n│            auto-generated section and only the auto-generated docs will be replaced              │\n│            when you re-run the command.                                                          │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC                                │\n│            project can only define one pipeline, so you need to specify one workflow             │\n│            defined in the project.yml. If no workflow is specified, the first defined            │\n│            workflow is used. The DVC config will only be updated if the project.yml              │\n│            changed.                                                                              │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow             │\n│            name is specified, all commands in the workflow are run, in order. If                 │\n│            commands define dependencies and/or outputs, they will only be re-run if              │\n│            state has changed.                                                                    │\n│ pull       Retrieve available precomputed outputs from a remote storage.                         │\n│            You can alias remotes in your project.yml by mapping them to storage paths.           │\n│            A storage can be anything that the smart_open library can upload to, e.g.             │\n│            AWS, Google Cloud Storage, SSH, local directories etc.                                │\n│ push       Persist outputs to a remote storage. You can alias remotes in your                    │\n│            project.yml by mapping them to storage paths. A storage can be anything that          │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH,            │\n│            local directories etc.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "hidden_group_commands": {
     "benchmark": [],
@@ -114,5 +114,5 @@
     "train",
     "validate"
   ],
-  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using pip.  │\n│                  If --direct flag is set, the command expects the full package name with         │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions. Expects a loadable    │\n│                  spaCy pipeline and path to the data, which can be a directory or a file. The    │\n│                  data files can be provided in multiple formats:     1. .spacy files     2.      │\n│                  .jsonl files with a specified \"field\" to read the text from.     3. Files with  │\n│                  any other extension are assumed to be containing        a single document.      │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes all      │\n│                  settings for initializing the pipeline. To override settings in the config,     │\n│                  e.g. settings that point to local paths or that you want to experiment with,    │\n│                  you can override them as command line options. The --code argument lets you     │\n│                  pass in a Python file that can be used to register custom functions that are    │\n│                  referenced in the config.                                                       │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the displacy_path  │\n│                  argument.                                                                       │\n│ find-function    Find the module, path and line number to the file the registered function is    │\n│                  defined in, if available.                                                       │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If --create-meta  │\n│                  is set and a meta.json already exists in the output directory, the existing     │\n│                  values will be used as the defaults in the command-line prompt. After           │\n│                  packaging, \"python -m build --sdist\" is run in the package directory, which     │\n│                  will create a .tar.gz archive that can be installed via \"pip install\".          │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using   │\n│                  an approximate language-modelling objective. Two objective types are available, │\n│                  vector-based and character-based.                                               │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The config    │\n│                  file includes all settings and hyperparameters used during training. To         │\n│                  override settings in the config, e.g. settings that point to local paths or     │\n│                  that you want to experiment with, you can override them as command line         │\n│                  options. For instance, --training.batch_size 128 overrides the value of         │\n│                  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a  │\n│                  Python file that's imported before training. It can be used to register custom  │\n│                  functions and architectures that can then be referenced in the config.          │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks if │\n│                  the installed packages are compatible and shows upgrade instructions if         │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes commands to     │\n│                  check and validate your config files, training and evaluation data, and custom  │\n│                  model implementations.                                                          │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates. You'd typically start  │\n│                  by cloning a project template to a local directory and fetching its assets like │\n│                  datasets etc. See the project's project.yml for the available commands.         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using       │\n│                  pip. If --direct flag is set, the command expects the full package name with    │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions.                       │\n│                  Expects a loadable spaCy pipeline and path to the data, which                   │\n│                  can be a directory or a file.                                                   │\n│                  The data files can be provided in multiple formats:                             │\n│                      1. .spacy files                                                             │\n│                      2. .jsonl files with a specified \"field\" to read the text from.             │\n│                      3. Files with any other extension are assumed to be containing              │\n│                         a single document.                                                       │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes          │\n│                  all settings for initializing the pipeline. To override settings in the         │\n│                  config, e.g. settings that point to local paths or that you want to             │\n│                  experiment with, you can override them as command line options. The             │\n│                  --code argument lets you pass in a Python file that can be used to              │\n│                  register custom functions that are referenced in the config.                    │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the                │\n│                  displacy_path argument.                                                         │\n│ find-function    Find the module, path and line number to the file the registered                │\n│                  function is defined in, if available.                                           │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If                │\n│                  --create-meta is set and a meta.json already exists in the output directory,    │\n│                  the existing values will be used as the defaults in the command-line prompt.    │\n│                  After packaging, \"python -m build --sdist\" is run in the package directory,     │\n│                  which will create a .tar.gz archive that can be installed via \"pip install\".    │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,         │\n│                  using an approximate language-modelling objective. Two objective types          │\n│                  are available, vector-based and character-based.                                │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The           │\n│                  config file includes all settings and hyperparameters used during training.     │\n│                  To override settings in the config, e.g. settings that point to local           │\n│                  paths or that you want to experiment with, you can override them as             │\n│                  command line options. For instance, --training.batch_size 128 overrides         │\n│                  the value of \"batch_size\" in the block \"\". The --code argument                  │\n│                  lets you pass in a Python file that's imported before training. It can be       │\n│                  used to register custom functions and architectures that can then be            │\n│                  referenced in the config.                                                       │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks    │\n│                  if the installed packages are compatible and shows upgrade instructions if      │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes                 │\n│                  commands to check and validate your config files, training and evaluation data, │\n│                  and custom model implementations.                                               │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates.                        │\n│                  You'd typically start by cloning a project template to a local directory and    │\n│                  fetching its assets like datasets etc. See the project's project.yml for the    │\n│                  available commands.                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
 }

From 5c559fc0175dc2eaaee097c2bb888f04b72bbe23 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 16:12:26 +0100
Subject: [PATCH 04/42] Fix manifest

---
 spacy_cli/build_manifest.py |  6 +++---
 spacy_cli/cli_manifest.json | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/spacy_cli/build_manifest.py b/spacy_cli/build_manifest.py
index 6e019bdcb95..71982d82d77 100644
--- a/spacy_cli/build_manifest.py
+++ b/spacy_cli/build_manifest.py
@@ -58,7 +58,7 @@ def build_manifest() -> Dict[str, object]:
             group_help[name] = _get_help(runner, app, [name])
             unknown_subcommand[name] = _invoke(
                 runner, app, [name, UNKNOWN_SUBCOMMAND_TOKEN]
-            ).stdout
+            ).output
             for sub_name in subcommands:
                 help_text = _maybe_get_help(runner, app, [name, sub_name])
                 if help_text is not None:
@@ -76,8 +76,8 @@ def build_manifest() -> Dict[str, object]:
         "group_help": group_help,
         "command_help": command_help,
         "errors": {
-            "missing_command": _invoke(runner, app, []).stdout,
-            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).stdout,
+            "missing_command": _invoke(runner, app, []).output,
+            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).output,
             "unknown_subcommand": unknown_subcommand,
         },
     }
diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
index 12d760416c5..9aa7da9b973 100644
--- a/spacy_cli/cli_manifest.json
+++ b/spacy_cli/cli_manifest.json
@@ -36,13 +36,13 @@
     "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "errors": {
-    "missing_command": "",
-    "unknown_command": "",
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
     "unknown_subcommand": {
-      "benchmark": "",
-      "debug": "",
-      "init": "",
-      "project": ""
+      "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy benchmark --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy debug --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy init --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy project --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n"
     }
   },
   "group_help": {

From cfa1d3a59abd8ebb859070f97d681120ac9d07a7 Mon Sep 17 00:00:00 2001
From: Kaushik Rajan <kaushi@alumni.ncsu.edu>
Date: Sun, 15 Mar 2026 15:59:09 +0530
Subject: [PATCH 05/42] fix: ensure memory_zone cleanup runs on exception
 (#13924) (#13932)

Wrap yield in try/finally in StringStore.memory_zone and
Vocab.memory_zone so transient state is always cleaned up,
even when an exception propagates through the context manager.
---
 spacy/strings.pyx                             | 12 ++++++----
 spacy/tests/vocab_vectors/test_memory_zone.py | 23 +++++++++++++++++++
 spacy/vocab.pyx                               |  8 ++++---
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 65e851cae4e..a65cdb6fc62 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -205,11 +205,13 @@ cdef class StringStore:
         if mem is None:
             mem = Pool()
         self.mem = mem
-        yield mem
-        for key in self._transient_keys:
-            map_clear(self._map.c_map, key)
-        self._transient_keys.clear()
-        self.mem = self._non_temp_mem
+        try:
+            yield mem
+        finally:
+            for key in self._transient_keys:
+                map_clear(self._map.c_map, key)
+            self._transient_keys.clear()
+            self.mem = self._non_temp_mem
 
     def add(self, string: str, allow_transient: Optional[bool] = None) -> int:
         """Add a string to the StringStore.
diff --git a/spacy/tests/vocab_vectors/test_memory_zone.py b/spacy/tests/vocab_vectors/test_memory_zone.py
index 910d2664eb4..f718afa2f6e 100644
--- a/spacy/tests/vocab_vectors/test_memory_zone.py
+++ b/spacy/tests/vocab_vectors/test_memory_zone.py
@@ -34,3 +34,26 @@ def test_memory_zone_redundant_insertion():
         _ = vocab["dog"]
     assert "dog" in vocab
     assert "horse" not in vocab
+
+
+def test_memory_zone_exception_cleanup():
+    """Test that if an exception occurs inside a memory zone, the vocab
+    is properly cleaned up and remains usable afterward."""
+    vocab = Vocab()
+    _ = vocab["dog"]
+    assert "dog" in vocab
+    try:
+        with vocab.memory_zone():
+            _ = vocab["horse"]
+            raise ValueError("simulated error")
+    except ValueError:
+        pass
+    # Vocab should not be stuck in memory zone state
+    assert not vocab.in_memory_zone
+    # Pre-existing words should still work
+    assert "dog" in vocab
+    # Transient word from failed zone should be cleaned up
+    assert "horse" not in vocab
+    # Vocab should be fully usable for new operations
+    lex = vocab["cat"]
+    assert lex.text == "cat"
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 11043c17ae7..4bf80c85d8e 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -150,9 +150,11 @@ cdef class Vocab:
             if hasattr(self._vectors, "memory_zone"):
                 contexts.append(stack.enter_context(self._vectors.memory_zone(mem)))
             self.mem = mem
-            yield mem
-        self._clear_transient_orths()
-        self.mem = self._non_temp_mem
+            try:
+                yield mem
+            finally:
+                self._clear_transient_orths()
+                self.mem = self._non_temp_mem
 
     def add_flag(self, flag_getter, int flag_id=-1):
         """Set a new boolean flag to words in the vocabulary.

From 4168448124be73654069e77e7d48b59847d0d137 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:12:08 +0100
Subject: [PATCH 06/42] Update test_cli_launcher

---
 spacy/tests/test_cli_launcher.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
index 392572fdeec..1fd6e3a6fc6 100644
--- a/spacy/tests/test_cli_launcher.py
+++ b/spacy/tests/test_cli_launcher.py
@@ -4,7 +4,6 @@
 
 import pytest
 
-from spacy_cli.build_manifest import build_manifest
 from spacy_cli.static import load_manifest
 
 launcher_module = importlib.import_module("spacy_cli.main")
@@ -46,7 +45,21 @@ def test_load_for_argv_imports_project_on_demand():
 
 
 def test_manifest_is_current():
-    assert build_manifest() == load_manifest()
+    # Run in a subprocess to avoid command registration order being affected
+    # by other test modules importing CLI submodules (which register commands
+    # as a side effect of import).
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-c",
+            "from spacy_cli.build_manifest import build_manifest; "
+            "from spacy_cli.static import load_manifest; "
+            "assert build_manifest() == load_manifest()",
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr
 
 
 def test_launcher_root_help_uses_static(capsys, monkeypatch):

From 37b4a74fa72bfe607e5050b95c1c0b4897589b97 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Fri, 20 Mar 2026 09:14:19 +0100
Subject: [PATCH 07/42] Switch dependency back from `typer-slim` to `typer`
 (#13922)

* change typer-slim dependency to typer

* set rich_markup_mode to None to preserve behaviour
---
 requirements.txt   | 2 +-
 setup.cfg          | 2 +-
 spacy/cli/_util.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6e79ed526bd..0162c694286 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer-slim>=0.3.0,<1.0.0
+typer>=0.3.0,<1.0.0
 weasel>=0.4.2,<0.5.0
 # Third party dependencies
 numpy>=2.0.0,<3.0.0
diff --git a/setup.cfg b/setup.cfg
index c4928af9224..57a6d2f29db 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -55,7 +55,7 @@ install_requires =
     catalogue>=2.0.6,<2.1.0
     weasel>=0.4.2,<0.5.0
     # Third-party dependencies
-    typer-slim>=0.3.0,<1.0.0
+    typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0; python_version < "3.9"
     numpy>=1.19.0; python_version >= "3.9"
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 309b6b1e79a..5057640a5b9 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -68,7 +68,7 @@
 Arg = typer.Argument
 Opt = typer.Option
 
-app = typer.Typer(name=NAME, help=HELP)
+app = typer.Typer(name=NAME, help=HELP, rich_markup_mode=None)
 benchmark_cli = typer.Typer(name="benchmark", help=BENCHMARK_HELP, no_args_is_help=True)
 debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
 init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)

From 2f6142b43e9f7dcb2762429924b48a3ef78556f7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:17:28 +0100
Subject: [PATCH 08/42] Require weasel 1.0

---
 requirements.txt | 2 +-
 setup.cfg        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0162c694286..c850cfb77c5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<1.0.0
-weasel>=0.4.2,<0.5.0
+weasel>=1.0.0,<1.1.0
 # Third party dependencies
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
diff --git a/setup.cfg b/setup.cfg
index 57a6d2f29db..a3df44a6f2d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -53,7 +53,7 @@ install_requires =
     wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
-    weasel>=0.4.2,<0.5.0
+    weasel>=1.0.0,<1.1.0
     # Third-party dependencies
     typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0

From c6c78d6c330c5ff4efbf113f0eac416e5031d047 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:20:32 +0100
Subject: [PATCH 09/42] Allow use of uv as a fallback to pip in  spacy download

---
 spacy/cli/download.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 8104fd2d285..120616753b8 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -1,3 +1,4 @@
+import shutil
 import sys
 from typing import Optional, Sequence
 from urllib.parse import urljoin
@@ -176,5 +177,19 @@ def download_model(
     if not download_url.startswith(about.__download_url__):
         raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
     pip_args = list(user_pip_args) if user_pip_args is not None else []
-    cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
+    cmd = _get_pip_install_cmd() + pip_args + [download_url]
     run_command(cmd)
+
+
+def _get_pip_install_cmd() -> list:
+    if shutil.which("pip"):
+        return [sys.executable, "-m", "pip", "install"]
+    elif shutil.which("uv"):
+        return ["uv", "pip", "install"]
+    else:
+        msg.fail(
+            "No package installer found",
+            "spaCy requires either pip or uv to download models. "
+            "Please install one of them and try again.",
+            exits=1,
+        )

From ed20f79abe543ea4033be5b1d7950d2bb8fa5cf0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:43:20 +0100
Subject: [PATCH 10/42] Require confection

---
 requirements.txt | 1 +
 setup.cfg        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index c850cfb77c5..19b803325b8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,3 +36,4 @@ types-setuptools>=57.0.0
 black>=25.0.0
 cython-lint>=0.15.0
 isort>=5.0,<6.0
+confection>=1.0.0,<2.0.0
diff --git a/setup.cfg b/setup.cfg
index a3df44a6f2d..56dcc343aa0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -54,6 +54,7 @@ install_requires =
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
     weasel>=1.0.0,<1.1.0
+    confection>=1.0.0,<1.1.0
     # Third-party dependencies
     typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0

From f22ff91476c1052f9b71218144e7e88841b5703a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:55:17 +0100
Subject: [PATCH 11/42] Fix vuln scan by not calling test file requirements
 requirements.txt

---
 setup.py                                 | 12 ++++++------
 spacy/tests/package/test_requirements.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 33178662df4..6f28b7340d0 100755
--- a/setup.py
+++ b/setup.py
@@ -82,9 +82,9 @@
 }
 # Files to copy into the package that are otherwise not included
 COPY_FILES = {
-    ROOT / "setup.cfg": PACKAGE_ROOT / "tests" / "package",
-    ROOT / "pyproject.toml": PACKAGE_ROOT / "tests" / "package",
-    ROOT / "requirements.txt": PACKAGE_ROOT / "tests" / "package",
+    ROOT / "setup.cfg": PACKAGE_ROOT / "tests" / "package" / "test.cfg",
+    ROOT / "pyproject.toml": PACKAGE_ROOT / "tests" / "package" / "test.toml",
+    ROOT / "requirements.txt": PACKAGE_ROOT / "tests" / "package" / "test.txt",
 }
 
 
@@ -173,10 +173,10 @@ def setup_package():
         about = {}
         exec(f.read(), about)
 
-    for copy_file, target_dir in COPY_FILES.items():
+    for copy_file, target_file in COPY_FILES.items():
         if copy_file.exists():
-            shutil.copy(str(copy_file), str(target_dir))
-            print(f"Copied {copy_file} -> {target_dir}")
+            shutil.copyfile(str(copy_file), str(target_file))
+            print(f"Copied {copy_file} -> {target_file}")
 
     include_dirs = [
         numpy.get_include(),
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index ff07c5b454a..e7df02e9769 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -37,7 +37,7 @@ def test_build_dependencies():
     req_dict = {}
 
     root_dir = Path(__file__).parent
-    req_file = root_dir / "requirements.txt"
+    req_file = root_dir / "test.txt"
     with req_file.open() as f:
         lines = f.readlines()
         for line in lines:
@@ -48,7 +48,7 @@ def test_build_dependencies():
                     req_dict[lib] = v
     # check setup.cfg and compare to requirements.txt
     # also fails when there are missing or additional libs
-    setup_file = root_dir / "setup.cfg"
+    setup_file = root_dir / "test.cfg"
     with setup_file.open() as f:
         lines = f.readlines()
 
@@ -73,7 +73,7 @@ def test_build_dependencies():
 
     # check pyproject.toml and compare the versions of the libs to requirements.txt
     # does not fail when there are missing or additional libs
-    toml_file = root_dir / "pyproject.toml"
+    toml_file = root_dir / "test.toml"
     with toml_file.open() as f:
         lines = f.readlines()
     for line in lines:

From 21439ec09d181229aae51639aa3049a7a7b107fa Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 11:10:26 +0100
Subject: [PATCH 12/42] Migrate from pydantic v1 to v2, require pydantic>=2.0.0

Replace all pydantic.v1 compat imports with direct pydantic v2 imports.
Migrate schemas to v2 API: ConfigDict instead of inner Config class,
field_validator instead of validator, RootModel instead of __root__,
model_dump() instead of dict(), model_validate() instead of parse_obj(),
Annotated[str, StringConstraints()] instead of ConstrainedStr,
min_length instead of min_items, populate_by_name instead of
allow_population_by_field_name.
---
 requirements.txt                              |   2 +-
 setup.cfg                                     |   2 +-
 spacy/cli/init_config.py                      |   2 +-
 spacy/matcher/phrasematcher.pyx               |   2 +-
 .../pipeline/_edit_tree_internals/schemas.py  |  32 ++--
 spacy/schemas.py                              | 155 +++++++-----------
 spacy/tests/pipeline/test_initialize.py       |   5 +-
 spacy/tests/pipeline/test_pipe_factories.py   |   5 +-
 spacy/tests/test_misc.py                      |   5 +-
 9 files changed, 84 insertions(+), 126 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 19b803325b8..e2a4a181813 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ weasel>=1.0.0,<1.1.0
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+pydantic>=2.0.0,<3.0.0
 jinja2
 # Official Python utilities
 setuptools
diff --git a/setup.cfg b/setup.cfg
index 56dcc343aa0..f44d661feec 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,7 +61,7 @@ install_requires =
     numpy>=1.15.0; python_version < "3.9"
     numpy>=1.19.0; python_version >= "3.9"
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+    pydantic>=2.0.0,<3.0.0
     jinja2
     # Official Python utilities
     setuptools
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index a7fb2b5b81f..c2c26ca56ec 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -168,7 +168,7 @@ def init_config(
     # Filter out duplicates since tok2vec and transformer are added by template
     pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
     defaults = RECOMMENDATIONS["__default__"]
-    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
+    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump()
     variables = {
         "lang": lang,
         "components": pipeline,
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index ccc830e35c1..a71f85f6e63 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -57,7 +57,7 @@ cdef class PhraseMatcher:
                 attr = "ORTH"
             if attr == "IS_SENT_START":
                 attr = "SENT_START"
-            if attr.lower() not in TokenPattern().dict():
+            if attr.lower() not in TokenPattern().model_dump():
                 raise ValueError(Errors.E152.format(attr=attr))
             self.attr = IDS.get(attr)
 
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index 89f2861ceac..4afb6b3747c 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,34 +1,36 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Union
 
-try:
-    from pydantic.v1 import BaseModel, Field, ValidationError
-    from pydantic.v1.types import StrictBool, StrictInt, StrictStr
-except ImportError:
-    from pydantic import BaseModel, Field, ValidationError  # type: ignore
-    from pydantic.types import StrictBool, StrictInt, StrictStr  # type: ignore
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    StrictBool,
+    StrictInt,
+    StrictStr,
+    ValidationError,
+)
 
 
 class MatchNodeSchema(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
     prefix_len: StrictInt = Field(..., title="Prefix length")
     suffix_len: StrictInt = Field(..., title="Suffix length")
     prefix_tree: StrictInt = Field(..., title="Prefix tree")
     suffix_tree: StrictInt = Field(..., title="Suffix tree")
 
-    class Config:
-        extra = "forbid"
-
 
 class SubstNodeSchema(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
     orig: Union[int, StrictStr] = Field(..., title="Original substring")
     subst: Union[int, StrictStr] = Field(..., title="Replacement substring")
 
-    class Config:
-        extra = "forbid"
-
 
-class EditTreeSchema(BaseModel):
-    __root__: Union[MatchNodeSchema, SubstNodeSchema]
+class EditTreeSchema(RootModel[Union[MatchNodeSchema, SubstNodeSchema]]):
+    pass
 
 
 def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
@@ -38,7 +40,7 @@ def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
     RETURNS (List[str]): A list of error messages, if available.
     """
     try:
-        EditTreeSchema.parse_obj(obj)
+        EditTreeSchema.model_validate(obj)
         return []
     except ValidationError as e:
         errors = e.errors()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index fa987b90f19..b72037a0a8a 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -4,6 +4,7 @@
 from enum import Enum
 from typing import (
     TYPE_CHECKING,
+    Annotated,
     Any,
     Callable,
     Dict,
@@ -16,34 +17,20 @@
     Union,
 )
 
-try:
-    from pydantic.v1 import (
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.v1.main import ModelMetaclass
-except ImportError:
-    from pydantic import (  # type: ignore
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.main import ModelMetaclass  # type: ignore
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    StrictBool,
+    StrictFloat,
+    StrictInt,
+    StrictStr,
+    StringConstraints,
+    ValidationError,
+    create_model,
+    field_validator,
+)
 from thinc.api import ConfigValidationError, Model, Optimizer
 from thinc.config import Promise
 
@@ -89,14 +76,8 @@ def validate(schema: Type[BaseModel], obj: Dict[str, Any]) -> List[str]:
 # Initialization
 
 
-class ArgSchemaConfig:
-    extra = "forbid"
-    arbitrary_types_allowed = True
-
-
-class ArgSchemaConfigExtra:
-    extra = "forbid"
-    arbitrary_types_allowed = True
+_ARG_SCHEMA_CONFIG = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
+_ARG_SCHEMA_CONFIG_EXTRA = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
 
 def get_arg_model(
@@ -105,7 +86,7 @@ def get_arg_model(
     exclude: Iterable[str] = tuple(),
     name: str = "ArgModel",
     strict: bool = True,
-) -> ModelMetaclass:
+) -> Type[BaseModel]:
     """Generate a pydantic model for function arguments.
 
     func (Callable): The function to generate the schema for.
@@ -113,15 +94,15 @@ def get_arg_model(
     name (str): Name of created model class.
     strict (bool): Don't allow extra arguments if no variable keyword arguments
         are allowed on the function.
-    RETURNS (ModelMetaclass): A pydantic model.
+    RETURNS (Type[BaseModel]): A pydantic model.
     """
-    sig_args = {}
+    sig_args: Dict[str, Any] = {}
     try:
         sig = inspect.signature(func)
     except ValueError:
         # Typically happens if the method is part of a Cython module without
         # binding=True. Here we just use an empty model that allows everything.
-        return create_model(name, __config__=ArgSchemaConfigExtra)  # type: ignore[arg-type, return-value]
+        return create_model(name, __config__=_ARG_SCHEMA_CONFIG_EXTRA)  # type: ignore[call-overload]
     has_variable = False
     for param in sig.parameters.values():
         if param.name in exclude:
@@ -141,8 +122,8 @@ def get_arg_model(
         default = param.default if param.default != param.empty else default_empty
         sig_args[param.name] = (annotation, default)
     is_strict = strict and not has_variable
-    sig_args["__config__"] = ArgSchemaConfig if is_strict else ArgSchemaConfigExtra  # type: ignore[assignment]
-    return create_model(name, **sig_args)  # type: ignore[call-overload, arg-type, return-value]
+    config = _ARG_SCHEMA_CONFIG if is_strict else _ARG_SCHEMA_CONFIG_EXTRA
+    return create_model(name, __config__=config, **sig_args)  # type: ignore[call-overload]
 
 
 def validate_init_settings(
@@ -167,7 +148,7 @@ def validate_init_settings(
     """
     schema = get_arg_model(func, exclude=exclude, name="InitArgModel")
     try:
-        return schema(**settings).dict()
+        return schema(**settings).model_dump()
     except ValidationError as e:
         block = "initialize" if not section else f"initialize.{section}"
         title = f"Error validating initialization settings in [{block}]"
@@ -193,6 +174,8 @@ def validate_token_pattern(obj: list) -> List[str]:
 
 
 class TokenPatternString(BaseModel):
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
     REGEX: Optional[Union[StrictStr, "TokenPatternString"]] = Field(None, alias="regex")
     IN: Optional[List[StrictStr]] = Field(None, alias="in")
     NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
@@ -228,18 +211,17 @@ class TokenPatternString(BaseModel):
         None, alias="fuzzy9"
     )
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
-
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
-    def raise_for_none(cls, v):
+    @field_validator("*", mode="before")
+    @classmethod
+    def raise_for_none(cls, v: Any) -> Any:
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternNumber(BaseModel):
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
     REGEX: Optional[StrictStr] = Field(None, alias="regex")
     IN: Optional[List[StrictInt]] = Field(None, alias="in")
     NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
@@ -253,26 +235,24 @@ class TokenPatternNumber(BaseModel):
     GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
     LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
-
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
-    def raise_for_none(cls, v):
+    @field_validator("*", mode="before")
+    @classmethod
+    def raise_for_none(cls, v: Any) -> Any:
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternOperatorSimple(str, Enum):
-    plus: StrictStr = StrictStr("+")
-    star: StrictStr = StrictStr("*")
-    question: StrictStr = StrictStr("?")
-    exclamation: StrictStr = StrictStr("!")
+    plus = "+"
+    star = "*"
+    question = "?"
+    exclamation = "!"
 
 
-class TokenPatternOperatorMinMax(ConstrainedStr):
-    regex = re.compile(r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
+TokenPatternOperatorMinMax = Annotated[
+    str, StringConstraints(pattern=r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
+]
 
 
 TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]
@@ -285,6 +265,12 @@ class TokenPatternOperatorMinMax(ConstrainedStr):
 
 
 class TokenPattern(BaseModel):
+    model_config = ConfigDict(
+        extra="forbid",
+        populate_by_name=True,
+        alias_generator=lambda value: value.upper(),
+    )
+
     orth: Optional[StringValue] = None
     text: Optional[StringValue] = None
     lower: Optional[StringValue] = None
@@ -323,23 +309,18 @@ class TokenPattern(BaseModel):
     op: Optional[TokenPatternOperator] = None
     underscore: Optional[Dict[StrictStr, UnderscoreValue]] = Field(None, alias="_")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True
-        alias_generator = lambda value: value.upper()
-
-    @validator("*", pre=True, allow_reuse=True)
-    def raise_for_none(cls, v):
+    @field_validator("*", mode="before")
+    @classmethod
+    def raise_for_none(cls, v: Any) -> Any:
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternSchema(BaseModel):
-    pattern: List[TokenPattern] = Field(..., min_items=1)
+    model_config = ConfigDict(extra="forbid")
 
-    class Config:
-        extra = "forbid"
+    pattern: List[TokenPattern] = Field(..., min_length=1)
 
 
 # Model meta
@@ -376,6 +357,7 @@ class ModelMetaSchema(BaseModel):
 
 
 class ConfigSchemaTraining(BaseModel):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     dev_corpus: StrictStr = Field(..., title="Path in the config to the dev data")
     train_corpus: StrictStr = Field(..., title="Path in the config to the training data")
@@ -397,12 +379,9 @@ class ConfigSchemaTraining(BaseModel):
     before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
-
 
 class ConfigSchemaNlp(BaseModel):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     lang: StrictStr = Field(..., title="The base language to use")
     pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order")
@@ -415,17 +394,13 @@ class ConfigSchemaNlp(BaseModel):
     vectors: Callable = Field(..., title="Vectors implementation")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
-
 
 class ConfigSchemaPretrainEmpty(BaseModel):
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 class ConfigSchemaPretrain(BaseModel):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     max_epochs: StrictInt = Field(..., title="Maximum number of epochs to train for")
     dropout: StrictFloat = Field(..., title="Dropout rate")
@@ -439,29 +414,23 @@ class ConfigSchemaPretrain(BaseModel):
     objective: Callable[["Vocab", Model], Model] = Field(..., title="A function that creates the pretraining objective.")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
-
 
 class ConfigSchemaInit(BaseModel):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     vocab_data: Optional[StrictStr] = Field(..., title="Path to JSON-formatted vocabulary file")
     lookups: Optional[Lookups] = Field(..., title="Vocabulary lookups, e.g. lexeme normalization")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
-    tokenizer: Dict[StrictStr, Any] = Field(..., help="Arguments to be passed into Tokenizer.initialize")
-    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., help="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
+    tokenizer: Dict[StrictStr, Any] = Field(..., description="Arguments to be passed into Tokenizer.initialize")
+    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., description="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
     before_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object before initialization")
     after_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after initialization")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
-
 
 class ConfigSchema(BaseModel):
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
     training: ConfigSchemaTraining
     nlp: ConfigSchemaNlp
     pretraining: Union[ConfigSchemaPretrain, ConfigSchemaPretrainEmpty] = {}  # type: ignore[assignment]
@@ -469,10 +438,6 @@ class ConfigSchema(BaseModel):
     corpora: Dict[str, Reader]
     initialize: ConfigSchemaInit
 
-    class Config:
-        extra = "allow"
-        arbitrary_types_allowed = True
-
 
 CONFIG_SCHEMAS = {
     "nlp": ConfigSchemaNlp,
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 9854b391e60..546068800fa 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,9 +1,6 @@
 import pytest
 
-try:
-    from pydantic.v1 import StrictBool
-except ImportError:
-    from pydantic import StrictBool  # type: ignore
+from pydantic import StrictBool
 
 from thinc.api import ConfigValidationError
 
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b355379bfd0..f8f9ec4b10d 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,9 +1,6 @@
 import pytest
 
-try:
-    from pydantic.v1 import StrictInt, StrictStr
-except ImportError:
-    from pydantic import StrictInt, StrictStr  # type: ignore
+from pydantic import StrictInt, StrictStr
 
 from thinc.api import ConfigValidationError, Linear, Model
 
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index d2a41ff0fed..2c26952891d 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -4,10 +4,7 @@
 
 import pytest
 
-try:
-    from pydantic.v1 import ValidationError
-except ImportError:
-    from pydantic import ValidationError  # type: ignore
+from pydantic import ValidationError
 
 from thinc.api import (
     Config,

From d41afc2966037c15a136c8f070b2ef6aefaafc88 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 11:13:21 +0100
Subject: [PATCH 13/42] isort

---
 spacy/tests/pipeline/test_initialize.py     | 2 --
 spacy/tests/pipeline/test_pipe_factories.py | 2 --
 spacy/tests/test_misc.py                    | 2 --
 3 files changed, 6 deletions(-)

diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 546068800fa..6dd4114f1cd 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,7 +1,5 @@
 import pytest
-
 from pydantic import StrictBool
-
 from thinc.api import ConfigValidationError
 
 from spacy.lang.en import English
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index f8f9ec4b10d..a8a6c7d136a 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,7 +1,5 @@
 import pytest
-
 from pydantic import StrictInt, StrictStr
-
 from thinc.api import ConfigValidationError, Linear, Model
 
 import spacy
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 2c26952891d..309c57b0926 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -3,9 +3,7 @@
 from pathlib import Path
 
 import pytest
-
 from pydantic import ValidationError
-
 from thinc.api import (
     Config,
     ConfigValidationError,

From 2afc3fd41c93af48f1f4d366cda301fb067b87c9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 11:34:12 +0100
Subject: [PATCH 14/42] Escape braces in TokenPatternOperatorMinMax regex for
 Rust regex engine

---
 spacy/schemas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/schemas.py b/spacy/schemas.py
index b72037a0a8a..079e3aa853a 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -251,7 +251,7 @@ class TokenPatternOperatorSimple(str, Enum):
 
 
 TokenPatternOperatorMinMax = Annotated[
-    str, StringConstraints(pattern=r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
+    str, StringConstraints(pattern=r"^(\{\d+\}|\{\d+,\d*\}|\{\d*,\d+\})$")
 ]
 
 

From 60a19cb800e84553298170e56f72cb6695360671 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 14:02:03 +0100
Subject: [PATCH 15/42] Revert to confection <1 and allow pydantic v1

---
 requirements.txt | 4 ++--
 setup.cfg        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e2a4a181813..6e3fbdf71d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ weasel>=1.0.0,<1.1.0
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=2.0.0,<3.0.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
 jinja2
 # Official Python utilities
 setuptools
@@ -36,4 +36,4 @@ types-setuptools>=57.0.0
 black>=25.0.0
 cython-lint>=0.15.0
 isort>=5.0,<6.0
-confection>=1.0.0,<2.0.0
+confection>=0.0.4,<1.0.0
diff --git a/setup.cfg b/setup.cfg
index f44d661feec..cfc6209c706 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -54,14 +54,14 @@ install_requires =
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
     weasel>=1.0.0,<1.1.0
-    confection>=1.0.0,<1.1.0
+    confection>=0.0.4,<1.0.0
     # Third-party dependencies
     typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0; python_version < "3.9"
     numpy>=1.19.0; python_version >= "3.9"
     requests>=2.13.0,<3.0.0
-    pydantic>=2.0.0,<3.0.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
     jinja2
     # Official Python utilities
     setuptools

From 4f19800b4e972755cbb2f597c20fdf42acecb60c Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 14:04:12 +0100
Subject: [PATCH 16/42] Revert to weasel <0.5

---
 requirements.txt | 2 +-
 setup.cfg        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6e3fbdf71d0..895bc3f7c07 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<1.0.0
-weasel>=1.0.0,<1.1.0
+weasel>=0.4.2,<0.5.0
 # Third party dependencies
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
diff --git a/setup.cfg b/setup.cfg
index cfc6209c706..585c2694fe2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -53,7 +53,7 @@ install_requires =
     wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
-    weasel>=1.0.0,<1.1.0
+    weasel>=0.4.2,<0.5.0
     confection>=0.0.4,<1.0.0
     # Third-party dependencies
     typer>=0.3.0,<1.0.0

From 3154ede82a1bc1e1a0198233b3adcb2b9c242b5d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 14:09:37 +0100
Subject: [PATCH 17/42] Revert pydantic v2 migration, restore v1 compat imports

---
 spacy/cli/init_config.py                      |   2 +-
 spacy/matcher/phrasematcher.pyx               |   2 +-
 .../pipeline/_edit_tree_internals/schemas.py  |  32 ++--
 spacy/schemas.py                              | 155 +++++++++++-------
 spacy/tests/pipeline/test_initialize.py       |   7 +-
 spacy/tests/pipeline/test_pipe_factories.py   |   7 +-
 spacy/tests/test_misc.py                      |   7 +-
 7 files changed, 130 insertions(+), 82 deletions(-)

diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index c2c26ca56ec..a7fb2b5b81f 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -168,7 +168,7 @@ def init_config(
     # Filter out duplicates since tok2vec and transformer are added by template
     pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
     defaults = RECOMMENDATIONS["__default__"]
-    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump()
+    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
     variables = {
         "lang": lang,
         "components": pipeline,
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index a71f85f6e63..ccc830e35c1 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -57,7 +57,7 @@ cdef class PhraseMatcher:
                 attr = "ORTH"
             if attr == "IS_SENT_START":
                 attr = "SENT_START"
-            if attr.lower() not in TokenPattern().model_dump():
+            if attr.lower() not in TokenPattern().dict():
                 raise ValueError(Errors.E152.format(attr=attr))
             self.attr = IDS.get(attr)
 
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index 4afb6b3747c..89f2861ceac 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,36 +1,34 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Union
 
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    RootModel,
-    StrictBool,
-    StrictInt,
-    StrictStr,
-    ValidationError,
-)
+try:
+    from pydantic.v1 import BaseModel, Field, ValidationError
+    from pydantic.v1.types import StrictBool, StrictInt, StrictStr
+except ImportError:
+    from pydantic import BaseModel, Field, ValidationError  # type: ignore
+    from pydantic.types import StrictBool, StrictInt, StrictStr  # type: ignore
 
 
 class MatchNodeSchema(BaseModel):
-    model_config = ConfigDict(extra="forbid")
-
     prefix_len: StrictInt = Field(..., title="Prefix length")
     suffix_len: StrictInt = Field(..., title="Suffix length")
     prefix_tree: StrictInt = Field(..., title="Prefix tree")
     suffix_tree: StrictInt = Field(..., title="Suffix tree")
 
+    class Config:
+        extra = "forbid"
 
-class SubstNodeSchema(BaseModel):
-    model_config = ConfigDict(extra="forbid")
 
+class SubstNodeSchema(BaseModel):
     orig: Union[int, StrictStr] = Field(..., title="Original substring")
     subst: Union[int, StrictStr] = Field(..., title="Replacement substring")
 
+    class Config:
+        extra = "forbid"
+
 
-class EditTreeSchema(RootModel[Union[MatchNodeSchema, SubstNodeSchema]]):
-    pass
+class EditTreeSchema(BaseModel):
+    __root__: Union[MatchNodeSchema, SubstNodeSchema]
 
 
 def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
@@ -40,7 +38,7 @@ def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
     RETURNS (List[str]): A list of error messages, if available.
     """
     try:
-        EditTreeSchema.model_validate(obj)
+        EditTreeSchema.parse_obj(obj)
         return []
     except ValidationError as e:
         errors = e.errors()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 079e3aa853a..fa987b90f19 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -4,7 +4,6 @@
 from enum import Enum
 from typing import (
     TYPE_CHECKING,
-    Annotated,
     Any,
     Callable,
     Dict,
@@ -17,20 +16,34 @@
     Union,
 )
 
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    RootModel,
-    StrictBool,
-    StrictFloat,
-    StrictInt,
-    StrictStr,
-    StringConstraints,
-    ValidationError,
-    create_model,
-    field_validator,
-)
+try:
+    from pydantic.v1 import (
+        BaseModel,
+        ConstrainedStr,
+        Field,
+        StrictBool,
+        StrictFloat,
+        StrictInt,
+        StrictStr,
+        ValidationError,
+        create_model,
+        validator,
+    )
+    from pydantic.v1.main import ModelMetaclass
+except ImportError:
+    from pydantic import (  # type: ignore
+        BaseModel,
+        ConstrainedStr,
+        Field,
+        StrictBool,
+        StrictFloat,
+        StrictInt,
+        StrictStr,
+        ValidationError,
+        create_model,
+        validator,
+    )
+    from pydantic.main import ModelMetaclass  # type: ignore
 from thinc.api import ConfigValidationError, Model, Optimizer
 from thinc.config import Promise
 
@@ -76,8 +89,14 @@ def validate(schema: Type[BaseModel], obj: Dict[str, Any]) -> List[str]:
 # Initialization
 
 
-_ARG_SCHEMA_CONFIG = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
-_ARG_SCHEMA_CONFIG_EXTRA = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+class ArgSchemaConfig:
+    extra = "forbid"
+    arbitrary_types_allowed = True
+
+
+class ArgSchemaConfigExtra:
+    extra = "forbid"
+    arbitrary_types_allowed = True
 
 
 def get_arg_model(
@@ -86,7 +105,7 @@ def get_arg_model(
     exclude: Iterable[str] = tuple(),
     name: str = "ArgModel",
     strict: bool = True,
-) -> Type[BaseModel]:
+) -> ModelMetaclass:
     """Generate a pydantic model for function arguments.
 
     func (Callable): The function to generate the schema for.
@@ -94,15 +113,15 @@ def get_arg_model(
     name (str): Name of created model class.
     strict (bool): Don't allow extra arguments if no variable keyword arguments
         are allowed on the function.
-    RETURNS (Type[BaseModel]): A pydantic model.
+    RETURNS (ModelMetaclass): A pydantic model.
     """
-    sig_args: Dict[str, Any] = {}
+    sig_args = {}
     try:
         sig = inspect.signature(func)
     except ValueError:
         # Typically happens if the method is part of a Cython module without
         # binding=True. Here we just use an empty model that allows everything.
-        return create_model(name, __config__=_ARG_SCHEMA_CONFIG_EXTRA)  # type: ignore[call-overload]
+        return create_model(name, __config__=ArgSchemaConfigExtra)  # type: ignore[arg-type, return-value]
     has_variable = False
     for param in sig.parameters.values():
         if param.name in exclude:
@@ -122,8 +141,8 @@ def get_arg_model(
         default = param.default if param.default != param.empty else default_empty
         sig_args[param.name] = (annotation, default)
     is_strict = strict and not has_variable
-    config = _ARG_SCHEMA_CONFIG if is_strict else _ARG_SCHEMA_CONFIG_EXTRA
-    return create_model(name, __config__=config, **sig_args)  # type: ignore[call-overload]
+    sig_args["__config__"] = ArgSchemaConfig if is_strict else ArgSchemaConfigExtra  # type: ignore[assignment]
+    return create_model(name, **sig_args)  # type: ignore[call-overload, arg-type, return-value]
 
 
 def validate_init_settings(
@@ -148,7 +167,7 @@ def validate_init_settings(
     """
     schema = get_arg_model(func, exclude=exclude, name="InitArgModel")
     try:
-        return schema(**settings).model_dump()
+        return schema(**settings).dict()
     except ValidationError as e:
         block = "initialize" if not section else f"initialize.{section}"
         title = f"Error validating initialization settings in [{block}]"
@@ -174,8 +193,6 @@ def validate_token_pattern(obj: list) -> List[str]:
 
 
 class TokenPatternString(BaseModel):
-    model_config = ConfigDict(extra="forbid", populate_by_name=True)
-
     REGEX: Optional[Union[StrictStr, "TokenPatternString"]] = Field(None, alias="regex")
     IN: Optional[List[StrictStr]] = Field(None, alias="in")
     NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
@@ -211,17 +228,18 @@ class TokenPatternString(BaseModel):
         None, alias="fuzzy9"
     )
 
-    @field_validator("*", mode="before")
-    @classmethod
-    def raise_for_none(cls, v: Any) -> Any:
+    class Config:
+        extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name
+
+    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternNumber(BaseModel):
-    model_config = ConfigDict(extra="forbid", populate_by_name=True)
-
     REGEX: Optional[StrictStr] = Field(None, alias="regex")
     IN: Optional[List[StrictInt]] = Field(None, alias="in")
     NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
@@ -235,24 +253,26 @@ class TokenPatternNumber(BaseModel):
     GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
     LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
-    @field_validator("*", mode="before")
-    @classmethod
-    def raise_for_none(cls, v: Any) -> Any:
+    class Config:
+        extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name
+
+    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternOperatorSimple(str, Enum):
-    plus = "+"
-    star = "*"
-    question = "?"
-    exclamation = "!"
+    plus: StrictStr = StrictStr("+")
+    star: StrictStr = StrictStr("*")
+    question: StrictStr = StrictStr("?")
+    exclamation: StrictStr = StrictStr("!")
 
 
-TokenPatternOperatorMinMax = Annotated[
-    str, StringConstraints(pattern=r"^(\{\d+\}|\{\d+,\d*\}|\{\d*,\d+\})$")
-]
+class TokenPatternOperatorMinMax(ConstrainedStr):
+    regex = re.compile(r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
 
 
 TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]
@@ -265,12 +285,6 @@ class TokenPatternOperatorSimple(str, Enum):
 
 
 class TokenPattern(BaseModel):
-    model_config = ConfigDict(
-        extra="forbid",
-        populate_by_name=True,
-        alias_generator=lambda value: value.upper(),
-    )
-
     orth: Optional[StringValue] = None
     text: Optional[StringValue] = None
     lower: Optional[StringValue] = None
@@ -309,18 +323,23 @@ class TokenPattern(BaseModel):
     op: Optional[TokenPatternOperator] = None
     underscore: Optional[Dict[StrictStr, UnderscoreValue]] = Field(None, alias="_")
 
-    @field_validator("*", mode="before")
-    @classmethod
-    def raise_for_none(cls, v: Any) -> Any:
+    class Config:
+        extra = "forbid"
+        allow_population_by_field_name = True
+        alias_generator = lambda value: value.upper()
+
+    @validator("*", pre=True, allow_reuse=True)
+    def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
         return v
 
 
 class TokenPatternSchema(BaseModel):
-    model_config = ConfigDict(extra="forbid")
+    pattern: List[TokenPattern] = Field(..., min_items=1)
 
-    pattern: List[TokenPattern] = Field(..., min_length=1)
+    class Config:
+        extra = "forbid"
 
 
 # Model meta
@@ -357,7 +376,6 @@ class ModelMetaSchema(BaseModel):
 
 
 class ConfigSchemaTraining(BaseModel):
-    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     dev_corpus: StrictStr = Field(..., title="Path in the config to the dev data")
     train_corpus: StrictStr = Field(..., title="Path in the config to the training data")
@@ -379,9 +397,12 @@ class ConfigSchemaTraining(BaseModel):
     before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step")
     # fmt: on
 
+    class Config:
+        extra = "forbid"
+        arbitrary_types_allowed = True
+
 
 class ConfigSchemaNlp(BaseModel):
-    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     lang: StrictStr = Field(..., title="The base language to use")
     pipeline: List[StrictStr] = Field(..., title="The pipeline component names in order")
@@ -394,13 +415,17 @@ class ConfigSchemaNlp(BaseModel):
     vectors: Callable = Field(..., title="Vectors implementation")
     # fmt: on
 
+    class Config:
+        extra = "forbid"
+        arbitrary_types_allowed = True
+
 
 class ConfigSchemaPretrainEmpty(BaseModel):
-    model_config = ConfigDict(extra="forbid")
+    class Config:
+        extra = "forbid"
 
 
 class ConfigSchemaPretrain(BaseModel):
-    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     max_epochs: StrictInt = Field(..., title="Maximum number of epochs to train for")
     dropout: StrictFloat = Field(..., title="Dropout rate")
@@ -414,23 +439,29 @@ class ConfigSchemaPretrain(BaseModel):
     objective: Callable[["Vocab", Model], Model] = Field(..., title="A function that creates the pretraining objective.")
     # fmt: on
 
+    class Config:
+        extra = "forbid"
+        arbitrary_types_allowed = True
+
 
 class ConfigSchemaInit(BaseModel):
-    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # fmt: off
     vocab_data: Optional[StrictStr] = Field(..., title="Path to JSON-formatted vocabulary file")
     lookups: Optional[Lookups] = Field(..., title="Vocabulary lookups, e.g. lexeme normalization")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
-    tokenizer: Dict[StrictStr, Any] = Field(..., description="Arguments to be passed into Tokenizer.initialize")
-    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., description="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
+    tokenizer: Dict[StrictStr, Any] = Field(..., help="Arguments to be passed into Tokenizer.initialize")
+    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., help="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
     before_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object before initialization")
     after_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after initialization")
     # fmt: on
 
+    class Config:
+        extra = "forbid"
+        arbitrary_types_allowed = True
+
 
 class ConfigSchema(BaseModel):
-    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
     training: ConfigSchemaTraining
     nlp: ConfigSchemaNlp
     pretraining: Union[ConfigSchemaPretrain, ConfigSchemaPretrainEmpty] = {}  # type: ignore[assignment]
@@ -438,6 +469,10 @@ class ConfigSchema(BaseModel):
     corpora: Dict[str, Reader]
     initialize: ConfigSchemaInit
 
+    class Config:
+        extra = "allow"
+        arbitrary_types_allowed = True
+
 
 CONFIG_SCHEMAS = {
     "nlp": ConfigSchemaNlp,
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 6dd4114f1cd..9854b391e60 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,5 +1,10 @@
 import pytest
-from pydantic import StrictBool
+
+try:
+    from pydantic.v1 import StrictBool
+except ImportError:
+    from pydantic import StrictBool  # type: ignore
+
 from thinc.api import ConfigValidationError
 
 from spacy.lang.en import English
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index a8a6c7d136a..b355379bfd0 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,5 +1,10 @@
 import pytest
-from pydantic import StrictInt, StrictStr
+
+try:
+    from pydantic.v1 import StrictInt, StrictStr
+except ImportError:
+    from pydantic import StrictInt, StrictStr  # type: ignore
+
 from thinc.api import ConfigValidationError, Linear, Model
 
 import spacy
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 309c57b0926..d2a41ff0fed 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -3,7 +3,12 @@
 from pathlib import Path
 
 import pytest
-from pydantic import ValidationError
+
+try:
+    from pydantic.v1 import ValidationError
+except ImportError:
+    from pydantic import ValidationError  # type: ignore
+
 from thinc.api import (
     Config,
     ConfigValidationError,

From d5f67dc92daeabe906158311ef483072ce10f1e0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 14:31:59 +0100
Subject: [PATCH 18/42] Update spaCy pydantic imports from v1 compat to v2
 native API

---
 .../pipeline/_edit_tree_internals/schemas.py  |  19 +--
 spacy/schemas.py                              | 110 ++++++------------
 spacy/tests/pipeline/test_initialize.py       |   5 +-
 spacy/tests/pipeline/test_pipe_factories.py   |   5 +-
 spacy/tests/test_misc.py                      |   5 +-
 5 files changed, 45 insertions(+), 99 deletions(-)

diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index 89f2861ceac..d20945dc5f1 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,12 +1,7 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Union
 
-try:
-    from pydantic.v1 import BaseModel, Field, ValidationError
-    from pydantic.v1.types import StrictBool, StrictInt, StrictStr
-except ImportError:
-    from pydantic import BaseModel, Field, ValidationError  # type: ignore
-    from pydantic.types import StrictBool, StrictInt, StrictStr  # type: ignore
+from pydantic import BaseModel, ConfigDict, Field, RootModel, StrictBool, StrictInt, StrictStr, ValidationError
 
 
 class MatchNodeSchema(BaseModel):
@@ -15,20 +10,18 @@ class MatchNodeSchema(BaseModel):
     prefix_tree: StrictInt = Field(..., title="Prefix tree")
     suffix_tree: StrictInt = Field(..., title="Suffix tree")
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 class SubstNodeSchema(BaseModel):
     orig: Union[int, StrictStr] = Field(..., title="Original substring")
     subst: Union[int, StrictStr] = Field(..., title="Replacement substring")
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
-class EditTreeSchema(BaseModel):
-    __root__: Union[MatchNodeSchema, SubstNodeSchema]
+class EditTreeSchema(RootModel[Union[MatchNodeSchema, SubstNodeSchema]]):
+    pass
 
 
 def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
@@ -38,7 +31,7 @@ def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
     RETURNS (List[str]): A list of error messages, if available.
     """
     try:
-        EditTreeSchema.parse_obj(obj)
+        EditTreeSchema.model_validate(obj)
         return []
     except ValidationError as e:
         errors = e.errors()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index fa987b90f19..c0ecee314ee 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -1,5 +1,4 @@
 import inspect
-import re
 from collections import defaultdict
 from enum import Enum
 from typing import (
@@ -16,34 +15,19 @@
     Union,
 )
 
-try:
-    from pydantic.v1 import (
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.v1.main import ModelMetaclass
-except ImportError:
-    from pydantic import (  # type: ignore
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.main import ModelMetaclass  # type: ignore
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    StrictBool,
+    StrictFloat,
+    StrictInt,
+    StrictStr,
+    ValidationError,
+    constr,
+    create_model,
+    field_validator,
+)
 from thinc.api import ConfigValidationError, Model, Optimizer
 from thinc.config import Promise
 
@@ -89,14 +73,9 @@ def validate(schema: Type[BaseModel], obj: Dict[str, Any]) -> List[str]:
 # Initialization
 
 
-class ArgSchemaConfig:
-    extra = "forbid"
-    arbitrary_types_allowed = True
-
+ArgSchemaConfig = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
-class ArgSchemaConfigExtra:
-    extra = "forbid"
-    arbitrary_types_allowed = True
+ArgSchemaConfigExtra = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 def get_arg_model(
@@ -105,7 +84,7 @@ def get_arg_model(
     exclude: Iterable[str] = tuple(),
     name: str = "ArgModel",
     strict: bool = True,
-) -> ModelMetaclass:
+) -> type[BaseModel]:
     """Generate a pydantic model for function arguments.
 
     func (Callable): The function to generate the schema for.
@@ -113,7 +92,7 @@ def get_arg_model(
     name (str): Name of created model class.
     strict (bool): Don't allow extra arguments if no variable keyword arguments
         are allowed on the function.
-    RETURNS (ModelMetaclass): A pydantic model.
+    RETURNS (type[BaseModel]): A pydantic model.
     """
     sig_args = {}
     try:
@@ -167,7 +146,7 @@ def validate_init_settings(
     """
     schema = get_arg_model(func, exclude=exclude, name="InitArgModel")
     try:
-        return schema(**settings).dict()
+        return schema.model_validate(settings).model_dump()
     except ValidationError as e:
         block = "initialize" if not section else f"initialize.{section}"
         title = f"Error validating initialization settings in [{block}]"
@@ -228,11 +207,10 @@ class TokenPatternString(BaseModel):
         None, alias="fuzzy9"
     )
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -253,11 +231,10 @@ class TokenPatternNumber(BaseModel):
     GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
     LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -271,8 +248,7 @@ class TokenPatternOperatorSimple(str, Enum):
     exclamation: StrictStr = StrictStr("!")
 
 
-class TokenPatternOperatorMinMax(ConstrainedStr):
-    regex = re.compile(r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
+TokenPatternOperatorMinMax = constr(pattern=r"^(\{\d+\}|\{\d+,\d*\}|\{\d*,\d+\})$")
 
 
 TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]
@@ -323,12 +299,10 @@ class TokenPattern(BaseModel):
     op: Optional[TokenPatternOperator] = None
     underscore: Optional[Dict[StrictStr, UnderscoreValue]] = Field(None, alias="_")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True
-        alias_generator = lambda value: value.upper()
+    model_config = ConfigDict(extra="forbid", populate_by_name=True, alias_generator=lambda value: value.upper())
 
-    @validator("*", pre=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -336,10 +310,9 @@ def raise_for_none(cls, v):
 
 
 class TokenPatternSchema(BaseModel):
-    pattern: List[TokenPattern] = Field(..., min_items=1)
+    pattern: List[TokenPattern] = Field(..., min_length=1)
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 # Model meta
@@ -397,9 +370,7 @@ class ConfigSchemaTraining(BaseModel):
     before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaNlp(BaseModel):
@@ -415,14 +386,11 @@ class ConfigSchemaNlp(BaseModel):
     vectors: Callable = Field(..., title="Vectors implementation")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaPretrainEmpty(BaseModel):
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 class ConfigSchemaPretrain(BaseModel):
@@ -439,9 +407,7 @@ class ConfigSchemaPretrain(BaseModel):
     objective: Callable[["Vocab", Model], Model] = Field(..., title="A function that creates the pretraining objective.")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaInit(BaseModel):
@@ -456,9 +422,7 @@ class ConfigSchemaInit(BaseModel):
     after_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after initialization")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchema(BaseModel):
@@ -469,9 +433,7 @@ class ConfigSchema(BaseModel):
     corpora: Dict[str, Reader]
     initialize: ConfigSchemaInit
 
-    class Config:
-        extra = "allow"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
 
 CONFIG_SCHEMAS = {
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 9854b391e60..546068800fa 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,9 +1,6 @@
 import pytest
 
-try:
-    from pydantic.v1 import StrictBool
-except ImportError:
-    from pydantic import StrictBool  # type: ignore
+from pydantic import StrictBool
 
 from thinc.api import ConfigValidationError
 
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b355379bfd0..f8f9ec4b10d 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,9 +1,6 @@
 import pytest
 
-try:
-    from pydantic.v1 import StrictInt, StrictStr
-except ImportError:
-    from pydantic import StrictInt, StrictStr  # type: ignore
+from pydantic import StrictInt, StrictStr
 
 from thinc.api import ConfigValidationError, Linear, Model
 
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index d2a41ff0fed..2c26952891d 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -4,10 +4,7 @@
 
 import pytest
 
-try:
-    from pydantic.v1 import ValidationError
-except ImportError:
-    from pydantic import ValidationError  # type: ignore
+from pydantic import ValidationError
 
 from thinc.api import (
     Config,

From f835985f3f517d7a83dea6b71716a2b32870fdd1 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 14:38:21 +0100
Subject: [PATCH 19/42] Increment version

---
 spacy/about.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/about.py b/spacy/about.py
index a93d91532b6..df33ff96bfe 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,5 +1,5 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.8.11"
+__version__ = "3.8.12"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

From b8bade1a570bbbc41e0d0ebf6ef5300c5d064b3d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 15:03:41 +0100
Subject: [PATCH 20/42] Update spaCy to pydantic v2 native API

- Replace pydantic.v1 compat imports with direct v2 imports
- Replace class Config with model_config = ConfigDict(...)
- Replace @validator with @field_validator
- Replace ConstrainedStr with constr()
- Replace min_items with min_length, allow_population_by_field_name
  with populate_by_name
- Add model_rebuild() calls in __init__.py for forward ref resolution
- Update test error type assertions for v2
---
 spacy/__init__.py                       | 21 +++++++++++++++++++++
 spacy/tests/pipeline/test_initialize.py |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index 8bb8b49498e..eeab3773591 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -21,6 +21,27 @@
 from .util import logger, registry  # noqa: F401
 from .vocab import Vocab
 
+# Rebuild pydantic v2 schemas that use forward references to Language/Vocab
+from .schemas import (  # noqa: F401
+    ConfigSchema,
+    ConfigSchemaInit,
+    ConfigSchemaNlp,
+    ConfigSchemaPretrain,
+    ConfigSchemaTraining,
+)
+
+from .training import Example  # noqa: F401
+
+_rebuild_ns = {"Language": Language, "Vocab": Vocab, "Example": Example}
+for _schema in (
+    ConfigSchemaTraining,
+    ConfigSchemaNlp,
+    ConfigSchemaPretrain,
+    ConfigSchemaInit,
+    ConfigSchema,
+):
+    _schema.model_rebuild(_types_namespace=_rebuild_ns)
+
 if sys.maxunicode == 65535:
     raise SystemError(Errors.E130)
 
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 546068800fa..acb1c6faa45 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -48,7 +48,7 @@ def initialize(
     errors = e.value.errors
     assert len(errors) == 1
     assert errors[0]["loc"] == ("custom1",)
-    assert errors[0]["type"] == "value_error.missing"
+    assert errors[0]["type"] == "missing"
     init_cfg = {
         "tokenizer": {"custom": 1},
         "components": {name: {"custom1": "x", "custom2": 1}},
@@ -60,7 +60,7 @@ def initialize(
     errors = e.value.errors
     assert len(errors) == 1
     assert errors[0]["loc"] == ("custom2",)
-    assert errors[0]["type"] == "value_error.strictbool"
+    assert errors[0]["type"] == "bool_type"
     init_cfg = {
         "tokenizer": {"custom": 1},
         "components": {name: {"custom1": "x"}},

From 501ccfd5fb19d708714dc7d90dd7c9a289661ef9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 20:50:56 +0100
Subject: [PATCH 21/42] Fix pydantic v2 pattern validation error counts and
 attributeruler type annotation

- Update expected error counts in test_pattern_validation.py for pydantic v2
  (v2 reports errors for all union members, increasing counts for OP and
  nested pattern validation)
- Fix AttributeRulerPatternType to include List[MatcherPatternType] in
  the union (v2 is strict about nested list-of-list-of-dict types that
  v1 accepted laxly)
---
 spacy/pipeline/attributeruler.py              |  2 +-
 .../tests/matcher/test_pattern_validation.py  | 34 +++++++++----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index cc1e2e37a64..fd037480cb2 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -19,7 +19,7 @@
 from .pipe import Pipe
 
 MatcherPatternType = List[Dict[Union[int, str], Any]]
-AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
+AttributeRulerPatternType = Dict[str, Union[List[MatcherPatternType], MatcherPatternType, Dict, int]]
 TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
 MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
 
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index 45f9f4ee718..554522fe8a1 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -10,30 +10,30 @@
     # Bad patterns flagged in all cases
     ([{"XX": "foo"}], 1, 1),
     ([{"IS_ALPHA": {"==": True}}, {"LIKE_NUM": None}], 2, 1),
-    ([{"IS_PUNCT": True, "OP": "$"}], 1, 1),
+    ([{"IS_PUNCT": True, "OP": "$"}], 2, 1),  # v2: union reports 2 errors (enum + pattern)
     ([{"_": "foo"}], 1, 1),
     ('[{"TEXT": "foo"}, {"LOWER": "bar"}]', 1, 1),
     ([{"ENT_IOB": "foo"}], 1, 1),
     ([1, 2, 3], 3, 1),
-    ([{"TEXT": "foo", "OP": "{,}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{,4}4"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{a,3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{a}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{,a}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{1,2,3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{1, 3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{-2}"}], 1, 1),
+    ([{"TEXT": "foo", "OP": "{,}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{,4}4"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{a,3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{a}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{,a}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{1,2,3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{1, 3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{-2}"}], 2, 1),  # v2: union reports 2 errors
     # Bad patterns flagged outside of Matcher
-    ([{"_": {"foo": "bar", "baz": {"IN": "foo"}}}], 2, 0),  # prev: (1, 0)
+    ([{"_": {"foo": "bar", "baz": {"IN": "foo"}}}], 7, 0),  # v2: more detailed union errors
     # Bad patterns not flagged with minimal checks
-    ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 2, 0),
-    ([{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}], 4, 0),  # prev: (2, 0)
-    ([{"LENGTH": {"VALUE": 5}}], 2, 0),  # prev: (1, 0)
-    ([{"TEXT": {"VALUE": "foo"}}], 2, 0),  # prev: (1, 0)
+    ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 5, 0),  # v2: more detailed union errors
+    ([{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}], 5, 0),  # v2: more detailed union errors
+    ([{"LENGTH": {"VALUE": 5}}], 3, 0),  # v2: more detailed union errors
+    ([{"TEXT": {"VALUE": "foo"}}], 2, 0),
     ([{"IS_DIGIT": -1}], 1, 0),
-    ([{"ORTH": -1}], 1, 0),
-    ([{"ENT_ID": -1}], 1, 0),
-    ([{"ENT_KB_ID": -1}], 1, 0),
+    ([{"ORTH": -1}], 2, 0),  # v2: union reports 2 errors
+    ([{"ENT_ID": -1}], 2, 0),  # v2: union reports 2 errors
+    ([{"ENT_KB_ID": -1}], 2, 0),  # v2: union reports 2 errors
     # Good patterns
     ([{"TEXT": "foo"}, {"LOWER": "bar"}], 0, 0),
     ([{"LEMMA": {"IN": ["love", "like"]}}, {"POS": "DET", "OP": "?"}], 0, 0),

From fd99ed312b792931d773d3334c92b13ca8c07961 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:36:11 +0100
Subject: [PATCH 22/42] Replace black, isort, and flake8 with ruff for linting
 and formatting

- requirements.txt: remove black, isort, flake8; add ruff
- pyproject.toml: replace [tool.isort] with [tool.ruff] config
- setup.cfg: remove [flake8] section (rules moved to pyproject.toml)
- .pre-commit-config.yaml: replace black/flake8 hooks with ruff/ruff-format
---
 .pre-commit-config.yaml | 16 +++++-----------
 pyproject.toml          |  9 ++++++++-
 requirements.txt        |  4 +---
 setup.cfg               | 10 ----------
 4 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2c5e98fd97..7d57c3a0c56 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,13 +1,7 @@
 repos:
--   repo: https://github.com/ambv/black
-    rev: 22.3.0
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.0
     hooks:
-    - id: black
-      language_version: python3.7
-      additional_dependencies: ['click==8.0.4']
--   repo: https://github.com/pycqa/flake8
-    rev: 5.0.4
-    hooks:
-    - id: flake8
-      args:
-        - "--config=setup.cfg"
+    - id: ruff
+      args: ['--fix']
+    - id: ruff-format
diff --git a/pyproject.toml b/pyproject.toml
index 64b71429e6e..be47f69a005 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,5 +62,12 @@ repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest
 [tool.cibuildwheel.pyodide]
 
 
-[tool.isort]
+[tool.ruff]
+line-length = 88
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "C", "B", "B9"]
+ignore = ["E203", "E266", "E501", "E731", "W503", "E741", "F541"]
+
+[tool.ruff.lint.isort]
 profile = "black"
diff --git a/requirements.txt b/requirements.txt
index 895bc3f7c07..866128b31b6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,14 +26,12 @@ cython>=3.0,<4.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
 mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
-black>=25.0.0
+ruff>=0.9.0
 cython-lint>=0.15.0
-isort>=5.0,<6.0
 confection>=0.0.4,<1.0.0
diff --git a/setup.cfg b/setup.cfg
index 585c2694fe2..1ef8e303125 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -131,16 +131,6 @@ universal = false
 [sdist]
 formats = gztar
 
-[flake8]
-ignore = E203, E266, E501, E731, W503, E741, F541
-max-line-length = 80
-select = B,C,E,F,W,T4,B9
-exclude =
-    .env,
-    .git,
-    __pycache__,
-    _tokenizer_exceptions_list.py,
-
 [tool:pytest]
 markers =
     slow: mark a test as slow

From adeb1620ec1082a0e4ebf0d052587236457bd9b0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:36:53 +0100
Subject: [PATCH 23/42] Remove W503 from ruff ignore list (not a valid ruff
 rule)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index be47f69a005..fe2f45eb80a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,7 @@ line-length = 88
 
 [tool.ruff.lint]
 select = ["E", "F", "W", "C", "B", "B9"]
-ignore = ["E203", "E266", "E501", "E731", "W503", "E741", "F541"]
+ignore = ["E203", "E266", "E501", "E731", "E741", "F541"]
 
 [tool.ruff.lint.isort]
 profile = "black"

From a7f629bb917d0a440cf6f236bb06fd5f1e88acfb Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:37:22 +0100
Subject: [PATCH 24/42] Fix ruff isort config: replace unsupported profile with
 equivalent settings

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index fe2f45eb80a..9e6cab69da3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,4 +70,5 @@ select = ["E", "F", "W", "C", "B", "B9"]
 ignore = ["E203", "E266", "E501", "E731", "E741", "F541"]
 
 [tool.ruff.lint.isort]
-profile = "black"
+combine-as-imports = true
+split-on-trailing-comma = true

From 32c4b638ae0e042264fadc3ceb3ab67147c7b6f5 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:38:53 +0100
Subject: [PATCH 25/42] Format with ruff

---
 spacy/cli/apply.py                            |  15 +-
 spacy/cli/assemble.py                         |  23 +-
 spacy/cli/benchmark_speed.py                  |  28 ++-
 spacy/cli/convert.py                          |  52 ++++-
 spacy/cli/debug_config.py                     |  26 ++-
 spacy/cli/debug_data.py                       |  29 ++-
 spacy/cli/debug_diff.py                       |  36 +++-
 spacy/cli/debug_model.py                      |  18 +-
 spacy/cli/download.py                         |  13 +-
 spacy/cli/evaluate.py                         |  45 +++-
 spacy/cli/find_function.py                    |   4 +-
 spacy/cli/find_threshold.py                   |  38 +++-
 spacy/cli/info.py                             |  22 +-
 spacy/cli/init_config.py                      |  72 +++++--
 spacy/cli/init_pipeline.py                    |  83 ++++++--
 spacy/cli/package.py                          |  68 ++++--
 spacy/cli/pretrain.py                         |  29 ++-
 spacy/cli/profile.py                          |  11 +-
 spacy/cli/train.py                            |  29 ++-
 spacy/lang/af/stop_words.py                   |   6 +-
 spacy/lang/am/lex_attrs.py                    |   2 +-
 spacy/lang/am/stop_words.py                   |   6 +-
 spacy/lang/ar/lex_attrs.py                    |  12 +-
 spacy/lang/ar/stop_words.py                   |   6 +-
 spacy/lang/az/stop_words.py                   |   6 +-
 spacy/lang/bg/stop_words.py                   |   6 +-
 spacy/lang/bn/stop_words.py                   |   6 +-
 spacy/lang/bo/stop_words.py                   |   6 +-
 spacy/lang/ca/stop_words.py                   |   6 +-
 spacy/lang/cs/stop_words.py                   |   6 +-
 spacy/lang/da/stop_words.py                   |   6 +-
 spacy/lang/de/stop_words.py                   |   6 +-
 spacy/lang/dsb/stop_words.py                  |   6 +-
 spacy/lang/el/stop_words.py                   |   6 +-
 spacy/lang/el/tokenizer_exceptions.py         |   1 -
 spacy/lang/en/stop_words.py                   |   6 +-
 spacy/lang/es/lemmatizer.py                   |   5 +-
 spacy/lang/es/stop_words.py                   |   6 +-
 spacy/lang/et/stop_words.py                   |   6 +-
 spacy/lang/eu/stop_words.py                   |   6 +-
 spacy/lang/fa/lex_attrs.py                    |  12 +-
 spacy/lang/fa/stop_words.py                   |   6 +-
 spacy/lang/fi/stop_words.py                   |   6 +-
 spacy/lang/fr/lex_attrs.py                    |  12 +-
 spacy/lang/fr/stop_words.py                   |   6 +-
 spacy/lang/ga/stop_words.py                   |   6 +-
 spacy/lang/gd/stop_words.py                   |   6 +-
 spacy/lang/grc/stop_words.py                  |   6 +-
 spacy/lang/gu/stop_words.py                   |   6 +-
 spacy/lang/he/stop_words.py                   |   6 +-
 spacy/lang/hi/stop_words.py                   |   6 +-
 spacy/lang/hr/stop_words.py                   |   6 +-
 spacy/lang/hsb/stop_words.py                  |   6 +-
 spacy/lang/ht/lex_attrs.py                    |  12 +-
 spacy/lang/ht/stop_words.py                   |   6 +-
 spacy/lang/hu/stop_words.py                   |   6 +-
 spacy/lang/hy/stop_words.py                   |   6 +-
 spacy/lang/id/_tokenizer_exceptions_list.py   |   6 +-
 spacy/lang/id/stop_words.py                   |   6 +-
 spacy/lang/id/tokenizer_exceptions.py         |   4 +-
 spacy/lang/is/stop_words.py                   |   6 +-
 spacy/lang/it/stop_words.py                   |   6 +-
 spacy/lang/ja/stop_words.py                   |   6 +-
 spacy/lang/kmr/stop_words.py                  |   6 +-
 spacy/lang/kn/stop_words.py                   |   6 +-
 spacy/lang/ko/stop_words.py                   |   6 +-
 spacy/lang/ky/stop_words.py                   |   6 +-
 spacy/lang/la/stop_words.py                   |   6 +-
 spacy/lang/lb/lex_attrs.py                    |  12 +-
 spacy/lang/lb/stop_words.py                   |   6 +-
 spacy/lang/lg/stop_words.py                   |   6 +-
 spacy/lang/lij/stop_words.py                  |   6 +-
 spacy/lang/lv/stop_words.py                   |   6 +-
 spacy/lang/mk/stop_words.py                   |   6 +-
 spacy/lang/ml/stop_words.py                   |   6 +-
 spacy/lang/mr/stop_words.py                   |   6 +-
 spacy/lang/ms/_tokenizer_exceptions_list.py   |   6 +-
 spacy/lang/ms/examples.py                     |   2 +-
 spacy/lang/ms/stop_words.py                   |   6 +-
 spacy/lang/nb/stop_words.py                   |   6 +-
 spacy/lang/ne/stop_words.py                   |   6 +-
 spacy/lang/nl/lex_attrs.py                    |  12 +-
 spacy/lang/nl/stop_words.py                   |   6 +-
 spacy/lang/pl/stop_words.py                   |   6 +-
 spacy/lang/pt/stop_words.py                   |   6 +-
 spacy/lang/ro/lex_attrs.py                    |  12 +-
 spacy/lang/ro/stop_words.py                   |   6 +-
 spacy/lang/ru/lex_attrs.py                    |   8 +-
 spacy/lang/ru/stop_words.py                   |   6 +-
 spacy/lang/sa/stop_words.py                   |   6 +-
 spacy/lang/si/stop_words.py                   |   6 +-
 spacy/lang/sk/stop_words.py                   |   6 +-
 spacy/lang/sl/lex_attrs.py                    |  18 +-
 spacy/lang/sl/stop_words.py                   |   6 +-
 spacy/lang/sq/stop_words.py                   |   6 +-
 spacy/lang/sr/stop_words.py                   |   6 +-
 spacy/lang/sv/stop_words.py                   |   6 +-
 spacy/lang/ta/stop_words.py                   |   6 +-
 spacy/lang/te/stop_words.py                   |   6 +-
 spacy/lang/th/stop_words.py                   |   6 +-
 spacy/lang/ti/stop_words.py                   |   6 +-
 spacy/lang/tl/stop_words.py                   |   6 +-
 spacy/lang/tn/stop_words.py                   |   6 +-
 spacy/lang/tokenizer_exceptions.py            |  16 +-
 spacy/lang/tr/stop_words.py                   |   6 +-
 spacy/lang/tt/stop_words.py                   |   6 +-
 spacy/lang/uk/stop_words.py                   |   6 +-
 spacy/lang/ur/lex_attrs.py                    |   4 +-
 spacy/lang/ur/stop_words.py                   |   6 +-
 spacy/lang/vi/stop_words.py                   |   6 +-
 spacy/lang/zh/stop_words.py                   |   6 +-
 spacy/language.py                             |  10 +-
 spacy/matcher/dependencymatcher.pyi           |   6 +-
 spacy/matcher/matcher.pyi                     |   6 +-
 spacy/ml/models/entity_linker.py              |   6 +-
 spacy/ml/staticvectors.py                     |   2 +-
 .../pipeline/_edit_tree_internals/schemas.py  |  11 +-
 spacy/pipeline/attributeruler.py              |   7 +-
 spacy/schemas.py                              |   6 +-
 spacy/tests/lang/et/test_tokenizer.py         |   3 +-
 .../tests/matcher/test_pattern_validation.py  |  24 ++-
 spacy/tests/package/test_requirements.py      |   6 +-
 spacy/tests/pipeline/test_entity_linker.py    |  12 +-
 spacy/tests/pipeline/test_sentencizer.py      |  40 +++-
 spacy/tests/pipeline/test_tok2vec.py          |  58 ++++-
 spacy/tests/test_cli.py                       | 198 ++++++++++++++++--
 spacy/tests/test_factory_imports.py           |  12 +-
 spacy/tests/test_factory_registrations.py     |   6 +-
 spacy/tests/test_registry_population.py       |   6 +-
 spacy/tokens/doc.pyi                          |  10 +-
 spacy/tokens/span.pyi                         |   8 +-
 spacy/tokens/span_group.pyi                   |   2 +-
 spacy/tokens/token.pyi                        |   8 +-
 spacy/training/batchers.py                    |   8 +-
 .../training/converters/conll_ner_to_docs.py  |   3 +-
 spacy/training/converters/conllu_to_docs.py   |   2 +-
 spacy/training/loggers.py                     |   2 +-
 spacy/ty.py                                   |   4 +-
 spacy/util.py                                 |   4 +-
 139 files changed, 1279 insertions(+), 432 deletions(-)

diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py
index ffd8105060a..7671026f488 100644
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@@ -22,7 +22,7 @@
 
 out_help = "Path to save the resulting .spacy file"
 code_help = (
-    "Path to Python file with additional " "code (registered functions) to be imported"
+    "Path to Python file with additional code (registered functions) to be imported"
 )
 gold_help = "Use gold preprocessing provided in the .spacy files"
 force_msg = (
@@ -72,11 +72,15 @@ def apply_cli(
     data_path: Path = Arg(..., help=path_help, exists=True),
     output_file: Path = Arg(..., help=out_help, dir_okay=False),
     code_path: Optional[Path] = Opt(None, "--code", "-c", help=code_help),
-    text_key: str = Opt("text", "--text-key", "-tk", help="Key containing text string for JSONL"),
-    force_overwrite: bool = Opt(False, "--force", "-F", help="Force overwriting the output file"),
+    text_key: str = Opt(
+        "text", "--text-key", "-tk", help="Key containing text string for JSONL"
+    ),
+    force_overwrite: bool = Opt(
+        False, "--force", "-F", help="Force overwriting the output file"
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU."),
     batch_size: int = Opt(1, "--batch-size", "-b", help="Batch size."),
-    n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use.")
+    n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use."),
 ):
     """
     Apply a trained pipeline to documents to get predictions.
@@ -114,8 +118,7 @@ def apply(
     if len(paths) == 0:
         docbin.to_disk(output_file)
         msg.warn(
-            "Did not find data to process,"
-            f" {data_path} seems to be an empty directory."
+            f"Did not find data to process, {data_path} seems to be an empty directory."
         )
         return
     nlp = load_model(model)
diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py
index f74bbacb555..bc97a9d594f 100644
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@@ -24,10 +24,25 @@
 def assemble_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    output_path: Path = Arg(
+        ..., help="Output directory to store assembled pipeline in"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/benchmark_speed.py b/spacy/cli/benchmark_speed.py
index 4dd10049cda..052e7d43416 100644
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@@ -24,13 +24,29 @@ def benchmark_speed_cli(
     # fmt: off
     ctx: typer.Context,
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    batch_size: Optional[int] = Opt(None, "--batch-size", "-b", min=1, help="Override the pipeline batch size"),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
+    batch_size: Optional[int] = Opt(
+        None, "--batch-size", "-b", min=1, help="Override the pipeline batch size"
+    ),
     no_shuffle: bool = Opt(False, "--no-shuffle", help="Do not shuffle benchmark data"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
-    warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    n_batches: int = Opt(
+        50,
+        "--batches",
+        help="Minimum number of batches to benchmark",
+        min=30,
+    ),
+    warmup_epochs: int = Opt(
+        3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     # fmt: on
 ):
     """
@@ -151,7 +167,7 @@ def print_mean_with_ci(sample: numpy.ndarray):
     low = bootstrap_means[int(len(bootstrap_means) * 0.025)]
     high = bootstrap_means[int(len(bootstrap_means) * 0.975)]
 
-    print(f"Mean: {mean:.1f} words/s (95% CI: {low-mean:.1f} +{high-mean:.1f})")
+    print(f"Mean: {mean:.1f} words/s (95% CI: {low - mean:.1f} +{high - mean:.1f})")
 
 
 def print_outliers(sample: numpy.ndarray):
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index a66a68133b3..140999207f3 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -48,17 +48,47 @@ class FileTypes(str, Enum):
 def convert_cli(
     # fmt: off
     input_path: str = Arg(..., help="Input file or directory", exists=True),
-    output_dir: Path = Arg("-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True),
-    file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
-    n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
-    seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
-    model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
-    morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
-    merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
-    converter: str = Opt(AUTO, "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
-    ner_map: Optional[Path] = Opt(None, "--ner-map", "-nm", help="NER tag mapping (as JSON-encoded dict of entity types)", exists=True),
-    lang: Optional[str] = Opt(None, "--lang", "-l", help="Language (if tokenizer required)"),
-    concatenate: bool = Opt(None, "--concatenate", "-C", help="Concatenate output to a single file"),
+    output_dir: Path = Arg(
+        "-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True
+    ),
+    file_type: FileTypes = Opt(
+        "spacy", "--file-type", "-t", help="Type of data to produce"
+    ),
+    n_sents: int = Opt(
+        1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"
+    ),
+    seg_sents: bool = Opt(
+        False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"
+    ),
+    model: Optional[str] = Opt(
+        None,
+        "--model",
+        "--base",
+        "-b",
+        help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)",
+    ),
+    morphology: bool = Opt(
+        False, "--morphology", "-m", help="Enable appending morphology to tags"
+    ),
+    merge_subtokens: bool = Opt(
+        False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"
+    ),
+    converter: str = Opt(
+        AUTO, "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"
+    ),
+    ner_map: Optional[Path] = Opt(
+        None,
+        "--ner-map",
+        "-nm",
+        help="NER tag mapping (as JSON-encoded dict of entity types)",
+        exists=True,
+    ),
+    lang: Optional[str] = Opt(
+        None, "--lang", "-l", help="Language (if tokenizer required)"
+    ),
+    concatenate: bool = Opt(
+        None, "--concatenate", "-C", help="Concatenate output to a single file"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index 0e5382cd956..f049d7fd149 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -26,10 +26,28 @@
 def debug_config_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
-    show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    show_funcs: bool = Opt(
+        False,
+        "--show-functions",
+        "-F",
+        help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)",
+    ),
+    show_vars: bool = Opt(
+        False,
+        "--show-variables",
+        "-V",
+        help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.",
+    ),
     # fmt: on
 ):
     """Debug a config file and show validation errors. The command will
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 1c9c0e0ea3a..df52250cf90 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -71,11 +71,28 @@
 def debug_data_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
-    verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
-    no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    ignore_warnings: bool = Opt(
+        False,
+        "--ignore-warnings",
+        "-IW",
+        help="Ignore warnings, only show stats and errors",
+    ),
+    verbose: bool = Opt(
+        False, "--verbose", "-V", help="Print additional information and explanations"
+    ),
+    no_format: bool = Opt(
+        False, "--no-format", "-NF", help="Don't pretty-print the results"
+    ),
     # fmt: on
 ):
     """
@@ -708,7 +725,7 @@ def debug_data(
         if len(dev_not_train) != 0:
             pct = len(dev_not_train) / len(trees_dev)
             msg.info(
-                f"{len(dev_not_train)} lemmatizer trees ({pct*100:.1f}% of dev trees)"
+                f"{len(dev_not_train)} lemmatizer trees ({pct * 100:.1f}% of dev trees)"
                 " were found exclusively in the dev data."
             )
         else:
diff --git a/spacy/cli/debug_diff.py b/spacy/cli/debug_diff.py
index c53b0acab50..08c31b32df3 100644
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@@ -17,12 +17,36 @@
 def debug_diff_cli(
     # fmt: off
     ctx: typer.Context,
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    compare_to: Optional[Path] = Opt(None, help="Path to a config file to diff against, or `None` to compare against default settings", exists=True, allow_dash=True),
-    optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config."),
-    gpu: bool = Opt(False, "--gpu", "-G", help="Whether the original config can run on a GPU. Only relevant when comparing against the default config."),
-    pretraining: bool = Opt(False, "--pretraining", "--pt", help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config."),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    compare_to: Optional[Path] = Opt(
+        None,
+        help="Path to a config file to diff against, or `None` to compare against default settings",
+        exists=True,
+        allow_dash=True,
+    ),
+    optimize: Optimizations = Opt(
+        Optimizations.efficiency.value,
+        "--optimize",
+        "-o",
+        help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config.",
+    ),
+    gpu: bool = Opt(
+        False,
+        "--gpu",
+        "-G",
+        help="Whether the original config can run on a GPU. Only relevant when comparing against the default config.",
+    ),
+    pretraining: bool = Opt(
+        False,
+        "--pretraining",
+        "--pt",
+        help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config.",
+    ),
+    markdown: bool = Opt(
+        False, "--markdown", "-md", help="Generate Markdown for GitHub issues"
+    ),
     # fmt: on
 ):
     """Show a diff of a config file with respect to spaCy's defaults or another config file. If
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 3c667e42a2b..ec7ffe099d7 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -36,18 +36,26 @@
 def debug_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
-    layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    component: str = Arg(
+        ..., help="Name of the pipeline component of which the model should be analysed"
+    ),
+    layers: str = Opt(
+        "", "--layers", "-l", help="Comma-separated names of layer IDs to print"
+    ),
     dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
     parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
     gradients: bool = Opt(False, "--gradients", "-GRAD", help="Show gradients"),
     attributes: bool = Opt(False, "--attributes", "-ATTR", help="Show attributes"),
     P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
-    P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
+    P1: bool = Opt(
+        False, "--print-step1", "-P1", help="Print model after initialization"
+    ),
     P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
     P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 120616753b8..8a1110dcef3 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -28,9 +28,16 @@ def download_cli(
     # fmt: off
     ctx: typer.Context,
     model: str = Arg(..., help="Name of pipeline package to download"),
-    direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
-    sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
-    url: str = Opt(None, "--url", "-U", help="Download from given url")
+    direct: bool = Opt(
+        False, "--direct", "-d", "-D", help="Force direct download of name + version"
+    ),
+    sdist: bool = Opt(
+        False,
+        "--sdist",
+        "-S",
+        help="Download sdist (.tar.gz) archive instead of pre-built binary wheel",
+    ),
+    url: str = Opt(None, "--url", "-U", help="Download from given url"),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 2276ca6b0d4..62131fe13ea 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -20,15 +20,42 @@
 def evaluate_cli(
     # fmt: off
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
+    output: Optional[Path] = Opt(
+        None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
-    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
-    displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
-    per_component: bool = Opt(False, "--per-component", "-P", help="Return scores per component, only applicable when an output JSON file is specified."),
-    spans_key: str = Opt("sc", "--spans-key", "-sk", help="Spans key to use when evaluating Doc.spans"),
+    gold_preproc: bool = Opt(
+        False, "--gold-preproc", "-G", help="Use gold preprocessing"
+    ),
+    displacy_path: Optional[Path] = Opt(
+        None,
+        "--displacy-path",
+        "-dp",
+        help="Directory to output rendered parses as HTML",
+        exists=True,
+        file_okay=False,
+    ),
+    displacy_limit: int = Opt(
+        25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"
+    ),
+    per_component: bool = Opt(
+        False,
+        "--per-component",
+        "-P",
+        help="Return scores per component, only applicable when an output JSON file is specified.",
+    ),
+    spans_key: str = Opt(
+        "sc", "--spans-key", "-sk", help="Spans key to use when evaluating Doc.spans"
+    ),
     # fmt: on
 ):
     """
@@ -123,7 +150,7 @@ def evaluate(
                     if key == "speed":
                         results[metric] = f"{scores[key]:.0f}"
                     else:
-                        results[metric] = f"{scores[key]*100:.2f}"
+                        results[metric] = f"{scores[key] * 100:.2f}"
                 else:
                     results[metric] = "-"
                 data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
diff --git a/spacy/cli/find_function.py b/spacy/cli/find_function.py
index f99ce2adc9f..3b3b333337b 100644
--- a/spacy/cli/find_function.py
+++ b/spacy/cli/find_function.py
@@ -11,7 +11,9 @@
 def find_function_cli(
     # fmt: off
     func_name: str = Arg(..., help="Name of the registered function."),
-    registry_name: Optional[str] = Opt(None, "--registry", "-r", help="Name of the catalogue registry."),
+    registry_name: Optional[str] = Opt(
+        None, "--registry", "-r", help="Name of the catalogue registry."
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index ff7af32e6f6..d89b4c27d55 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -27,15 +27,39 @@
 def find_threshold_cli(
     # fmt: off
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
     pipe_name: str = Arg(..., help="Name of pipe to examine thresholds for"),
-    threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
+    threshold_key: str = Arg(
+        ..., help="Key of threshold attribute in component's configuration"
+    ),
     scores_key: str = Arg(..., help="Metric to optimize"),
-    n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
+    n_trials: int = Opt(
+        _DEFAULTS["n_trials"],
+        "--n_trials",
+        "-n",
+        help="Number of trials to determine optimal thresholds",
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    use_gpu: int = Opt(
+        _DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"
+    ),
+    gold_preproc: bool = Opt(
+        _DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 8bfc6b54f15..ed2394c564e 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -16,10 +16,24 @@
 def info_cli(
     # fmt: off
     model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
-    silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
-    exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
-    url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
+    markdown: bool = Opt(
+        False, "--markdown", "-md", help="Generate Markdown for GitHub issues"
+    ),
+    silent: bool = Opt(
+        False, "--silent", "-s", "-S", help="Don't print anything (just return)"
+    ),
+    exclude: str = Opt(
+        "labels",
+        "--exclude",
+        "-e",
+        help="Comma-separated keys to exclude from the print-out",
+    ),
+    url: bool = Opt(
+        False,
+        "--url",
+        "-u",
+        help="Print the URL to download the most recent compatible version of the pipeline",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index a7fb2b5b81f..2cb39056d5e 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -49,13 +49,44 @@ class InitValues:
 @init_cli.command("config")
 def init_config_cli(
     # fmt: off
-    output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
-    lang: str = Opt(InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"),
-    pipeline: str = Opt(",".join(InitValues.pipeline), "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
-    optimize: Optimizations = Opt(InitValues.optimize, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    gpu: bool = Opt(InitValues.gpu, "--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    pretraining: bool = Opt(InitValues.pretraining, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    force_overwrite: bool = Opt(InitValues.force_overwrite, "--force", "-F", help="Force overwriting the output file"),
+    output_file: Path = Arg(
+        ...,
+        help="File to save the config to or - for stdout (will only output config and no additional logging info)",
+        allow_dash=True,
+    ),
+    lang: str = Opt(
+        InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"
+    ),
+    pipeline: str = Opt(
+        ",".join(InitValues.pipeline),
+        "--pipeline",
+        "-p",
+        help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')",
+    ),
+    optimize: Optimizations = Opt(
+        InitValues.optimize,
+        "--optimize",
+        "-o",
+        help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters.",
+    ),
+    gpu: bool = Opt(
+        InitValues.gpu,
+        "--gpu",
+        "-G",
+        help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters.",
+    ),
+    pretraining: bool = Opt(
+        InitValues.pretraining,
+        "--pretraining",
+        "-pt",
+        help="Include config for pretraining (with 'spacy pretrain')",
+    ),
+    force_overwrite: bool = Opt(
+        InitValues.force_overwrite,
+        "--force",
+        "-F",
+        help="Force overwriting the output file",
+    ),
     # fmt: on
 ):
     """
@@ -88,11 +119,28 @@ def init_config_cli(
 @init_cli.command("fill-config")
 def init_fill_config_cli(
     # fmt: off
-    base_path: Path = Arg(..., help="Path to base config to fill", exists=True, dir_okay=False),
-    output_file: Path = Arg("-", help="Path to output .cfg file (or - for stdout)", allow_dash=True),
-    pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    base_path: Path = Arg(
+        ..., help="Path to base config to fill", exists=True, dir_okay=False
+    ),
+    output_file: Path = Arg(
+        "-", help="Path to output .cfg file (or - for stdout)", allow_dash=True
+    ),
+    pretraining: bool = Opt(
+        False,
+        "--pretraining",
+        "-pt",
+        help="Include config for pretraining (with 'spacy pretrain')",
+    ),
+    diff: bool = Opt(
+        False, "--diff", "-D", help="Print a visual diff highlighting the changes"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py
index 21eea8edf2f..1c0ff526235 100644
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@@ -26,13 +26,42 @@ def init_vectors_cli(
     lang: str = Arg(..., help="The language of the nlp object to create"),
     vectors_loc: Path = Arg(..., help="Vectors file in Word2Vec format", exists=True),
     output_dir: Path = Arg(..., help="Pipeline output directory"),
-    prune: int = Opt(-1, "--prune", "-p", help="Optional number of vectors to prune to"),
-    truncate: int = Opt(0, "--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
+    prune: int = Opt(
+        -1, "--prune", "-p", help="Optional number of vectors to prune to"
+    ),
+    truncate: int = Opt(
+        0,
+        "--truncate",
+        "-t",
+        help="Optional number of vectors to truncate to when reading in vectors file",
+    ),
     mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
-    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
+    name: Optional[str] = Opt(
+        None,
+        "--name",
+        "-n",
+        help="Optional name for the word vectors, e.g. en_core_web_lg.vectors",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    jsonl_loc: Optional[Path] = Opt(
+        None,
+        "--lexemes-jsonl",
+        "-j",
+        help="Location of JSONL-formatted attributes file",
+        hidden=True,
+    ),
+    attr: str = Opt(
+        "ORTH",
+        "--attr",
+        "-a",
+        help="Optional token attribute to use for vectors, e.g. LOWER or NORM",
+    ),
     # fmt: on
 ):
     """Convert word vectors for use with spaCy. Will export an nlp object that
@@ -81,11 +110,24 @@ def update_lexemes(nlp: Language, jsonl_loc: Path) -> None:
 def init_pipeline_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
     output_path: Path = Arg(..., help="Output directory for the prepared data"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     if verbose:
@@ -108,11 +150,24 @@ def init_pipeline_cli(
 def init_labels_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
     output_path: Path = Arg(..., help="Output directory for the labels"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """Generate JSON files for the labels in the data. This helps speed up the
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 67b1d318651..9291aae2827 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -21,16 +21,56 @@
 @app.command("package")
 def package_cli(
     # fmt: off
-    input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
-    output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
-    code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"),
-    meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
-    create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
-    version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
-    build: str = Opt("sdist", "--build", "-b", help="Comma-separated formats to build: sdist and/or wheel, or none."),
-    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
-    require_parent: bool = Opt(True, "--require-parent/--no-require-parent", "-R", "-R", help="Include the parent package (e.g. spacy) in the requirements"),
+    input_dir: Path = Arg(
+        ..., help="Directory with pipeline data", exists=True, file_okay=False
+    ),
+    output_dir: Path = Arg(
+        ..., help="Output parent directory", exists=True, file_okay=False
+    ),
+    code_paths: str = Opt(
+        "",
+        "--code",
+        "-c",
+        help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package",
+    ),
+    meta_path: Optional[Path] = Opt(
+        None,
+        "--meta-path",
+        "--meta",
+        "-m",
+        help="Path to meta.json",
+        exists=True,
+        dir_okay=False,
+    ),
+    create_meta: bool = Opt(
+        False, "--create-meta", "-C", help="Create meta.json, even if one exists"
+    ),
+    name: Optional[str] = Opt(
+        None, "--name", "-n", help="Package name to override meta"
+    ),
+    version: Optional[str] = Opt(
+        None, "--version", "-v", help="Package version to override meta"
+    ),
+    build: str = Opt(
+        "sdist",
+        "--build",
+        "-b",
+        help="Comma-separated formats to build: sdist and/or wheel, or none.",
+    ),
+    force: bool = Opt(
+        False,
+        "--force",
+        "-f",
+        "-F",
+        help="Force overwriting existing data in output directory",
+    ),
+    require_parent: bool = Opt(
+        True,
+        "--require-parent/--no-require-parent",
+        "-R",
+        "-R",
+        help="Include the parent package (e.g. spacy) in the requirements",
+    ),
     # fmt: on
 ):
     """
@@ -410,7 +450,7 @@ def generate_readme(meta: Dict[str, Any]) -> str:
     pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])])
     components = ", ".join([md.code(p) for p in meta.get("components", [])])
     vecs = meta.get("vectors", {})
-    vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)"
+    vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({vecs.get('width', 0)} dimensions)"
     author = meta.get("author") or "n/a"
     notes = meta.get("notes", "")
     license_name = meta.get("license")
@@ -469,7 +509,7 @@ def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> st
     md = MarkdownRenderer()
     scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))]
     scores = [
-        (md.code(acc.upper()), f"{score*100:.2f}")
+        (md.code(acc.upper()), f"{score * 100:.2f}")
         for acc, score in scalars
         if acc not in exclude
     ]
@@ -488,9 +528,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
         if not labels:
             continue
         col1 = md.bold(md.code(pipe))
-        col2 = ", ".join(
-            [md.code(str(label).replace("|", "\\|")) for label in labels]
-        )  # noqa: W605
+        col2 = ", ".join([md.code(str(label).replace("|", "\\|")) for label in labels])  # noqa: W605
         label_data.append((col1, col2))
         n_labels += len(labels)
         n_pipes += 1
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 446c40510df..daea861a952 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -25,13 +25,32 @@
 def pretrain_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True
+    ),
     output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
-    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    resume_path: Optional[Path] = Opt(
+        None,
+        "--resume-path",
+        "-r",
+        help="Path to pretrained weights from which to resume pretraining",
+    ),
+    epoch_resume: Optional[int] = Opt(
+        None,
+        "--epoch-resume",
+        "-er",
+        help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files.",
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
+    skip_last: bool = Opt(
+        False, "--skip-last", "-L", help="Skip saving model-last.bin"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index e5b8f11939f..03f7127149e 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -21,8 +21,15 @@ def profile_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read current calling context
     model: str = Arg(..., help="Trained pipeline to load"),
-    inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
-    n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
+    inputs: Optional[Path] = Arg(
+        None,
+        help="Location of input file. '-' for stdin.",
+        exists=True,
+        allow_dash=True,
+    ),
+    n_texts: int = Opt(
+        10000, "--n-texts", "-n", help="Maximum number of texts to use if available"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index c72e13b2681..379268286ee 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -26,11 +26,30 @@
 def train_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    output_path: Optional[Path] = Opt(
+        None,
+        "--output",
+        "--output-path",
+        "-o",
+        help="Output directory to store trained pipeline in",
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """
diff --git a/spacy/lang/af/stop_words.py b/spacy/lang/af/stop_words.py
index 337afb57f8c..4b5a04a5eca 100644
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-af
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'n
 aan
 af
@@ -52,4 +53,5 @@
 was
 wat
 ŉ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/am/lex_attrs.py b/spacy/lang/am/lex_attrs.py
index 9e111b8d5eb..c7b2aab35bf 100644
--- a/spacy/lang/am/lex_attrs.py
+++ b/spacy/lang/am/lex_attrs.py
@@ -60,7 +60,7 @@
     "አስራ ስምንተኛ",
     "አስራ ዘጠነኛ",
     "ሃያኛ",
-    "ሰላሳኛ" "አርባኛ",
+    "ሰላሳኛአርባኛ",
     "አምሳኛ",
     "ስድሳኛ",
     "ሰባኛ",
diff --git a/spacy/lang/am/stop_words.py b/spacy/lang/am/stop_words.py
index 8a04c555f74..5487ada5aeb 100644
--- a/spacy/lang/am/stop_words.py
+++ b/spacy/lang/am/stop_words.py
@@ -1,7 +1,8 @@
 # Stop words by Teshome Kassie http://etd.aau.edu.et/bitstream/handle/123456789/3315/Teshome%20Kassie.pdf?sequence=1&isAllowed=y
 # Stop words by Tihitina Petros http://etd.aau.edu.et/bitstream/handle/123456789/3384/Tihitina%20Petros.pdf?sequence=1&isAllowed=y
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ግን አንቺ አንተ እናንተ ያንተ ያንቺ የናንተ ራስህን ራስሽን ራሳችሁን
 ሁሉ ኋላ በሰሞኑ አሉ በኋላ ሁኔታ በኩል አስታውቀዋል ሆነ በውስጥ
 አስታውሰዋል ሆኑ ባጣም እስካሁን ሆኖም በተለይ አሳሰበ ሁል በተመለከተ
@@ -28,4 +29,5 @@
 በዚህም መሆን ምንጊዜም እነዚህም በዚህና ያለ ስም
 ሲኖር ከዚህም መሆኑን በሁኔታው የማያንስ እነዚህኑ ማንም ከነዚሁ
 ያላቸውን እጅግ ሲሆኑ ለሆኑ ሊሆን  ለማናቸውም
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ar/lex_attrs.py b/spacy/lang/ar/lex_attrs.py
index 6e943d064ee..54ad7a8c363 100644
--- a/spacy/lang/ar/lex_attrs.py
+++ b/spacy/lang/ar/lex_attrs.py
@@ -1,6 +1,7 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 صفر
 واحد
 إثنان
@@ -50,9 +51,11 @@
 مليون
 مليار
 مليارات
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 اول
 أول
 حاد
@@ -67,7 +70,8 @@
 ثامن
 تاسع
 عاشر
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ar/stop_words.py b/spacy/lang/ar/stop_words.py
index 65c8992cbd6..f4da54dda29 100644
--- a/spacy/lang/ar/stop_words.py
+++ b/spacy/lang/ar/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 من
 نحو
 لعل
@@ -385,4 +386,5 @@
 وإن
 ولو
 يا
-""".split())
+""".split()
+)
diff --git a/spacy/lang/az/stop_words.py b/spacy/lang/az/stop_words.py
index 8beffa998da..2114939ba11 100644
--- a/spacy/lang/az/stop_words.py
+++ b/spacy/lang/az/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/eliasdabbas/advertools/blob/master/advertools/stopwords.py
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 amma
 arasında
 artıq
@@ -140,4 +141,5 @@
 əlbəttə
 ən
 əslində
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py
index 7d3e756054d..061850da594 100644
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@@ -4,7 +4,8 @@
     https://postvai.com/books/stop-dumi.pdf - Additions to the original list in order to improve it.
 """
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а автентичен аз ако ала
 
 бе без беше би бивш бивша бившо бивши бил била били било благодаря близо бъдат
@@ -75,4 +76,5 @@
 юмрук
 
 я як
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bn/stop_words.py b/spacy/lang/bn/stop_words.py
index 5aec18b7f5b..bf38e32545e 100644
--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত  অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
 আগামী আগে আগেই আছে আজ আদ্যভাগে আপনার আপনি আবার আমরা আমাকে আমাদের আমার  আমি আর আরও
 ইত্যাদি ইহা
@@ -37,4 +38,5 @@
 সাধারণ সামনে সঙ্গে সঙ্গেও সব সবার সমস্ত সম্প্রতি সময় সহ সহিত সাথে সুতরাং সে  সেই সেখান সেখানে  সেটা সেটাই সেটাও সেটি স্পষ্ট স্বয়ং
 হইতে হইবে হইয়া হওয়া হওয়ায় হওয়ার হচ্ছে হত হতে হতেই হন হবে হবেন হয় হয়তো হয়নি হয়ে হয়েই হয়েছিল হয়েছে হাজার
 হয়েছেন হল হলে হলেই হলেও হলো হিসাবে হিসেবে হৈলে হোক হয় হয়ে হয়েছে হৈতে হইয়া  হয়েছিল হয়েছেন হয়নি হয়েই হয়তো হওয়া হওয়ার হওয়ায়
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bo/stop_words.py b/spacy/lang/bo/stop_words.py
index 158e148b00b..407242c849b 100644
--- a/spacy/lang/bo/stop_words.py
+++ b/spacy/lang/bo/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://zenodo.org/records/10148636
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 འི་
 །
 དུ་
@@ -193,4 +194,5 @@
 གིང་
 ཚ་
 ཀྱང
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ca/stop_words.py b/spacy/lang/ca/stop_words.py
index 90cce5de885..1a87b2f9dbe 100644
--- a/spacy/lang/ca/stop_words.py
+++ b/spacy/lang/ca/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abans ací ah així això al aleshores algun alguna algunes alguns alhora allà allí allò
 als altra altre altres amb ambdues ambdós anar ans apa aquell aquella aquelles aquells
 aquest aquesta aquestes aquests aquí
@@ -47,4 +48,5 @@
 
 va vaig vam van vas veu vosaltres vostra vostre vostres
 
-""".split())
+""".split()
+)
diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py
index 35db9fedc86..f61f424f6f4 100644
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@@ -1,7 +1,8 @@
 # Source: https://github.com/Alir3z4/stop-words
 # Source: https://github.com/stopwords-iso/stopwords-cs/blob/master/stopwords-cs.txt
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 aby
 ahoj
@@ -360,4 +361,5 @@
 zatímco
 ze
 že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/da/stop_words.py b/spacy/lang/da/stop_words.py
index 0e71dfde739..05b2084dde3 100644
--- a/spacy/lang/da/stop_words.py
+++ b/spacy/lang/da/stop_words.py
@@ -1,6 +1,7 @@
 # Source: Handpicked by Jens Dahl Møllerhøj.
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 af aldrig alene alle allerede alligevel alt altid anden andet andre at
 
 bag begge blandt blev blive bliver burde bør
@@ -40,4 +41,5 @@
 var ved vi via vil ville vore vores vær være været
 
 øvrigt
-""".split())
+""".split()
+)
diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py
index 5fbd7428757..f52687eb9b3 100644
--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
 aller allerdings alles allgemeinen als also am an andere anderen anderem andern
 anders auch auf aus ausser außer ausserdem außerdem
@@ -73,4 +74,5 @@
 
 zehn zehnte zehnten zehnter zehntes zeit zu zuerst zugleich zum zunächst zur
 zurück zusammen zwanzig zwar zwei zweite zweiten zweiter zweites zwischen
-""".split())
+""".split()
+)
diff --git a/spacy/lang/dsb/stop_words.py b/spacy/lang/dsb/stop_words.py
index 90735a6236a..376e04aa6e5 100644
--- a/spacy/lang/dsb/stop_words.py
+++ b/spacy/lang/dsb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abo aby ako ale až
 
 daniž dokulaž
@@ -10,4 +11,5 @@
 pak pótom
 
 teke togodla
-""".split())
+""".split()
+)
diff --git a/spacy/lang/el/stop_words.py b/spacy/lang/el/stop_words.py
index b5c1c36c41f..7c436219fa9 100644
--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@@ -1,6 +1,7 @@
 # Stop words
 # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 αδιάκοπα αι ακόμα ακόμη ακριβώς άλλα αλλά αλλαχού άλλες άλλη άλλην
 άλλης αλλιώς αλλιώτικα άλλο άλλοι αλλοιώς αλλοιώτικα άλλον άλλος άλλοτε αλλού
 άλλους άλλων άμα άμεσα αμέσως αν ανά ανάμεσα αναμεταξύ άνευ αντί αντίπερα αντίς
@@ -82,4 +83,5 @@
 χωρίς χωριστά
 
 ω ως ωσάν ωσότου ώσπου ώστε ωστόσο ωχ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py
index 41317ba9770..d88d4837e2a 100644
--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@@ -128,7 +128,6 @@
 _exc.update(_other_exc)
 
 for h in range(1, 12 + 1):
-
     for period in ["π.μ.", "πμ"]:
         _exc[f"{h}{period}"] = [
             {ORTH: f"{h}"},
diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py
index cbce281b491..1ca5cbc1670 100644
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a about above across after afterwards again against all almost alone along
 already also although always am among amongst amount an and another any anyhow
 anyone anything anyway anywhere are around as at
@@ -61,7 +62,8 @@
 whither who whoever whole whom whose why will with within without would
 
 yet you your yours yourself yourselves
-""".split())
+""".split()
+)
 
 contractions = ["n't", "'d", "'ll", "'m", "'re", "'s", "'ve"]
 STOP_WORDS.update(contractions)
diff --git a/spacy/lang/es/lemmatizer.py b/spacy/lang/es/lemmatizer.py
index ee5d38e8466..05238a75b70 100644
--- a/spacy/lang/es/lemmatizer.py
+++ b/spacy/lang/es/lemmatizer.py
@@ -415,7 +415,10 @@ def lemmatize_verb_pron(
         else:
             rule = self.select_rule("verb", features)
             verb_lemma = self.lemmatize_verb(
-                verb, features - {"PronType=Prs"}, rule, index  # type: ignore[operator]
+                verb,
+                features - {"PronType=Prs"},
+                rule,
+                index,  # type: ignore[operator]
             )[0]
         pron_lemmas = []
         for pron in prons:
diff --git a/spacy/lang/es/stop_words.py b/spacy/lang/es/stop_words.py
index 5099359e843..6d28854810a 100644
--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a acuerdo adelante ademas además afirmó agregó ahi ahora ahí al algo alguna
 algunas alguno algunos algún alli allí alrededor ambos ante anterior antes
 apenas aproximadamente aquel aquella aquellas aquello aquellos aqui aquél
@@ -75,4 +76,5 @@
 vosotras vosotros voy vuestra vuestras vuestro vuestros
 
 y ya yo
-""".split())
+""".split()
+)
diff --git a/spacy/lang/et/stop_words.py b/spacy/lang/et/stop_words.py
index 248bcb61f08..e1da1f14d5e 100644
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-et
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aga
 ei
 et
@@ -36,4 +37,5 @@
 ta
 te
 ära
-""".split())
+""".split()
+)
diff --git a/spacy/lang/eu/stop_words.py b/spacy/lang/eu/stop_words.py
index 4a6661e7d20..d213b5b81a5 100644
--- a/spacy/lang/eu/stop_words.py
+++ b/spacy/lang/eu/stop_words.py
@@ -1,7 +1,8 @@
 # Source: https://github.com/stopwords-iso/stopwords-eu
 # https://www.ranks.nl/stopwords/basque
 # https://www.mustgo.com/worldlanguages/basque/
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 al
 anitz
 arabera
@@ -100,4 +101,5 @@
 zuek
 zuen
 zuten
-""".split())
+""".split()
+)
diff --git a/spacy/lang/fa/lex_attrs.py b/spacy/lang/fa/lex_attrs.py
index 9b0ff546e0d..065e81bd6af 100644
--- a/spacy/lang/fa/lex_attrs.py
+++ b/spacy/lang/fa/lex_attrs.py
@@ -5,7 +5,8 @@
 YE_NUN = "ین"
 
 
-_num_words = set("""
+_num_words = set(
+    """
 صفر
 یک
 دو
@@ -62,12 +63,15 @@
 کوادریلیون
 کادریلیارد
 کوینتیلیون
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 اول
 سوم
-سی‌ام""".split())
+سی‌ام""".split()
+)
 
 _ordinal_words.update({num + MIM for num in _num_words})
 _ordinal_words.update({num + ZWNJ_O_MIM for num in _num_words})
diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py
index 93738c89263..f462f2e7a5d 100644
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words from HAZM package
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 و
 در
 به
@@ -388,4 +389,5 @@
 لذا
 زاده
 گردد
-اینجا""".split())
+اینجا""".split()
+)
diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py
index 742cacc2689..8e8dcfa565d 100644
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@@ -1,6 +1,7 @@
 # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
 # Reformatted with some minor corrections
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aiemmin aika aikaa aikaan aikaisemmin aikaisin aikana aikoina aikoo aikovat
 aina ainakaan ainakin ainoa ainoat aiomme aion aiotte aivan ajan alas alemmas
 alkuisin alkuun alla alle aloitamme aloitan aloitat aloitatte aloitattivat
@@ -105,4 +106,5 @@
 ympäri
 
 älköön älä
-""".split())
+""".split()
+)
diff --git a/spacy/lang/fr/lex_attrs.py b/spacy/lang/fr/lex_attrs.py
index 8a9dfb82a8b..9cf508a07b9 100644
--- a/spacy/lang/fr/lex_attrs.py
+++ b/spacy/lang/fr/lex_attrs.py
@@ -1,20 +1,24 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 zero un une deux trois quatre cinq six sept huit neuf dix
 onze douze treize quatorze quinze seize dix-sept dix-huit dix-neuf
 vingt trente quarante cinquante soixante soixante-dix septante quatre-vingt huitante quatre-vingt-dix nonante
 cent mille mil million milliard billion quadrillion quintillion
 sextillion septillion octillion nonillion decillion
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 premier première deuxième second seconde troisième quatrième cinquième sixième septième huitième neuvième dixième
 onzième douzième treizième quatorzième quinzième seizième dix-septième dix-huitième dix-neuvième
 vingtième trentième quarantième cinquantième soixantième soixante-dixième septantième quatre-vingtième huitantième quatre-vingt-dixième nonantième
 centième millième millionnième milliardième billionnième quadrillionnième quintillionnième
 sextillionnième septillionnième octillionnième nonillionnième decillionnième
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/fr/stop_words.py b/spacy/lang/fr/stop_words.py
index 85ffe47baef..b32ee3d7173 100644
--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à â abord afin ah ai aie ainsi ait allaient allons
 alors anterieur anterieure anterieures antérieur antérieure antérieures
 apres après as assez attendu au
@@ -79,4 +80,5 @@
 
 y
 
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ga/stop_words.py b/spacy/lang/ga/stop_words.py
index e32ad6431f6..4ef052ca58a 100644
--- a/spacy/lang/ga/stop_words.py
+++ b/spacy/lang/ga/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ach ag agus an aon ar arna as
 
 ba beirt bhúr
@@ -38,4 +39,5 @@
 í
 
 ó ón óna ónár
-""".split())
+""".split()
+)
diff --git a/spacy/lang/gd/stop_words.py b/spacy/lang/gd/stop_words.py
index 6f2c2856bec..d5132c35e31 100644
--- a/spacy/lang/gd/stop_words.py
+++ b/spacy/lang/gd/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'ad
 'ar
 'd # iad
@@ -381,4 +382,5 @@
 ì
 ò
 ó
-""".split("\n"))
+""".split("\n")
+)
diff --git a/spacy/lang/grc/stop_words.py b/spacy/lang/grc/stop_words.py
index 51f5e9d9dac..cbb766a8ce1 100644
--- a/spacy/lang/grc/stop_words.py
+++ b/spacy/lang/grc/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 αὐτῷ αὐτοῦ αὐτῆς αὐτόν αὐτὸν αὐτῶν αὐτὸς αὐτὸ αὐτό αὐτός αὐτὴν αὐτοῖς αὐτοὺς αὔτ' αὐτὰ αὐτῇ αὐτὴ
 αὐτὼ αὑταὶ καὐτὸς αὐτά αὑτός αὐτοῖσι αὐτοῖσιν αὑτὸς αὐτήν αὐτοῖσί αὐτοί αὐτοὶ αὐτοῖο αὐτάων αὐτὰς
 αὐτέων αὐτώ αὐτάς αὐτούς αὐτή αὐταί αὐταὶ αὐτῇσιν τὠυτῷ τὠυτὸ ταὐτὰ ταύτῃ αὐτῇσι αὐτῇς αὐταῖς αὐτᾶς αὐτὰν ταὐτὸν
@@ -56,4 +57,5 @@
 
  ὣς ὡς ὥς ὧς ὥστ' ὥστε ὥσθ' ὤ ὢ 
  
- """.split())
+ """.split()
+)
diff --git a/spacy/lang/gu/stop_words.py b/spacy/lang/gu/stop_words.py
index 1d11a3ebd96..2c859681b05 100644
--- a/spacy/lang/gu/stop_words.py
+++ b/spacy/lang/gu/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 એમ
 આ
 એ
@@ -83,4 +84,5 @@
 દર
 એટલો
 પરંતુ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/he/stop_words.py b/spacy/lang/he/stop_words.py
index ea486722475..23bb5176de9 100644
--- a/spacy/lang/he/stop_words.py
+++ b/spacy/lang/he/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 אני
 את
 אתה
@@ -217,4 +218,5 @@
 אחרות
 אשר
 או
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hi/stop_words.py b/spacy/lang/hi/stop_words.py
index 9bc57bd3136..475b07da152 100644
--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अंदर
 अत
 अदि
@@ -234,4 +235,5 @@
 होते
 होना
 होने
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hr/stop_words.py b/spacy/lang/hr/stop_words.py
index 769ebe4db53..dd10f792d01 100644
--- a/spacy/lang/hr/stop_words.py
+++ b/spacy/lang/hr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-hr
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 ah
 aha
@@ -339,4 +340,5 @@
 željeo
 zimus
 zum
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hsb/stop_words.py b/spacy/lang/hsb/stop_words.py
index 86021f555c1..e6fedaf4c92 100644
--- a/spacy/lang/hsb/stop_words.py
+++ b/spacy/lang/hsb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abo ale ani
 
 dokelž
@@ -14,4 +15,5 @@
 tež tohodla
 
 zo zoby
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ht/lex_attrs.py b/spacy/lang/ht/lex_attrs.py
index 27a535dd746..ab1a39a8234 100644
--- a/spacy/lang/ht/lex_attrs.py
+++ b/spacy/lang/ht/lex_attrs.py
@@ -1,20 +1,24 @@
 from ...attrs import LIKE_NUM, NORM
 
 # Cardinal numbers in Creole
-_num_words = set("""
+_num_words = set(
+    """
 zewo youn en de twa kat senk sis sèt uit nèf dis
 onz douz trèz katoz kenz sèz disèt dizwit diznèf
 vent trant karant sinkant swasant swasann-dis
 san mil milyon milya
-""".split())
+""".split()
+)
 
 # Ordinal numbers in Creole (some are French-influenced, some simplified)
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 premye dezyèm twazyèm katryèm senkyèm sizyèm sètvyèm uitvyèm nèvyèm dizyèm
 onzèm douzyèm trèzyèm katozyèm kenzèm sèzyèm disetyèm dizwityèm diznèvyèm
 ventyèm trantyèm karantyèm sinkantyèm swasantyèm
 swasann-disyèm santyèm milyèm milyonnyèm milyadyèm
-""".split())
+""".split()
+)
 
 NORM_MAP = {
     "'m": "mwen",
diff --git a/spacy/lang/ht/stop_words.py b/spacy/lang/ht/stop_words.py
index fd85c2a197f..50998e0e5ff 100644
--- a/spacy/lang/ht/stop_words.py
+++ b/spacy/lang/ht/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ak an ankò ant apre ap atò avan avanlè
 byen bò byenke
 
@@ -38,7 +39,8 @@
 
 men mèsi oswa osinon
 
-""".split())
+""".split()
+)
 
 # Add common contractions, with and without apostrophe variants
 contractions = ["m'", "n'", "w'", "y'", "l'", "t'", "k'"]
diff --git a/spacy/lang/hu/stop_words.py b/spacy/lang/hu/stop_words.py
index 1841557073a..e39a26d35ae 100644
--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
 amelyeket amelyet amelynek ami amikor amit amolyan amíg annak arra arról az
 azok azon azonban azt aztán azután azzal azért
@@ -57,4 +58,5 @@
 úgy új újabb újra
 
 ő őket
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hy/stop_words.py b/spacy/lang/hy/stop_words.py
index 1bfd09a4b29..46d0f6b511c 100644
--- a/spacy/lang/hy/stop_words.py
+++ b/spacy/lang/hy/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 նա
 ողջը
 այստեղ
@@ -102,4 +103,5 @@
 այս
 մեջ
 թ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/_tokenizer_exceptions_list.py b/spacy/lang/id/_tokenizer_exceptions_list.py
index 11220a61e5b..a0b35fa1a2b 100644
--- a/spacy/lang/id/_tokenizer_exceptions_list.py
+++ b/spacy/lang/id/_tokenizer_exceptions_list.py
@@ -1,4 +1,5 @@
-ID_BASE_EXCEPTIONS = set("""
+ID_BASE_EXCEPTIONS = set(
+    """
 aba-aba
 abah-abah
 abal-abal
@@ -3897,4 +3898,5 @@
 yo-yo
 zam-zam
 zig-zag
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/stop_words.py b/spacy/lang/id/stop_words.py
index fc85f83679a..b1bfaea796e 100644
--- a/spacy/lang/id/stop_words.py
+++ b/spacy/lang/id/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ada adalah adanya adapun agak agaknya agar akan akankah akhir akhiri akhirnya
 aku akulah amat amatlah anda andalah antar antara antaranya apa apaan apabila
 apakah apalagi apatah artinya asal asalkan atas atau ataukah ataupun awal
@@ -113,4 +114,5 @@
 waduh wah wahai waktu waktunya walau walaupun wong
 
 yaitu yakin yakni yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py
index 8dea4e97fd1..8e206262c10 100644
--- a/spacy/lang/id/tokenizer_exceptions.py
+++ b/spacy/lang/id/tokenizer_exceptions.py
@@ -156,7 +156,7 @@
     "S.T.",
     "S.T.Han",
     "S.Th.",
-    "S.Th.I" "S.TI.",
+    "S.Th.IS.TI.",
     "S.T.P.",
     "S.TrK",
     "S.Tekp.",
@@ -210,7 +210,7 @@
     "hlm.",
     "i/o",
     "n.b.",
-    "p.p." "pjs.",
+    "p.p.pjs.",
     "s.d.",
     "tel.",
     "u.p.",
diff --git a/spacy/lang/is/stop_words.py b/spacy/lang/is/stop_words.py
index 79f84ee6000..917fb6df444 100644
--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Xangis/extra-stopwords
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 afhverju
 aftan
 aftur
@@ -153,4 +154,5 @@
 því
 þær
 ætti
-""".split())
+""".split()
+)
diff --git a/spacy/lang/it/stop_words.py b/spacy/lang/it/stop_words.py
index 2a37236a9b9..42adc7904c8 100644
--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
 agli ahime ahimè ai al alcuna alcuni alcuno all alla alle allo allora altri
 altrimenti altro altrove altrui anche ancora anni anno ansa anticipo assai
@@ -78,4 +79,5 @@
 
 v' va vale vari varia varie vario verso vi via vicino visto vita voi volta volte
 vostra vostre vostri vostro
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ja/stop_words.py b/spacy/lang/ja/stop_words.py
index 661b5183594..98560d7e28b 100644
--- a/spacy/lang/ja/stop_words.py
+++ b/spacy/lang/ja/stop_words.py
@@ -2,7 +2,8 @@
 # filtering out everything that wasn't hiragana. ー (one) was also added.
 # Considered keeping some non-hiragana words but too many place names were
 # present.
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 あ あっ あまり あり ある あるいは あれ
 い いい いう いく いずれ いっ いつ いる いわ
 うち
@@ -43,4 +44,5 @@
 を
 ん
 一
-""".split())
+""".split()
+)
diff --git a/spacy/lang/kmr/stop_words.py b/spacy/lang/kmr/stop_words.py
index 93e6ea27f0c..aee33c2b748 100644
--- a/spacy/lang/kmr/stop_words.py
+++ b/spacy/lang/kmr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 û
 li
 bi
@@ -39,4 +40,5 @@
 hemû
 kes
 tişt
-""".split())
+""".split()
+)
diff --git a/spacy/lang/kn/stop_words.py b/spacy/lang/kn/stop_words.py
index 528e5e3a8a8..dba9740af91 100644
--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ಹಲವು
 ಮೂಲಕ
 ಹಾಗೂ
@@ -81,4 +82,5 @@
 ಎಂದು
 ನನ್ನ
 ಮೇಲೆ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ko/stop_words.py b/spacy/lang/ko/stop_words.py
index d4cdbc7a112..3eba9fc8299 100644
--- a/spacy/lang/ko/stop_words.py
+++ b/spacy/lang/ko/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 이
 있
 하
@@ -62,4 +63,5 @@
 원
 잘
 놓
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ky/stop_words.py b/spacy/lang/ky/stop_words.py
index fb8e2c84b95..ea40bdfa222 100644
--- a/spacy/lang/ky/stop_words.py
+++ b/spacy/lang/ky/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ага адам айтты айтымында айтып ал алар
 алардын алган алуу алып анда андан аны
 анын ар
@@ -37,4 +38,5 @@
 үч үчүн
 
 өз
-""".split())
+""".split()
+)
diff --git a/spacy/lang/la/stop_words.py b/spacy/lang/la/stop_words.py
index 47abf7384f4..8b590bb67b3 100644
--- a/spacy/lang/la/stop_words.py
+++ b/spacy/lang/la/stop_words.py
@@ -1,6 +1,7 @@
 # Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem 
 
 cum cur 
@@ -32,4 +33,5 @@
 ubi uel uero
 
 vel vero
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lb/lex_attrs.py b/spacy/lang/lb/lex_attrs.py
index bbef72b9bb3..11923137418 100644
--- a/spacy/lang/lb/lex_attrs.py
+++ b/spacy/lang/lb/lex_attrs.py
@@ -1,18 +1,22 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 null eent zwee dräi véier fënnef sechs ziwen aacht néng zéng eelef zwielef dräizéng
 véierzéng foffzéng siechzéng siwwenzéng uechtzeng uechzeng nonnzéng nongzéng zwanzeg drësseg véierzeg foffzeg sechzeg siechzeg siwenzeg achtzeg achzeg uechtzeg uechzeg nonnzeg
 honnert dausend millioun milliard billioun billiard trillioun triliard
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 éischten zweeten drëtten véierten fënneften sechsten siwenten aachten néngten zéngten eeleften
 zwieleften dräizéngten véierzéngten foffzéngten siechzéngten uechtzéngen uechzéngten nonnzéngten nongzéngten zwanzegsten
 drëssegsten véierzegsten foffzegsten siechzegsten siwenzegsten uechzegsten nonnzegsten
 honnertsten dausendsten milliounsten
 milliardsten billiounsten billiardsten trilliounsten trilliardsten
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/lb/stop_words.py b/spacy/lang/lb/stop_words.py
index 386ce1222af..8f22ea6e694 100644
--- a/spacy/lang/lb/stop_words.py
+++ b/spacy/lang/lb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 à
 äis
@@ -206,4 +207,5 @@
 zu
 zum
 zwar
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lg/stop_words.py b/spacy/lang/lg/stop_words.py
index a9f99cbf40f..7bad59344fb 100644
--- a/spacy/lang/lg/stop_words.py
+++ b/spacy/lang/lg/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 abadde abalala abamu abangi abava ajja ali alina ani anti ateekeddwa atewamu
 atya awamu aweebwa ayinza ba baali babadde babalina bajja
 bajjanewankubade bali balina bandi bangi bano bateekeddwa baweebwa bayina bebombi beera bibye
@@ -14,4 +15,5 @@
 tetuteekeddwa tewali teyalina teyayina tolina tu tuyina tulina tuyina twafuna twetaaga wa wabula
 wabweru wadde waggulunnina wakati waliwobangi waliyo wandi wange wano wansi weebwa yabadde yaffe
 ye yenna yennyini yina yonna ziba zijja zonna
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lij/stop_words.py b/spacy/lang/lij/stop_words.py
index 37eb163ffe7..1d6f09d27ca 100644
--- a/spacy/lang/lij/stop_words.py
+++ b/spacy/lang/lij/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à â a-a a-e a-i a-o aiva aloa an ancheu ancon apreuvo ascì atra atre atri atro avanti avei
 
 bella belle belli bello ben
@@ -34,4 +35,5 @@
 un uña unn' unna
 
 za zu
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lv/stop_words.py b/spacy/lang/lv/stop_words.py
index 4ed61996ac1..2685c243083 100644
--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-lv
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aiz
 ap
 apakš
@@ -162,4 +163,5 @@
 zem
 ārpus
 šaipus
-""".split())
+""".split()
+)
diff --git a/spacy/lang/mk/stop_words.py b/spacy/lang/mk/stop_words.py
index 90a27179852..312a456c5db 100644
--- a/spacy/lang/mk/stop_words.py
+++ b/spacy/lang/mk/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а
 абре
 aв
@@ -810,4 +811,5 @@
 џагара-магара
 џанам
 џив-џив
-    """.split())
+    """.split()
+)
diff --git a/spacy/lang/ml/stop_words.py b/spacy/lang/ml/stop_words.py
index 64b9acc1025..441e9358699 100644
--- a/spacy/lang/ml/stop_words.py
+++ b/spacy/lang/ml/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 അത്
 ഇത്
 ആയിരുന്നു
@@ -8,4 +9,5 @@
 അന്ന്
 ഇന്ന്
 ആണ്
-""".split())
+""".split()
+)
diff --git a/spacy/lang/mr/stop_words.py b/spacy/lang/mr/stop_words.py
index 3c9c6208916..9b0cee951ab 100644
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 न
 अतरी
 तो
@@ -187,4 +188,5 @@
 होता
 होती
 होते
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ms/_tokenizer_exceptions_list.py b/spacy/lang/ms/_tokenizer_exceptions_list.py
index e579e316ae9..fba1dd70f94 100644
--- a/spacy/lang/ms/_tokenizer_exceptions_list.py
+++ b/spacy/lang/ms/_tokenizer_exceptions_list.py
@@ -1,6 +1,7 @@
 # from https://prpm.dbp.gov.my/cari1?keyword=
 # dbp https://en.wikipedia.org/wiki/Dewan_Bahasa_dan_Pustaka
-MS_BASE_EXCEPTIONS = set("""
+MS_BASE_EXCEPTIONS = set(
+    """
 aba-aba
 abah-abah
 abar-abar
@@ -1938,4 +1939,5 @@
 water-cooled
 world-class
 yang-yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ms/examples.py b/spacy/lang/ms/examples.py
index 1af439d4a5b..236e0c0f660 100644
--- a/spacy/lang/ms/examples.py
+++ b/spacy/lang/ms/examples.py
@@ -10,7 +10,7 @@
     "Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?",
     "Pengeluaran makanan berasal dari beberapa lokasi termasuk Cameron Highlands, Johor Bahru, dan Kuching.",
     "Syarikat XYZ telah menghasilkan 20,000 unit produk baharu dalam setahun terakhir",
-    "Kuala Lumpur merupakan ibu negara Malaysia." "Kau berada di mana semalam?",
+    "Kuala Lumpur merupakan ibu negara Malaysia.Kau berada di mana semalam?",
     "Siapa yang akan memimpin projek itu?",
     "Siapa perdana menteri Malaysia sekarang?",
 ]
diff --git a/spacy/lang/ms/stop_words.py b/spacy/lang/ms/stop_words.py
index fc85f83679a..b1bfaea796e 100644
--- a/spacy/lang/ms/stop_words.py
+++ b/spacy/lang/ms/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ada adalah adanya adapun agak agaknya agar akan akankah akhir akhiri akhirnya
 aku akulah amat amatlah anda andalah antar antara antaranya apa apaan apabila
 apakah apalagi apatah artinya asal asalkan atas atau ataukah ataupun awal
@@ -113,4 +114,5 @@
 waduh wah wahai waktu waktunya walau walaupun wong
 
 yaitu yakin yakni yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/nb/stop_words.py b/spacy/lang/nb/stop_words.py
index bc1c54a4af3..d9ed414efdf 100644
--- a/spacy/lang/nb/stop_words.py
+++ b/spacy/lang/nb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 alle allerede alt and andre annen annet at av
 
 bak bare bedre beste blant ble bli blir blitt bris by både
@@ -45,4 +46,5 @@
 å år
 
 ønsker
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ne/stop_words.py b/spacy/lang/ne/stop_words.py
index 95d7a375821..8470297b9f0 100644
--- a/spacy/lang/ne/stop_words.py
+++ b/spacy/lang/ne/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/sanjaalcorps/NepaliStopWords/blob/master/NepaliStopWords.txt
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अक्सर
 अगाडि
 अगाडी
@@ -489,4 +490,5 @@
 होइन
 होकि
 होला
-""".split())
+""".split()
+)
diff --git a/spacy/lang/nl/lex_attrs.py b/spacy/lang/nl/lex_attrs.py
index 1b8602831ae..488224c2f20 100644
--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@@ -1,17 +1,21 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
 veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd
 duizend miljoen miljard biljoen biljard triljoen triljard
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde
 twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste
 zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste
 miljardste biljoenste biljardste triljoenste triljardste
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/nl/stop_words.py b/spacy/lang/nl/stop_words.py
index a88c2905199..cd4fdefdf58 100644
--- a/spacy/lang/nl/stop_words.py
+++ b/spacy/lang/nl/stop_words.py
@@ -13,7 +13,8 @@
 # should have a Dutch counterpart here.
 
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aan af al alle alles allebei alleen allen als altijd ander anders andere anderen aangaande aangezien achter achterna
 afgelopen aldus alhoewel anderzijds
 
@@ -67,4 +68,5 @@
 
 zal ze zei zelf zich zij zijn zo zonder zou zeer zeker zekere zelfde zelfs zichzelf zijnde zijne zo’n zoals zodra zouden
  zoveel zowat zulk zulke zulks zullen zult
-""".split())
+""".split()
+)
diff --git a/spacy/lang/pl/stop_words.py b/spacy/lang/pl/stop_words.py
index 4418deedc0b..075aec39167 100644
--- a/spacy/lang/pl/stop_words.py
+++ b/spacy/lang/pl/stop_words.py
@@ -1,6 +1,7 @@
 # sources: https://github.com/bieli/stopwords/blob/master/polish.stopwords.txt and https://github.com/stopwords-iso/stopwords-pl
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a aby ach acz aczkolwiek aj albo ale alez
 ależ ani az aż
 
@@ -73,4 +74,5 @@
 z za zaden zadna zadne zadnych zapewne zawsze zaś
 ze zeby znow znowu znów zostal został
 
-żaden żadna żadne żadnych że żeby""".split())
+żaden żadna żadne żadnych że żeby""".split()
+)
diff --git a/spacy/lang/pt/stop_words.py b/spacy/lang/pt/stop_words.py
index 722aef80236..ce3c86ff570 100644
--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
 ao aos apenas apoia apoio apontar após aquela aquelas aquele aqueles aqui aquilo
 as assim através atrás até aí
@@ -61,4 +62,5 @@
 vossas vosso vossos vários vão vêm vós
 
 zero
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ro/lex_attrs.py b/spacy/lang/ro/lex_attrs.py
index a5880fc2fac..736aa911ac6 100644
--- a/spacy/lang/ro/lex_attrs.py
+++ b/spacy/lang/ro/lex_attrs.py
@@ -1,13 +1,16 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 zero unu doi două trei patru cinci șase șapte opt nouă zece
 unsprezece doisprezece douăsprezece treisprezece patrusprezece cincisprezece șaisprezece șaptesprezece optsprezece nouăsprezece
 douăzeci treizeci patruzeci cincizeci șaizeci șaptezeci optzeci nouăzeci
 sută mie milion miliard bilion trilion cvadrilion catralion cvintilion sextilion septilion enșpemii
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 primul doilea treilea patrulea cincilea șaselea șaptelea optulea nouălea zecelea
 prima doua treia patra cincia șasea șaptea opta noua zecea
 unsprezecelea doisprezecelea treisprezecelea patrusprezecelea cincisprezecelea șaisprezecelea șaptesprezecelea optsprezecelea nouăsprezecelea
@@ -15,7 +18,8 @@
 douăzecilea treizecilea patruzecilea cincizecilea șaizecilea șaptezecilea optzecilea nouăzecilea sutălea
 douăzecea treizecea patruzecea cincizecea șaizecea șaptezecea optzecea nouăzecea suta
 miilea mielea mia milionulea milioana miliardulea miliardelea miliarda enșpemia
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ro/stop_words.py b/spacy/lang/ro/stop_words.py
index c7c0801f171..d68a81c4569 100644
--- a/spacy/lang/ro/stop_words.py
+++ b/spacy/lang/ro/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-ro
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 abia
 acea
@@ -494,4 +495,5 @@
 știu
 ți
 ție
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ru/lex_attrs.py b/spacy/lang/ru/lex_attrs.py
index 63b1cead810..e0b35bdc07f 100644
--- a/spacy/lang/ru/lex_attrs.py
+++ b/spacy/lang/ru/lex_attrs.py
@@ -1,6 +1,8 @@
 from ...attrs import LIKE_NUM
 
-_num_words = list(set("""
+_num_words = list(
+    set(
+        """
 ноль ноля нолю нолём ноле нулевой нулевого нулевому нулевым нулевом нулевая нулевую нулевое нулевые нулевых нулевыми 
 
 четверть четверти четвертью четвертей четвертям четвертями четвертях 
@@ -201,7 +203,9 @@
 квинтиллиону квинтиллионов квинтлн 
 
 i ii iii iv v vi vii viii ix x xi xii xiii xiv xv xvi xvii xviii xix xx xxi xxii xxiii xxiv xxv xxvi xxvii xxvii xxix
-""".split()))
+""".split()
+    )
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ru/stop_words.py b/spacy/lang/ru/stop_words.py
index 3040adb52b1..d6ea6b42af9 100644
--- a/spacy/lang/ru/stop_words.py
+++ b/spacy/lang/ru/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а авось ага агу аж ай али алло ау ах ая
 
 б будем будет будете будешь буду будут будучи будь будьте бы был была были было
@@ -106,4 +107,5 @@
 ю
 
 я явно явных яко якобы якоже
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sa/stop_words.py b/spacy/lang/sa/stop_words.py
index eaf0ffaa2c9..30302a14dcb 100644
--- a/spacy/lang/sa/stop_words.py
+++ b/spacy/lang/sa/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://gist.github.com/Akhilesh28/fe8b8e180f64b72e64751bc31cb6d323
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अहम्
 आवाम्
 वयम्
@@ -510,4 +511,5 @@
 ह
 हन्त
 हि
-""".split())
+""".split()
+)
diff --git a/spacy/lang/si/stop_words.py b/spacy/lang/si/stop_words.py
index acae5763b52..7d29bc1b4d8 100644
--- a/spacy/lang/si/stop_words.py
+++ b/spacy/lang/si/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 සහ
 සමග
 සමඟ
@@ -190,4 +191,5 @@
 ලෙස
 පරිදි
 එහෙත්
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sk/stop_words.py b/spacy/lang/sk/stop_words.py
index 6ef4818c3a2..017e7beef39 100644
--- a/spacy/lang/sk/stop_words.py
+++ b/spacy/lang/sk/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Ardevop-sk/stopwords-sk
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 aby
 aj
@@ -419,4 +420,5 @@
 ňou
 ňu
 že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sl/lex_attrs.py b/spacy/lang/sl/lex_attrs.py
index 6d6b40b4546..3c1493050a1 100644
--- a/spacy/lang/sl/lex_attrs.py
+++ b/spacy/lang/sl/lex_attrs.py
@@ -2,7 +2,8 @@
 
 from ...attrs import IS_CURRENCY, LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 	nula ničla nič ena dva tri štiri pet šest sedem osem
 	devet deset enajst dvanajst trinajst štirinajst petnajst
 	šestnajst sedemnajst osemnajst devetnajst dvajset trideset štirideset
@@ -17,9 +18,11 @@
 	šestnajstih šestnajstim šestnajstimi petnajstih petnajstim petnajstimi
 	sedemnajstih sedemnajstim sedemnajstimi osemnajstih osemnajstim osemnajstimi
 	devetnajstih devetnajstim devetnajstimi dvajsetih dvajsetim dvajsetimi  
-	""".split())
+	""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 	prvi drugi tretji četrti peti šesti sedmi osmi
 	deveti deseti enajsti dvanajsti trinajsti štirinajsti
 	petnajsti šestnajsti sedemnajsti osemnajsti devetnajsti
@@ -89,9 +92,11 @@
 	osemnajstimi devetnajstimi dvajsetimi tridesetimi štiridesetimi petdesetimi šestdesetimi
 	sedemdesetimi osemdesetimi devetdesetimi stotimi tisočimi milijontimi bilijontimi
 	trilijontimi kvadrilijontimi neštetimi
-	""".split())
+	""".split()
+)
 
-_currency_words = set("""
+_currency_words = set(
+    """
 	evro evra evru evrom evrov evroma evrih evrom evre evri evr eur
 	cent centa centu cenom centov centoma centih centom cente centi
 	dolar dolarja dolarji dolarju dolarjem dolarjev dolarjema dolarjih dolarje usd
@@ -104,7 +109,8 @@
 	jen jena jeni jenu jenom jenov jenoma jenih jene
 	kuna kuni kune kuno kun kunama kunah kunam kunami
 	marka marki marke markama markah markami 
-	""".split())
+	""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/sl/stop_words.py b/spacy/lang/sl/stop_words.py
index a81c00db269..8491efcb580 100644
--- a/spacy/lang/sl/stop_words.py
+++ b/spacy/lang/sl/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-sl
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ali 
 
 b bi bil bila bile bili bilo biti blizu bo bodo bojo bolj bom bomo 
@@ -79,4 +80,5 @@
 z za zadaj zadnji zakaj zaprta zaprti zaprto zdaj zelo zunaj
 
 ž že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sq/stop_words.py b/spacy/lang/sq/stop_words.py
index bf1c7a7039c..f2b1a4f4a7b 100644
--- a/spacy/lang/sq/stop_words.py
+++ b/spacy/lang/sq/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/andrixh/index-albanian
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 afert
 ai
@@ -224,4 +225,5 @@
 vjen
 yne
 zakonisht
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sr/stop_words.py b/spacy/lang/sr/stop_words.py
index 758964a5853..5df5509d2c4 100644
--- a/spacy/lang/sr/stop_words.py
+++ b/spacy/lang/sr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а
 авај
 ако
@@ -388,4 +389,5 @@
 ћете
 ћеш
 ћу
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sv/stop_words.py b/spacy/lang/sv/stop_words.py
index 08251bcff32..2422b2a9e5a 100644
--- a/spacy/lang/sv/stop_words.py
+++ b/spacy/lang/sv/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras
 annan annat ännu artonde arton åtminstone att åtta åttio åttionde åttonde av
 även
@@ -61,4 +62,5 @@
 vad vänster vänstra var vår vara våra varför varifrån varit varken värre
 varsågod vart vårt vem vems verkligen vi vid vidare viktig viktigare viktigast
 viktigt vilka vilken vilket vill
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ta/stop_words.py b/spacy/lang/ta/stop_words.py
index d6ef21f3b0a..abbff949d79 100644
--- a/spacy/lang/ta/stop_words.py
+++ b/spacy/lang/ta/stop_words.py
@@ -1,6 +1,7 @@
 # Stop words
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ஒரு
 என்று
 மற்றும்
@@ -126,4 +127,5 @@
 வரையில்
 சற்று
 எனக்
-""".split())
+""".split()
+)
diff --git a/spacy/lang/te/stop_words.py b/spacy/lang/te/stop_words.py
index d2834260898..b18dab697da 100644
--- a/spacy/lang/te/stop_words.py
+++ b/spacy/lang/te/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Xangis/extra-stopwords (MIT License)
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 అందరూ
 అందుబాటులో
 అడగండి
@@ -51,4 +52,5 @@
 వేరుగా
 వ్యతిరేకంగా
 సంబంధం
-""".split())
+""".split()
+)
diff --git a/spacy/lang/th/stop_words.py b/spacy/lang/th/stop_words.py
index 3dd6e56525b..2823281ce95 100644
--- a/spacy/lang/th/stop_words.py
+++ b/spacy/lang/th/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ทั้งนี้ ดัง ขอ รวม หลังจาก เป็น หลัง หรือ ๆ เกี่ยวกับ ซึ่งได้แก่ ด้วยเพราะ ด้วยว่า ด้วยเหตุเพราะ
 ด้วยเหตุว่า สุดๆ เสร็จแล้ว เช่น เข้า ถ้า ถูก ถึง ต่างๆ ใคร เปิดเผย ครา รือ ตาม ใน ได้แก่ ได้แต่
 ได้ที่ ตลอดถึง นอกจากว่า นอกนั้น จริง อย่างดี ส่วน เพียงเพื่อ เดียว จัด ทั้งที ทั้งคน ทั้งตัว ไกลๆ
@@ -70,4 +71,5 @@
 แห่งนี้ แห่งโน้น แห่งไหน แหละ ให้แก่ ใหญ่ ใหญ่โต อย่างมาก อย่างยิ่ง อย่างไรก็ อย่างไรก็ได้ อย่างไรเสีย
 อย่างละ อย่างหนึ่ง อย่างๆ อัน อันจะ อันได้แก่ อันที่ อันที่จริง อันที่จะ อันเนื่องมาจาก อันละ อันๆ อาจจะ
 อาจเป็น อาจเป็นด้วย อื่น อื่นๆ เอ็ง เอา ฯ ฯล ฯลฯ 555 กำ ขอโทษ เยี่ยม นี่คือ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ti/stop_words.py b/spacy/lang/ti/stop_words.py
index e0aaf47d3fe..9bd7122007a 100644
--- a/spacy/lang/ti/stop_words.py
+++ b/spacy/lang/ti/stop_words.py
@@ -1,7 +1,8 @@
 # Stop words from Tigrinya Wordcount: https://github.com/fgaim/Tigrinya-WordCount/blob/main/ti_stop_words.txt
 
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'ምበር 'ሞ 'ቲ 'ታ 'ኳ 'ውን 'ዚ 'የ 'ዩ 'ያ 'ዮም 'ዮን
 ልዕሊ ሒዙ ሒዛ ሕጂ መበል መን መንጎ መጠን ማለት ምስ ምባል
 ምእንቲ ምኽንያቱ ምኽንያት ምዃኑ ምዃንና ምዃኖም
@@ -22,4 +23,5 @@
 ዝነበረ ዝነበረት ዝነበሩ ዝካየድ ዝኸውን ዝኽእል ዝኾነ ዝዀነ
 የለን ይቕረብ ይብል ይኸውን ይኹን ይኽእል ደኣ ድሕሪ ድማ
 ገለ ገሊጹ ገና ገይሩ ግና ግን ጥራይ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tl/stop_words.py b/spacy/lang/tl/stop_words.py
index a7bf541990a..2560cdaed6a 100644
--- a/spacy/lang/tl/stop_words.py
+++ b/spacy/lang/tl/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 akin
 aking
 ako
@@ -146,4 +147,5 @@
 tungkol
 una
 walang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tn/stop_words.py b/spacy/lang/tn/stop_words.py
index a63a455f754..f614771dd11 100644
--- a/spacy/lang/tn/stop_words.py
+++ b/spacy/lang/tn/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ke gareng ga selekanyo tlhwatlhwa yo mongwe se
 sengwe fa go le jalo gongwe ba na mo tikologong
 jaaka kwa morago nna gonne ka sa pele nako teng
@@ -15,4 +16,5 @@
 bonala e tshwanang bogolo tsenya tsweetswee karolo
 sepe tlhalosa dirwa robedi robongwe lesomenngwe gaisa
 tlhano lesometlhano botlalo lekgolo
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index e80423e5150..b7d91d86f0d 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -34,11 +34,11 @@
     # host & domain names
     # mods: match is case-sensitive, so include [A-Z]
     r"(?:"  # noqa: E131
-      r"(?:"  # noqa: E131
-        r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
-        r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
-      r")?"
-      r"[A-Za-z0-9\u00a1-\uffff]\."
+    r"(?:"  # noqa: E131
+    r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
+    r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
+    r")?"
+    r"[A-Za-z0-9\u00a1-\uffff]\."
     r")+"
     # TLD identifier
     # mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
@@ -111,7 +111,8 @@
     BASE_EXCEPTIONS[orth] = [{ORTH: orth}]
 
 
-emoticons = set(r"""
+emoticons = set(
+    r"""
 :)
 :-)
 :))
@@ -242,7 +243,8 @@
 ¯\(ツ)/¯
 (╯°□°）╯︵┻━┻
 ><(((*>
-""".split())
+""".split()
+)
 
 
 for orth in emoticons:
diff --git a/spacy/lang/tr/stop_words.py b/spacy/lang/tr/stop_words.py
index 5323cf32d9c..85dcff6a53a 100644
--- a/spacy/lang/tr/stop_words.py
+++ b/spacy/lang/tr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-tr
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 acaba
 acep
 adamakıllı
@@ -552,4 +553,5 @@
 zaten
 zati
 zira
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tt/stop_words.py b/spacy/lang/tt/stop_words.py
index 8f146d9150a..44169b757e5 100644
--- a/spacy/lang/tt/stop_words.py
+++ b/spacy/lang/tt/stop_words.py
@@ -1,6 +1,7 @@
 # Tatar stopwords are from https://github.com/aliiae/stopwords-tt
 
-STOP_WORDS = set("""алай алайса алар аларга аларда алардан аларны аларның аларча
+STOP_WORDS = set(
+    """алай алайса алар аларга аларда алардан аларны аларның аларча
 алары аларын аларынга аларында аларыннан аларының алтмыш алтмышынчы алтмышынчыга
 алтмышынчыда алтмышынчыдан алтмышынчылар алтмышынчыларга алтмышынчыларда
 алтмышынчылардан алтмышынчыларны алтмышынчыларның алтмышынчыны алтмышынчының
@@ -168,4 +169,5 @@
 
 өстәп өч өчен өченче өченчегә өченчедә өченчедән өченчеләр өченчеләргә
 өченчеләрдә өченчеләрдән өченчеләрне өченчеләрнең өченчене өченченең өчләп
-өчәрләп""".split())
+өчәрләп""".split()
+)
diff --git a/spacy/lang/uk/stop_words.py b/spacy/lang/uk/stop_words.py
index 517c300070a..b11d7a044a3 100644
--- a/spacy/lang/uk/stop_words.py
+++ b/spacy/lang/uk/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""а
+STOP_WORDS = set(
+    """а
 або
 адже
 аж
@@ -464,4 +465,5 @@
 якій
 якого
 якої
-якщо""".split())
+якщо""".split()
+)
diff --git a/spacy/lang/ur/lex_attrs.py b/spacy/lang/ur/lex_attrs.py
index 916a47bfd19..e590ed3e303 100644
--- a/spacy/lang/ur/lex_attrs.py
+++ b/spacy/lang/ur/lex_attrs.py
@@ -5,8 +5,7 @@
 # https://en.wikibooks.org/wiki/Urdu/Vocabulary/Numbers
 # https://www.urdu-english.com/lessons/beginner/numbers
 
-_num_words = (
-    """ایک دو تین چار پانچ چھ سات آٹھ نو دس گیارہ بارہ تیرہ چودہ پندرہ سولہ سترہ
+_num_words = """ایک دو تین چار پانچ چھ سات آٹھ نو دس گیارہ بارہ تیرہ چودہ پندرہ سولہ سترہ
  اٹهارا انیس بیس اکیس بائیس تئیس چوبیس پچیس چھببیس
 ستایس اٹھائس انتيس تیس اکتیس بتیس تینتیس چونتیس پینتیس
  چھتیس سینتیس ارتیس انتالیس چالیس اکتالیس بیالیس تیتالیس
@@ -18,7 +17,6 @@
  سٹیاسی اٹھیاسی نواسی نوے اکانوے بانوے ترانوے
 چورانوے پچانوے چھیانوے ستانوے اٹھانوے ننانوے سو
 """.split()
-)
 
 # source https://www.google.com/intl/ur/inputtools/try/
 
diff --git a/spacy/lang/ur/stop_words.py b/spacy/lang/ur/stop_words.py
index 00f0dd2d6b4..abfa3649713 100644
--- a/spacy/lang/ur/stop_words.py
+++ b/spacy/lang/ur/stop_words.py
@@ -1,5 +1,6 @@
 # Source: collected from different resource on internet
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ثھی
 خو
 گی
@@ -508,4 +509,5 @@
 ہورہی
 ثبعث
 ضت
-""".split())
+""".split()
+)
diff --git a/spacy/lang/vi/stop_words.py b/spacy/lang/vi/stop_words.py
index 9163e10938e..3481701d5ea 100644
--- a/spacy/lang/vi/stop_words.py
+++ b/spacy/lang/vi/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords/vietnamese-stopwords
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a_lô
 a_ha
 ai
@@ -1942,4 +1943,5 @@
 ừ_ào
 ừ_ừ
 ử
-""".split("\n"))
+""".split("\n")
+)
diff --git a/spacy/lang/zh/stop_words.py b/spacy/lang/zh/stop_words.py
index d54fe689504..42ae4a1de04 100644
--- a/spacy/lang/zh/stop_words.py
+++ b/spacy/lang/zh/stop_words.py
@@ -1,6 +1,7 @@
 # stop words as whitespace-separated list
 # Chinese stop words,maybe not enough
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 !
 "
 #
@@ -1894,4 +1895,5 @@
 ～±
 ～＋
 ￥
-""".split())
+""".split()
+)
diff --git a/spacy/language.py b/spacy/language.py
index dcf436c65fe..ea9ba3cae8b 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1589,9 +1589,7 @@ def pipe(  # noqa: F811
         if batch_size is None:
             batch_size = self.batch_size
 
-        pipes = (
-            []
-        )  # contains functools.partial objects to easily create multiprocess worker.
+        pipes = []  # contains functools.partial objects to easily create multiprocess worker.
         for name, proc in self.pipeline:
             if name in disable:
                 continue
@@ -1626,7 +1624,11 @@ def _has_gpu_model(self, disable: Iterable[str]):
             if name in disable or not is_trainable:
                 continue
 
-            if hasattr(proc, "model") and hasattr(proc.model, "ops") and isinstance(proc.model.ops, CupyOps):  # type: ignore
+            if (
+                hasattr(proc, "model")
+                and hasattr(proc.model, "ops")
+                and isinstance(proc.model.ops, CupyOps)
+            ):  # type: ignore
                 return True
 
         return False
diff --git a/spacy/matcher/dependencymatcher.pyi b/spacy/matcher/dependencymatcher.pyi
index d84a30a58b0..3d744dfce4b 100644
--- a/spacy/matcher/dependencymatcher.pyi
+++ b/spacy/matcher/dependencymatcher.pyi
@@ -48,10 +48,12 @@ class DependencyMatcher:
         *,
         on_match: Optional[
             Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
-        ] = ...
+        ] = ...,
     ) -> None: ...
     def has_key(self, key: Union[str, int]) -> bool: ...
-    def get(self, key: Union[str, int], default: Optional[Any] = ...) -> Tuple[
+    def get(
+        self, key: Union[str, int], default: Optional[Any] = ...
+    ) -> Tuple[
         Optional[
             Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
         ],
diff --git a/spacy/matcher/matcher.pyi b/spacy/matcher/matcher.pyi
index c33b534cbd2..e474d250d22 100644
--- a/spacy/matcher/matcher.pyi
+++ b/spacy/matcher/matcher.pyi
@@ -33,7 +33,7 @@ class Matcher:
         on_match: Optional[
             Callable[[Matcher, Doc, int, List[Tuple[Any, ...]]], Any]
         ] = ...,
-        greedy: Optional[str] = ...
+        greedy: Optional[str] = ...,
     ) -> None: ...
     def remove(self, key: str) -> None: ...
     def has_key(self, key: Union[str, int]) -> bool: ...
@@ -56,7 +56,7 @@ class Matcher:
         *,
         as_spans: Literal[False] = ...,
         allow_missing: bool = ...,
-        with_alignments: bool = ...
+        with_alignments: bool = ...,
     ) -> List[Tuple[int, int, int]]: ...
     @overload
     def __call__(
@@ -65,6 +65,6 @@ class Matcher:
         *,
         as_spans: Literal[True],
         allow_missing: bool = ...,
-        with_alignments: bool = ...
+        with_alignments: bool = ...,
     ) -> List[Span]: ...
     def _normalize_key(self, key: Any) -> Any: ...
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 8b12720db20..752d1c4433c 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -122,7 +122,7 @@ def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]:
     return get_candidates
 
 
-def create_candidates_batch() -> (
-    Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]]
-):
+def create_candidates_batch() -> Callable[
+    [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
+]:
     return get_candidates_batch
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index 122ef379544..bc69e53ab24 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -19,7 +19,7 @@ def StaticVectors(
     *,
     dropout: Optional[float] = None,
     init_W: Callable = glorot_uniform_init,
-    key_attr: str = "ORTH"
+    key_attr: str = "ORTH",
 ) -> Model[List[Doc], Ragged]:
     """Embed Doc objects with their vocab's vectors table, applying a learned
     linear projection to control the dimensionality. If a dropout rate is
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index d20945dc5f1..ef7a076b6cd 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,7 +1,16 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Union
 
-from pydantic import BaseModel, ConfigDict, Field, RootModel, StrictBool, StrictInt, StrictStr, ValidationError
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    StrictBool,
+    StrictInt,
+    StrictStr,
+    ValidationError,
+)
 
 
 class MatchNodeSchema(BaseModel):
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index fd037480cb2..aeb0672c7c7 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -19,7 +19,9 @@
 from .pipe import Pipe
 
 MatcherPatternType = List[Dict[Union[int, str], Any]]
-AttributeRulerPatternType = Dict[str, Union[List[MatcherPatternType], MatcherPatternType, Dict, int]]
+AttributeRulerPatternType = Dict[
+    str, Union[List[MatcherPatternType], MatcherPatternType, Dict, int]
+]
 TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
 MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
 
@@ -137,7 +139,8 @@ def match(self, doc: Doc):
         matches = self.matcher(doc, allow_missing=True, as_spans=False)
         # Sort by the attribute ID, so that later rules have precedence
         matches = [
-            (int(self.vocab.strings[m_id]), m_id, s, e) for m_id, s, e in matches  # type: ignore
+            (int(self.vocab.strings[m_id]), m_id, s, e)
+            for m_id, s, e in matches  # type: ignore
         ]
         matches.sort()
         return matches
diff --git a/spacy/schemas.py b/spacy/schemas.py
index c0ecee314ee..e3200348013 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -299,7 +299,11 @@ class TokenPattern(BaseModel):
     op: Optional[TokenPatternOperator] = None
     underscore: Optional[Dict[StrictStr, UnderscoreValue]] = Field(None, alias="_")
 
-    model_config = ConfigDict(extra="forbid", populate_by_name=True, alias_generator=lambda value: value.upper())
+    model_config = ConfigDict(
+        extra="forbid",
+        populate_by_name=True,
+        alias_generator=lambda value: value.upper(),
+    )
 
     @field_validator("*", mode="before")
     @classmethod
diff --git a/spacy/tests/lang/et/test_tokenizer.py b/spacy/tests/lang/et/test_tokenizer.py
index f0f8079cae8..8bee2288033 100644
--- a/spacy/tests/lang/et/test_tokenizer.py
+++ b/spacy/tests/lang/et/test_tokenizer.py
@@ -2,8 +2,7 @@
 
 ET_BASIC_TOKENIZATION_TESTS = [
     (
-        "Kedagi ei või piinata ega ebainimlikult või alandavalt kohelda "
-        "ega karistada.",
+        "Kedagi ei või piinata ega ebainimlikult või alandavalt kohelda ega karistada.",
         [
             "Kedagi",
             "ei",
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index 554522fe8a1..e0dc7d5a1dd 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -10,7 +10,11 @@
     # Bad patterns flagged in all cases
     ([{"XX": "foo"}], 1, 1),
     ([{"IS_ALPHA": {"==": True}}, {"LIKE_NUM": None}], 2, 1),
-    ([{"IS_PUNCT": True, "OP": "$"}], 2, 1),  # v2: union reports 2 errors (enum + pattern)
+    (
+        [{"IS_PUNCT": True, "OP": "$"}],
+        2,
+        1,
+    ),  # v2: union reports 2 errors (enum + pattern)
     ([{"_": "foo"}], 1, 1),
     ('[{"TEXT": "foo"}, {"LOWER": "bar"}]', 1, 1),
     ([{"ENT_IOB": "foo"}], 1, 1),
@@ -24,10 +28,22 @@
     ([{"TEXT": "foo", "OP": "{1, 3}"}], 2, 1),  # v2: union reports 2 errors
     ([{"TEXT": "foo", "OP": "{-2}"}], 2, 1),  # v2: union reports 2 errors
     # Bad patterns flagged outside of Matcher
-    ([{"_": {"foo": "bar", "baz": {"IN": "foo"}}}], 7, 0),  # v2: more detailed union errors
+    (
+        [{"_": {"foo": "bar", "baz": {"IN": "foo"}}}],
+        7,
+        0,
+    ),  # v2: more detailed union errors
     # Bad patterns not flagged with minimal checks
-    ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 5, 0),  # v2: more detailed union errors
-    ([{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}], 5, 0),  # v2: more detailed union errors
+    (
+        [{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}],
+        5,
+        0,
+    ),  # v2: more detailed union errors
+    (
+        [{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}],
+        5,
+        0,
+    ),  # v2: more detailed union errors
     ([{"LENGTH": {"VALUE": 5}}], 3, 0),  # v2: more detailed union errors
     ([{"TEXT": {"VALUE": "foo"}}], 2, 0),
     ([{"IS_DIGIT": -1}], 1, 0),
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index e7df02e9769..b9276441222 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -59,9 +59,9 @@ def test_build_dependencies():
             lib, v = _parse_req(line)
             if lib and not lib.startswith("cupy") and lib not in libs_ignore_setup:
                 req_v = req_dict.get(lib, None)
-                assert (
-                    req_v is not None
-                ), "{} in setup.cfg but not in requirements.txt".format(lib)
+                assert req_v is not None, (
+                    "{} in setup.cfg but not in requirements.txt".format(lib)
+                )
                 assert (lib + v) == (lib + req_v), (
                     "{} has different version in setup.cfg and in requirements.txt: "
                     "{} and {} respectively".format(lib, v, req_v)
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 1b6f49f4cde..5e50a4d2801 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -496,15 +496,15 @@ def get_lowercased_candidates_batch(kb, spans):
         return [get_lowercased_candidates(kb, span) for span in spans]
 
     @registry.misc("spacy.LowercaseCandidateGenerator.v1")
-    def create_candidates() -> (
-        Callable[[InMemoryLookupKB, "Span"], Iterable[Candidate]]
-    ):
+    def create_candidates() -> Callable[
+        [InMemoryLookupKB, "Span"], Iterable[Candidate]
+    ]:
         return get_lowercased_candidates
 
     @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
-    def create_candidates_batch() -> (
-        Callable[[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]]
-    ):
+    def create_candidates_batch() -> Callable[
+        [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
+    ]:
         return get_lowercased_candidates_batch
 
     # replace the pipe with a new one with with a different candidate generator
diff --git a/spacy/tests/pipeline/test_sentencizer.py b/spacy/tests/pipeline/test_sentencizer.py
index 9b1ddd53012..826086fc7fe 100644
--- a/spacy/tests/pipeline/test_sentencizer.py
+++ b/spacy/tests/pipeline/test_sentencizer.py
@@ -135,14 +135,38 @@ def test_sentencizer_serialize_bytes(en_vocab):
     # fmt: off
     "lang,text",
     [
-        ('bn', 'বাংলা ভাষা (বাঙলা, বাঙ্গলা, তথা বাঙ্গালা নামগুলোতেও পরিচিত) একটি ইন্দো-আর্য ভাষা, যা দক্ষিণ এশিয়ার বাঙালি জাতির প্রধান কথ্য ও লেখ্য ভাষা। মাতৃভাষীর সংখ্যায় বাংলা ইন্দো-ইউরোপীয় ভাষা পরিবারের চতুর্থ ও বিশ্বের ষষ্ঠ বৃহত্তম ভাষা।[৫] মোট ব্যবহারকারীর সংখ্যা অনুসারে বাংলা বিশ্বের সপ্তম বৃহত্তম ভাষা। বাংলা সার্বভৌম ভাষাভিত্তিক জাতিরাষ্ট্র বাংলাদেশের একমাত্র রাষ্ট্রভাষা তথা সরকারি ভাষা[৬] এবং ভারতের পশ্চিমবঙ্গ, ত্রিপুরা, আসামের বরাক উপত্যকার সরকারি ভাষা। বঙ্গোপসাগরে অবস্থিত আন্দামান দ্বীপপুঞ্জের প্রধান কথ্য ভাষা বাংলা। এছাড়া ভারতের ঝাড়খণ্ড, বিহার, মেঘালয়, মিজোরাম, উড়িষ্যা রাজ্যগুলোতে উল্লেখযোগ্য পরিমাণে বাংলাভাষী জনগণ রয়েছে। ভারতে হিন্দির পরেই সর্বাধিক প্রচলিত ভাষা বাংলা।[৭][৮] এছাড়াও মধ্য প্রাচ্য, আমেরিকা ও ইউরোপে উল্লেখযোগ্য পরিমাণে বাংলাভাষী অভিবাসী রয়েছে।[৯] সারা বিশ্বে সব মিলিয়ে ২৬ কোটির অধিক লোক দৈনন্দিন জীবনে বাংলা ব্যবহার করে।[২] বাংলাদেশের জাতীয় সঙ্গীত এবং ভারতের জাতীয় সঙ্গীত ও স্তোত্র বাংলাতে রচিত।'),
-        ('de', 'Die deutsche Sprache bzw. Deutsch ([dɔʏ̯t͡ʃ]; abgekürzt dt. oder dtsch.) ist eine westgermanische Sprache. Ihr Sprachraum umfasst Deutschland, Österreich, die Deutschschweiz, Liechtenstein, Luxemburg, Ostbelgien, Südtirol, das Elsass und Lothringen sowie Nordschleswig. Außerdem ist sie eine Minderheitensprache in einigen europäischen und außereuropäischen Ländern, z. B. in Rumänien und Südafrika, sowie Nationalsprache im afrikanischen Namibia.'),
-        ('hi', 'हिन्दी विश्व की एक प्रमुख भाषा है एवं भारत की राजभाषा है। केन्द्रीय स्तर पर भारत में दूसरी आधिकारिक भाषा अंग्रेजी है। यह हिंदुस्तानी भाषा की एक मानकीकृत रूप है जिसमें संस्कृत के तत्सम तथा तद्भव शब्दों का प्रयोग अधिक है और अरबी-फ़ारसी शब्द कम हैं। हिंदी संवैधानिक रूप से भारत की राजभाषा और भारत की सबसे अधिक बोली और समझी जाने वाली भाषा है। हालाँकि, हिन्दी भारत की राष्ट्रभाषा नहीं है,[3] क्योंकि भारत के संविधान में कोई भी भाषा को ऐसा दर्जा नहीं दिया गया था।[4][5] चीनी के बाद यह विश्व में सबसे अधिक बोली जाने वाली भाषा भी है। विश्व आर्थिक मंच की गणना के अनुसार यह विश्व की दस शक्तिशाली भाषाओं में से एक है।[6]'),
-        ('kn', 'ದ್ರಾವಿಡ ಭಾಷೆಗಳಲ್ಲಿ ಪ್ರಾಮುಖ್ಯವುಳ್ಳ ಭಾಷೆಯೂ ಭಾರತದ ಪುರಾತನವಾದ ಭಾಷೆಗಳಲ್ಲಿ ಒಂದೂ ಆಗಿರುವ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಅದರ ವಿವಿಧ ರೂಪಗಳಲ್ಲಿ ಸುಮಾರು ೪೫ ದಶಲಕ್ಷ ಜನರು ಆಡು ನುಡಿಯಾಗಿ ಬಳಸುತ್ತಲಿದ್ದಾರೆ. ಕನ್ನಡ ಕರ್ನಾಟಕ ರಾಜ್ಯದ ಆಡಳಿತ ಭಾಷೆ.[೧೧] ಜಗತ್ತಿನಲ್ಲಿ ಅತ್ಯಂತ ಹೆಚ್ಚು ಮಂದಿ ಮಾತನಾಡುವ ಭಾಷೆಯೆಂಬ ನೆಲೆಯಲ್ಲಿ ಇಪ್ಪತೊಂಬತ್ತನೆಯ ಸ್ಥಾನ ಕನ್ನಡಕ್ಕಿದೆ. ೨೦೧೧ರ ಜನಗಣತಿಯ ಪ್ರಕಾರ ಜಗತ್ತಿನಲ್ಲಿ ೬.೪ ಕೋಟಿ ಜನಗಳು ಕನ್ನಡ ಮಾತನಾಡುತ್ತಾರೆ ಎಂದು ತಿಳಿದುಬಂದಿದೆ. ಇವರಲ್ಲಿ ೫.೫ ಕೋಟಿ ಜನಗಳ ಮಾತೃಭಾಷೆ ಕನ್ನಡವಾಗಿದೆ. ಬ್ರಾಹ್ಮಿ ಲಿಪಿಯಿಂದ ರೂಪುಗೊಂಡ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಉಪಯೋಗಿಸಿ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಬರೆಯಲಾಗುತ್ತದೆ. ಕನ್ನಡ ಬರಹದ ಮಾದರಿಗಳಿಗೆ ಸಾವಿರದ ಐನೂರು ವರುಷಗಳ ಚರಿತ್ರೆಯಿದೆ. ಕ್ರಿ.ಶ. ಆರನೆಯ ಶತಮಾನದ ಪಶ್ಚಿಮ ಗಂಗ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ [೧೨] ಮತ್ತು ಒಂಬತ್ತನೆಯ ಶತಮಾನದ ರಾಷ್ಟ್ರಕೂಟ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ ಹಳಗನ್ನಡ ಸಾಹಿತ್ಯ ಅತ್ಯಂತ ಹೆಚ್ಚಿನ ರಾಜಾಶ್ರಯ ಪಡೆಯಿತು.[೧೩][೧೪] ಅದಲ್ಲದೆ ಸಾವಿರ ವರುಷಗಳ ಸಾಹಿತ್ಯ ಪರಂಪರೆ ಕನ್ನಡಕ್ಕಿದೆ.[೧೫]ವಿನೋಬಾ ಭಾವೆ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಲಿಪಿಗಳ ರಾಣಿಯೆಂದು ಹೊಗಳಿದ್ದಾರೆ.[ಸೂಕ್ತ ಉಲ್ಲೇಖನ ಬೇಕು]'),
-        ('si', 'ශ්‍රී ලංකාවේ ප්‍රධාන ජාතිය වන සිංහල ජනයාගේ මව් බස සිංහල වෙයි. අද වන විට මිලියන 20 කට අධික සිංහල සහ මිලියන 3කට අධික සිංහල නොවන ජනගහනයක් සිංහල භාෂාව භාවිත කරති. සිංහල‍ ඉන්දු-යුරෝපීය භාෂාවල උප ගණයක් වන ඉන්දු-ආර්ය භාෂා ගණයට අයිති වන අතර මාල දිවයින භාවිත කරන දිවෙහි භාෂාව සිංහලයෙන් පැවත එන්නකි. සිංහල ශ්‍රී ලංකාවේ නිල භාෂාවයි .'),
-        ('ta', 'தமிழ் மொழி (Tamil language) தமிழர்களினதும், தமிழ் பேசும் பலரதும் தாய்மொழி ஆகும். தமிழ் திராவிட மொழிக் குடும்பத்தின் முதன்மையான மொழிகளில் ஒன்றும் செம்மொழியும் ஆகும். இந்தியா, இலங்கை, மலேசியா, சிங்கப்பூர் ஆகிய நாடுகளில் அதிக அளவிலும், ஐக்கிய அரபு அமீரகம், தென்னாப்பிரிக்கா, மொரிசியசு, பிஜி, ரீயூனியன், டிரினிடாட் போன்ற நாடுகளில் சிறிய அளவிலும் தமிழ் பேசப்படுகிறது. 1997ஆம் ஆண்டுப் புள்ளி விவரப்படி உலகம் முழுவதிலும் 8 கோடி (80 மில்லியன்) மக்களால் பேசப்படும் தமிழ்[13], ஒரு மொழியைத் தாய்மொழியாகக் கொண்டு பேசும் மக்களின் எண்ணிக்கை அடிப்படையில் பதினெட்டாவது இடத்தில் உள்ளது.[14] இணையத்தில் அதிகம் பயன்படுத்தப்படும் இந்திய மொழிகளில் தமிழ் முதன்மையாக உள்ளதாக 2017 ஆவது ஆண்டில் நடைபெற்ற கூகுள் கணக்கெடுப்பில் தெரிய வந்தது.[15]'),
-        ('te', 'ఆంధ్ర ప్రదేశ్, తెలంగాణ రాష్ట్రాల అధికార భాష తెలుగు. భారత దేశంలో తెలుగు మాతృభాషగా మాట్లాడే 8.7 కోట్ల (2001) జనాభాతో [1] ప్రాంతీయ భాషలలో మొదటి స్థానంలో ఉంది. ప్రపంచంలోని ప్రజలు అత్యధికముగా మాట్లాడే భాషలలో 15 స్థానములోనూ, భారత దేశములో హిందీ, తర్వాత స్థానములోనూ నిలుస్తుంది. పాతవైన ప్రపంచ భాష గణాంకాల (ఎథ్నోలాగ్) ప్రకారం ప్రపంచవ్యాప్తంగా 7.4 కోట్లు మందికి మాతృభాషగా ఉంది.[2] మొదటి భాషగా మాట్లాడతారు. అతి ప్రాచీన దేశ భాషలలో సంస్కృతము తమిళముతో బాటు తెలుగు భాషను 2008 అక్టోబరు 31న భారత ప్రభుత్వము గుర్తించింది.'),
-        ('ur', 'اُردُو لشکری زبان[8] (یا جدید معیاری اردو) برصغیر کی معیاری زبانوں میں سے ایک ہے۔ یہ پاکستان کی قومی اور رابطہ عامہ کی زبان ہے، جبکہ بھارت کی چھے ریاستوں کی دفتری زبان کا درجہ رکھتی ہے۔ آئین ہند کے مطابق اسے 22 دفتری شناخت زبانوں میں شامل کیا جاچکا ہے۔ 2001ء کی مردم شماری کے مطابق اردو کو بطور مادری زبان بھارت میں 5.01% فیصد لوگ بولتے ہیں اور اس لحاظ سے یہ بھارت کی چھٹی بڑی زبان ہے جبکہ پاکستان میں اسے بطور مادری زبان 7.59% فیصد لوگ استعمال کرتے ہیں، یہ پاکستان کی پانچویں بڑی زبان ہے۔ اردو تاریخی طور پر ہندوستان کی مسلم آبادی سے جڑی ہے۔[حوالہ درکار] بعض ذخیرہ الفاظ کے علاوہ یہ زبان معیاری ہندی سے قابل فہم ہے جو اس خطے کی ہندوؤں سے منسوب ہے۔[حوالہ درکار] زبانِ اردو کو پہچان و ترقی اس وقت ملی جب برطانوی دور میں انگریز حکمرانوں نے اسے فارسی کی بجائے انگریزی کے ساتھ شمالی ہندوستان کے علاقوں اور جموں و کشمیر میں اسے سنہ 1846ء اور پنجاب میں سنہ 1849ء میں بطور دفتری زبان نافذ کیا۔ اس کے علاوہ خلیجی، یورپی، ایشیائی اور امریکی علاقوں میں اردو بولنے والوں کی ایک بڑی تعداد آباد ہے جو بنیادی طور پر جنوبی ایشیاء سے کوچ کرنے والے اہلِ اردو ہیں۔ 1999ء کے اعداد وشمار کے مطابق اردو زبان کے مجموعی متکلمین کی تعداد دس کروڑ ساٹھ لاکھ کے لگ بھگ تھی۔ اس لحاظ سے یہ دنیا کی نویں بڑی زبان ہے۔'),
+        (
+            "bn",
+            "বাংলা ভাষা (বাঙলা, বাঙ্গলা, তথা বাঙ্গালা নামগুলোতেও পরিচিত) একটি ইন্দো-আর্য ভাষা, যা দক্ষিণ এশিয়ার বাঙালি জাতির প্রধান কথ্য ও লেখ্য ভাষা। মাতৃভাষীর সংখ্যায় বাংলা ইন্দো-ইউরোপীয় ভাষা পরিবারের চতুর্থ ও বিশ্বের ষষ্ঠ বৃহত্তম ভাষা।[৫] মোট ব্যবহারকারীর সংখ্যা অনুসারে বাংলা বিশ্বের সপ্তম বৃহত্তম ভাষা। বাংলা সার্বভৌম ভাষাভিত্তিক জাতিরাষ্ট্র বাংলাদেশের একমাত্র রাষ্ট্রভাষা তথা সরকারি ভাষা[৬] এবং ভারতের পশ্চিমবঙ্গ, ত্রিপুরা, আসামের বরাক উপত্যকার সরকারি ভাষা। বঙ্গোপসাগরে অবস্থিত আন্দামান দ্বীপপুঞ্জের প্রধান কথ্য ভাষা বাংলা। এছাড়া ভারতের ঝাড়খণ্ড, বিহার, মেঘালয়, মিজোরাম, উড়িষ্যা রাজ্যগুলোতে উল্লেখযোগ্য পরিমাণে বাংলাভাষী জনগণ রয়েছে। ভারতে হিন্দির পরেই সর্বাধিক প্রচলিত ভাষা বাংলা।[৭][৮] এছাড়াও মধ্য প্রাচ্য, আমেরিকা ও ইউরোপে উল্লেখযোগ্য পরিমাণে বাংলাভাষী অভিবাসী রয়েছে।[৯] সারা বিশ্বে সব মিলিয়ে ২৬ কোটির অধিক লোক দৈনন্দিন জীবনে বাংলা ব্যবহার করে।[২] বাংলাদেশের জাতীয় সঙ্গীত এবং ভারতের জাতীয় সঙ্গীত ও স্তোত্র বাংলাতে রচিত।",
+        ),
+        (
+            "de",
+            "Die deutsche Sprache bzw. Deutsch ([dɔʏ̯t͡ʃ]; abgekürzt dt. oder dtsch.) ist eine westgermanische Sprache. Ihr Sprachraum umfasst Deutschland, Österreich, die Deutschschweiz, Liechtenstein, Luxemburg, Ostbelgien, Südtirol, das Elsass und Lothringen sowie Nordschleswig. Außerdem ist sie eine Minderheitensprache in einigen europäischen und außereuropäischen Ländern, z. B. in Rumänien und Südafrika, sowie Nationalsprache im afrikanischen Namibia.",
+        ),
+        (
+            "hi",
+            "हिन्दी विश्व की एक प्रमुख भाषा है एवं भारत की राजभाषा है। केन्द्रीय स्तर पर भारत में दूसरी आधिकारिक भाषा अंग्रेजी है। यह हिंदुस्तानी भाषा की एक मानकीकृत रूप है जिसमें संस्कृत के तत्सम तथा तद्भव शब्दों का प्रयोग अधिक है और अरबी-फ़ारसी शब्द कम हैं। हिंदी संवैधानिक रूप से भारत की राजभाषा और भारत की सबसे अधिक बोली और समझी जाने वाली भाषा है। हालाँकि, हिन्दी भारत की राष्ट्रभाषा नहीं है,[3] क्योंकि भारत के संविधान में कोई भी भाषा को ऐसा दर्जा नहीं दिया गया था।[4][5] चीनी के बाद यह विश्व में सबसे अधिक बोली जाने वाली भाषा भी है। विश्व आर्थिक मंच की गणना के अनुसार यह विश्व की दस शक्तिशाली भाषाओं में से एक है।[6]",
+        ),
+        (
+            "kn",
+            "ದ್ರಾವಿಡ ಭಾಷೆಗಳಲ್ಲಿ ಪ್ರಾಮುಖ್ಯವುಳ್ಳ ಭಾಷೆಯೂ ಭಾರತದ ಪುರಾತನವಾದ ಭಾಷೆಗಳಲ್ಲಿ ಒಂದೂ ಆಗಿರುವ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಅದರ ವಿವಿಧ ರೂಪಗಳಲ್ಲಿ ಸುಮಾರು ೪೫ ದಶಲಕ್ಷ ಜನರು ಆಡು ನುಡಿಯಾಗಿ ಬಳಸುತ್ತಲಿದ್ದಾರೆ. ಕನ್ನಡ ಕರ್ನಾಟಕ ರಾಜ್ಯದ ಆಡಳಿತ ಭಾಷೆ.[೧೧] ಜಗತ್ತಿನಲ್ಲಿ ಅತ್ಯಂತ ಹೆಚ್ಚು ಮಂದಿ ಮಾತನಾಡುವ ಭಾಷೆಯೆಂಬ ನೆಲೆಯಲ್ಲಿ ಇಪ್ಪತೊಂಬತ್ತನೆಯ ಸ್ಥಾನ ಕನ್ನಡಕ್ಕಿದೆ. ೨೦೧೧ರ ಜನಗಣತಿಯ ಪ್ರಕಾರ ಜಗತ್ತಿನಲ್ಲಿ ೬.೪ ಕೋಟಿ ಜನಗಳು ಕನ್ನಡ ಮಾತನಾಡುತ್ತಾರೆ ಎಂದು ತಿಳಿದುಬಂದಿದೆ. ಇವರಲ್ಲಿ ೫.೫ ಕೋಟಿ ಜನಗಳ ಮಾತೃಭಾಷೆ ಕನ್ನಡವಾಗಿದೆ. ಬ್ರಾಹ್ಮಿ ಲಿಪಿಯಿಂದ ರೂಪುಗೊಂಡ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಉಪಯೋಗಿಸಿ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಬರೆಯಲಾಗುತ್ತದೆ. ಕನ್ನಡ ಬರಹದ ಮಾದರಿಗಳಿಗೆ ಸಾವಿರದ ಐನೂರು ವರುಷಗಳ ಚರಿತ್ರೆಯಿದೆ. ಕ್ರಿ.ಶ. ಆರನೆಯ ಶತಮಾನದ ಪಶ್ಚಿಮ ಗಂಗ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ [೧೨] ಮತ್ತು ಒಂಬತ್ತನೆಯ ಶತಮಾನದ ರಾಷ್ಟ್ರಕೂಟ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ ಹಳಗನ್ನಡ ಸಾಹಿತ್ಯ ಅತ್ಯಂತ ಹೆಚ್ಚಿನ ರಾಜಾಶ್ರಯ ಪಡೆಯಿತು.[೧೩][೧೪] ಅದಲ್ಲದೆ ಸಾವಿರ ವರುಷಗಳ ಸಾಹಿತ್ಯ ಪರಂಪರೆ ಕನ್ನಡಕ್ಕಿದೆ.[೧೫]ವಿನೋಬಾ ಭಾವೆ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಲಿಪಿಗಳ ರಾಣಿಯೆಂದು ಹೊಗಳಿದ್ದಾರೆ.[ಸೂಕ್ತ ಉಲ್ಲೇಖನ ಬೇಕು]",
+        ),
+        (
+            "si",
+            "ශ්‍රී ලංකාවේ ප්‍රධාන ජාතිය වන සිංහල ජනයාගේ මව් බස සිංහල වෙයි. අද වන විට මිලියන 20 කට අධික සිංහල සහ මිලියන 3කට අධික සිංහල නොවන ජනගහනයක් සිංහල භාෂාව භාවිත කරති. සිංහල‍ ඉන්දු-යුරෝපීය භාෂාවල උප ගණයක් වන ඉන්දු-ආර්ය භාෂා ගණයට අයිති වන අතර මාල දිවයින භාවිත කරන දිවෙහි භාෂාව සිංහලයෙන් පැවත එන්නකි. සිංහල ශ්‍රී ලංකාවේ නිල භාෂාවයි .",
+        ),
+        (
+            "ta",
+            "தமிழ் மொழி (Tamil language) தமிழர்களினதும், தமிழ் பேசும் பலரதும் தாய்மொழி ஆகும். தமிழ் திராவிட மொழிக் குடும்பத்தின் முதன்மையான மொழிகளில் ஒன்றும் செம்மொழியும் ஆகும். இந்தியா, இலங்கை, மலேசியா, சிங்கப்பூர் ஆகிய நாடுகளில் அதிக அளவிலும், ஐக்கிய அரபு அமீரகம், தென்னாப்பிரிக்கா, மொரிசியசு, பிஜி, ரீயூனியன், டிரினிடாட் போன்ற நாடுகளில் சிறிய அளவிலும் தமிழ் பேசப்படுகிறது. 1997ஆம் ஆண்டுப் புள்ளி விவரப்படி உலகம் முழுவதிலும் 8 கோடி (80 மில்லியன்) மக்களால் பேசப்படும் தமிழ்[13], ஒரு மொழியைத் தாய்மொழியாகக் கொண்டு பேசும் மக்களின் எண்ணிக்கை அடிப்படையில் பதினெட்டாவது இடத்தில் உள்ளது.[14] இணையத்தில் அதிகம் பயன்படுத்தப்படும் இந்திய மொழிகளில் தமிழ் முதன்மையாக உள்ளதாக 2017 ஆவது ஆண்டில் நடைபெற்ற கூகுள் கணக்கெடுப்பில் தெரிய வந்தது.[15]",
+        ),
+        (
+            "te",
+            "ఆంధ్ర ప్రదేశ్, తెలంగాణ రాష్ట్రాల అధికార భాష తెలుగు. భారత దేశంలో తెలుగు మాతృభాషగా మాట్లాడే 8.7 కోట్ల (2001) జనాభాతో [1] ప్రాంతీయ భాషలలో మొదటి స్థానంలో ఉంది. ప్రపంచంలోని ప్రజలు అత్యధికముగా మాట్లాడే భాషలలో 15 స్థానములోనూ, భారత దేశములో హిందీ, తర్వాత స్థానములోనూ నిలుస్తుంది. పాతవైన ప్రపంచ భాష గణాంకాల (ఎథ్నోలాగ్) ప్రకారం ప్రపంచవ్యాప్తంగా 7.4 కోట్లు మందికి మాతృభాషగా ఉంది.[2] మొదటి భాషగా మాట్లాడతారు. అతి ప్రాచీన దేశ భాషలలో సంస్కృతము తమిళముతో బాటు తెలుగు భాషను 2008 అక్టోబరు 31న భారత ప్రభుత్వము గుర్తించింది.",
+        ),
+        (
+            "ur",
+            "اُردُو لشکری زبان[8] (یا جدید معیاری اردو) برصغیر کی معیاری زبانوں میں سے ایک ہے۔ یہ پاکستان کی قومی اور رابطہ عامہ کی زبان ہے، جبکہ بھارت کی چھے ریاستوں کی دفتری زبان کا درجہ رکھتی ہے۔ آئین ہند کے مطابق اسے 22 دفتری شناخت زبانوں میں شامل کیا جاچکا ہے۔ 2001ء کی مردم شماری کے مطابق اردو کو بطور مادری زبان بھارت میں 5.01% فیصد لوگ بولتے ہیں اور اس لحاظ سے یہ بھارت کی چھٹی بڑی زبان ہے جبکہ پاکستان میں اسے بطور مادری زبان 7.59% فیصد لوگ استعمال کرتے ہیں، یہ پاکستان کی پانچویں بڑی زبان ہے۔ اردو تاریخی طور پر ہندوستان کی مسلم آبادی سے جڑی ہے۔[حوالہ درکار] بعض ذخیرہ الفاظ کے علاوہ یہ زبان معیاری ہندی سے قابل فہم ہے جو اس خطے کی ہندوؤں سے منسوب ہے۔[حوالہ درکار] زبانِ اردو کو پہچان و ترقی اس وقت ملی جب برطانوی دور میں انگریز حکمرانوں نے اسے فارسی کی بجائے انگریزی کے ساتھ شمالی ہندوستان کے علاقوں اور جموں و کشمیر میں اسے سنہ 1846ء اور پنجاب میں سنہ 1849ء میں بطور دفتری زبان نافذ کیا۔ اس کے علاوہ خلیجی، یورپی، ایشیائی اور امریکی علاقوں میں اردو بولنے والوں کی ایک بڑی تعداد آباد ہے جو بنیادی طور پر جنوبی ایشیاء سے کوچ کرنے والے اہلِ اردو ہیں۔ 1999ء کے اعداد وشمار کے مطابق اردو زبان کے مجموعی متکلمین کی تعداد دس کروڑ ساٹھ لاکھ کے لگ بھگ تھی۔ اس لحاظ سے یہ دنیا کی نویں بڑی زبان ہے۔",
+        ),
     ],
     # fmt: on
 )
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 998f0472c7e..ddd9a990c65 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -65,10 +65,30 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
     "embed_arch,embed_config",
     # fmt: off
     [
-        ("spacy.MultiHashEmbed.v1", {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}),
-        ("spacy.MultiHashEmbed.v1", {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}),
-        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}),
-        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}),
+        (
+            "spacy.MultiHashEmbed.v1",
+            {
+                "rows": [100, 100],
+                "attrs": ["SHAPE", "LOWER"],
+                "include_static_vectors": False,
+            },
+        ),
+        (
+            "spacy.MultiHashEmbed.v1",
+            {
+                "rows": [100, 20],
+                "attrs": ["ORTH", "PREFIX"],
+                "include_static_vectors": False,
+            },
+        ),
+        (
+            "spacy.CharacterEmbed.v1",
+            {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False},
+        ),
+        (
+            "spacy.CharacterEmbed.v1",
+            {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False},
+        ),
     ],
     # fmt: on
 )
@@ -76,10 +96,26 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
     "tok2vec_arch,encode_arch,encode_config",
     # fmt: off
     [
-        ("spacy.Tok2Vec.v1", "spacy.MaxoutWindowEncoder.v1", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
-        ("spacy.Tok2Vec.v2", "spacy.MaxoutWindowEncoder.v2", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
-        ("spacy.Tok2Vec.v1", "spacy.MishWindowEncoder.v1", {"window_size": 1, "depth": 6}),
-        ("spacy.Tok2Vec.v2", "spacy.MishWindowEncoder.v2", {"window_size": 1, "depth": 6}),
+        (
+            "spacy.Tok2Vec.v1",
+            "spacy.MaxoutWindowEncoder.v1",
+            {"window_size": 1, "maxout_pieces": 3, "depth": 2},
+        ),
+        (
+            "spacy.Tok2Vec.v2",
+            "spacy.MaxoutWindowEncoder.v2",
+            {"window_size": 1, "maxout_pieces": 3, "depth": 2},
+        ),
+        (
+            "spacy.Tok2Vec.v1",
+            "spacy.MishWindowEncoder.v1",
+            {"window_size": 1, "depth": 6},
+        ),
+        (
+            "spacy.Tok2Vec.v2",
+            "spacy.MishWindowEncoder.v2",
+            {"window_size": 1, "depth": 6},
+        ),
     ],
     # fmt: on
 )
@@ -164,9 +200,9 @@ def test_init_tok2vec():
 @pytest.mark.parametrize("with_vectors", (False, True))
 def test_tok2vec_listener(with_vectors):
     orig_config = Config().from_str(cfg_string)
-    orig_config["components"]["tok2vec"]["model"]["embed"][
-        "include_static_vectors"
-    ] = with_vectors
+    orig_config["components"]["tok2vec"]["model"]["embed"]["include_static_vectors"] = (
+        with_vectors
+    )
     nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
 
     if with_vectors:
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 43d5f62837a..8415e5c92ff 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -153,21 +153,171 @@ def test_issue12566(factory: str, output_file: str):
             "Briana McNaira - Cultural Chaos .",
             "tokens": [
                 # fmt: off
-                {"id": 0, "start": 0, "end": 8, "tag": "ADV", "pos": "ADV", "morph": "Degree=Pos", "lemma": "niedawno", "dep": "advmod", "head": 1, },
-                {"id": 1, "start": 9, "end": 15, "tag": "PRAET", "pos": "VERB", "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act", "lemma": "czytać", "dep": "ROOT", "head": 1, },
-                {"id": 2, "start": 16, "end": 18, "tag": "AGLT", "pos": "NOUN", "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing", "lemma": "em", "dep": "iobj", "head": 1, },
-                {"id": 3, "start": 19, "end": 23, "tag": "ADJ", "pos": "ADJ", "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing", "lemma": "nowy", "dep": "amod", "head": 4, },
-                {"id": 4, "start": 24, "end": 31, "tag": "SUBST", "pos": "NOUN", "morph": "Case=Acc|Gender=Fem|Number=Sing", "lemma": "książka", "dep": "obj", "head": 1, },
-                {"id": 5, "start": 32, "end": 43, "tag": "ADJ", "pos": "ADJ", "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing", "lemma": "znakomit", "dep": "acl", "head": 4, },
-                {"id": 6, "start": 44, "end": 54, "tag": "ADJ", "pos": "ADJ", "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing", "lemma": "szkockiy", "dep": "amod", "head": 7, },
-                {"id": 7, "start": 55, "end": 66, "tag": "SUBST", "pos": "NOUN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "medioznawca", "dep": "iobj", "head": 5, },
-                {"id": 8, "start": 67, "end": 68, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Comm", "lemma": ",", "dep": "punct", "head": 9, },
-                {"id": 9, "start": 69, "end": 75, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "Brian", "dep": "nmod", "head": 4, },
-                {"id": 10, "start": 76, "end": 83, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "McNair", "dep": "flat", "head": 9, },
-                {"id": 11, "start": 84, "end": 85, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Dash", "lemma": "-", "dep": "punct", "head": 12, },
-                {"id": 12, "start": 86, "end": 94, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing", "lemma": "Cultural", "dep": "conj", "head": 4, },
-                {"id": 13, "start": 95, "end": 100, "tag": "SUBST", "pos": "NOUN", "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing", "lemma": "Chaos", "dep": "flat", "head": 12, },
-                {"id": 14, "start": 101, "end": 102, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Peri", "lemma": ".", "dep": "punct", "head": 1, },
+                {
+                    "id": 0,
+                    "start": 0,
+                    "end": 8,
+                    "tag": "ADV",
+                    "pos": "ADV",
+                    "morph": "Degree=Pos",
+                    "lemma": "niedawno",
+                    "dep": "advmod",
+                    "head": 1,
+                },
+                {
+                    "id": 1,
+                    "start": 9,
+                    "end": 15,
+                    "tag": "PRAET",
+                    "pos": "VERB",
+                    "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+                    "lemma": "czytać",
+                    "dep": "ROOT",
+                    "head": 1,
+                },
+                {
+                    "id": 2,
+                    "start": 16,
+                    "end": 18,
+                    "tag": "AGLT",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing",
+                    "lemma": "em",
+                    "dep": "iobj",
+                    "head": 1,
+                },
+                {
+                    "id": 3,
+                    "start": 19,
+                    "end": 23,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing",
+                    "lemma": "nowy",
+                    "dep": "amod",
+                    "head": 4,
+                },
+                {
+                    "id": 4,
+                    "start": 24,
+                    "end": 31,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Case=Acc|Gender=Fem|Number=Sing",
+                    "lemma": "książka",
+                    "dep": "obj",
+                    "head": 1,
+                },
+                {
+                    "id": 5,
+                    "start": 32,
+                    "end": 43,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing",
+                    "lemma": "znakomit",
+                    "dep": "acl",
+                    "head": 4,
+                },
+                {
+                    "id": 6,
+                    "start": 44,
+                    "end": 54,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing",
+                    "lemma": "szkockiy",
+                    "dep": "amod",
+                    "head": 7,
+                },
+                {
+                    "id": 7,
+                    "start": 55,
+                    "end": 66,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "medioznawca",
+                    "dep": "iobj",
+                    "head": 5,
+                },
+                {
+                    "id": 8,
+                    "start": 67,
+                    "end": 68,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Comm",
+                    "lemma": ",",
+                    "dep": "punct",
+                    "head": 9,
+                },
+                {
+                    "id": 9,
+                    "start": 69,
+                    "end": 75,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "Brian",
+                    "dep": "nmod",
+                    "head": 4,
+                },
+                {
+                    "id": 10,
+                    "start": 76,
+                    "end": 83,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "McNair",
+                    "dep": "flat",
+                    "head": 9,
+                },
+                {
+                    "id": 11,
+                    "start": 84,
+                    "end": 85,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Dash",
+                    "lemma": "-",
+                    "dep": "punct",
+                    "head": 12,
+                },
+                {
+                    "id": 12,
+                    "start": 86,
+                    "end": 94,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing",
+                    "lemma": "Cultural",
+                    "dep": "conj",
+                    "head": 4,
+                },
+                {
+                    "id": 13,
+                    "start": 95,
+                    "end": 100,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing",
+                    "lemma": "Chaos",
+                    "dep": "flat",
+                    "head": 12,
+                },
+                {
+                    "id": 14,
+                    "start": 101,
+                    "end": 102,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Peri",
+                    "lemma": ".",
+                    "dep": "punct",
+                    "head": 1,
+                },
                 # fmt: on
             ],
         }
@@ -420,8 +570,14 @@ def test_cli_converters_conll_ner_to_docs():
         (["--x.foo=bar"], {"x.foo": "bar"}),
         (["--x.foo", "--x.bar", "baz"], {"x.foo": True, "x.bar": "baz"}),
         (["--x.foo", "--x.bar=baz"], {"x.foo": True, "x.bar": "baz"}),
-        (["--x.foo", "10.1", "--x.bar", "--x.baz", "false"], {"x.foo": 10.1, "x.bar": True, "x.baz": False}),
-        (["--x.foo", "10.1", "--x.bar", "--x.baz=false"], {"x.foo": 10.1, "x.bar": True, "x.baz": False})
+        (
+            ["--x.foo", "10.1", "--x.bar", "--x.baz", "false"],
+            {"x.foo": 10.1, "x.bar": True, "x.baz": False},
+        ),
+        (
+            ["--x.foo", "10.1", "--x.bar", "--x.baz=false"],
+            {"x.foo": 10.1, "x.bar": True, "x.baz": False},
+        ),
         # fmt: on
     ],
 )
@@ -499,11 +655,11 @@ def test_model_recommendations():
         # fmt: off
         "parser,textcat,tagger",
         " parser, textcat ,tagger ",
-        'parser,textcat,tagger',
-        ' parser, textcat ,tagger ',
+        "parser,textcat,tagger",
+        " parser, textcat ,tagger ",
         ' "parser"," textcat " ,"tagger "',
         " 'parser',' textcat ' ,'tagger '",
-        '[parser,textcat,tagger]',
+        "[parser,textcat,tagger]",
         '["parser","textcat","tagger"]',
         '[" parser" ,"textcat ", " tagger " ]',
         "[parser,textcat,tagger]",
@@ -522,7 +678,7 @@ def test_string_to_list(value):
     [
         # fmt: off
         "1,2,3",
-        '[1,2,3]',
+        "[1,2,3]",
         '["1","2","3"]',
         '[" 1" ,"2 ", " 3 " ]',
         "[' 1' , '2', ' 3 ' ]",
diff --git a/spacy/tests/test_factory_imports.py b/spacy/tests/test_factory_imports.py
index a975af0bbd2..7a1b4a769a8 100644
--- a/spacy/tests/test_factory_imports.py
+++ b/spacy/tests/test_factory_imports.py
@@ -67,16 +67,16 @@ def test_factory_import_compatibility(factory_name, original_module, compat_modu
     # Import from the original module (registrations.py)
     original_module_obj = importlib.import_module(original_module)
     original_factory = getattr(original_module_obj, factory_name)
-    assert (
-        original_factory is not None
-    ), f"Could not import {factory_name} from {original_module}"
+    assert original_factory is not None, (
+        f"Could not import {factory_name} from {original_module}"
+    )
 
     # Import from the compatibility module (component file)
     compat_module_obj = importlib.import_module(compat_module)
     compat_factory = getattr(compat_module_obj, factory_name)
-    assert (
-        compat_factory is not None
-    ), f"Could not import {factory_name} from {compat_module}"
+    assert compat_factory is not None, (
+        f"Could not import {factory_name} from {compat_module}"
+    )
 
     # Test that they're the same function (identity)
     assert original_factory is compat_factory, (
diff --git a/spacy/tests/test_factory_registrations.py b/spacy/tests/test_factory_registrations.py
index 8e93f54f0b0..ab604c3a6d5 100644
--- a/spacy/tests/test_factory_registrations.py
+++ b/spacy/tests/test_factory_registrations.py
@@ -82,9 +82,9 @@ def test_factory_registrations_preserved(reference_factory_registrations):
     missing_registrations = set(reference_factory_registrations.keys()) - set(
         current_registrations.keys()
     )
-    assert (
-        not missing_registrations
-    ), f"Missing factory registrations: {', '.join(sorted(missing_registrations))}"
+    assert not missing_registrations, (
+        f"Missing factory registrations: {', '.join(sorted(missing_registrations))}"
+    )
 
     # Check for new registrations (not an error, but informative)
     new_registrations = set(current_registrations.keys()) - set(
diff --git a/spacy/tests/test_registry_population.py b/spacy/tests/test_registry_population.py
index 592e74dd20a..c67136f9c31 100644
--- a/spacy/tests/test_registry_population.py
+++ b/spacy/tests/test_registry_population.py
@@ -50,6 +50,6 @@ def test_registry_entries(reference_registry):
         # Check for missing entries - these would indicate our new registry population
         # mechanism is missing something
         missing_entries = expected_set - current_set
-        assert (
-            not missing_entries
-        ), f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}"
+        assert not missing_entries, (
+            f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}"
+        )
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index d92f04d0564..b8b26ce8b0d 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -57,7 +57,9 @@ class Doc:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[DocMethod],
         Optional[Callable[[Doc], Any]],
@@ -66,7 +68,9 @@ class Doc:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[DocMethod],
         Optional[Callable[[Doc], Any]],
@@ -144,7 +148,7 @@ class Doc:
         blocked: Optional[List[Span]] = ...,
         missing: Optional[List[Span]] = ...,
         outside: Optional[List[Span]] = ...,
-        default: str = ...
+        default: str = ...,
     ) -> None: ...
     @property
     def noun_chunks(self) -> Iterator[Span]: ...
diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi
index 070aaffb3a8..b982eb810b8 100644
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@@ -23,7 +23,9 @@ class Span:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[SpanMethod],
         Optional[Callable[[Span], Any]],
@@ -32,7 +34,9 @@ class Span:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[SpanMethod],
         Optional[Callable[[Span], Any]],
diff --git a/spacy/tokens/span_group.pyi b/spacy/tokens/span_group.pyi
index d063bb59533..3bd2b6788fb 100644
--- a/spacy/tokens/span_group.pyi
+++ b/spacy/tokens/span_group.pyi
@@ -12,7 +12,7 @@ class SpanGroup:
         *,
         name: str = ...,
         attrs: Dict[str, Any] = ...,
-        spans: Iterable[Span] = ...
+        spans: Iterable[Span] = ...,
     ) -> None: ...
     def __repr__(self) -> str: ...
     @property
diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi
index 7e56ae3bccd..435ace52707 100644
--- a/spacy/tokens/token.pyi
+++ b/spacy/tokens/token.pyi
@@ -27,7 +27,9 @@ class Token:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[TokenMethod],
         Optional[Callable[[Token], Any]],
@@ -36,7 +38,9 @@ class Token:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[TokenMethod],
         Optional[Callable[[Token], Any]],
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index 4a1dfa94515..4f3ac5de795 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -24,7 +24,7 @@ def configure_minibatch_by_padded_size(
     size: Sizing,
     buffer: int,
     discard_oversize: bool,
-    get_length: Optional[Callable[[ItemT], int]] = None
+    get_length: Optional[Callable[[ItemT], int]] = None,
 ) -> BatcherT:
     """Create a batcher that uses the `batch_by_padded_size` strategy.
 
@@ -49,7 +49,7 @@ def configure_minibatch_by_padded_size(
         size=size,
         buffer=buffer,
         discard_oversize=discard_oversize,
-        **optionals
+        **optionals,
     )
 
 
@@ -58,7 +58,7 @@ def configure_minibatch_by_words(
     size: Sizing,
     tolerance: float,
     discard_oversize: bool,
-    get_length: Optional[Callable[[ItemT], int]] = None
+    get_length: Optional[Callable[[ItemT], int]] = None,
 ) -> BatcherT:
     """Create a batcher that uses the "minibatch by words" strategy.
 
@@ -76,7 +76,7 @@ def configure_minibatch_by_words(
         size=size,
         tolerance=tolerance,
         discard_oversize=discard_oversize,
-        **optionals
+        **optionals,
     )
 
 
diff --git a/spacy/training/converters/conll_ner_to_docs.py b/spacy/training/converters/conll_ner_to_docs.py
index b19d1791b27..e66a8a8dfed 100644
--- a/spacy/training/converters/conll_ner_to_docs.py
+++ b/spacy/training/converters/conll_ner_to_docs.py
@@ -74,8 +74,7 @@ def conll_ner_to_docs(
     # provide warnings for problematic data
     if "\n\n" not in input_data:
         msg.warn(
-            "No sentence boundaries found. Use `-s` to automatically segment "
-            "sentences."
+            "No sentence boundaries found. Use `-s` to automatically segment sentences."
         )
     if doc_delimiter not in input_data:
         msg.warn(
diff --git a/spacy/training/converters/conllu_to_docs.py b/spacy/training/converters/conllu_to_docs.py
index bda5c88c3d4..3a60c4e024b 100644
--- a/spacy/training/converters/conllu_to_docs.py
+++ b/spacy/training/converters/conllu_to_docs.py
@@ -15,7 +15,7 @@ def conllu_to_docs(
     ner_map=None,
     merge_subtokens=False,
     no_print=False,
-    **_
+    **_,
 ):
     """
     Convert conllu files into JSON format for use with train cli.
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 488ca4a7136..05c5d3bb681 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -176,7 +176,7 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
                         initial = info["step"]
                     else:
                         total = eval_frequency
-                        desc = f"Epoch {info['epoch']+1}"
+                        desc = f"Epoch {info['epoch'] + 1}"
                         initial = 0
                     # Set disable=None, so that it disables on non-TTY
                     progress = tqdm.tqdm(
diff --git a/spacy/ty.py b/spacy/ty.py
index b37f2e18a1f..c18ce284dc0 100644
--- a/spacy/ty.py
+++ b/spacy/ty.py
@@ -29,7 +29,7 @@ def update(
         *,
         drop: float = 0.0,
         sgd: Optional[Optimizer] = None,
-        losses: Optional[Dict[str, float]] = None
+        losses: Optional[Dict[str, float]] = None,
     ) -> Dict[str, float]: ...
 
     def finish_update(self, sgd: Optimizer) -> None: ...
@@ -41,7 +41,7 @@ def initialize(
         self,
         get_examples: Callable[[], Iterable["Example"]],
         nlp: "Language",
-        **kwargs: Any
+        **kwargs: Any,
     ): ...
 
 
diff --git a/spacy/util.py b/spacy/util.py
index ad5a7e0bada..14d7b539994 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -557,7 +557,9 @@ def load_model_from_package(
     RETURNS (Language): The loaded nlp object.
     """
     cls = importlib.import_module(name)
-    return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config)  # type: ignore[attr-defined]
+    return cls.load(
+        vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config
+    )  # type: ignore[attr-defined]
 
 
 def load_model_from_path(

From 79b5f811bf6e403528e6e7e84307af4951df1a03 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:39:43 +0100
Subject: [PATCH 26/42] Update CI validation workflow: replace black, isort,
 flake8 with ruff

---
 .github/workflows/tests.yml | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index bb4eb278131..c967be0d128 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -33,24 +33,13 @@ jobs:
         with:
           python-version: "3.10"
 
-      - name: black
-        run: |
-          python -m pip install black -c requirements.txt
-          python -m black spacy --check
-      - name: isort
-        run: |
-          python -m pip install isort -c requirements.txt
-          python -m isort spacy --check
-      - name: flake8
-        run: |
-          python -m pip install flake8==5.0.4
-          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
-          # Unfortunately cython-lint isn't working after the shift to Cython 3.
-          #- name: cython-lint
-          #  run: |
-          #    python -m pip install cython-lint -c requirements.txt
-          #    # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
-          #    cython-lint spacy --ignore E501,W291,E266
+      - name: ruff format
+        run: |
+          python -m pip install ruff -c requirements.txt
+          python -m ruff format spacy --check
+      - name: ruff lint
+        run: |
+          python -m ruff check spacy
 
   tests:
     name: Test

From 86f7ce303a2ebedf562227229f75b43bcac6cf46 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:41:25 +0100
Subject: [PATCH 27/42] Limit CI ruff lint to isort-only checks for now

---
 .github/workflows/tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c967be0d128..adfce07f50b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -37,9 +37,9 @@ jobs:
         run: |
           python -m pip install ruff -c requirements.txt
           python -m ruff format spacy --check
-      - name: ruff lint
+      - name: ruff isort
         run: |
-          python -m ruff check spacy
+          python -m ruff check spacy --select I
 
   tests:
     name: Test

From 47b5504e90cf30f1675df112aed1af9330c07986 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:43:16 +0100
Subject: [PATCH 28/42] Autofix autofixable things from ruff

---
 spacy/cli/_util.py                             | 11 +----------
 spacy/cli/debug_diff.py                        |  3 +--
 spacy/cli/evaluate.py                          |  3 +--
 spacy/cli/find_threshold.py                    |  6 +-----
 spacy/lang/fa/generate_verbs_exc.py            |  6 +++---
 spacy/lang/ga/lemmatizer.py                    |  2 +-
 spacy/lang/ht/lemmatizer.py                    |  1 -
 spacy/lang/sl/punctuation.py                   |  2 --
 spacy/matcher/phrasematcher.pyi                |  2 +-
 spacy/ml/_character_embed.py                   |  1 -
 spacy/ml/_precomputable_affine.py              |  1 -
 spacy/ml/callbacks.py                          |  4 +---
 spacy/ml/extract_ngrams.py                     |  1 -
 spacy/ml/extract_spans.py                      |  1 -
 spacy/ml/featureextractor.py                   |  2 +-
 spacy/ml/models/entity_linker.py               |  1 -
 spacy/ml/models/multi_task.py                  |  6 +++---
 spacy/ml/models/parser.py                      |  3 +--
 spacy/ml/models/span_finder.py                 |  1 -
 spacy/ml/models/spancat.py                     |  1 -
 spacy/ml/models/tagger.py                      |  1 -
 spacy/ml/models/textcat.py                     |  1 -
 spacy/ml/models/tok2vec.py                     |  3 +--
 spacy/ml/staticvectors.py                      |  4 ++--
 spacy/ml/tb_framework.py                       |  1 -
 spacy/pipe_analysis.py                         |  4 ++--
 spacy/pipeline/_edit_tree_internals/schemas.py |  1 -
 spacy/pipeline/attributeruler.py               |  3 +--
 spacy/pipeline/edit_tree_lemmatizer.py         |  1 -
 spacy/pipeline/entity_linker.py                |  4 +---
 spacy/pipeline/entityruler.py                  |  3 +--
 spacy/pipeline/functions.py                    |  1 -
 spacy/pipeline/lemmatizer.py                   |  3 +--
 spacy/pipeline/pipe.pyi                        |  1 -
 spacy/pipeline/span_finder.py                  |  2 --
 spacy/pipeline/span_ruler.py                   |  3 +--
 spacy/pipeline/spancat.py                      |  2 --
 spacy/pipeline/textcat.py                      |  5 +----
 spacy/pipeline/textcat_multilabel.py           |  6 +-----
 spacy/pipeline/tok2vec.py                      |  1 -
 spacy/tests/doc/test_doc_api.py                |  6 +++---
 spacy/tests/lang/bg/test_tokenizer.py          |  1 -
 spacy/tests/lang/es/test_noun_chunks.py        | 10 +++++-----
 spacy/tests/lang/fr/test_noun_chunks.py        | 12 ++++++------
 spacy/tests/lang/it/test_noun_chunks.py        | 12 ++++++------
 spacy/tests/lang/la/test_exception.py          |  1 -
 spacy/tests/lang/pt/test_noun_chunks.py        | 10 +++++-----
 spacy/tests/lang/sl/test_text.py               |  1 -
 spacy/tests/lang/sq/test_text.py               |  1 -
 spacy/tests/lang/xx/test_text.py               |  1 -
 spacy/tests/pipeline/test_entity_linker.py     |  4 ++--
 spacy/tests/pipeline/test_textcat.py           |  1 -
 spacy/tests/test_cli_app.py                    |  1 -
 spacy/tests/test_factory_registrations.py      |  3 ---
 spacy/tests/test_models.py                     |  2 +-
 spacy/tests/test_registry_population.py        |  1 -
 spacy/tests/training/test_corpus.py            |  3 +--
 spacy/tests/vocab_vectors/test_lexeme.py       |  1 -
 spacy/tokens/_serialize.py                     |  2 +-
 spacy/training/augment.py                      |  1 -
 spacy/training/batchers.py                     |  5 ++---
 spacy/training/callbacks.py                    |  2 +-
 spacy/training/loggers.py                      |  1 -
 spacy/vocab.pyi                                |  1 -
 64 files changed, 60 insertions(+), 131 deletions(-)

diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 5057640a5b9..757c418440b 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -1,15 +1,11 @@
-import hashlib
 import os
-import shutil
 import sys
 from configparser import InterpolationError
 from contextlib import contextmanager
 from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Dict,
-    Iterable,
     List,
     Optional,
     Tuple,
@@ -21,22 +17,17 @@
 import typer
 from click import NoSuchOption
 from click.shell_completion import split_arg_string
-from thinc.api import Config, ConfigValidationError, require_gpu
+from thinc.api import ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
 from typer.main import get_command
 from wasabi import Printer, msg
 from weasel import app as project_cli
 
-from .. import about
 from ..compat import Literal
-from ..schemas import validate
 from ..util import (
     ENV_VARS,
-    SimpleFrozenDict,
     import_file,
-    is_compatible_version,
     logger,
-    make_tempdir,
     registry,
     run_command,
 )
diff --git a/spacy/cli/debug_diff.py b/spacy/cli/debug_diff.py
index 08c31b32df3..71d8826bc5e 100644
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@@ -2,11 +2,10 @@
 from typing import Optional
 
 import typer
-from thinc.api import Config
 from wasabi import MarkdownRenderer, Printer, diff_strings
 
 from ..util import load_config
-from ._util import Arg, Opt, debug_cli, parse_config_overrides, show_validation_error
+from ._util import Arg, Opt, debug_cli, show_validation_error
 from .init_config import Optimizations, init_config
 
 
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 62131fe13ea..9704ea44413 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -1,13 +1,12 @@
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import srsly
 from thinc.api import fix_random_seed
 from wasabi import Printer
 
 from .. import displacy, util
-from ..scorer import Scorer
 from ..tokens import Doc
 from ..training import Corpus
 from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index d89b4c27d55..7b2c5f98051 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -207,11 +207,7 @@ def filter_config(
             ),
         )
         if hasattr(pipe, "cfg"):
-            setattr(
-                nlp.get_pipe(pipe_name),
-                "cfg",
-                set_nested_item(getattr(pipe, "cfg"), config_keys, threshold),
-            )
+            nlp.get_pipe(pipe_name).cfg = set_nested_item(pipe.cfg, config_keys, threshold)
 
         eval_scores = nlp.evaluate(dev_dataset)
         if scores_key not in eval_scores:
diff --git a/spacy/lang/fa/generate_verbs_exc.py b/spacy/lang/fa/generate_verbs_exc.py
index a6d79a386df..7ef82c3e8a9 100644
--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@@ -611,8 +611,8 @@
 present_ends = ["م", "ی", "د", "یم", "ید", "ند"]
 
 # special case of '#هست':
-VERBS_EXC.update({conj: "هست" for conj in ["هست" + end for end in simple_ends]})
-VERBS_EXC.update({conj: "هست" for conj in ["نیست" + end for end in simple_ends]})
+VERBS_EXC.update(dict.fromkeys(["هست" + end for end in simple_ends], "هست"))
+VERBS_EXC.update(dict.fromkeys(["نیست" + end for end in simple_ends], "هست"))
 
 for verb_root in verb_roots:
     conjugations = []
@@ -648,4 +648,4 @@
             )
         )
 
-    VERBS_EXC.update({conj: (past,) if past else present for conj in conjugations})
+    VERBS_EXC.update(dict.fromkeys(conjugations, (past,) if past else present))
diff --git a/spacy/lang/ga/lemmatizer.py b/spacy/lang/ga/lemmatizer.py
index c9fbfbc193a..cffcf1d3c49 100644
--- a/spacy/lang/ga/lemmatizer.py
+++ b/spacy/lang/ga/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Tuple
+from typing import List, Tuple
 
 from ...pipeline import Lemmatizer
 from ...tokens import Token
diff --git a/spacy/lang/ht/lemmatizer.py b/spacy/lang/ht/lemmatizer.py
index 52bf23d2390..7687865c300 100644
--- a/spacy/lang/ht/lemmatizer.py
+++ b/spacy/lang/ht/lemmatizer.py
@@ -1,6 +1,5 @@
 from typing import List, Tuple
 
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 from ...tokens import Token
 
diff --git a/spacy/lang/sl/punctuation.py b/spacy/lang/sl/punctuation.py
index dadb54d315c..3be83eba382 100644
--- a/spacy/lang/sl/punctuation.py
+++ b/spacy/lang/sl/punctuation.py
@@ -5,14 +5,12 @@
     CONCAT_QUOTES,
     CURRENCY,
     HYPHENS,
-    LIST_CURRENCY,
     LIST_ELLIPSES,
     LIST_ICONS,
     LIST_PUNCT,
     LIST_QUOTES,
     PUNCT,
     UNITS,
-    merge_chars,
 )
 from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
 
diff --git a/spacy/matcher/phrasematcher.pyi b/spacy/matcher/phrasematcher.pyi
index 27f6ba373fc..0f56699d63f 100644
--- a/spacy/matcher/phrasematcher.pyi
+++ b/spacy/matcher/phrasematcher.pyi
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, overload
+from typing import Any, Callable, List, Optional, Tuple, Union, overload
 
 from ..compat import Literal
 from ..tokens import Doc, Span
diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py
index fde73f35b5b..8cc4d25743e 100644
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats2d
 
 from ..tokens import Doc
-from ..util import registry
 
 
 def CharacterEmbed(nM: int, nC: int) -> Model[List[Doc], List[Floats2d]]:
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index cdcac0c3812..ac2f6bbd3fa 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -1,6 +1,5 @@
 from thinc.api import Model, normal_init
 
-from ..util import registry
 
 
 def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
diff --git a/spacy/ml/callbacks.py b/spacy/ml/callbacks.py
index fefb170ba21..d9976cea80a 100644
--- a/spacy/ml/callbacks.py
+++ b/spacy/ml/callbacks.py
@@ -2,14 +2,12 @@
 import inspect
 import types
 import warnings
-from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set, Type
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set
 
 from thinc.layers import with_nvtx_range
-from thinc.model import Model, wrap_model_recursive
 from thinc.util import use_nvtx_range
 
 from ..errors import Warnings
-from ..util import registry
 
 if TYPE_CHECKING:
     # This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index d571973122e..9f54b48899e 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -1,7 +1,6 @@
 from thinc.api import Model
 
 from ..attrs import LOWER
-from ..util import registry
 
 
 def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model:
diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
index d3456b705a6..2351ddc2cf0 100644
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@@ -3,7 +3,6 @@
 from thinc.api import Model, to_numpy
 from thinc.types import Ints1d, Ragged
 
-from ..util import registry
 
 
 def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]:
diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py
index fb4e3c39aea..ad376e15f25 100644
--- a/spacy/ml/featureextractor.py
+++ b/spacy/ml/featureextractor.py
@@ -1,6 +1,6 @@
 from typing import Callable, List, Tuple, Union
 
-from thinc.api import Model, registry
+from thinc.api import Model
 from thinc.types import Ints2d
 
 from ..tokens import Doc
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 752d1c4433c..05ad9a27287 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -23,7 +23,6 @@
     get_candidates_batch,
 )
 from ...tokens import Doc, Span
-from ...util import registry
 from ...vocab import Vocab
 from ..extract_spans import extract_spans
 
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index 7c68fe48126..9beecf878ad 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Tuple, cast
 
 import numpy
 from thinc.api import (
@@ -21,7 +21,7 @@
 
 from ...attrs import ID, ORTH
 from ...errors import Errors
-from ...util import OOV_RANK, registry
+from ...util import OOV_RANK
 from ...vectors import Mode as VectorsMode
 
 if TYPE_CHECKING:
@@ -199,7 +199,7 @@ def mlm_initialize(model: Model, X=None, Y=None):
         layers=[wrapped_model],
         init=mlm_initialize,
         refs={"wrapped": wrapped_model},
-        dims={dim: None for dim in wrapped_model.dim_names},
+        dims=dict.fromkeys(wrapped_model.dim_names),
     )
     mlm_model.set_ref("wrapped", wrapped_model)
     return mlm_model
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index 9ff0ac8ba3c..20b8f6d6e80 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, cast
+from typing import List, Optional
 
 from thinc.api import Linear, Model, chain, list2array, use_ops, zero_init
 from thinc.types import Floats2d
@@ -6,7 +6,6 @@
 from ...compat import Literal
 from ...errors import Errors
 from ...tokens import Doc
-from ...util import registry
 from .._precomputable_affine import PrecomputableAffine
 from ..tb_framework import TransitionModel
 
diff --git a/spacy/ml/models/span_finder.py b/spacy/ml/models/span_finder.py
index 8081ed92b70..226b736c7eb 100644
--- a/spacy/ml/models/span_finder.py
+++ b/spacy/ml/models/span_finder.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats1d, Floats2d
 
 from ...tokens import Doc
-from ...util import registry
 
 InT = List[Doc]
 OutT = Floats2d
diff --git a/spacy/ml/models/spancat.py b/spacy/ml/models/spancat.py
index 91dfb41ed7f..697d1df4d35 100644
--- a/spacy/ml/models/spancat.py
+++ b/spacy/ml/models/spancat.py
@@ -18,7 +18,6 @@
 from thinc.types import Floats2d, Ragged
 
 from ...tokens import Doc
-from ...util import registry
 from ..extract_spans import extract_spans
 
 
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index aec4276dbd8..d3b090de005 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats2d
 
 from ...tokens import Doc
-from ...util import registry
 
 
 def build_tagger_model(
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index 49c0dd7077c..8194ab3101e 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -36,7 +36,6 @@
 from ...attrs import ORTH
 from ...errors import Errors
 from ...tokens import Doc
-from ...util import registry
 from ..extract_ngrams import extract_ngrams
 from ..staticvectors import StaticVectors
 from .tok2vec import get_tok2vec_width
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index b2b803b6ed0..ade84274475 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -17,14 +17,13 @@
     with_array,
     with_padded,
 )
-from thinc.types import Floats2d, Ints1d, Ints2d, Ragged
+from thinc.types import Floats2d, Ints2d, Ragged
 
 from ...attrs import intify_attr
 from ...errors import Errors
 from ...ml import _character_embed
 from ...pipeline.tok2vec import Tok2VecListener
 from ...tokens import Doc
-from ...util import registry
 from ..featureextractor import FeatureExtractor
 from ..staticvectors import StaticVectors
 
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index bc69e53ab24..d90acdaf008 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -1,7 +1,7 @@
 import warnings
-from typing import Callable, List, Optional, Sequence, Tuple, cast
+from typing import Callable, List, Optional, Tuple, cast
 
-from thinc.api import Model, Ops, registry
+from thinc.api import Model, Ops
 from thinc.initializers import glorot_uniform_init
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from thinc.util import partial
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 16c894f6c5c..e538b9e88c0 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -1,6 +1,5 @@
 from thinc.api import Model, noop
 
-from ..util import registry
 from .parser_model import ParserStepModel
 
 
diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py
index d26884487d3..b564b466e50 100644
--- a/spacy/pipe_analysis.py
+++ b/spacy/pipe_analysis.py
@@ -23,7 +23,7 @@ def validate_attrs(values: Iterable[str]) -> Iterable[str]:
     values (Iterable[str]): The string attributes to check, e.g. `["token.pos"]`.
     RETURNS (Iterable[str]): The checked attributes.
     """
-    data = dot_to_dict({value: True for value in values})
+    data = dot_to_dict(dict.fromkeys(values, True))
     objs = {"doc": Doc, "token": Token, "span": Span}
     for obj_key, attrs in data.items():
         if obj_key == "span":
@@ -100,7 +100,7 @@ def analyze_pipes(
         all_attrs.update(meta.requires)
         result["summary"][name] = {key: getattr(meta, key, None) for key in keys}
         prev_pipes = nlp.pipeline[:i]
-        requires = {annot: False for annot in meta.requires}
+        requires = dict.fromkeys(meta.requires, False)
         if requires:
             for prev_name, prev_pipe in prev_pipes:
                 prev_meta = nlp.get_pipe_meta(prev_name)
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index ef7a076b6cd..1f12b63607c 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -6,7 +6,6 @@
     ConfigDict,
     Field,
     RootModel,
-    StrictBool,
     StrictInt,
     StrictStr,
     ValidationError,
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index aeb0672c7c7..d4f96ec014b 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
@@ -14,7 +13,7 @@
 from ..tokens import Doc, Span
 from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
 from ..training import Example
-from ..util import SimpleFrozenList, registry
+from ..util import SimpleFrozenList
 from ..vocab import Vocab
 from .pipe import Pipe
 
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 0941b43c1ce..77f033b1c48 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from collections import Counter
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 6a1ed11dfc5..4b23fee6249 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,6 +1,5 @@
 import importlib
 import random
-import sys
 from itertools import islice
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union
@@ -16,9 +15,8 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example, validate_examples, validate_get_examples
-from ..util import SimpleFrozenList, registry
+from ..util import SimpleFrozenList
 from ..vocab import Vocab
-from .legacy.entity_linker import EntityLinker_v1
 from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 2b8c9830720..0728c3f0006 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from collections import defaultdict
 from pathlib import Path
@@ -14,7 +13,7 @@
 from ..scorer import get_ner_prf
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import SimpleFrozenList, ensure_path, from_disk, registry, to_disk
+from ..util import SimpleFrozenList, ensure_path, from_disk, to_disk
 from .pipe import Pipe
 
 DEFAULT_ENT_ID_SEP = "||"
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index e4a3d6d1d5b..b2aa8b708c8 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from typing import Any, Dict
 
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index e8d467ef8db..f518e1072ac 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
@@ -13,7 +12,7 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Token
 from ..training import Example
-from ..util import SimpleFrozenList, logger, registry
+from ..util import SimpleFrozenList, logger
 from ..vocab import Vocab
 from .pipe import Pipe
 
diff --git a/spacy/pipeline/pipe.pyi b/spacy/pipeline/pipe.pyi
index 9a1c11cefea..55cfd1fec95 100644
--- a/spacy/pipeline/pipe.pyi
+++ b/spacy/pipeline/pipe.pyi
@@ -7,7 +7,6 @@ from typing import (
     Iterator,
     List,
     NoReturn,
-    Optional,
     Tuple,
     Union,
 )
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index 26c9efb6a9d..7ee19de04b0 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
 from thinc.api import Config, Model, Optimizer, set_dropout_rate
@@ -10,7 +9,6 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import registry
 from .spancat import DEFAULT_SPANS_KEY
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py
index 98287ba1d22..703eda61561 100644
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from functools import partial
 from pathlib import Path
@@ -27,7 +26,7 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import SimpleFrozenList, ensure_path, registry
+from ..util import SimpleFrozenList, ensure_path
 from .pipe import Pipe
 
 PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 805a0538f01..9b945df35b5 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from dataclasses import dataclass
 from functools import partial
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
@@ -14,7 +13,6 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span, SpanGroup
 from ..training import Example, validate_examples
-from ..util import registry
 from ..vocab import Vocab
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 36b569edc63..7b03c7e81d4 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,18 +1,15 @@
 import importlib
-import sys
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
 import numpy
-from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
-from thinc.types import Floats2d
+from thinc.api import Config, Model, Optimizer, set_dropout_rate
 
 from ..errors import Errors
 from ..language import Language
 from ..scorer import Scorer
 from ..tokens import Doc
 from ..training import Example, validate_examples, validate_get_examples
-from ..util import registry
 from ..vocab import Vocab
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 32845490d4e..cc094bf6197 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -1,17 +1,13 @@
 import importlib
-import sys
 from itertools import islice
-from typing import Any, Callable, Dict, Iterable, List, Optional
+from typing import Any, Callable, Dict, Iterable, Optional
 
 from thinc.api import Config, Model
-from thinc.types import Floats2d
 
 from ..errors import Errors
 from ..language import Language
 from ..scorer import Scorer
-from ..tokens import Doc
 from ..training import Example, validate_get_examples
-from ..util import registry
 from ..vocab import Vocab
 from .textcat import TextCategorizer
 
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index ce0296bf5f3..4e2e5af846f 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
 
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index d72c916efb0..ef098ec1a9f 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -60,12 +60,12 @@ def test_issue1757():
     """Test comparison against None doesn't cause segfault."""
     doc = Doc(Vocab(), words=["a", "b", "c"])
     assert not doc[0] < None
-    assert not doc[0] is None
+    assert doc[0] is not None
     assert doc[0] >= None
     assert not doc[:2] < None
-    assert not doc[:2] is None
+    assert doc[:2] is not None
     assert doc[:2] >= None
-    assert not doc.vocab["a"] is None
+    assert doc.vocab["a"] is not None
     assert not doc.vocab["a"] < None
 
 
diff --git a/spacy/tests/lang/bg/test_tokenizer.py b/spacy/tests/lang/bg/test_tokenizer.py
index 2e2c45001ef..ec575ec9838 100644
--- a/spacy/tests/lang/bg/test_tokenizer.py
+++ b/spacy/tests/lang/bg/test_tokenizer.py
@@ -1,4 +1,3 @@
-import pytest
 
 
 def test_bg_tokenizer_handles_final_diacritics(bg_tokenizer):
diff --git a/spacy/tests/lang/es/test_noun_chunks.py b/spacy/tests/lang/es/test_noun_chunks.py
index 8e5fe83540c..50d49fcc28e 100644
--- a/spacy/tests/lang/es/test_noun_chunks.py
+++ b/spacy/tests/lang/es/test_noun_chunks.py
@@ -48,13 +48,13 @@
             [(0,4)]
         ),
         # Tengo un gato y un perro -> un gato, un perro
-        ( 
+        (
             ["Tengo", "un", "gato", "y", "un", "perro"],
             [0, 2, 0, 5, 5, 0],
             ["ROOT", "det", "obj", "cc", "det", "conj"],
             ["VERB", "DET", "NOUN", "CCONJ", "DET", "NOUN"],
             [(1,3), (4,6)]
-         
+
         ),
         # Dom Pedro II -> Dom Pedro II
         (
@@ -101,11 +101,11 @@
             [1, 1, 3, 1, 5, 1],
             ['det', 'ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['DET', 'NOUN', 'ADP', 'PROPN', 'ADP', 'NOUN'],
-            [(0,2), (3,4), (5,6)]  
-       
+            [(0,2), (3,4), (5,6)]
+
         ),
         # El gato regordete de Susana y su amigo -> el gato regordete, Susana, su amigo
-        (  
+        (
             ['El', 'gato', 'regordete', 'de', 'Susana', 'y', 'su', 'amigo'],
             [1, 1, 1, 4, 1, 7, 7, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/fr/test_noun_chunks.py b/spacy/tests/lang/fr/test_noun_chunks.py
index 436e07b29d0..d413f1f2211 100644
--- a/spacy/tests/lang/fr/test_noun_chunks.py
+++ b/spacy/tests/lang/fr/test_noun_chunks.py
@@ -35,7 +35,7 @@
             [(0, 2)],
         ),
         # det + adj + noun
-        # Le vieux Londres  -> Le vieux Londres 
+        # Le vieux Londres  -> Le vieux Londres
         (
             ['Les', 'vieux', 'Londres'],
             [2, 2, 2],
@@ -144,13 +144,13 @@
         ),
         # Two NPs conjuncted
         # Il a un chien et un chat -> Il, un chien, un chat
-        ( 
+        (
             ['Il', 'a', 'un', 'chien', 'et', 'un', 'chat'],
             [1, 1, 3, 1, 6, 6, 3],
             ['nsubj', 'ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['PRON', 'VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(0,1), (2,4), (5,7)]
-         
+
         ),
         # Two NPs together
         # l'écrivain brésilien Aníbal Machado -> l'écrivain brésilien, Aníbal Machado
@@ -195,12 +195,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # Le gros chat de Susana et son amie -> Le gros chat, Susana, son amie
-        (  
+        (
             ['Le', 'gros', 'chat', 'de', 'Susana', 'et', 'son', 'amie'],
             [2, 2, 2, 4, 2, 7, 7, 2],
             ['det', 'amod', 'ROOT', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/it/test_noun_chunks.py b/spacy/tests/lang/it/test_noun_chunks.py
index 7f6659ee7bd..5fd39ab01b9 100644
--- a/spacy/tests/lang/it/test_noun_chunks.py
+++ b/spacy/tests/lang/it/test_noun_chunks.py
@@ -62,7 +62,7 @@
             [(0,3)],
         ),
         # noun + adj plural
-        # mucche bianche 
+        # mucche bianche
         (
             ["mucche", "bianche"],
             [0, 0],
@@ -117,13 +117,13 @@
         ),
         # Two NPs conjuncted
         # Ho un cane e un gatto -> un cane, un gatto
-        ( 
+        (
             ['Ho', 'un', 'cane', 'e', 'un', 'gatto'],
             [0, 2, 0, 5, 5, 0],
             ['ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(1,3), (4,6)]
-         
+
         ),
         # Two NPs together
         # lo scrittore brasiliano Aníbal Machado -> lo scrittore brasiliano, Aníbal Machado
@@ -177,12 +177,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # Il gatto grasso di Susana e la sua amica -> Il gatto grasso, Susana, sua amica
-        (  
+        (
             ['Il', 'gatto', 'grasso', 'di', 'Susana', 'e', 'la', 'sua', 'amica'],
             [1, 1, 1, 4, 1, 8, 8, 8, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'det:poss', 'conj'],
diff --git a/spacy/tests/lang/la/test_exception.py b/spacy/tests/lang/la/test_exception.py
index 966ae22cfec..a7fb7c85254 100644
--- a/spacy/tests/lang/la/test_exception.py
+++ b/spacy/tests/lang/la/test_exception.py
@@ -1,4 +1,3 @@
-import pytest
 
 
 def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
diff --git a/spacy/tests/lang/pt/test_noun_chunks.py b/spacy/tests/lang/pt/test_noun_chunks.py
index eee96d593b1..5dd7bfd3b82 100644
--- a/spacy/tests/lang/pt/test_noun_chunks.py
+++ b/spacy/tests/lang/pt/test_noun_chunks.py
@@ -126,13 +126,13 @@
         ),
         # Two NPs conjuncted
         # Eu tenho um cachorro e um gato -> Eu, um cacharo, um gato
-        ( 
+        (
             ["Eu", "tenho", "um", "cachorro", "e", "um", "gato"],
             [1, 1, 3, 1, 6, 6, 3],
             ['nsubj', 'ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['PRON', 'VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(0,1), (2,4), (5,7)]
-         
+
         ),
         # Two NPs together
         # o escritor brasileiro Aníbal Machado -> o escritor brasileiro, Aníbal Machado
@@ -186,12 +186,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # O gato gordo da Susana e seu amigo -> O gato gordo, Susana, seu amigo
-        (  
+        (
             ['O', 'gato', 'gordo', 'da', 'Susana', 'e', 'seu', 'amigo'],
             [1, 1, 1, 4, 1, 7, 7, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/sl/test_text.py b/spacy/tests/lang/sl/test_text.py
index a2a93207729..741e5cc1003 100644
--- a/spacy/tests/lang/sl/test_text.py
+++ b/spacy/tests/lang/sl/test_text.py
@@ -1,4 +1,3 @@
-import pytest
 
 
 def test_long_text(sl_tokenizer):
diff --git a/spacy/tests/lang/sq/test_text.py b/spacy/tests/lang/sq/test_text.py
index 44eedaa5487..368bb6c4157 100644
--- a/spacy/tests/lang/sq/test_text.py
+++ b/spacy/tests/lang/sq/test_text.py
@@ -1,4 +1,3 @@
-import pytest
 
 
 def test_long_text(sq_tokenizer):
diff --git a/spacy/tests/lang/xx/test_text.py b/spacy/tests/lang/xx/test_text.py
index 477f0ebe271..c28692ee57e 100644
--- a/spacy/tests/lang/xx/test_text.py
+++ b/spacy/tests/lang/xx/test_text.py
@@ -1,4 +1,3 @@
-import pytest
 
 
 def test_long_text(xx_tokenizer):
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 5e50a4d2801..74dd026e716 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,9 +1,9 @@
-from typing import Any, Callable, Dict, Iterable, Tuple
+from typing import Any, Callable, Dict, Iterable
 
 import pytest
 from numpy.testing import assert_equal
 
-from spacy import Language, registry, util
+from spacy import registry, util
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
 from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase, get_candidates
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 4310e41ab47..f2bfe003d39 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -29,7 +29,6 @@
 from spacy.training.initialize import init_nlp
 
 # Ensure that the architecture gets added to the registry.
-from ..tok2vec import build_lazy_init_tok2vec as _
 from ..util import make_tempdir
 
 TRAIN_DATA_SINGLE_LABEL = [
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 1789d60ea4c..5fe9e9bcf01 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -1,5 +1,4 @@
 import os
-import sys
 from pathlib import Path
 
 import pytest
diff --git a/spacy/tests/test_factory_registrations.py b/spacy/tests/test_factory_registrations.py
index ab604c3a6d5..eb69265e3f3 100644
--- a/spacy/tests/test_factory_registrations.py
+++ b/spacy/tests/test_factory_registrations.py
@@ -1,17 +1,14 @@
-import inspect
 import json
 from pathlib import Path
 
 import pytest
 
-from spacy.language import Language
 from spacy.util import registry
 
 # Path to the reference factory registrations, relative to this file
 REFERENCE_FILE = Path(__file__).parent / "factory_registrations.json"
 
 # Monkey patch the util.is_same_func to handle Cython functions
-import inspect
 
 from spacy import util
 
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 5228b4544fd..706203ffd63 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -95,7 +95,7 @@ def test_multi_hash_embed():
     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
     assert len(hash_embeds) == 3
     # Check they look at different columns.
-    assert list(sorted(he.attrs["column"] for he in hash_embeds)) == [0, 1, 2]
+    assert sorted(he.attrs["column"] for he in hash_embeds) == [0, 1, 2]
     # Check they use different seeds
     assert len(set(he.attrs["seed"] for he in hash_embeds)) == 3
     # Check they all have the same number of rows
diff --git a/spacy/tests/test_registry_population.py b/spacy/tests/test_registry_population.py
index c67136f9c31..e72f3d9f8e8 100644
--- a/spacy/tests/test_registry_population.py
+++ b/spacy/tests/test_registry_population.py
@@ -1,5 +1,4 @@
 import json
-import os
 from pathlib import Path
 
 import pytest
diff --git a/spacy/tests/training/test_corpus.py b/spacy/tests/training/test_corpus.py
index e7cae989384..ded6a53833c 100644
--- a/spacy/tests/training/test_corpus.py
+++ b/spacy/tests/training/test_corpus.py
@@ -1,7 +1,6 @@
-import tempfile
 from contextlib import contextmanager
 from pathlib import Path
-from typing import IO, Generator, Iterable, List, TextIO, Tuple
+from typing import Generator, Iterable, List, Tuple
 
 import pytest
 
diff --git a/spacy/tests/vocab_vectors/test_lexeme.py b/spacy/tests/vocab_vectors/test_lexeme.py
index 156e3391aa2..3c01055b552 100644
--- a/spacy/tests/vocab_vectors/test_lexeme.py
+++ b/spacy/tests/vocab_vectors/test_lexeme.py
@@ -2,7 +2,6 @@
 import pytest
 
 from spacy.attrs import IS_ALPHA, IS_DIGIT
-from spacy.lookups import Lookups
 from spacy.tokens import Doc
 from spacy.util import OOV_RANK
 from spacy.vocab import Vocab
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 873d85835f0..06d1791ade0 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -207,7 +207,7 @@ def to_bytes(self) -> bytes:
             "tokens": tokens.tobytes("C"),
             "spaces": spaces.tobytes("C"),
             "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"),
-            "strings": list(sorted(self.strings)),
+            "strings": sorted(self.strings),
             "cats": self.cats,
             "flags": self.flags,
             "span_groups": self.span_groups,
diff --git a/spacy/training/augment.py b/spacy/training/augment.py
index da5ae3d087a..ba4368acef1 100644
--- a/spacy/training/augment.py
+++ b/spacy/training/augment.py
@@ -3,7 +3,6 @@
 from functools import partial
 from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple
 
-from ..util import registry
 from .example import Example
 from .iob_utils import _doc_to_biluo_tags_with_partial, split_bilu_label
 
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index 4f3ac5de795..8158e9fdf8f 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -4,7 +4,6 @@
     Any,
     Callable,
     Iterable,
-    Iterator,
     List,
     Optional,
     Sequence,
@@ -12,7 +11,7 @@
     Union,
 )
 
-from ..util import minibatch, registry
+from ..util import minibatch
 
 Sizing = Union[Sequence[int], int]
 ItemT = TypeVar("ItemT")
@@ -232,6 +231,6 @@ def _batch_by_length(
         batches.append(batch)
     # Check lengths match
     assert sum(len(b) for b in batches) == len(seqs)
-    batches = [list(sorted(batch)) for batch in batches]
+    batches = [sorted(batch) for batch in batches]
     batches.reverse()
     return batches
diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py
index 714deea6dcd..19382757a95 100644
--- a/spacy/training/callbacks.py
+++ b/spacy/training/callbacks.py
@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING, Callable, Optional
 
 from ..errors import Errors
-from ..util import load_model, logger, registry
+from ..util import load_model, logger
 
 if TYPE_CHECKING:
     from ..language import Language
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 05c5d3bb681..7f200545ca0 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -8,7 +8,6 @@
 
 from .. import util
 from ..errors import Errors
-from ..util import registry
 
 if TYPE_CHECKING:
     from ..language import Language  # noqa: F401
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index ee7636f02c8..906a4c0d978 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -5,7 +5,6 @@ from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Unio
 from cymem.cymem import Pool
 from thinc.types import Floats1d, FloatsXd
 
-from . import Language
 from .lexeme import Lexeme
 from .lookups import Lookups
 from .morphology import Morphology

From 8e6bd6d1c5ff008adf3ce8a0bb07250e1c6b68ae Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:43:52 +0100
Subject: [PATCH 29/42] Apply ruff formatting to 8 files

---
 spacy/cli/find_threshold.py           | 4 +++-
 spacy/ml/_precomputable_affine.py     | 1 -
 spacy/ml/extract_spans.py             | 1 -
 spacy/tests/lang/bg/test_tokenizer.py | 2 --
 spacy/tests/lang/la/test_exception.py | 2 --
 spacy/tests/lang/sl/test_text.py      | 2 --
 spacy/tests/lang/sq/test_text.py      | 2 --
 spacy/tests/lang/xx/test_text.py      | 2 --
 8 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index 7b2c5f98051..05b6fdc9517 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -207,7 +207,9 @@ def filter_config(
             ),
         )
         if hasattr(pipe, "cfg"):
-            nlp.get_pipe(pipe_name).cfg = set_nested_item(pipe.cfg, config_keys, threshold)
+            nlp.get_pipe(pipe_name).cfg = set_nested_item(
+                pipe.cfg, config_keys, threshold
+            )
 
         eval_scores = nlp.evaluate(dev_dataset)
         if scores_key not in eval_scores:
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index ac2f6bbd3fa..464c32594dc 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -1,7 +1,6 @@
 from thinc.api import Model, normal_init
 
 
-
 def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
     model = Model(
         "precomputable_affine",
diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
index 2351ddc2cf0..925bfd45c31 100644
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@@ -4,7 +4,6 @@
 from thinc.types import Ints1d, Ragged
 
 
-
 def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]:
     """Extract spans from a sequence of source arrays, as specified by an array
     of (start, end) indices. The output is a ragged array of the
diff --git a/spacy/tests/lang/bg/test_tokenizer.py b/spacy/tests/lang/bg/test_tokenizer.py
index ec575ec9838..b16ef12d880 100644
--- a/spacy/tests/lang/bg/test_tokenizer.py
+++ b/spacy/tests/lang/bg/test_tokenizer.py
@@ -1,5 +1,3 @@
-
-
 def test_bg_tokenizer_handles_final_diacritics(bg_tokenizer):
     text = "Ня̀маше яйца̀. Ня̀маше яйца̀."
     tokens = bg_tokenizer(text)
diff --git a/spacy/tests/lang/la/test_exception.py b/spacy/tests/lang/la/test_exception.py
index a7fb7c85254..9a6e6a422c5 100644
--- a/spacy/tests/lang/la/test_exception.py
+++ b/spacy/tests/lang/la/test_exception.py
@@ -1,5 +1,3 @@
-
-
 def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
     text = "scio te omnia facturum, ut nobiscum quam primum sis"
     tokens = la_tokenizer(text)
diff --git a/spacy/tests/lang/sl/test_text.py b/spacy/tests/lang/sl/test_text.py
index 741e5cc1003..4781bebcdcb 100644
--- a/spacy/tests/lang/sl/test_text.py
+++ b/spacy/tests/lang/sl/test_text.py
@@ -1,5 +1,3 @@
-
-
 def test_long_text(sl_tokenizer):
     # Excerpt: European Convention on Human Rights
     text = """
diff --git a/spacy/tests/lang/sq/test_text.py b/spacy/tests/lang/sq/test_text.py
index 368bb6c4157..24d60afdf20 100644
--- a/spacy/tests/lang/sq/test_text.py
+++ b/spacy/tests/lang/sq/test_text.py
@@ -1,5 +1,3 @@
-
-
 def test_long_text(sq_tokenizer):
     # Excerpt: European Convention on Human Rights
     text = """
diff --git a/spacy/tests/lang/xx/test_text.py b/spacy/tests/lang/xx/test_text.py
index c28692ee57e..a4eafdcb98e 100644
--- a/spacy/tests/lang/xx/test_text.py
+++ b/spacy/tests/lang/xx/test_text.py
@@ -1,5 +1,3 @@
-
-
 def test_long_text(xx_tokenizer):
     # Excerpt: Text in Skolt Sami taken from https://www.samediggi.fi
     text = """

From 24255bd1e25c7275553325d989bfcfd3a41fc1d2 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 21 Mar 2026 08:44:43 +0100
Subject: [PATCH 30/42] Fix import sorting for ruff isort compliance

---
 spacy/__init__.py                           | 11 ++++++-----
 spacy/compat.py                             |  4 +++-
 spacy/lang/pt/punctuation.py                |  8 +++++---
 spacy/pipeline/factories.py                 |  2 +-
 spacy/tests/pipeline/test_initialize.py     |  2 --
 spacy/tests/pipeline/test_pipe_factories.py |  2 --
 spacy/tests/test_misc.py                    |  2 --
 spacy/tokens/_serialize.py                  |  3 +--
 spacy/training/initialize.py                |  3 +--
 9 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index eeab3773591..9d7fdf29ba7 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -10,16 +10,16 @@
 # These are imported as part of the API
 from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401
 
-from . import pipeline  # noqa: F401
-from . import util
+from . import (
+    pipeline,  # noqa: F401
+    util,
+)
 from .about import __version__  # noqa: F401
 from .cli.info import info  # noqa: F401
 from .errors import Errors
 from .glossary import explain  # noqa: F401
 from .language import Language
 from .registrations import REGISTRY_POPULATED, populate_registry
-from .util import logger, registry  # noqa: F401
-from .vocab import Vocab
 
 # Rebuild pydantic v2 schemas that use forward references to Language/Vocab
 from .schemas import (  # noqa: F401
@@ -29,8 +29,9 @@
     ConfigSchemaPretrain,
     ConfigSchemaTraining,
 )
-
 from .training import Example  # noqa: F401
+from .util import logger, registry  # noqa: F401
+from .vocab import Vocab
 
 _rebuild_ns = {"Language": Language, "Vocab": Vocab, "Example": Example}
 for _schema in (
diff --git a/spacy/compat.py b/spacy/compat.py
index a9e7d5a20b9..abaca49302f 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -35,7 +35,9 @@
 try:  # Python 3.8+
     import importlib.metadata as importlib_metadata
 except ImportError:
-    from catalogue import _importlib_metadata as importlib_metadata  # type: ignore[no-redef]    # noqa: F401
+    from catalogue import (
+        _importlib_metadata as importlib_metadata,  # type: ignore[no-redef]    # noqa: F401
+    )
 
 from thinc.api import Optimizer  # noqa: F401
 
diff --git a/spacy/lang/pt/punctuation.py b/spacy/lang/pt/punctuation.py
index b2d63cb3d63..60bd50da1eb 100644
--- a/spacy/lang/pt/punctuation.py
+++ b/spacy/lang/pt/punctuation.py
@@ -1,6 +1,8 @@
-from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
-from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-from ..punctuation import TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES
+from ..punctuation import (
+    TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES,
+    TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES,
+    TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES,
+)
 
 _prefixes = [r"\w{1,3}\$"] + BASE_TOKENIZER_PREFIXES
 
diff --git a/spacy/pipeline/factories.py b/spacy/pipeline/factories.py
index f796f2dc8a5..e76704237ee 100644
--- a/spacy/pipeline/factories.py
+++ b/spacy/pipeline/factories.py
@@ -24,8 +24,8 @@
 from ..pipeline.sentencizer import Sentencizer
 from ..pipeline.senter import DEFAULT_SENTER_MODEL, SentenceRecognizer
 from ..pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL, SpanFinder
-from ..pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
 from ..pipeline.span_ruler import (
+    DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY,
     SpanRuler,
     prioritize_existing_ents_filter,
     prioritize_new_ents_filter,
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index acb1c6faa45..71b12227f2c 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,7 +1,5 @@
 import pytest
-
 from pydantic import StrictBool
-
 from thinc.api import ConfigValidationError
 
 from spacy.lang.en import English
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index f8f9ec4b10d..a8a6c7d136a 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,7 +1,5 @@
 import pytest
-
 from pydantic import StrictInt, StrictStr
-
 from thinc.api import ConfigValidationError, Linear, Model
 
 import spacy
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 2c26952891d..309c57b0926 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -3,9 +3,7 @@
 from pathlib import Path
 
 import pytest
-
 from pydantic import ValidationError
-
 from thinc.api import (
     Config,
     ConfigValidationError,
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 06d1791ade0..51f5740c25c 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -13,8 +13,7 @@
 from ..util import SimpleFrozenList, ensure_path
 from ..vocab import Vocab
 from ._dict_proxies import SpanGroups
-from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS
-from .doc import Doc
+from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS, Doc
 
 
 class DocBin:
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 0621702214c..64cdddf9c43 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -25,8 +25,7 @@
     registry,
     resolve_dot_names,
 )
-from ..vectors import Mode as VectorsMode
-from ..vectors import Vectors
+from ..vectors import Mode as VectorsMode, Vectors
 from .pretrain import get_tok2vec_ref
 
 if TYPE_CHECKING:

From f175a51e2df36f16d473c7b62a0b9b8563ebb773 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 13:45:02 +0100
Subject: [PATCH 31/42] Fully migrate to Pydantic v2 (#13940)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use confection v1.3 and Thinc v8.3.13, which implement custom validation logic in place of Pydantic, allowing us to properly adopt Pydantic v2 and provide full Python 3.14 support.

Our dependency tree used Pydantic v1 in unusual ways, and relied on behaviours that Pydantic v2 reformed. In the time since Pydantic v2 was released there were a few attempts to migrate over to it, but the task has been complicated by the fact that the confection library has a fairly tangled implementation and I had reduced availability for open-source work in 2024 and 2025.

Specifically, our library confection provides the extensible configuration system we use in spaCy and Thinc. The config system allows you to refer to values that will be supplied by arbitrary functions, that e.g. define some neural network model or its sublayers. The functionality in confection is complicated because we aggressively prioritised user experience in the specification, even if it required increased implementation complexity.

Confection's original implementation built a dynamic Pydantic v1 schema for function-supplied values ("promises"). We validate the schema before calling any promises, and then validate the schema again after calling all the promises and substituting in their values. The variable-interpolation system adds further difficulties to the implementation, and we have to do it all subclassing the Python built-in configparser, which ties us to implementation choices I'd do differently if I had a clean slate.

Here's one summary of Pydantic v1-specific behaviours that the migration to v2 particularly difficult for us. This particular summary was produced during a session with Claude Code Opus 4.6, so nuances of it might be wrong. The full history of attempts at doing this spans over different refactors separated by a few months at a time, so I don't have a full record of all the things that I struggled with. It's possible some details of this summary are incorrect though.

The core problem we kept hitting: Pydantic v2 compiles validation schemas upfront and has much stricter immutability. The whole session has been a series of workarounds for this:

```
 1. Schema mutation — v1 let you mutate __fields__ in place; v2 needs model_rebuild() which loses forward ref namespaces, or create_model subclasses which don't propagate to parent schemas.
 2. model_dump vs dict — v2 converts dataclasses to dicts, breaking resolved objects. Needed a custom _model_to_dict helper.
 3. model_construct drops extras — v2 silently drops fields with extra="forbid", needed manual workarounds.
 4. Strict coercion — v2 coerces ndarray to List[Floats1d] via iteration, needed strict=True.
 5. Forward refs — Every schema with TYPE_CHECKING imports needs model_rebuild() with the right namespace, and that breaks when confection re-rebuilds later.
In order to adjust for behavioural differences like this, I'd refactored confection to build the different versions of the schema in multiple passes, instead of building all the representations together as we'd been doing. However this refactor itself had problems, further complicating the migration.
```

~I've now bitten the bullet and rolled back the refactor I'd been attempting of confection, and instead replaced the Pydantic validation with custom logic. This allows Confection to remove Pydantic as a dependency entirely.~ Update: Actually I went back and got the refactor working. All much nicer now.

I've taken some lengths to explain this because migrating off a dependency after breaking changes can be a sensitive topic. I want to stress that the changes Pydantic made from v1 to v2 are very good, and I greatly appreciate them as a user of FastAPI in our services. It would be very bad for the ecosystem if Pydantic pinned themselves to exactly matching the behaviours they had in v1 just to avoid breaking support for the sort of thing we'd been doing. Instead users who were relying on those behaviours like us should just find some way to adapt --- either vendor the v1 version we need, or change our behaviours, or implement an alternative. I would have liked to do this sooner but we've ultimately gone with the third option.
---
 .github/workflows/cibuildwheel.yml             |  5 ++++-
 .github/workflows/explosionbot.yml             | 10 +++++++---
 .github/workflows/issue-manager.yml            |  6 +++++-
 .github/workflows/lock.yml                     |  2 +-
 .github/workflows/publish_pypi.yml             |  4 +++-
 .github/workflows/spacy_universe_alert.yml     | 13 ++++---------
 .github/workflows/tests.yml                    | 18 ++++++++++--------
 .github/workflows/universe_validation.yml      |  6 ++++--
 pyproject.toml                                 |  2 +-
 requirements.txt                               | 12 ++++++------
 setup.cfg                                      | 16 ++++++++++------
 spacy/__init__.py                              |  2 +-
 spacy/cli/debug_config.py                      |  4 ++--
 spacy/cli/debug_data.py                        |  2 +-
 spacy/cli/debug_model.py                       |  2 +-
 spacy/cli/find_threshold.py                    |  6 ++++--
 spacy/cli/init_config.py                       |  2 +-
 spacy/compat.py                                |  4 ++--
 spacy/lang/es/lemmatizer.py                    |  2 +-
 spacy/language.py                              | 10 ++++++----
 spacy/matcher/phrasematcher.pyx                |  2 +-
 spacy/pipeline/_edit_tree_internals/schemas.py |  1 +
 spacy/pipeline/factories.py                    |  3 ++-
 spacy/schemas.py                               |  6 +++---
 spacy/tests/lang/zh/test_tokenizer.py          |  2 +-
 spacy/tests/package/test_requirements.py       |  1 +
 spacy/tests/pipeline/test_textcat.py           |  1 +
 spacy/tests/test_cli.py                        |  3 +++
 spacy/training/batchers.py                     |  1 +
 spacy/training/corpus.py                       |  3 ++-
 spacy/training/initialize.py                   |  2 +-
 spacy/training/loop.py                         |  2 +-
 spacy/training/pretrain.py                     |  2 +-
 33 files changed, 93 insertions(+), 64 deletions(-)

diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml
index 5f8ba9285ac..5f731a31595 100644
--- a/.github/workflows/cibuildwheel.yml
+++ b/.github/workflows/cibuildwheel.yml
@@ -7,9 +7,12 @@ on:
       # ** matches 'zero or more of any character'
       - 'release-v[0-9]+.[0-9]+.[0-9]+**'
       - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**'
+
+permissions: {}
+
 jobs:
   build_wheels:
-    uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@main
+    uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@2c98f757f13d112cf73fcf4b627249f1fffb5aae  # main
     permissions:
       contents: write
       actions: read
diff --git a/.github/workflows/explosionbot.yml b/.github/workflows/explosionbot.yml
index 78a27cfa3ba..979385ccb90 100644
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@@ -6,6 +6,8 @@ on:
       - created
       - edited
 
+permissions: {}
+
 jobs:
   explosion-bot:
     if: github.repository_owner == 'explosion'
@@ -15,13 +17,15 @@ jobs:
         env:
           GITHUB_CONTEXT: ${{ toJson(github) }}
         run: echo "$GITHUB_CONTEXT"
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
       - name: Install and run explosion-bot
         run: |
-          pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot
+          git config --global url."https://x-access-token:${EXPLOSIONBOT_TOKEN}@github.com/".insteadOf "https://github.com/"
+          pip install git+https://github.com/explosion/explosion-bot
           python -m explosionbot
         env:
+          EXPLOSIONBOT_TOKEN: ${{ secrets.EXPLOSIONBOT_TOKEN }}
           INPUT_TOKEN: ${{ secrets.EXPLOSIONBOT_TOKEN }}
           INPUT_BK_TOKEN: ${{ secrets.BUILDKITE_SECRET }}
           ENABLED_COMMANDS: "test_gpu,test_slow,test_slow_gpu"
diff --git a/.github/workflows/issue-manager.yml b/.github/workflows/issue-manager.yml
index 6c7d7d5a6f8..264707485e7 100644
--- a/.github/workflows/issue-manager.yml
+++ b/.github/workflows/issue-manager.yml
@@ -11,12 +11,16 @@ on:
     types:
       - labeled
 
+permissions: {}
+
 jobs:
   issue-manager:
+    permissions:
+      issues: write
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
-      - uses: tiangolo/issue-manager@0.4.0
+      - uses: tiangolo/issue-manager@4d1b7e05935a404dc8337d30bd23be46be8bb8e5  # 0.4.0
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
           config: >
diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 2bbdd64c771..8fcf3028476 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -16,7 +16,7 @@ jobs:
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
-      - uses: dessant/lock-threads@v5
+      - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771  # v5
         with:
           process-only: 'issues'
           issue-inactive-days: '30'
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index 9f432874cc2..fcc6f2a9999 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -8,6 +8,8 @@ on:
     types:
       - published
 
+permissions: {}
+
 jobs:
   upload_pypi:
     runs-on: ubuntu-latest
@@ -21,7 +23,7 @@ jobs:
     # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
     # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     steps:
-      - uses: robinraju/release-downloader@v1
+      - uses: robinraju/release-downloader@daf26c55d821e836577a15f77d86ddc078948b05  # v1
         with:
           tag: ${{ github.event.release.tag_name }}
           fileName: '*'
diff --git a/.github/workflows/spacy_universe_alert.yml b/.github/workflows/spacy_universe_alert.yml
index 01731ffe0d7..ec0230699be 100644
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@@ -5,21 +5,16 @@ on:
     paths:
       - "website/meta/universe.json"
 
+permissions: {}
+
 jobs:
   build:
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
 
     steps:
-      - name: Dump GitHub context
-        env:
-          GITHUB_CONTEXT: ${{ toJson(github) }}
-          PR_NUMBER: ${{github.event.number}}
-        run: |
-          echo "$GITHUB_CONTEXT"
-
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: '3.10'
       - name: Install Bernadette app dependency and send an alert
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index adfce07f50b..b20dba12f04 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,6 +19,8 @@ on:
       - "*.mdx"
       - "website/**"
 
+permissions: {}
+
 jobs:
   validate:
     name: Validate
@@ -26,10 +28,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: "3.10"
 
@@ -45,19 +47,19 @@ jobs:
     name: Test
     needs: Validate
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python_version: ["3.10", "3.11", "3.12", "3.13"]
+        python_version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 
     runs-on: ${{ matrix.os }}
 
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: ${{ matrix.python_version }}
 
@@ -93,7 +95,7 @@ jobs:
         shell: bash
 
       - name: Test import
-        run: python -W error -c "import spacy"
+        run: python -W error -W 'ignore:Core Pydantic V1:UserWarning:pydantic' -c "import spacy"
 
       - name: "Test download CLI"
         run: |
@@ -154,7 +156,7 @@ jobs:
 
       - name: "Run CPU tests"
         run: |
-          python -m pytest --pyargs spacy -W error
+          python -m pytest --pyargs spacy -W error -W 'ignore:Core Pydantic V1:UserWarning:pydantic'
         if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')"
 
       - name: "Run CPU tests with thinc-apple-ops"
diff --git a/.github/workflows/universe_validation.yml b/.github/workflows/universe_validation.yml
index ce7df49dbae..e97850cd4b0 100644
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@@ -13,6 +13,8 @@ on:
     paths:
       - "website/meta/universe.json"
 
+permissions: {}
+
 jobs:
   validate:
     name: Validate
@@ -20,10 +22,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: "3.7"
 
diff --git a/pyproject.toml b/pyproject.toml
index 9e6cab69da3..395c2f7a108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.3.4,<8.4.0",
+    "thinc>=8.3.12,<8.4.0",
     "numpy>=2.0.0,<3.0.0"
 ]
 build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
index 866128b31b6..50c6382bea3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,19 +3,19 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.3.4,<8.4.0
-ml_datasets>=0.2.0,<0.3.0
+thinc>=8.3.12,<8.4.0
+ml_datasets>=0.2.1,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
-srsly>=2.4.3,<3.0.0
+srsly>=2.5.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<1.0.0
-weasel>=0.4.2,<0.5.0
+weasel>=1.0.0,<2.0.0
 # Third party dependencies
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+pydantic>=2.0.0,<3.0.0
 jinja2
 # Official Python utilities
 setuptools
@@ -34,4 +34,4 @@ types-requests
 types-setuptools>=57.0.0
 ruff>=0.9.0
 cython-lint>=0.15.0
-confection>=0.0.4,<1.0.0
+confection>=1.1.0,<2.0.0
diff --git a/setup.cfg b/setup.cfg
index 1ef8e303125..66cf942218b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -22,6 +22,7 @@ classifiers =
     Programming Language :: Python :: 3.11
     Programming Language :: Python :: 3.12
     Programming Language :: Python :: 3.13
+    Programming Language :: Python :: 3.14
     Topic :: Scientific/Engineering
 project_urls =
     Release notes = https://github.com/explosion/spaCy/releases
@@ -41,7 +42,7 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.3.12,<8.4.0
 install_requires =
     # Our libraries
     spacy-legacy>=3.0.11,<3.1.0
@@ -49,19 +50,19 @@ install_requires =
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.3.12,<8.4.0
     wasabi>=0.9.1,<1.2.0
-    srsly>=2.4.3,<3.0.0
+    srsly>=2.5.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
-    weasel>=0.4.2,<0.5.0
-    confection>=0.0.4,<1.0.0
+    weasel>=1.0.0,<2.0.0
+    confection>=1.1.0,<2.0.0
     # Third-party dependencies
     typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0; python_version < "3.9"
     numpy>=1.19.0; python_version >= "3.9"
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+    pydantic>=2.0.0,<3.0.0
     jinja2
     # Official Python utilities
     setuptools
@@ -135,6 +136,9 @@ formats = gztar
 markers =
     slow: mark a test as slow
     issue: reference specific issue
+filterwarnings =
+    error
+    ignore:Core Pydantic V1:UserWarning:pydantic
 
 [mypy]
 ignore_missing_imports = True
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 9d7fdf29ba7..5b3ff25c872 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -41,7 +41,7 @@
     ConfigSchemaInit,
     ConfigSchema,
 ):
-    _schema.model_rebuild(_types_namespace=_rebuild_ns)
+    _schema.model_rebuild(_types_namespace=_rebuild_ns)  # type: ignore[attr-defined]
 
 if sys.maxunicode == 65535:
     raise SystemError(Errors.E130)
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index f049d7fd149..4876b6ff9e1 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -82,10 +82,10 @@ def debug_config(
         config = nlp.config.interpolate()
     msg.divider("Config validation for [initialize]")
     with show_validation_error(config_path):
-        T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
+        T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)  # type: ignore[arg-type]
     msg.divider("Config validation for [training]")
     with show_validation_error(config_path):
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
         dot_names = [T["train_corpus"], T["dev_corpus"]]
         util.resolve_dot_names(config, dot_names)
     msg.good("Config is valid")
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index df52250cf90..6ba18e7f224 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -137,7 +137,7 @@ def debug_data(
         cfg = util.load_config(config_path, overrides=config_overrides)
         nlp = util.load_model_from_config(cfg)
         config = nlp.config.interpolate()
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     # Use original config here, not resolved version
     sourced_components = get_sourced_components(cfg)
     frozen_components = T["frozen_components"]
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index ec7ffe099d7..dc0de3e1489 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -89,7 +89,7 @@ def debug_model_cli(
     with show_validation_error(config_path):
         nlp = util.load_model_from_config(raw_config)
         config = nlp.config.interpolate()
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     seed = T["seed"]
     if seed is not None:
         msg.info(f"Fixing random seed: {seed}")
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index 05b6fdc9517..1873f476fcd 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -207,8 +207,10 @@ def filter_config(
             ),
         )
         if hasattr(pipe, "cfg"):
-            nlp.get_pipe(pipe_name).cfg = set_nested_item(
-                pipe.cfg, config_keys, threshold
+            nlp.get_pipe(pipe_name).cfg = set_nested_item(  # type: ignore[attr-defined]
+                pipe.cfg,
+                config_keys,
+                threshold,  # type: ignore[attr-defined]
             )
 
         eval_scores = nlp.evaluate(dev_dataset)
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index 2cb39056d5e..c7081040280 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -216,7 +216,7 @@ def init_config(
     # Filter out duplicates since tok2vec and transformer are added by template
     pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
     defaults = RECOMMENDATIONS["__default__"]
-    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
+    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump()
     variables = {
         "lang": lang,
         "components": pipeline,
diff --git a/spacy/compat.py b/spacy/compat.py
index abaca49302f..828ed1ba62e 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -35,8 +35,8 @@
 try:  # Python 3.8+
     import importlib.metadata as importlib_metadata
 except ImportError:
-    from catalogue import (
-        _importlib_metadata as importlib_metadata,  # type: ignore[no-redef]    # noqa: F401
+    from catalogue import (  # type: ignore[no-redef]
+        _importlib_metadata as importlib_metadata,  # noqa: F401
     )
 
 from thinc.api import Optimizer  # noqa: F401
diff --git a/spacy/lang/es/lemmatizer.py b/spacy/lang/es/lemmatizer.py
index 05238a75b70..3102f3b9bc4 100644
--- a/spacy/lang/es/lemmatizer.py
+++ b/spacy/lang/es/lemmatizer.py
@@ -416,7 +416,7 @@ def lemmatize_verb_pron(
             rule = self.select_rule("verb", features)
             verb_lemma = self.lemmatize_verb(
                 verb,
-                features - {"PronType=Prs"},
+                features - {"PronType=Prs"},  # type: ignore[operator]
                 rule,
                 index,  # type: ignore[operator]
             )[0]
diff --git a/spacy/language.py b/spacy/language.py
index ea9ba3cae8b..8e91018254e 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1323,7 +1323,7 @@ def get_examples():
         # Make sure the config is interpolated so we can resolve subsections
         config = self.config.interpolate()
         # These are the settings provided in the [initialize] block in the config
-        I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
+        I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)  # type: ignore[arg-type]
         before_init = I["before_init"]
         if before_init is not None:
             before_init(self)
@@ -1353,7 +1353,7 @@ def get_examples():
                 proc.initialize(get_examples, nlp=self, **p_settings)
         pretrain_cfg = config.get("pretraining")
         if pretrain_cfg:
-            P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)
+            P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)  # type: ignore[arg-type]
             init_tok2vec(self, P, I)
         self._link_components()
         self._optimizer = sgd
@@ -1823,7 +1823,7 @@ def from_config(
         orig_pretraining = config.pop("pretraining", None)
         config["components"] = {}
         if auto_fill:
-            filled = registry.fill(config, validate=validate, schema=ConfigSchema)
+            filled = registry.fill(config, validate=validate, schema=ConfigSchema)  # type: ignore[arg-type]
         else:
             filled = config
         filled["components"] = orig_pipeline
@@ -1832,7 +1832,9 @@ def from_config(
             filled["pretraining"] = orig_pretraining
             config["pretraining"] = orig_pretraining
         resolved_nlp = registry.resolve(
-            filled["nlp"], validate=validate, schema=ConfigSchemaNlp
+            filled["nlp"],
+            validate=validate,
+            schema=ConfigSchemaNlp,  # type: ignore[arg-type]
         )
         create_tokenizer = resolved_nlp["tokenizer"]
         create_vectors = resolved_nlp["vectors"]
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index ccc830e35c1..a71f85f6e63 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -57,7 +57,7 @@ cdef class PhraseMatcher:
                 attr = "ORTH"
             if attr == "IS_SENT_START":
                 attr = "SENT_START"
-            if attr.lower() not in TokenPattern().dict():
+            if attr.lower() not in TokenPattern().model_dump():
                 raise ValueError(Errors.E152.format(attr=attr))
             self.attr = IDS.get(attr)
 
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index 1f12b63607c..ef7a076b6cd 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -6,6 +6,7 @@
     ConfigDict,
     Field,
     RootModel,
+    StrictBool,
     StrictInt,
     StrictStr,
     ValidationError,
diff --git a/spacy/pipeline/factories.py b/spacy/pipeline/factories.py
index e76704237ee..8c71067b32e 100644
--- a/spacy/pipeline/factories.py
+++ b/spacy/pipeline/factories.py
@@ -14,9 +14,10 @@
 )
 
 # Import factory default configurations
-from ..pipeline.entity_linker import DEFAULT_NEL_MODEL, EntityLinker, EntityLinker_v1
+from ..pipeline.entity_linker import DEFAULT_NEL_MODEL, EntityLinker
 from ..pipeline.entityruler import DEFAULT_ENT_ID_SEP, EntityRuler
 from ..pipeline.functions import DocCleaner, TokenSplitter
+from ..pipeline.legacy import EntityLinker_v1
 from ..pipeline.lemmatizer import Lemmatizer
 from ..pipeline.morphologizer import DEFAULT_MORPH_MODEL, Morphologizer
 from ..pipeline.multitask import DEFAULT_MT_MODEL, MultitaskObjective
diff --git a/spacy/schemas.py b/spacy/schemas.py
index e3200348013..359c3fd0f83 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -251,7 +251,7 @@ class TokenPatternOperatorSimple(str, Enum):
 TokenPatternOperatorMinMax = constr(pattern=r"^(\{\d+\}|\{\d+,\d*\}|\{\d*,\d+\})$")
 
 
-TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]
+TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]  # type: ignore[valid-type]
 StringValue = Union[TokenPatternString, StrictStr]
 NumberValue = Union[TokenPatternNumber, StrictInt, StrictFloat]
 UnderscoreValue = Union[
@@ -420,8 +420,8 @@ class ConfigSchemaInit(BaseModel):
     lookups: Optional[Lookups] = Field(..., title="Vocabulary lookups, e.g. lexeme normalization")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
-    tokenizer: Dict[StrictStr, Any] = Field(..., help="Arguments to be passed into Tokenizer.initialize")
-    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., help="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
+    tokenizer: Dict[StrictStr, Any] = Field(..., title="Arguments to be passed into Tokenizer.initialize")
+    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., title="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
     before_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object before initialization")
     after_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after initialization")
     # fmt: on
diff --git a/spacy/tests/lang/zh/test_tokenizer.py b/spacy/tests/lang/zh/test_tokenizer.py
index cdba5e39709..cb9b4ec539a 100644
--- a/spacy/tests/lang/zh/test_tokenizer.py
+++ b/spacy/tests/lang/zh/test_tokenizer.py
@@ -1,5 +1,5 @@
 import pytest
-from thinc.api import ConfigValidationError
+from confection import ConfigValidationError
 
 from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
 
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index b9276441222..f4c6f056aa2 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -15,6 +15,7 @@ def test_build_dependencies():
         "cython-lint",
         "black",
         "isort",
+        "ruff",
         "mypy",
         "types-dataclasses",
         "types-mock",
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index f2bfe003d39..e7499404f63 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -24,6 +24,7 @@
 )
 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 from spacy.scorer import Scorer
+from spacy.tests.tok2vec import build_lazy_init_tok2vec as _  # noqa: F401
 from spacy.tokens import Doc, DocBin
 from spacy.training import Example
 from spacy.training.initialize import init_nlp
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 8415e5c92ff..4bac40f0b89 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1229,6 +1229,9 @@ def test_download_rejects_relative_urls(monkeypatch):
     relative path in the filename"""
 
     monkeypatch.setattr(download_module, "run_command", lambda cmd: None)
+    monkeypatch.setattr(
+        download_module, "_get_pip_install_cmd", lambda: ["pip", "install"]
+    )
 
     # Check that normal download works
     download_module.download("en_core_web_sm-3.7.1", direct=True)
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index 8158e9fdf8f..40e437dcc8c 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -4,6 +4,7 @@
     Any,
     Callable,
     Iterable,
+    Iterator,
     List,
     Optional,
     Sequence,
diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py
index 5cc2733a540..30e32911e6b 100644
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@@ -50,9 +50,10 @@ def create_jsonl_reader(
 
 
 @util.registry.readers("spacy.read_labels.v1")
-def read_labels(path: Path, *, require: bool = False):
+def read_labels(path: Union[str, Path], *, require: bool = False):
     # I decided not to give this a generic name, because I don't want people to
     # use it for arbitrary stuff, as I want this require arg with default False.
+    path = Path(path)
     if not require and not path.exists():
         return None
     return srsly.read_json(path)
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 64cdddf9c43..164a0867494 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -50,7 +50,7 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
     logger.info("Set up nlp object from config")
     config = nlp.config.interpolate()
     # Resolve all training-relevant sections using the filled nlp config
-    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     dot_names = [T["train_corpus"], T["dev_corpus"]]
     if not isinstance(T["train_corpus"], str):
         raise ConfigValidationError(
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index 6f5099858f1..d6f1ad7d608 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -62,7 +62,7 @@ def train(
     allocator = config["training"]["gpu_allocator"]
     if use_gpu >= 0 and allocator:
         set_gpu_allocator(allocator)
-    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     dot_names = [T["train_corpus"], T["dev_corpus"]]
     train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
     optimizer = T["optimizer"]
diff --git a/spacy/training/pretrain.py b/spacy/training/pretrain.py
index 14a813a0993..32eada4d749 100644
--- a/spacy/training/pretrain.py
+++ b/spacy/training/pretrain.py
@@ -42,7 +42,7 @@ def pretrain(
     config["initialize"]["init_tok2vec"] = None
     nlp = load_model_from_config(config)
     _config = nlp.config.interpolate()
-    P = registry.resolve(_config["pretraining"], schema=ConfigSchemaPretrain)
+    P = registry.resolve(_config["pretraining"], schema=ConfigSchemaPretrain)  # type: ignore[arg-type]
     corpus = dot_to_object(_config, P["corpus"])
     corpus = registry.resolve({"corpus": corpus})["corpus"]
     batcher = P["batcher"]

From 188c90d72a7dde6fd4d504bb49e24bc315d570bc Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 13:12:10 +0100
Subject: [PATCH 32/42] Add lazy spaCy CLI loading and static launcher

---
 MANIFEST.in                      |   1 +
 setup.cfg                        |   2 +-
 setup.py                         |   2 +-
 spacy/cli/__init__.py            | 110 +++++++++++++++++++---------
 spacy/cli/_dispatch.py           | 105 +++++++++++++++++++++++++++
 spacy/cli/_util.py               |  18 ++---
 spacy/tests/test_cli_app.py      |   4 ++
 spacy/tests/test_cli_launcher.py | 102 ++++++++++++++++++++++++++
 spacy_cli/__init__.py            |   1 +
 spacy_cli/build_manifest.py      |  99 ++++++++++++++++++++++++++
 spacy_cli/cli_manifest.json      | 118 +++++++++++++++++++++++++++++++
 spacy_cli/main.py                |  69 ++++++++++++++++++
 spacy_cli/static.py              |  24 +++++++
 13 files changed, 610 insertions(+), 45 deletions(-)
 create mode 100644 spacy/cli/_dispatch.py
 create mode 100644 spacy/tests/test_cli_launcher.py
 create mode 100644 spacy_cli/__init__.py
 create mode 100644 spacy_cli/build_manifest.py
 create mode 100644 spacy_cli/cli_manifest.json
 create mode 100644 spacy_cli/main.py
 create mode 100644 spacy_cli/static.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 1caf758464f..36465ea94a0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include spacy *.pyi *.pyx *.pxd *.txt *.cfg *.jinja *.toml *.hh
+recursive-include spacy_cli *.json
 include LICENSE
 include README.md
 include pyproject.toml
diff --git a/setup.cfg b/setup.cfg
index 66cf942218b..83147ad0d48 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -70,7 +70,7 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-    spacy = spacy.cli:setup_cli
+    spacy = spacy_cli.main:main
 
 [options.extras_require]
 lookups =
diff --git a/setup.py b/setup.py
index 6f28b7340d0..2de619c720a 100755
--- a/setup.py
+++ b/setup.py
@@ -213,7 +213,7 @@ def setup_package():
         version=about["__version__"],
         ext_modules=ext_modules,
         cmdclass={"build_ext": build_ext_subclass},
-        package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
+        package_data={"": ["*.pyx", "*.pxd", "*.pxi"], "spacy_cli": ["*.json"]},
     )
 
 
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 3095778fe22..dcfb4b8a92e 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,40 +1,82 @@
+import sys
+from importlib import import_module
+from typing import Iterable
+
+from typer.main import get_command
 from wasabi import msg
 
-# Needed for testing
-from . import download as download_module  # noqa: F401
-from ._util import app, setup_cli  # noqa: F401
-from .apply import apply  # noqa: F401
-from .assemble import assemble_cli  # noqa: F401
-
-# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
-# are registered automatically and won't have to be imported here.
-from .benchmark_speed import benchmark_speed_cli  # noqa: F401
-from .convert import convert  # noqa: F401
-from .debug_config import debug_config  # noqa: F401
-from .debug_data import debug_data  # noqa: F401
-from .debug_diff import debug_diff  # noqa: F401
-from .debug_model import debug_model  # noqa: F401
-from .download import download  # noqa: F401
-from .evaluate import evaluate  # noqa: F401
-from .find_function import find_function  # noqa: F401
-from .find_threshold import find_threshold  # noqa: F401
-from .info import info  # noqa: F401
-from .init_config import fill_config, init_config  # noqa: F401
-from .init_pipeline import init_pipeline_cli  # noqa: F401
-from .package import package  # noqa: F401
-from .pretrain import pretrain  # noqa: F401
-from .profile import profile  # noqa: F401
-from .project.assets import project_assets  # type: ignore[attr-defined]  # noqa: F401
-from .project.clone import project_clone  # type: ignore[attr-defined]  # noqa: F401
-from .project.document import (  # type: ignore[attr-defined]  # noqa: F401
-    project_document,
+from ..util import registry
+from ._dispatch import (
+    GROUP_MODULES,
+    PUBLIC_ATTRS,
+    SUBCOMMAND_MODULES,
+    TOP_LEVEL_MODULES,
 )
-from .project.dvc import project_update_dvc  # type: ignore[attr-defined]  # noqa: F401
-from .project.pull import project_pull  # type: ignore[attr-defined]  # noqa: F401
-from .project.push import project_push  # type: ignore[attr-defined]  # noqa: F401
-from .project.run import project_run  # type: ignore[attr-defined]  # noqa: F401
-from .train import train_cli  # type: ignore[attr-defined]  # noqa: F401
-from .validate import validate  # type: ignore[attr-defined]  # noqa: F401
+from ._dispatch import iter_builtin_modules
+from ._util import COMMAND, add_project_cli, app
+
+HELP_OPTIONS = {"--help", "-h"}
+ROOT_OPTIONS = HELP_OPTIONS | {"--install-completion", "--show-completion"}
+
+__all__ = [
+    "app",
+    "load_all_commands",
+    "load_for_argv",
+    "setup_cli",
+    *sorted(PUBLIC_ATTRS),
+]
+
+
+def _import_modules(module_names: Iterable[str]) -> None:
+    for module_name in module_names:
+        import_module(module_name)
+
+
+def load_all_commands() -> None:
+    _import_modules(iter_builtin_modules())
+    add_project_cli()
+
+
+def load_for_argv(argv: Iterable[str]) -> None:
+    args = list(argv)
+    if not args or args[0] in ROOT_OPTIONS or args[0].startswith("-"):
+        load_all_commands()
+        return
+    command = args[0]
+    if command == "project":
+        add_project_cli()
+        return
+    if command in GROUP_MODULES:
+        subcommand = args[1] if len(args) > 1 and not args[1].startswith("-") else None
+        if subcommand is not None and (command, subcommand) in SUBCOMMAND_MODULES:
+            _import_modules(SUBCOMMAND_MODULES[(command, subcommand)])
+            return
+        _import_modules(GROUP_MODULES[command])
+        return
+    if command in TOP_LEVEL_MODULES:
+        _import_modules(TOP_LEVEL_MODULES[command])
+
+
+def setup_cli() -> None:
+    # Make sure entry-point CLI integrations are imported before command dispatch.
+    registry.cli.get_all()
+    load_for_argv(sys.argv[1:])
+    command = get_command(app)
+    command(prog_name=COMMAND)
+
+
+def __getattr__(name: str):
+    if name not in PUBLIC_ATTRS:
+        raise AttributeError(f"module 'spacy.cli' has no attribute {name!r}")
+    module_name, attr_name = PUBLIC_ATTRS[name]
+    module = import_module(module_name)
+    value = module if attr_name is None else getattr(module, attr_name)
+    globals()[name] = value
+    return value
+
+
+def __dir__():
+    return sorted(set(globals()) | set(PUBLIC_ATTRS))
 
 
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
diff --git a/spacy/cli/_dispatch.py b/spacy/cli/_dispatch.py
new file mode 100644
index 00000000000..e1975dd7e1d
--- /dev/null
+++ b/spacy/cli/_dispatch.py
@@ -0,0 +1,105 @@
+from typing import Dict, Iterable, Optional, Tuple
+
+
+CommandPath = Tuple[str, ...]
+
+
+TOP_LEVEL_MODULES: Dict[str, Tuple[str, ...]] = {
+    "apply": ("spacy.cli.apply",),
+    "assemble": ("spacy.cli.assemble",),
+    "convert": ("spacy.cli.convert",),
+    "debug-data": ("spacy.cli.debug_data",),
+    "download": ("spacy.cli.download",),
+    "evaluate": ("spacy.cli.evaluate",),
+    "find-function": ("spacy.cli.find_function",),
+    "find-threshold": ("spacy.cli.find_threshold",),
+    "info": ("spacy.cli.info",),
+    "package": ("spacy.cli.package",),
+    "pretrain": ("spacy.cli.pretrain",),
+    "profile": ("spacy.cli.profile",),
+    "train": ("spacy.cli.train",),
+    "validate": ("spacy.cli.validate",),
+}
+
+
+GROUP_MODULES: Dict[str, Tuple[str, ...]] = {
+    "benchmark": (
+        "spacy.cli.benchmark_speed",
+        "spacy.cli.evaluate",
+    ),
+    "debug": (
+        "spacy.cli.debug_config",
+        "spacy.cli.debug_data",
+        "spacy.cli.debug_diff",
+        "spacy.cli.debug_model",
+        "spacy.cli.profile",
+    ),
+    "init": (
+        "spacy.cli.init_config",
+        "spacy.cli.init_pipeline",
+    ),
+}
+
+
+SUBCOMMAND_MODULES: Dict[CommandPath, Tuple[str, ...]] = {
+    ("benchmark", "accuracy"): ("spacy.cli.evaluate",),
+    ("benchmark", "speed"): ("spacy.cli.benchmark_speed",),
+    ("debug", "config"): ("spacy.cli.debug_config",),
+    ("debug", "data"): ("spacy.cli.debug_data",),
+    ("debug", "diff-config"): ("spacy.cli.debug_diff",),
+    ("debug", "model"): ("spacy.cli.debug_model",),
+    ("debug", "profile"): ("spacy.cli.profile",),
+    ("init", "config"): ("spacy.cli.init_config",),
+    ("init", "fill-config"): ("spacy.cli.init_config",),
+    ("init", "labels"): ("spacy.cli.init_pipeline",),
+    ("init", "nlp"): ("spacy.cli.init_pipeline",),
+    ("init", "vectors"): ("spacy.cli.init_pipeline",),
+}
+
+
+PUBLIC_ATTRS: Dict[str, Tuple[str, Optional[str]]] = {
+    "app": ("spacy.cli._util", "app"),
+    "apply": ("spacy.cli.apply", "apply"),
+    "assemble_cli": ("spacy.cli.assemble", "assemble_cli"),
+    "benchmark_speed_cli": ("spacy.cli.benchmark_speed", "benchmark_speed_cli"),
+    "convert": ("spacy.cli.convert", "convert"),
+    "debug_config": ("spacy.cli.debug_config", "debug_config"),
+    "debug_data": ("spacy.cli.debug_data", "debug_data"),
+    "debug_diff": ("spacy.cli.debug_diff", "debug_diff"),
+    "debug_model": ("spacy.cli.debug_model", "debug_model"),
+    "download": ("spacy.cli.download", "download"),
+    "download_module": ("spacy.cli.download", None),
+    "evaluate": ("spacy.cli.evaluate", "evaluate"),
+    "fill_config": ("spacy.cli.init_config", "fill_config"),
+    "find_function": ("spacy.cli.find_function", "find_function"),
+    "find_threshold": ("spacy.cli.find_threshold", "find_threshold"),
+    "info": ("spacy.cli.info", "info"),
+    "init_config": ("spacy.cli.init_config", "init_config"),
+    "init_pipeline_cli": ("spacy.cli.init_pipeline", "init_pipeline_cli"),
+    "package": ("spacy.cli.package", "package"),
+    "pretrain": ("spacy.cli.pretrain", "pretrain"),
+    "profile": ("spacy.cli.profile", "profile"),
+    "project_assets": ("spacy.cli.project.assets", "project_assets"),
+    "project_clone": ("spacy.cli.project.clone", "project_clone"),
+    "project_document": ("spacy.cli.project.document", "project_document"),
+    "project_pull": ("spacy.cli.project.pull", "project_pull"),
+    "project_push": ("spacy.cli.project.push", "project_push"),
+    "project_run": ("spacy.cli.project.run", "project_run"),
+    "project_update_dvc": ("spacy.cli.project.dvc", "project_update_dvc"),
+    "train_cli": ("spacy.cli.train", "train_cli"),
+    "validate": ("spacy.cli.validate", "validate"),
+}
+
+
+def iter_builtin_modules() -> Iterable[str]:
+    seen = set()
+    for modules in TOP_LEVEL_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
+    for modules in GROUP_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 757c418440b..8a8b4e0f417 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -19,9 +19,7 @@
 from click.shell_completion import split_arg_string
 from thinc.api import ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
-from typer.main import get_command
 from wasabi import Printer, msg
-from weasel import app as project_cli
 
 from ..compat import Literal
 from ..util import (
@@ -63,19 +61,21 @@
 benchmark_cli = typer.Typer(name="benchmark", help=BENCHMARK_HELP, no_args_is_help=True)
 debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
 init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
+_PROJECT_CLI_ADDED = False
 
-app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
 app.add_typer(debug_cli)
 app.add_typer(benchmark_cli)
 app.add_typer(init_cli)
 
 
-def setup_cli() -> None:
-    # Make sure the entry-point for CLI runs, so that they get imported.
-    registry.cli.get_all()
-    # Ensure that the help messages always display the correct prompt
-    command = get_command(app)
-    command(prog_name=COMMAND)
+def add_project_cli() -> None:
+    global _PROJECT_CLI_ADDED
+    if _PROJECT_CLI_ADDED:
+        return
+    from weasel import app as project_cli
+
+    app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
+    _PROJECT_CLI_ADDED = True
 
 
 def parse_config_overrides(
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 5fe9e9bcf01..7ff3f755a31 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -5,12 +5,16 @@
 import srsly
 from typer.testing import CliRunner
 
+from spacy.cli import load_all_commands
 from spacy.cli._util import app, get_git_version
 from spacy.tokens import Doc, DocBin, Span
 
 from .util import make_tempdir, normalize_whitespace
 
 
+load_all_commands()
+
+
 def has_git():
     try:
         get_git_version()
diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
new file mode 100644
index 00000000000..392572fdeec
--- /dev/null
+++ b/spacy/tests/test_cli_launcher.py
@@ -0,0 +1,102 @@
+import importlib
+import subprocess
+import sys
+
+import pytest
+
+from spacy_cli.build_manifest import build_manifest
+from spacy_cli.static import load_manifest
+
+launcher_module = importlib.import_module("spacy_cli.main")
+
+
+def _run_python(code: str) -> str:
+    result = subprocess.run(
+        [sys.executable, "-c", code],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return result.stdout.strip()
+
+
+def test_cli_package_import_is_lazy():
+    output = _run_python(
+        "import sys; import spacy.cli; "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["False", "False"]
+
+
+def test_load_for_argv_imports_only_requested_command():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['train', '--help']); "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["True", "False"]
+
+
+def test_load_for_argv_imports_project_on_demand():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['project', '--help']); print('weasel' in sys.modules)"
+    )
+    assert output == "True"
+
+
+def test_manifest_is_current():
+    assert build_manifest() == load_manifest()
+
+
+def test_launcher_root_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["root_help"]
+
+
+def test_launcher_command_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["train", "--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["command_help"]["train"]
+
+
+def test_launcher_unknown_command_uses_static_error(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["definitely-not-a-command"])
+    assert exc.value.code == 2
+    assert "No such command 'definitely-not-a-command'" in capsys.readouterr().out
+
+
+def test_launcher_non_help_command_falls_back_to_live(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    launcher_module.main(["train", "config.cfg"])
+    assert called == [True]
+
+
+def test_launcher_root_help_falls_back_with_plugins(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    monkeypatch.setattr(launcher_module, "get_plugin_command_names", lambda: {"custom"})
+    launcher_module.main(["--help"])
+    assert called == [True]
diff --git a/spacy_cli/__init__.py b/spacy_cli/__init__.py
new file mode 100644
index 00000000000..a2cb1f66b78
--- /dev/null
+++ b/spacy_cli/__init__.py
@@ -0,0 +1 @@
+"""Lightweight launcher package for the spaCy console script."""
diff --git a/spacy_cli/build_manifest.py b/spacy_cli/build_manifest.py
new file mode 100644
index 00000000000..6e019bdcb95
--- /dev/null
+++ b/spacy_cli/build_manifest.py
@@ -0,0 +1,99 @@
+import json
+from pathlib import Path
+from typing import Dict, Iterable, List
+
+from typer.main import get_command
+from typer.testing import CliRunner
+
+from spacy.cli import load_all_commands
+from spacy.cli._util import COMMAND, app
+
+from .static import MANIFEST_FILE, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+
+DEFAULT_ENV = {"COLUMNS": "100", "LINES": "40", "TERM": "xterm-256color"}
+
+
+def _invoke(runner: CliRunner, cli, args: Iterable[str]):
+    return runner.invoke(cli, list(args), prog_name=COMMAND, env=DEFAULT_ENV)
+
+
+def _get_help(runner: CliRunner, cli, args: Iterable[str]) -> str:
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        err = f"Could not render help for: {' '.join(args) or '<root>'}"
+        raise RuntimeError(err)
+    return result.stdout
+
+
+def _maybe_get_help(runner: CliRunner, cli, args: Iterable[str]):
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        return None
+    return result.stdout
+
+
+def build_manifest() -> Dict[str, object]:
+    load_all_commands()
+    cli = get_command(app)
+    runner = CliRunner()
+    known_top_level: List[str] = sorted(cli.commands.keys())
+    known_groups: Dict[str, List[str]] = {}
+    hidden_top_level: List[str] = []
+    hidden_group_commands: Dict[str, List[str]] = {}
+    group_help: Dict[str, str] = {}
+    command_help: Dict[str, str] = {}
+    unknown_subcommand: Dict[str, str] = {}
+
+    for name, command in cli.commands.items():
+        if getattr(command, "hidden", False):
+            hidden_top_level.append(name)
+        if hasattr(command, "commands"):
+            subcommands = sorted(command.commands.keys())
+            known_groups[name] = subcommands
+            hidden_group_commands[name] = sorted(
+                sub_name
+                for sub_name, sub_cmd in command.commands.items()
+                if getattr(sub_cmd, "hidden", False)
+            )
+            group_help[name] = _get_help(runner, app, [name])
+            unknown_subcommand[name] = _invoke(
+                runner, app, [name, UNKNOWN_SUBCOMMAND_TOKEN]
+            ).stdout
+            for sub_name in subcommands:
+                help_text = _maybe_get_help(runner, app, [name, sub_name])
+                if help_text is not None:
+                    command_help[f"{name} {sub_name}"] = help_text
+        else:
+            command_help[name] = _get_help(runner, app, [name])
+
+    return {
+        "command": COMMAND,
+        "known_top_level": known_top_level,
+        "known_groups": known_groups,
+        "hidden_top_level": hidden_top_level,
+        "hidden_group_commands": hidden_group_commands,
+        "root_help": _get_help(runner, app, []),
+        "group_help": group_help,
+        "command_help": command_help,
+        "errors": {
+            "missing_command": _invoke(runner, app, []).stdout,
+            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).stdout,
+            "unknown_subcommand": unknown_subcommand,
+        },
+    }
+
+
+def write_manifest(path: Path) -> Path:
+    data = build_manifest()
+    path.write_text(
+        json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) + "\n"
+    )
+    return path
+
+
+def main() -> None:
+    write_manifest(Path(__file__).with_name(MANIFEST_FILE))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
new file mode 100644
index 00000000000..361a10dca16
--- /dev/null
+++ b/spacy_cli/cli_manifest.json
@@ -0,0 +1,118 @@
+{
+  "command": "python -m spacy",
+  "command_help": {
+    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [default: None] [required]                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [default: None]      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│                              [default: None]                                                     │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size [default: None]    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data is written to stdout, so you \n can pipe them forward to a JSON file: $ spacy convert some_file.conllu --file-type json >          \n some_file.json                                                                                     \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [default: None] [required]            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│                                           [default: None]                                        │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│                                           [default: None]                                        │\n│ --lang             -l       TEXT          Language (if tokenizer required) [default: None]       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"[training]\".           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when comparing against the      \n default configuration (or specifically when `compare_to` is None).                                 \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│                                                     [default: None]                              │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url [default: None]                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function. registry_name (Optional[str]): Name of the       \n catalogue registry.                                                                                \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry. [default: None]                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by thresholds - e.g.       \n `textcat_multilabel` and `spancat`, but not `textcat`. Note that the full path to the              \n corresponding threshold attribute in the config has to be provided.                                \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [default: None] [required]                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format                │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [default: None] [required]  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration            │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    scores_key         TEXT  Metric to optimize [default: None] [required]                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│                                      [default: None]                                             │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible version of the pipeline.     \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline [default: None]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [default: None] [required]      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the labels [default: None] [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the prepared data [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the         \n [initialize] block of your config to initialize a model with vectors.                              \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [default: None] [required]  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [default: None] [required]           │\n│ *    output_dir       PATH  Pipeline output directory [default: None] [required]                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│                                  [default: None]                                                 │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [default: None] [required]                                                │\n│ *    kwargs      TEXT  [default: None] [required]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom registered functions    \n like pipeline components), they are copied into the package and imported in the __init__.py.       \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [default: None] [required]          │\n│ *    output_dir      DIRECTORY  Output parent directory [default: None] [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json [default: None]        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│                                                         [default: None]                          │\n│ --version           -v                            TEXT  Package version to override meta         │\n│                                                         [default: None]                          │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained using a word2vec-style  \n distributional similarity algorithm, and train a component like a CNN, BiLSTM, etc to predict      \n vectors which match the pretrained ones. The weights are saved to a directory after each epoch.    \n You can then pass a path to one of these pretrained weights files to the 'spacy train' command.    \n This technique may be especially helpful if you have little labelled data. However, it's still     \n quite experimental, so your mileage may vary.                                                      \n To load the weights back in during 'spacy train', you need to ensure all settings are the same     \n between pretraining and training. Ideally, this is done by using the same config file for both     \n commands.                                                                                          \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [default: None] [required]                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [default: None]          │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│                                   [default: None]                                                │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│                                   [default: None]                                                │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project assets": "                                                                                                    \n Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]                                      \n                                                                                                    \n Fetch project assets like datasets and pretrained weights. Assets are defined in the \"assets\"      \n section of the project.yml. If a checksum is provided in the project.yml, the file is only         \n downloaded if no local file with the same checksum exists.                                         \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-and-assets.md          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --sparse  -S        Use sparse checkout for assets provided via Git, to only check out and clone │\n│                     the files needed. Requires Git v22.2+.                                       │\n│ --extra   -e        Download all assets, including those marked as 'extra'.                      │\n│ --help              Show this message and exit.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [default: None] [required]              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n│                        [default: None]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│                         [default: None]                                                          │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project document": "                                                                                                    \n Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]                                    \n                                                                                                    \n Auto-generate a README.md for a project. If the content is saved to a file, hidden markers are     \n added so you can add custom content before or after the auto-generated section and only the        \n auto-generated docs will be replaced when you re-run the command.                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-document               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output    -o       PATH  Path to output Markdown file for output. Defaults to - for standard   │\n│                            output                                                                │\n│                            [default: -]                                                          │\n│ --no-emoji  -NE            Don't use emoji                                                       │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n│                                   [default: None]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project pull": "                                                                                                    \n Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Retrieve available precomputed outputs from a remote storage. You can alias remotes in your        \n project.yml by mapping them to storage paths. A storage can be anything that the smart_open        \n library can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.                 \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-push                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project push": "                                                                                                    \n Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Persist outputs to a remote storage. You can alias remotes in your project.yml by mapping them to  \n storage paths. A storage can be anything that the smart_open library can upload to, e.g. AWS,      \n Google Cloud Storage, SSH, local directories etc.                                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block           \n \"[training]\". The --code argument lets you pass in a Python file that's imported before training.  \n It can be used to register custom functions and architectures that can then be referenced in the   \n config.                                                                                            \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│                                              [default: None]                                     │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  },
+  "errors": {
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_subcommand": {
+      "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+      "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    }
+  },
+  "group_help": {
+    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  },
+  "hidden_group_commands": {
+    "benchmark": [],
+    "debug": [],
+    "init": [
+      "nlp"
+    ],
+    "project": []
+  },
+  "hidden_top_level": [
+    "link",
+    "debug-data",
+    "profile"
+  ],
+  "known_groups": {
+    "benchmark": [
+      "accuracy",
+      "speed"
+    ],
+    "debug": [
+      "config",
+      "data",
+      "diff-config",
+      "model",
+      "profile"
+    ],
+    "init": [
+      "config",
+      "fill-config",
+      "labels",
+      "nlp",
+      "vectors"
+    ],
+    "project": [
+      "assets",
+      "clone",
+      "document",
+      "dvc",
+      "pull",
+      "push",
+      "run"
+    ]
+  },
+  "known_top_level": [
+    "apply",
+    "assemble",
+    "benchmark",
+    "convert",
+    "debug",
+    "debug-data",
+    "download",
+    "evaluate",
+    "find-function",
+    "find-threshold",
+    "info",
+    "init",
+    "link",
+    "package",
+    "pretrain",
+    "profile",
+    "project",
+    "train",
+    "validate"
+  ],
+  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using pip.  │\n│                  If --direct flag is set, the command expects the full package name with         │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions. Expects a loadable    │\n│                  spaCy pipeline and path to the data, which can be a directory or a file. The    │\n│                  data files can be provided in multiple formats:     1. .spacy files     2.      │\n│                  .jsonl files with a specified \"field\" to read the text from.     3. Files with  │\n│                  any other extension are assumed to be containing        a single document.      │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes all      │\n│                  settings for initializing the pipeline. To override settings in the config,     │\n│                  e.g. settings that point to local paths or that you want to experiment with,    │\n│                  you can override them as command line options. The --code argument lets you     │\n│                  pass in a Python file that can be used to register custom functions that are    │\n│                  referenced in the config.                                                       │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the displacy_path  │\n│                  argument.                                                                       │\n│ find-function    Find the module, path and line number to the file the registered function is    │\n│                  defined in, if available.                                                       │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If --create-meta  │\n│                  is set and a meta.json already exists in the output directory, the existing     │\n│                  values will be used as the defaults in the command-line prompt. After           │\n│                  packaging, \"python -m build --sdist\" is run in the package directory, which     │\n│                  will create a .tar.gz archive that can be installed via \"pip install\".          │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using   │\n│                  an approximate language-modelling objective. Two objective types are available, │\n│                  vector-based and character-based.                                               │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The config    │\n│                  file includes all settings and hyperparameters used during training. To         │\n│                  override settings in the config, e.g. settings that point to local paths or     │\n│                  that you want to experiment with, you can override them as command line         │\n│                  options. For instance, --training.batch_size 128 overrides the value of         │\n│                  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a  │\n│                  Python file that's imported before training. It can be used to register custom  │\n│                  functions and architectures that can then be referenced in the config.          │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks if │\n│                  the installed packages are compatible and shows upgrade instructions if         │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes commands to     │\n│                  check and validate your config files, training and evaluation data, and custom  │\n│                  model implementations.                                                          │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates. You'd typically start  │\n│                  by cloning a project template to a local directory and fetching its assets like │\n│                  datasets etc. See the project's project.yml for the available commands.         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+}
diff --git a/spacy_cli/main.py b/spacy_cli/main.py
new file mode 100644
index 00000000000..cbe6e376c58
--- /dev/null
+++ b/spacy_cli/main.py
@@ -0,0 +1,69 @@
+import sys
+from typing import Iterable, Optional
+
+from .static import HELP_OPTIONS, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+from .static import get_plugin_command_names, load_manifest
+
+
+def _write_output(text: str) -> None:
+    sys.stdout.write(text)
+    if not text.endswith("\n"):
+        sys.stdout.write("\n")
+
+
+def _run_live() -> None:
+    from spacy.cli import setup_cli
+
+    setup_cli()
+
+
+def _try_static(argv: Iterable[str]):
+    args = list(argv)
+    manifest = load_manifest()
+    plugin_command_names = get_plugin_command_names()
+    known_groups = manifest["known_groups"]
+    known_top_level = set(manifest["known_top_level"])
+    if not args:
+        return manifest["errors"]["missing_command"], 2
+    first = args[0]
+    if first in HELP_OPTIONS:
+        if plugin_command_names:
+            return None
+        return manifest["root_help"], 0
+    if first.startswith("-"):
+        return None
+    if first not in known_top_level:
+        if first in plugin_command_names:
+            return None
+        template = manifest["errors"]["unknown_command"]
+        return template.replace(UNKNOWN_COMMAND_TOKEN, first), 2
+    if first in known_groups:
+        if len(args) == 1 or args[1] in HELP_OPTIONS:
+            if plugin_command_names:
+                return None
+            return manifest["group_help"][first], 0
+        second = args[1]
+        if second not in known_groups[first]:
+            if plugin_command_names:
+                return None
+            template = manifest["errors"]["unknown_subcommand"][first]
+            return template.replace(UNKNOWN_SUBCOMMAND_TOKEN, second), 2
+        if any(arg in HELP_OPTIONS for arg in args[2:]):
+            return manifest["command_help"][f"{first} {second}"], 0
+        return None
+    if any(arg in HELP_OPTIONS for arg in args[1:]):
+        return manifest["command_help"][first], 0
+    return None
+
+
+def main(argv: Optional[Iterable[str]] = None) -> None:
+    args = sys.argv[1:] if argv is None else list(argv)
+    try:
+        static_result = _try_static(args)
+    except Exception:
+        return _run_live()
+    if static_result is None:
+        return _run_live()
+    text, code = static_result
+    _write_output(text)
+    raise SystemExit(code)
diff --git a/spacy_cli/static.py b/spacy_cli/static.py
new file mode 100644
index 00000000000..51594ceef9a
--- /dev/null
+++ b/spacy_cli/static.py
@@ -0,0 +1,24 @@
+import json
+from functools import lru_cache
+from importlib.metadata import entry_points
+from importlib.resources import files
+from typing import Any, Dict, Set
+
+
+HELP_OPTIONS = {"--help", "-h"}
+PLUGIN_ENTRY_POINT_GROUP = "spacy_cli"
+MANIFEST_FILE = "cli_manifest.json"
+UNKNOWN_COMMAND_TOKEN = "__SPACY_UNKNOWN_COMMAND__"
+UNKNOWN_SUBCOMMAND_TOKEN = "__SPACY_UNKNOWN_SUBCOMMAND__"
+
+
+@lru_cache(maxsize=1)
+def load_manifest() -> Dict[str, Any]:
+    data = files("spacy_cli").joinpath(MANIFEST_FILE).read_text(encoding="utf8")
+    return json.loads(data)
+
+
+def get_plugin_command_names() -> Set[str]:
+    return {
+        entry_point.name for entry_point in entry_points(group=PLUGIN_ENTRY_POINT_GROUP)
+    }

From 0a45289512d668a1beddbf11c2b163a0533e3a7a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 14:32:31 +0100
Subject: [PATCH 33/42] Fix lazy load on modules where the function shadows

---
 spacy/cli/__init__.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index dcfb4b8a92e..ded45efe9f7 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,4 +1,5 @@
 import sys
+import types
 from importlib import import_module
 from typing import Iterable
 
@@ -79,6 +80,19 @@ def __dir__():
     return sorted(set(globals()) | set(PUBLIC_ATTRS))
 
 
+class _CLIModule(types.ModuleType):
+    def __setattr__(self, name, value):
+        if isinstance(value, types.ModuleType) and name in PUBLIC_ATTRS:
+            _, attr_name = PUBLIC_ATTRS[name]
+            if attr_name is not None:
+                super().__setattr__(name, getattr(value, attr_name))
+                return
+        super().__setattr__(name, value)
+
+
+sys.modules[__name__].__class__ = _CLIModule
+
+
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
 def link(*args, **kwargs):
     """As of spaCy v3.0, symlinks like "en" are not supported anymore. You can load trained

From f0abcf7bc0d3c072a3a4c3dff4fc9329ba88d5f1 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 14:32:41 +0100
Subject: [PATCH 34/42] Update manifest

---
 spacy_cli/cli_manifest.json | 78 ++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
index 361a10dca16..12d760416c5 100644
--- a/spacy_cli/cli_manifest.json
+++ b/spacy_cli/cli_manifest.json
@@ -1,55 +1,55 @@
 {
   "command": "python -m spacy",
   "command_help": {
-    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [default: None] [required]                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [default: None]      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│                              [default: None]                                                     │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size [default: None]    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data is written to stdout, so you \n can pipe them forward to a JSON file: $ spacy convert some_file.conllu --file-type json >          \n some_file.json                                                                                     \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [default: None] [required]            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│                                           [default: None]                                        │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│                                           [default: None]                                        │\n│ --lang             -l       TEXT          Language (if tokenizer required) [default: None]       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"[training]\".           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when comparing against the      \n default configuration (or specifically when `compare_to` is None).                                 \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│                                                     [default: None]                              │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url [default: None]                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [default: None] [required]                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [default: None]    │\n│                           [required]                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics [default: None]               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│                                       [default: None]                                            │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│                                       [default: None]                                            │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function. registry_name (Optional[str]): Name of the       \n catalogue registry.                                                                                \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [default: None] [required]            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry. [default: None]                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by thresholds - e.g.       \n `textcat_multilabel` and `spancat`, but not `textcat`. Note that the full path to the              \n corresponding threshold attribute in the config has to be provided.                                \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [default: None] [required]                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format                │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [default: None] [required]  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration            │\n│                               [default: None]                                                    │\n│                               [required]                                                         │\n│ *    scores_key         TEXT  Metric to optimize [default: None] [required]                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│                                      [default: None]                                             │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible version of the pipeline.     \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline [default: None]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [default: None]                                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [default: None] [required]      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│                                    [default: None]                                               │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the labels [default: None] [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n│ *    output_path      PATH  Output directory for the prepared data [default: None] [required]    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│                                 [default: None]                                                  │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the         \n [initialize] block of your config to initialize a model with vectors.                              \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [default: None] [required]  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [default: None] [required]           │\n│ *    output_dir       PATH  Pipeline output directory [default: None] [required]                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│                                  [default: None]                                                 │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [default: None] [required]                                                │\n│ *    kwargs      TEXT  [default: None] [required]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom registered functions    \n like pipeline components), they are copied into the package and imported in the __init__.py.       \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [default: None] [required]          │\n│ *    output_dir      DIRECTORY  Output parent directory [default: None] [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json [default: None]        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│                                                         [default: None]                          │\n│ --version           -v                            TEXT  Package version to override meta         │\n│                                                         [default: None]                          │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained using a word2vec-style  \n distributional similarity algorithm, and train a component like a CNN, BiLSTM, etc to predict      \n vectors which match the pretrained ones. The weights are saved to a directory after each epoch.    \n You can then pass a path to one of these pretrained weights files to the 'spacy train' command.    \n This technique may be especially helpful if you have little labelled data. However, it's still     \n quite experimental, so your mileage may vary.                                                      \n To load the weights back in during 'spacy train', you need to ensure all settings are the same     \n between pretraining and training. Ideally, this is done by using the same config file for both     \n commands.                                                                                          \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [default: None] [required]                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [default: None]          │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│                                   [default: None]                                                │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│                                   [default: None]                                                │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│                                   [default: None]                                                │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [default: None] [required]                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin. [default: None]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [required]                                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size                    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data                              \n is written to stdout, so you can pipe them forward to a JSON file:                                 \n $ spacy convert some_file.conllu --file-type json > some_file.json                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [required]                            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│ --lang             -l       TEXT          Language (if tokenizer required)                       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\".                     \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when                            \n comparing against the default configuration (or specifically when `compare_to` is None).           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download                                                            \n AVAILABLE PACKAGES: https://spacy.io/models                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url                                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function.                                                  \n registry_name (Optional): Name of the catalogue registry.                                          \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry.                                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by                         \n thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note                      \n that the full path to the corresponding threshold attribute in the config has to                   \n be provided.                                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [required]                                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format [required]     │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [required]                  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration [required] │\n│ *    scores_key         TEXT  Metric to optimize [required]                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible                              \n version of the pipeline.                                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [required]                      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the labels [required]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the prepared data [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the  block  \n of your config to initialize a model with vectors.                                                 \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [required]                  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [required]                           │\n│ *    output_dir       PATH  Pipeline output directory [required]                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [required]                                                                │\n│ *    kwargs      TEXT  [required]                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom                         \n registered functions like pipeline components), they are copied into the                           \n package and imported in the __init__.py.                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [required]                          │\n│ *    output_dir      DIRECTORY  Output parent directory [required]                               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json                        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│ --version           -v                            TEXT  Package version to override meta         │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained                         \n using a word2vec-style distributional similarity algorithm, and train a                            \n component like a CNN, BiLSTM, etc to predict vectors which match the                               \n pretrained ones. The weights are saved to a directory after each epoch. You                        \n can then pass a path to one of these pretrained weights files to the                               \n 'spacy train' command.                                                                             \n                                                                                                    \n This technique may be especially helpful if you have little labelled data.                         \n However, it's still quite experimental, so your mileage may vary.                                  \n                                                                                                    \n To load the weights back in during 'spacy train', you need to ensure                               \n all settings are the same between pretraining and training. Ideally,                               \n this is done by using the same config file for both commands.                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [required]                                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project assets": "                                                                                                    \n Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]                                      \n                                                                                                    \n Fetch project assets like datasets and pretrained weights. Assets are defined in the \"assets\"      \n section of the project.yml. If a checksum is provided in the project.yml, the file is only         \n downloaded if no local file with the same checksum exists.                                         \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-and-assets.md          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --sparse  -S        Use sparse checkout for assets provided via Git, to only check out and clone │\n│                     the files needed. Requires Git v22.2+.                                       │\n│ --extra   -e        Download all assets, including those marked as 'extra'.                      │\n│ --help              Show this message and exit.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [default: None] [required]              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n│                        [default: None]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│                         [default: None]                                                          │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [required]                              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project document": "                                                                                                    \n Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]                                    \n                                                                                                    \n Auto-generate a README.md for a project. If the content is saved to a file, hidden markers are     \n added so you can add custom content before or after the auto-generated section and only the        \n auto-generated docs will be replaced when you re-run the command.                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-document               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output    -o       PATH  Path to output Markdown file for output. Defaults to - for standard   │\n│                            output                                                                │\n│                            [default: -]                                                          │\n│ --no-emoji  -NE            Don't use emoji                                                       │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n│                                   [default: None]                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project pull": "                                                                                                    \n Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Retrieve available precomputed outputs from a remote storage. You can alias remotes in your        \n project.yml by mapping them to storage paths. A storage can be anything that the smart_open        \n library can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.                 \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-push                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "project push": "                                                                                                    \n Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Persist outputs to a remote storage. You can alias remotes in your project.yml by mapping them to  \n storage paths. A storage can be anything that the smart_open library can upload to, e.g. AWS,      \n Google Cloud Storage, SSH, local directories etc.                                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block           \n \"[training]\". The --code argument lets you pass in a Python file that's imported before training.  \n It can be used to register custom functions and architectures that can then be referenced in the   \n config.                                                                                            \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [default: None] [required]                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│                                              [default: None]                                     │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│                                              [default: None]                                     │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\". The   \n --code argument lets you pass in a Python file that's imported before training. It can be used to  \n register custom functions and architectures that can then be referenced in the config.             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
     "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "errors": {
-    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "missing_command": "",
+    "unknown_command": "",
     "unknown_subcommand": {
-      "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-      "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+      "benchmark": "",
+      "debug": "",
+      "init": "",
+      "project": ""
     }
   },
   "group_help": {
-    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in │\n│            the binary .spacy format. The --gold-preproc option sets up the evaluation examples   │\n│            with gold-standard sentences and tokens for the predictions. Gold preprocessing helps │\n│            the annotations align to the tokenization, and may result in sequences of more        │\n│            consistent length. However, it may reduce runtime accuracy due to train/test skew. To │\n│            render a sample of dependency parses in a HTML file, set as output directory as the   │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the     │\n│            binary .spacy format.                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs useful     │\n│               stats, and can help you find problems like invalid entity annotations, cyclic      │\n│               dependencies, low data labels and more.                                            │\n│ profile       Profile which functions take the most time in a spaCy pipeline. Input should be    │\n│               formatted as one JSON object per line with a key \"text\". It can either be provided │\n│               as a JSONL file, or be read from sys.sytdin. If no input file is specified, the    │\n│               IMDB dataset is loaded via Thinc.                                                  │\n│ config        Debug a config file and show validation errors. The command will create all        │\n│               objects in the tree and validate them. Note that some config validation errors are │\n│               blocking and will prevent the rest of the config from being resolved. This means   │\n│               that you may not see all validation errors at once and some issues are only shown  │\n│               once previous errors have been fixed. Similar as with the 'train' command, you can │\n│               override settings from the config as command line options. For instance,           │\n│               --training.batch_size 128 overrides the value of \"batch_size\" in the block         │\n│               \"[training]\".                                                                      │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If additional settings were used in the creation of the config file, then    │\n│               you must supply these as extra parameters to the command when comparing to the     │\n│               default settings. The generated diff can also be used when posting to the          │\n│               discussion forum to provide more information for the maintainers.                  │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure and   │\n│               activations during training.                                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements specified  │\n│               via the CLI arguments, this command generates a config with the optimal settings   │\n│               for your use case. This includes the choice of architecture, pretrained weights    │\n│               and related hyperparameters.                                                       │\n│ fill-config   Fill partial config file with default values. Will add all missing settings from   │\n│               the default config and will create all objects, check the registered functions for │\n│               their default values and update the base config. This command can be used with a   │\n│               config generated via the training quickstart widget:                               │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that you can    │\n│               use in the [initialize] block of your config to initialize a model with vectors.   │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the training   │\n│               process, since spaCy won't have to preprocess the data to extract the labels.      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are defined in the  │\n│            \"assets\" section of the project.yml. If a checksum is provided in the project.yml,    │\n│            the file is only downloaded if no local file with the same checksum exists.           │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will only download   │\n│            the files from the given subdirectory. The GitHub repo defaults to the official       │\n│            Weasel template repo, but can be customized (including using a private repo).         │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file, hidden    │\n│            markers are added so you can add custom content before or after the auto-generated    │\n│            section and only the auto-generated docs will be replaced when you re-run the         │\n│            command.                                                                              │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC project can only define one    │\n│            pipeline, so you need to specify one workflow defined in the project.yml. If no       │\n│            workflow is specified, the first defined workflow is used. The DVC config will only   │\n│            be updated if the project.yml changed.                                                │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow name is     │\n│            specified, all commands in the workflow are run, in order. If commands define         │\n│            dependencies and/or outputs, they will only be re-run if state has changed.           │\n│ pull       Retrieve available precomputed outputs from a remote storage. You can alias remotes   │\n│            in your project.yml by mapping them to storage paths. A storage can be anything that  │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local      │\n│            directories etc.                                                                      │\n│ push       Persist outputs to a remote storage. You can alias remotes in your project.yml by     │\n│            mapping them to storage paths. A storage can be anything that the smart_open library  │\n│            can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation         │\n│            data in the binary .spacy format. The --gold-preproc option sets up the               │\n│            evaluation examples with gold-standard sentences and tokens for the                   │\n│            predictions. Gold preprocessing helps the annotations align to the                    │\n│            tokenization, and may result in sequences of more consistent length. However,         │\n│            it may reduce runtime accuracy due to train/test skew. To render a sample of          │\n│            dependency parses in a HTML file, set as output directory as the                      │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark                 │\n│            data in the binary .spacy format.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs            │\n│               useful stats, and can help you find problems like invalid entity annotations,      │\n│               cyclic dependencies, low data labels and more.                                     │\n│ profile       Profile which functions take the most time in a spaCy pipeline.                    │\n│               Input should be formatted as one JSON object per line with a key \"text\".           │\n│               It can either be provided as a JSONL file, or be read from sys.sytdin.             │\n│               If no input file is specified, the IMDB dataset is loaded via Thinc.               │\n│ config        Debug a config file and show validation errors. The command will                   │\n│               create all objects in the tree and validate them. Note that some config            │\n│               validation errors are blocking and will prevent the rest of the config from        │\n│               being resolved. This means that you may not see all validation errors at           │\n│               once and some issues are only shown once previous errors have been fixed.          │\n│               Similar as with the 'train' command, you can override settings from the config     │\n│               as command line options. For instance, --training.batch_size 128 overrides         │\n│               the value of \"batch_size\" in the block \"\".                                         │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If                                                                           │\n│               additional settings were used in the creation of the config file, then you         │\n│               must supply these as extra parameters to the command when comparing to the default │\n│               settings. The generated diff                                                       │\n│               can also be used when posting to the discussion forum to provide more              │\n│               information for the maintainers.                                                   │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure       │\n│               and activations during training.                                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements            │\n│               specified via the CLI arguments, this command generates a config with the          │\n│               optimal settings for your use case. This includes the choice of architecture,      │\n│               pretrained weights and related hyperparameters.                                    │\n│ fill-config   Fill partial config file with default values. Will add all missing settings        │\n│               from the default config and will create all objects, check the registered          │\n│               functions for their default values and update the base config. This command        │\n│               can be used with a config generated via the training quickstart widget:            │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that            │\n│               you can use in the  block of your config to initialize                             │\n│               a model with vectors.                                                              │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the            │\n│               training process, since spaCy won't have to preprocess the data to                 │\n│               extract the labels.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
+    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are                 │\n│            defined in the \"assets\" section of the project.yml. If a checksum is                  │\n│            provided in the project.yml, the file is only downloaded if no local file             │\n│            with the same checksum exists.                                                        │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will                 │\n│            only download the files from the given subdirectory. The GitHub repo                  │\n│            defaults to the official Weasel template repo, but can be customized                  │\n│            (including using a private repo).                                                     │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file,           │\n│            hidden markers are added so you can add custom content before or after the            │\n│            auto-generated section and only the auto-generated docs will be replaced              │\n│            when you re-run the command.                                                          │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC                                │\n│            project can only define one pipeline, so you need to specify one workflow             │\n│            defined in the project.yml. If no workflow is specified, the first defined            │\n│            workflow is used. The DVC config will only be updated if the project.yml              │\n│            changed.                                                                              │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow             │\n│            name is specified, all commands in the workflow are run, in order. If                 │\n│            commands define dependencies and/or outputs, they will only be re-run if              │\n│            state has changed.                                                                    │\n│ pull       Retrieve available precomputed outputs from a remote storage.                         │\n│            You can alias remotes in your project.yml by mapping them to storage paths.           │\n│            A storage can be anything that the smart_open library can upload to, e.g.             │\n│            AWS, Google Cloud Storage, SSH, local directories etc.                                │\n│ push       Persist outputs to a remote storage. You can alias remotes in your                    │\n│            project.yml by mapping them to storage paths. A storage can be anything that          │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH,            │\n│            local directories etc.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "hidden_group_commands": {
     "benchmark": [],
@@ -114,5 +114,5 @@
     "train",
     "validate"
   ],
-  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using pip.  │\n│                  If --direct flag is set, the command expects the full package name with         │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions. Expects a loadable    │\n│                  spaCy pipeline and path to the data, which can be a directory or a file. The    │\n│                  data files can be provided in multiple formats:     1. .spacy files     2.      │\n│                  .jsonl files with a specified \"field\" to read the text from.     3. Files with  │\n│                  any other extension are assumed to be containing        a single document.      │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes all      │\n│                  settings for initializing the pipeline. To override settings in the config,     │\n│                  e.g. settings that point to local paths or that you want to experiment with,    │\n│                  you can override them as command line options. The --code argument lets you     │\n│                  pass in a Python file that can be used to register custom functions that are    │\n│                  referenced in the config.                                                       │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the displacy_path  │\n│                  argument.                                                                       │\n│ find-function    Find the module, path and line number to the file the registered function is    │\n│                  defined in, if available.                                                       │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If --create-meta  │\n│                  is set and a meta.json already exists in the output directory, the existing     │\n│                  values will be used as the defaults in the command-line prompt. After           │\n│                  packaging, \"python -m build --sdist\" is run in the package directory, which     │\n│                  will create a .tar.gz archive that can be installed via \"pip install\".          │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using   │\n│                  an approximate language-modelling objective. Two objective types are available, │\n│                  vector-based and character-based.                                               │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The config    │\n│                  file includes all settings and hyperparameters used during training. To         │\n│                  override settings in the config, e.g. settings that point to local paths or     │\n│                  that you want to experiment with, you can override them as command line         │\n│                  options. For instance, --training.batch_size 128 overrides the value of         │\n│                  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a  │\n│                  Python file that's imported before training. It can be used to register custom  │\n│                  functions and architectures that can then be referenced in the config.          │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks if │\n│                  the installed packages are compatible and shows upgrade instructions if         │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes commands to     │\n│                  check and validate your config files, training and evaluation data, and custom  │\n│                  model implementations.                                                          │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates. You'd typically start  │\n│                  by cloning a project template to a local directory and fetching its assets like │\n│                  datasets etc. See the project's project.yml for the available commands.         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using       │\n│                  pip. If --direct flag is set, the command expects the full package name with    │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions.                       │\n│                  Expects a loadable spaCy pipeline and path to the data, which                   │\n│                  can be a directory or a file.                                                   │\n│                  The data files can be provided in multiple formats:                             │\n│                      1. .spacy files                                                             │\n│                      2. .jsonl files with a specified \"field\" to read the text from.             │\n│                      3. Files with any other extension are assumed to be containing              │\n│                         a single document.                                                       │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes          │\n│                  all settings for initializing the pipeline. To override settings in the         │\n│                  config, e.g. settings that point to local paths or that you want to             │\n│                  experiment with, you can override them as command line options. The             │\n│                  --code argument lets you pass in a Python file that can be used to              │\n│                  register custom functions that are referenced in the config.                    │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the                │\n│                  displacy_path argument.                                                         │\n│ find-function    Find the module, path and line number to the file the registered                │\n│                  function is defined in, if available.                                           │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If                │\n│                  --create-meta is set and a meta.json already exists in the output directory,    │\n│                  the existing values will be used as the defaults in the command-line prompt.    │\n│                  After packaging, \"python -m build --sdist\" is run in the package directory,     │\n│                  which will create a .tar.gz archive that can be installed via \"pip install\".    │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,         │\n│                  using an approximate language-modelling objective. Two objective types          │\n│                  are available, vector-based and character-based.                                │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The           │\n│                  config file includes all settings and hyperparameters used during training.     │\n│                  To override settings in the config, e.g. settings that point to local           │\n│                  paths or that you want to experiment with, you can override them as             │\n│                  command line options. For instance, --training.batch_size 128 overrides         │\n│                  the value of \"batch_size\" in the block \"\". The --code argument                  │\n│                  lets you pass in a Python file that's imported before training. It can be       │\n│                  used to register custom functions and architectures that can then be            │\n│                  referenced in the config.                                                       │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks    │\n│                  if the installed packages are compatible and shows upgrade instructions if      │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes                 │\n│                  commands to check and validate your config files, training and evaluation data, │\n│                  and custom model implementations.                                               │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates.                        │\n│                  You'd typically start by cloning a project template to a local directory and    │\n│                  fetching its assets like datasets etc. See the project's project.yml for the    │\n│                  available commands.                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
 }

From 8a318dbae5eaa8119a7f0be9ee8a3f168221ae27 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 9 Mar 2026 16:12:26 +0100
Subject: [PATCH 35/42] Fix manifest

---
 spacy_cli/build_manifest.py |  6 +++---
 spacy_cli/cli_manifest.json | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/spacy_cli/build_manifest.py b/spacy_cli/build_manifest.py
index 6e019bdcb95..71982d82d77 100644
--- a/spacy_cli/build_manifest.py
+++ b/spacy_cli/build_manifest.py
@@ -58,7 +58,7 @@ def build_manifest() -> Dict[str, object]:
             group_help[name] = _get_help(runner, app, [name])
             unknown_subcommand[name] = _invoke(
                 runner, app, [name, UNKNOWN_SUBCOMMAND_TOKEN]
-            ).stdout
+            ).output
             for sub_name in subcommands:
                 help_text = _maybe_get_help(runner, app, [name, sub_name])
                 if help_text is not None:
@@ -76,8 +76,8 @@ def build_manifest() -> Dict[str, object]:
         "group_help": group_help,
         "command_help": command_help,
         "errors": {
-            "missing_command": _invoke(runner, app, []).stdout,
-            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).stdout,
+            "missing_command": _invoke(runner, app, []).output,
+            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).output,
             "unknown_subcommand": unknown_subcommand,
         },
     }
diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
index 12d760416c5..9aa7da9b973 100644
--- a/spacy_cli/cli_manifest.json
+++ b/spacy_cli/cli_manifest.json
@@ -36,13 +36,13 @@
     "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
   },
   "errors": {
-    "missing_command": "",
-    "unknown_command": "",
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
     "unknown_subcommand": {
-      "benchmark": "",
-      "debug": "",
-      "init": "",
-      "project": ""
+      "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy benchmark --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy debug --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy init --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+      "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy project --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n"
     }
   },
   "group_help": {

From 4967496dbdd7696334cb8e5489d853711247eb7f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 20 Mar 2026 09:12:08 +0100
Subject: [PATCH 36/42] Update test_cli_launcher

---
 spacy/tests/test_cli_launcher.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
index 392572fdeec..1fd6e3a6fc6 100644
--- a/spacy/tests/test_cli_launcher.py
+++ b/spacy/tests/test_cli_launcher.py
@@ -4,7 +4,6 @@
 
 import pytest
 
-from spacy_cli.build_manifest import build_manifest
 from spacy_cli.static import load_manifest
 
 launcher_module = importlib.import_module("spacy_cli.main")
@@ -46,7 +45,21 @@ def test_load_for_argv_imports_project_on_demand():
 
 
 def test_manifest_is_current():
-    assert build_manifest() == load_manifest()
+    # Run in a subprocess to avoid command registration order being affected
+    # by other test modules importing CLI submodules (which register commands
+    # as a side effect of import).
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-c",
+            "from spacy_cli.build_manifest import build_manifest; "
+            "from spacy_cli.static import load_manifest; "
+            "assert build_manifest() == load_manifest()",
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr
 
 
 def test_launcher_root_help_uses_static(capsys, monkeypatch):

From 93547fe2e8ceae6cae66f3caf31c0148b42d9dea Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 14:56:27 +0100
Subject: [PATCH 37/42] Fix lint issues across PR files

- setup.py: rename loop variable shadowing parameter (B020)
- _util.py: remove unused registry import (F401), use specific except clause (E722, B904)
- test_cli_app.py: use dict literals instead of dict() (C408)
- main.py: extract _try_static_group to reduce complexity (C901)
---
 setup.py                    |  8 +++----
 spacy/cli/_util.py          |  5 ++--
 spacy/tests/test_cli_app.py | 48 ++++++++++++++++++-------------------
 spacy_cli/main.py           | 30 +++++++++++++----------
 4 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/setup.py b/setup.py
index 2de619c720a..e18e98b9249 100755
--- a/setup.py
+++ b/setup.py
@@ -158,10 +158,10 @@ def _minimal_ext_cmd(cmd):
 
 
 def clean(path):
-    for path in path.glob("**/*"):
-        if path.is_file() and path.suffix in (".so", ".cpp", ".html"):
-            print(f"Deleting {path.name}")
-            path.unlink()
+    for child in path.glob("**/*"):
+        if child.is_file() and child.suffix in (".so", ".cpp", ".html"):
+            print(f"Deleting {child.name}")
+            child.unlink()
 
 
 def setup_package():
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 8a8b4e0f417..35f899b2cf8 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -26,7 +26,6 @@
     ENV_VARS,
     import_file,
     logger,
-    registry,
     run_command,
 )
 
@@ -206,8 +205,8 @@ def get_git_version(
     """
     try:
         ret = run_command("git --version", capture=True)
-    except:
-        raise RuntimeError(error)
+    except Exception as err:
+        raise RuntimeError(error) from err
     stdout = ret.stdout.strip()
     if not stdout or not stdout.startswith("git version"):
         return 0, 0
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 7ff3f755a31..2b3aed04060 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -288,30 +288,30 @@ def test_find_function_invalid():
 example_ents = ["O", "O", "I-ANIMAL"]
 example_spans = [(2, 3, "ANIMAL")]
 
-TRAIN_EXAMPLE_1 = dict(
-    words=example_words_1,
-    lemmas=example_lemmas_1,
-    tags=example_tags,
-    morphs=example_morphs,
-    deps=example_deps,
-    heads=[1, 1, 1],
-    pos=example_pos,
-    ents=example_ents,
-    spans=example_spans,
-    cats={"CAT": 1.0, "DOG": 0.0},
-)
-TRAIN_EXAMPLE_2 = dict(
-    words=example_words_2,
-    lemmas=example_lemmas_2,
-    tags=example_tags,
-    morphs=example_morphs,
-    deps=example_deps,
-    heads=[1, 1, 1],
-    pos=example_pos,
-    ents=example_ents,
-    spans=example_spans,
-    cats={"CAT": 0.0, "DOG": 1.0},
-)
+TRAIN_EXAMPLE_1 = {
+    "words": example_words_1,
+    "lemmas": example_lemmas_1,
+    "tags": example_tags,
+    "morphs": example_morphs,
+    "deps": example_deps,
+    "heads": [1, 1, 1],
+    "pos": example_pos,
+    "ents": example_ents,
+    "spans": example_spans,
+    "cats": {"CAT": 1.0, "DOG": 0.0},
+}
+TRAIN_EXAMPLE_2 = {
+    "words": example_words_2,
+    "lemmas": example_lemmas_2,
+    "tags": example_tags,
+    "morphs": example_morphs,
+    "deps": example_deps,
+    "heads": [1, 1, 1],
+    "pos": example_pos,
+    "ents": example_ents,
+    "spans": example_spans,
+    "cats": {"CAT": 0.0, "DOG": 1.0},
+}
 
 
 @pytest.mark.slow
diff --git a/spacy_cli/main.py b/spacy_cli/main.py
index cbe6e376c58..f8e6cabe808 100644
--- a/spacy_cli/main.py
+++ b/spacy_cli/main.py
@@ -38,24 +38,28 @@ def _try_static(argv: Iterable[str]):
         template = manifest["errors"]["unknown_command"]
         return template.replace(UNKNOWN_COMMAND_TOKEN, first), 2
     if first in known_groups:
-        if len(args) == 1 or args[1] in HELP_OPTIONS:
-            if plugin_command_names:
-                return None
-            return manifest["group_help"][first], 0
-        second = args[1]
-        if second not in known_groups[first]:
-            if plugin_command_names:
-                return None
-            template = manifest["errors"]["unknown_subcommand"][first]
-            return template.replace(UNKNOWN_SUBCOMMAND_TOKEN, second), 2
-        if any(arg in HELP_OPTIONS for arg in args[2:]):
-            return manifest["command_help"][f"{first} {second}"], 0
-        return None
+        return _try_static_group(args, first, manifest, known_groups, plugin_command_names)
     if any(arg in HELP_OPTIONS for arg in args[1:]):
         return manifest["command_help"][first], 0
     return None
 
 
+def _try_static_group(args, first, manifest, known_groups, plugin_command_names):
+    if len(args) == 1 or args[1] in HELP_OPTIONS:
+        if plugin_command_names:
+            return None
+        return manifest["group_help"][first], 0
+    second = args[1]
+    if second not in known_groups[first]:
+        if plugin_command_names:
+            return None
+        template = manifest["errors"]["unknown_subcommand"][first]
+        return template.replace(UNKNOWN_SUBCOMMAND_TOKEN, second), 2
+    if any(arg in HELP_OPTIONS for arg in args[2:]):
+        return manifest["command_help"][f"{first} {second}"], 0
+    return None
+
+
 def main(argv: Optional[Iterable[str]] = None) -> None:
     args = sys.argv[1:] if argv is None else list(argv)
     try:

From c5bcffdbf89df6a3c262467d93d30d7a15e6203a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 14:58:26 +0100
Subject: [PATCH 38/42] Fix import sorting (ruff I001) for CI validation

---
 spacy/cli/__init__.py       | 2 +-
 spacy/cli/_dispatch.py      | 1 -
 spacy/tests/test_cli_app.py | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index ded45efe9f7..f176a2eabad 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -12,8 +12,8 @@
     PUBLIC_ATTRS,
     SUBCOMMAND_MODULES,
     TOP_LEVEL_MODULES,
+    iter_builtin_modules,
 )
-from ._dispatch import iter_builtin_modules
 from ._util import COMMAND, add_project_cli, app
 
 HELP_OPTIONS = {"--help", "-h"}
diff --git a/spacy/cli/_dispatch.py b/spacy/cli/_dispatch.py
index e1975dd7e1d..5ee4f654d39 100644
--- a/spacy/cli/_dispatch.py
+++ b/spacy/cli/_dispatch.py
@@ -1,6 +1,5 @@
 from typing import Dict, Iterable, Optional, Tuple
 
-
 CommandPath = Tuple[str, ...]
 
 
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 2b3aed04060..c72e26c3444 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -11,7 +11,6 @@
 
 from .util import make_tempdir, normalize_whitespace
 
-
 load_all_commands()
 
 

From ec786c85ad8252b9e93351a17ea9886f310205d0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 14:59:58 +0100
Subject: [PATCH 39/42] Add local lint script matching CI validate + mypy
 checks

---
 lint.sh | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100755 lint.sh

diff --git a/lint.sh b/lint.sh
new file mode 100755
index 00000000000..0ec0bda3f6b
--- /dev/null
+++ b/lint.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Local lint script matching the CI Validate job + mypy type checks.
+# Fixes formatting and import sorting in-place, then re-verifies in
+# check mode to catch any conflicts between the two, and runs mypy.
+set -euo pipefail
+
+err=0
+
+echo "==> ruff format (auto-fixing)"
+python -m ruff format spacy
+
+echo "==> ruff isort (auto-fixing)"
+python -m ruff check spacy --select I --fix
+
+echo "==> ruff format (verify)"
+if ! python -m ruff format spacy --check; then
+    echo "FAIL: isort fix broke formatting"
+    err=1
+fi
+
+echo "==> ruff isort (verify)"
+if ! python -m ruff check spacy --select I; then
+    echo "FAIL: format fix broke import sorting"
+    err=1
+fi
+
+echo "==> mypy"
+if ! python -m mypy spacy; then
+    err=1
+fi
+
+if [ "$err" -ne 0 ]; then
+    echo "FAIL: see errors above"
+    exit 1
+fi
+
+echo "OK: all checks passed"

From cb67fe175af7becd715db442a84d1de2a892509f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 15:19:01 +0100
Subject: [PATCH 40/42] Regenerate CLI manifest for typer 0.24.1 plain-text
 output

---
 spacy_cli/cli_manifest.json | 88 ++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
index 9aa7da9b973..e756c058eb3 100644
--- a/spacy_cli/cli_manifest.json
+++ b/spacy_cli/cli_manifest.json
@@ -1,55 +1,55 @@
 {
   "command": "python -m spacy",
   "command_help": {
-    "apply": "                                                                                                    \n Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE                                 \n                                                                                                    \n Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and    \n path to the data, which can be a directory or a file. The data files can be provided in multiple   \n formats:     1. .spacy files     2. .jsonl files with a specified \"field\" to read the text from.   \n 3. Files with any other extension are assumed to be containing        a single document. DOCS:     \n https://spacy.io/api/cli#apply                                                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model            TEXT  Model name or path [required]                                        │\n│ *    data_path        PATH  Location of the documents to predict on. Can be a single file in     │\n│                             .spacy format or a .jsonl file. Files with other extensions are      │\n│                             treated as single plain text documents. If a directory is provided   │\n│                             it is traversed recursively to grab all files to be processed. The   │\n│                             files can be a mixture of .spacy, .jsonl and text files. If .jsonl   │\n│                             is provided the specified field is going to be grabbed (\"text\" by    │\n│                             default).                                                            │\n│                             [required]                                                           │\n│ *    output_file      FILE  Path to save the resulting .spacy file [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code        -c       PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --text-key    -tk      TEXT     Key containing text string for JSONL [default: text]             │\n│ --force       -F                Force overwriting the output file                                │\n│ --gpu-id      -g       INTEGER  GPU ID or -1 for CPU. [default: -1]                              │\n│ --batch-size  -b       INTEGER  Batch size. [default: 1]                                         │\n│ --n-process   -n       INTEGER  number of processors to use. [default: 1]                        │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "assemble": "                                                                                                    \n Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n Assemble a spaCy pipeline from a config file. The config file includes all settings for            \n initializing the pipeline. To override settings in the config, e.g. settings that point to local   \n paths or that you want to experiment with, you can override them as command line options. The      \n --code argument lets you pass in a Python file that can be used to register custom functions that  \n are referenced in the config.                                                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#assemble                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory to store assembled pipeline in [required]           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH  Path to Python file with additional code (registered functions) to  │\n│                              be imported                                                         │\n│ --verbose  -V,-VV            Display more information for debugging purposes                     │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark accuracy": "                                                                                                    \n Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH                                \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "benchmark speed": "                                                                                                    \n Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH                                   \n                                                                                                    \n Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in the binary .spacy    \n format.                                                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --batch-size  -b      INTEGER RANGE [x>=1]   Override the pipeline batch size                    │\n│ --no-shuffle                                 Do not shuffle benchmark data                       │\n│ --gpu-id      -g      INTEGER                GPU ID or -1 for CPU [default: -1]                  │\n│ --batches             INTEGER RANGE [x>=30]  Minimum number of batches to benchmark              │\n│                                              [default: 50]                                       │\n│ --warmup      -w      INTEGER RANGE [x>=0]   Number of iterations over the data for warmup       │\n│                                              [default: 3]                                        │\n│ --code        -c      PATH                   Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "convert": "                                                                                                    \n Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]                                   \n                                                                                                    \n Convert files into json or DocBin format for training. The resulting .spacy file can be used with  \n the train command and other experiment management functions.                                       \n                                                                                                    \n If no output_dir is specified and the output format is JSON, the data                              \n is written to stdout, so you can pipe them forward to a JSON file:                                 \n $ spacy convert some_file.conllu --file-type json > some_file.json                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#convert                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_path      TEXT          Input file or directory [required]                            │\n│      output_dir      [OUTPUT_DIR]  Output directory. '-' for stdout. [default: -]                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --file-type        -t       [json|spacy]  Type of data to produce [default: spacy]               │\n│ --n-sents          -n       INTEGER       Number of sentences per doc (0 to disable)             │\n│                                           [default: 1]                                           │\n│ --seg-sents        -s                     Segment sentences (for -c ner)                         │\n│ --model,--base     -b       TEXT          Trained spaCy pipeline for sentence segmentation to    │\n│                                           use as base (for --seg-sents)                          │\n│ --morphology       -m                     Enable appending morphology to tags                    │\n│ --merge-subtokens  -T                     Merge CoNLL-U subtokens                                │\n│ --converter        -c       TEXT          Converter: ('conllubio', 'conllu', 'conll', 'ner',     │\n│                                           'iob', 'json')                                         │\n│                                           [default: auto]                                        │\n│ --ner-map          -nm      PATH          NER tag mapping (as JSON-encoded dict of entity types) │\n│ --lang             -l       TEXT          Language (if tokenizer required)                       │\n│ --concatenate      -C                     Concatenate output to a single file                    │\n│ --help                                    Show this message and exit.                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug config": "                                                                                                    \n Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH                                          \n                                                                                                    \n Debug a config file and show validation errors. The command will create all objects in the tree    \n and validate them. Note that some config validation errors are blocking and will prevent the rest  \n of the config from being resolved. This means that you may not see all validation errors at once   \n and some issues are only shown once previous errors have been fixed. Similar as with the 'train'   \n command, you can override settings from the config as command line options. For instance,          \n --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\".                     \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-config                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c      PATH  Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --show-functions    -F            Show an overview of all registered functions used in the       │\n│                                   config and where they come from (modules, files etc.)          │\n│ --show-variables    -V            Show an overview of all variables referenced in the config and │\n│                                   their values. This will also reflect variables overwritten on  │\n│                                   the CLI.                                                       │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug data": "                                                                                                    \n Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug diff-config": "                                                                                                    \n Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH                                     \n                                                                                                    \n Show a diff of a config file with respect to spaCy's defaults or another config file. If           \n additional settings were used in the creation of the config file, then you must supply these as    \n extra parameters to the command when comparing to the default settings. The generated diff can     \n also be used when posting to the discussion forum to provide more information for the maintainers. \n                                                                                                    \n The `optimize`, `gpu`, and `pretraining` options are only relevant when                            \n comparing against the default configuration (or specifically when `compare_to` is None).           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-diff                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --compare-to                 PATH                   Path to a config file to diff against, or    │\n│                                                     `None` to compare against default settings   │\n│ --optimize          -o       [efficiency|accuracy]  Whether the user config was optimized for    │\n│                                                     efficiency or accuracy. Only relevant when   │\n│                                                     comparing against the default config.        │\n│                                                     [default: efficiency]                        │\n│ --gpu               -G                              Whether the original config can run on a     │\n│                                                     GPU. Only relevant when comparing against    │\n│                                                     the default config.                          │\n│ --pretraining,--pt                                  Whether to compare on a config with          │\n│                                                     pretraining involved. Only relevant when     │\n│                                                     comparing against the default config.        │\n│ --markdown          -md                             Generate Markdown for GitHub issues          │\n│ --help                                              Show this message and exit.                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug model": "                                                                                                    \n Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT                                 \n                                                                                                    \n Analyze a Thinc model implementation. Includes checks for internal structure and activations       \n during training.                                                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-model                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    component        TEXT  Name of the pipeline component of which the model should be analysed │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --layers       -l         TEXT     Comma-separated names of layer IDs to print                   │\n│ --dimensions   -DIM                Show dimensions                                               │\n│ --parameters   -PAR                Show parameters                                               │\n│ --gradients    -GRAD               Show gradients                                                │\n│ --attributes   -ATTR               Show attributes                                               │\n│ --print-step0  -P0                 Print model before training                                   │\n│ --print-step1  -P1                 Print model after initialization                              │\n│ --print-step2  -P2                 Print model after training                                    │\n│ --print-step3  -P3                 Print final predictions                                       │\n│ --gpu-id       -g         INTEGER  GPU ID or -1 for CPU [default: -1]                            │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug profile": "                                                                                                    \n Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]                                      \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug-data": "                                                                                                    \n Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH                                            \n                                                                                                    \n Analyze, debug and validate your training and development data. Outputs useful stats, and can help \n you find problems like invalid entity annotations, cyclic dependencies, low data labels and more.  \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-data                                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --ignore-warnings   -IW            Ignore warnings, only show stats and errors                   │\n│ --verbose           -V             Print additional information and explanations                 │\n│ --no-format         -NF            Don't pretty-print the results                                │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "download": "                                                                                                    \n Usage: python -m spacy download [OPTIONS] MODEL                                                    \n                                                                                                    \n Download compatible trained pipeline from the default download path using pip. If --direct flag is \n set, the command expects the full package name with version. For direct downloads, the             \n compatibility check will be skipped. All additional arguments provided to this command will be     \n passed to `pip install` on package installation.                                                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#download                                                            \n AVAILABLE PACKAGES: https://spacy.io/models                                                        \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model      TEXT  Name of pipeline package to download [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --direct  -d,-D            Force direct download of name + version                               │\n│ --sdist   -S               Download sdist (.tar.gz) archive instead of pre-built binary wheel    │\n│ --url     -U         TEXT  Download from given url                                               │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "evaluate": "                                                                                                    \n Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH                                          \n                                                                                                    \n Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation data in the binary   \n .spacy format. The --gold-preproc option sets up the evaluation examples with gold-standard        \n sentences and tokens for the predictions. Gold preprocessing helps the annotations align to the    \n tokenization, and may result in sequences of more consistent length. However, it may reduce        \n runtime accuracy due to train/test skew. To render a sample of dependency parses in a HTML file,   \n set as output directory as the displacy_path argument.                                             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#benchmark-accuracy                                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model          TEXT  Model name or path [required]                                          │\n│ *    data_path      PATH  Location of binary evaluation data in .spacy format [required]         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output          -o       FILE       Output JSON file for metrics                               │\n│ --code            -c       PATH       Path to Python file with additional code (registered       │\n│                                       functions) to be imported                                  │\n│ --gpu-id          -g       INTEGER    GPU ID or -1 for CPU [default: -1]                         │\n│ --gold-preproc    -G                  Use gold preprocessing                                     │\n│ --displacy-path   -dp      DIRECTORY  Directory to output rendered parses as HTML                │\n│ --displacy-limit  -dl      INTEGER    Limit of parses to render as HTML [default: 25]            │\n│ --per-component   -P                  Return scores per component, only applicable when an       │\n│                                       output JSON file is specified.                             │\n│ --spans-key       -sk      TEXT       Spans key to use when evaluating Doc.spans [default: sc]   │\n│ --help                                Show this message and exit.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-function": "                                                                                                    \n Usage: python -m spacy find-function [OPTIONS] FUNC_NAME                                           \n                                                                                                    \n Find the module, path and line number to the file the registered function is defined in, if        \n available.                                                                                         \n                                                                                                    \n func_name (str): Name of the registered function.                                                  \n registry_name (Optional): Name of the catalogue registry.                                          \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-function                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    func_name      TEXT  Name of the registered function. [required]                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --registry  -r      TEXT  Name of the catalogue registry.                                        │\n│ --help                    Show this message and exit.                                            │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "find-threshold": "                                                                                                    \n Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME                          \n                                       THRESHOLD_KEY SCORES_KEY                                     \n                                                                                                    \n Runs prediction trials for a trained model with varying thresholds to maximize the specified       \n metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps.  \n Results are displayed in a table on `stdout` (the corresponding API call to                        \n `spacy.cli.find_threshold.find_threshold()` returns all results).                                  \n                                                                                                    \n This is applicable only for components whose predictions are influenced by                         \n thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note                      \n that the full path to the corresponding threshold attribute in the config has to                   \n be provided.                                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli#find-threshold                                                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model              TEXT  Model name or path [required]                                      │\n│ *    data_path          PATH  Location of binary evaluation data in .spacy format [required]     │\n│ *    pipe_name          TEXT  Name of pipe to examine thresholds for [required]                  │\n│ *    threshold_key      TEXT  Key of threshold attribute in component's configuration [required] │\n│ *    scores_key         TEXT  Metric to optimize [required]                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n_trials      -n          INTEGER  Number of trials to determine optimal thresholds            │\n│                                      [default: 11]                                               │\n│ --code          -c          PATH     Path to Python file with additional code (registered        │\n│                                      functions) to be imported                                   │\n│ --gpu-id        -g          INTEGER  GPU ID or -1 for CPU [default: -1]                          │\n│ --gold-preproc  -G                   Use gold preprocessing                                      │\n│ --verbose       -V,-VV               Display more information for debugging purposes             │\n│ --help                               Show this message and exit.                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "info": "                                                                                                    \n Usage: python -m spacy info [OPTIONS] [MODEL]                                                      \n                                                                                                    \n Print info about spaCy installation. If a pipeline is specified as an argument, print its meta     \n information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues.    \n                                                                                                    \n Flag --url prints only the download URL of the most recent compatible                              \n version of the pipeline.                                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#info                                                                \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   model      [MODEL]  Optional loadable spaCy pipeline                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --markdown  -md              Generate Markdown for GitHub issues                                 │\n│ --silent    -s,-S            Don't print anything (just return)                                  │\n│ --exclude   -e         TEXT  Comma-separated keys to exclude from the print-out                  │\n│                              [default: labels]                                                   │\n│ --url       -u               Print the URL to download the most recent compatible version of the │\n│                              pipeline                                                            │\n│ --help                       Show this message and exit.                                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init config": "                                                                                                    \n Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE                                           \n                                                                                                    \n Generate a starter config file for training. Based on your requirements specified via the CLI      \n arguments, this command generates a config with the optimal settings for your use case. This       \n includes the choice of architecture, pretrained weights and related hyperparameters.               \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-config                                                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    output_file      PATH  File to save the config to or - for stdout (will only output config  │\n│                             and no additional logging info)                                      │\n│                             [required]                                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --lang         -l       TEXT                   Two-letter code of the language to use            │\n│                                                [default: en]                                     │\n│ --pipeline     -p       TEXT                   Comma-separated names of trainable pipeline       │\n│                                                components to include (without 'tok2vec' or       │\n│                                                'transformer')                                    │\n│                                                [default: tagger,parser,ner]                      │\n│ --optimize     -o       [efficiency|accuracy]  Whether to optimize for efficiency (faster        │\n│                                                inference, smaller model, lower memory            │\n│                                                consumption) or higher accuracy (potentially      │\n│                                                larger and slower model). This will impact the    │\n│                                                choice of architecture, pretrained weights and    │\n│                                                related hyperparameters.                          │\n│                                                [default: efficiency]                             │\n│ --gpu          -G                              Whether the model can run on GPU. This will       │\n│                                                impact the choice of architecture, pretrained     │\n│                                                weights and related hyperparameters.              │\n│ --pretraining  -pt                             Include config for pretraining (with 'spacy       │\n│                                                pretrain')                                        │\n│ --force        -F                              Force overwriting the output file                 │\n│ --help                                         Show this message and exit.                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init fill-config": "                                                                                                    \n Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]                          \n                                                                                                    \n Fill partial config file with default values. Will add all missing settings from the default       \n config and will create all objects, check the registered functions for their default values and    \n update the base config. This command can be used with a config generated via the training          \n quickstart widget: https://spacy.io/usage/training#quickstart                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#init-fill-config                                                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    base_path        FILE           Path to base config to fill [required]                      │\n│      output_file      [OUTPUT_FILE]  Path to output .cfg file (or - for stdout) [default: -]     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --pretraining       -pt            Include config for pretraining (with 'spacy pretrain')        │\n│ --diff              -D             Print a visual diff highlighting the changes                  │\n│ --code-path,--code  -c       PATH  Path to Python file with additional code (registered          │\n│                                    functions) to be imported                                     │\n│ --help                             Show this message and exit.                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init labels": "                                                                                                    \n Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH                               \n                                                                                                    \n Generate JSON files for the labels in the data. This helps speed up the training process, since    \n spaCy won't have to preprocess the data to extract the labels.                                     \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the labels [required]                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init nlp": "                                                                                                    \n Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH                                  \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n│ *    output_path      PATH  Output directory for the prepared data [required]                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code     -c          PATH     Path to Python file with additional code (registered functions)  │\n│                                 to be imported                                                   │\n│ --verbose  -V,-VV               Display more information for debugging purposes                  │\n│ --gpu-id   -g          INTEGER  GPU ID or -1 for CPU [default: -1]                               │\n│ --help                          Show this message and exit.                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init vectors": "                                                                                                    \n Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR                          \n                                                                                                    \n Convert word vectors for use with spaCy. Will export an nlp object that you can use in the  block  \n of your config to initialize a model with vectors.                                                 \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    lang             TEXT  The language of the nlp object to create [required]                  │\n│ *    vectors_loc      PATH  Vectors file in Word2Vec format [required]                           │\n│ *    output_dir       PATH  Pipeline output directory [required]                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --prune     -p          INTEGER  Optional number of vectors to prune to [default: -1]            │\n│ --truncate  -t          INTEGER  Optional number of vectors to truncate to when reading in       │\n│                                  vectors file                                                    │\n│                                  [default: 0]                                                    │\n│ --mode      -m          TEXT     Vectors mode: default or floret [default: default]              │\n│ --name      -n          TEXT     Optional name for the word vectors, e.g. en_core_web_lg.vectors │\n│ --verbose   -V,-VV               Display more information for debugging purposes                 │\n│ --attr      -a          TEXT     Optional token attribute to use for vectors, e.g. LOWER or NORM │\n│                                  [default: ORTH]                                                 │\n│ --help                           Show this message and exit.                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "link": "                                                                                                    \n Usage: python -m spacy link [OPTIONS] ARGS KWARGS                                                  \n                                                                                                    \n (deprecated)                                                                                       \n As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load trained pipeline      \n packages using their full names or from a directory path.                                          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    args        TEXT  [required]                                                                │\n│ *    kwargs      TEXT  [required]                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "package": "                                                                                                    \n Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR                                      \n                                                                                                    \n Generate an installable Python package for a pipeline. Includes binary data, meta and required     \n installation files. A new directory will be created in the specified output directory, and the     \n data will be copied over. If --create-meta is set and a meta.json already exists in the output     \n directory, the existing values will be used as the defaults in the command-line prompt. After      \n packaging, \"python -m build --sdist\" is run in the package directory, which will create a .tar.gz  \n archive that can be installed via \"pip install\".                                                   \n                                                                                                    \n If additional code files are provided (e.g. Python files containing custom                         \n registered functions like pipeline components), they are copied into the                           \n package and imported in the __init__.py.                                                           \n                                                                                                    \n DOCS: https://spacy.io/api/cli#package                                                             \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    input_dir       DIRECTORY  Directory with pipeline data [required]                          │\n│ *    output_dir      DIRECTORY  Output parent directory [required]                               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code              -c                            TEXT  Comma-separated paths to Python file     │\n│                                                         with additional code (registered         │\n│                                                         functions) to be included in the package │\n│ --meta-path,--meta  -m                            FILE  Path to meta.json                        │\n│ --create-meta       -C                                  Create meta.json, even if one exists     │\n│ --name              -n                            TEXT  Package name to override meta            │\n│ --version           -v                            TEXT  Package version to override meta         │\n│ --build             -b                            TEXT  Comma-separated formats to build: sdist  │\n│                                                         and/or wheel, or none.                   │\n│                                                         [default: sdist]                         │\n│ --force             -f,-F                               Force overwriting existing data in       │\n│                                                         output directory                         │\n│ --require-parent    -R,-R  --no-require-parent          Include the parent package (e.g. spacy)  │\n│                                                         in the requirements                      │\n│                                                         [default: require-parent]                │\n│ --help                                                  Show this message and exit.              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "pretrain": "                                                                                                    \n Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR                                   \n                                                                                                    \n Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate       \n language-modelling objective. Two objective types are available, vector-based and character-based. \n                                                                                                    \n In the vector-based objective, we load word vectors that have been trained                         \n using a word2vec-style distributional similarity algorithm, and train a                            \n component like a CNN, BiLSTM, etc to predict vectors which match the                               \n pretrained ones. The weights are saved to a directory after each epoch. You                        \n can then pass a path to one of these pretrained weights files to the                               \n 'spacy train' command.                                                                             \n                                                                                                    \n This technique may be especially helpful if you have little labelled data.                         \n However, it's still quite experimental, so your mileage may vary.                                  \n                                                                                                    \n To load the weights back in during 'spacy train', you need to ensure                               \n all settings are the same between pretraining and training. Ideally,                               \n this is done by using the same config file for both commands.                                      \n                                                                                                    \n DOCS: https://spacy.io/api/cli#pretrain                                                            \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      FILE  Path to config file [required]                                       │\n│ *    output_dir       PATH  Directory to write weights to on each epoch [required]               │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --code          -c       PATH     Path to Python file with additional code (registered           │\n│                                   functions) to be imported                                      │\n│ --resume-path   -r       PATH     Path to pretrained weights from which to resume pretraining    │\n│ --epoch-resume  -er      INTEGER  The epoch to resume counting from when using --resume-path.    │\n│                                   Prevents unintended overwriting of existing weight files.      │\n│ --gpu-id        -g       INTEGER  GPU ID or -1 for CPU [default: -1]                             │\n│ --skip-last     -L                Skip saving model-last.bin                                     │\n│ --help                            Show this message and exit.                                    │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "profile": "                                                                                                    \n Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]                                            \n                                                                                                    \n Profile which functions take the most time in a spaCy pipeline. Input should be formatted as one   \n JSON object per line with a key \"text\". It can either be provided as a JSONL file, or be read from \n sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc.                   \n                                                                                                    \n DOCS: https://spacy.io/api/cli#debug-profile                                                       \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    model       TEXT      Trained pipeline to load [required]                                   │\n│      inputs      [INPUTS]  Location of input file. '-' for stdin.                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --n-texts  -n      INTEGER  Maximum number of texts to use if available [default: 10000]         │\n│ --help                      Show this message and exit.                                          │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project assets": "                                                                                                    \n Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]                                      \n                                                                                                    \n Fetch project assets like datasets and pretrained weights. Assets are defined in the \"assets\"      \n section of the project.yml. If a checksum is provided in the project.yml, the file is only         \n downloaded if no local file with the same checksum exists.                                         \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-and-assets.md          \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --sparse  -S        Use sparse checkout for assets provided via Git, to only check out and clone │\n│                     the files needed. Requires Git v22.2+.                                       │\n│ --extra   -e        Download all assets, including those marked as 'extra'.                      │\n│ --help              Show this message and exit.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project clone": "                                                                                                    \n Usage: python -m spacy project clone [OPTIONS] NAME [DEST]                                         \n                                                                                                    \n Clone a project template from a repository. Calls into \"git\" and will only download the files from \n the given subdirectory. The GitHub repo defaults to the official Weasel template repo, but can be  \n customized (including using a private repo).                                                       \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-clone                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    name      TEXT    The name of the template to clone [required]                              │\n│      dest      [DEST]  Where to clone the project. Defaults to current working directory         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --repo    -r      TEXT  The repository to clone from                                             │\n│                         [default: https://github.com/explosion/projects]                         │\n│ --branch  -b      TEXT  The branch to clone from. If not provided, will attempt main, master     │\n│ --sparse  -S            Use sparse Git checkout to only check out and clone the files needed.    │\n│                         Requires Git v22.2+.                                                     │\n│ --help                  Show this message and exit.                                              │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project document": "                                                                                                    \n Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]                                    \n                                                                                                    \n Auto-generate a README.md for a project. If the content is saved to a file, hidden markers are     \n added so you can add custom content before or after the auto-generated section and only the        \n auto-generated docs will be replaced when you re-run the command.                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-document               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Path to cloned project. Defaults to current working directory. │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output    -o       PATH  Path to output Markdown file for output. Defaults to - for standard   │\n│                            output                                                                │\n│                            [default: -]                                                          │\n│ --no-emoji  -NE            Don't use emoji                                                       │\n│ --help                     Show this message and exit.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project dvc": "                                                                                                    \n Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]                              \n                                                                                                    \n Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so    \n you need to specify one workflow defined in the project.yml. If no workflow is specified, the      \n first defined workflow is used. The DVC config will only be updated if the project.yml changed.    \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc                         \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n│   workflow         [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first     │\n│                                   workflow if not set.                                           │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --verbose  -V        Print more info                                                             │\n│ --quiet    -q        Print less info                                                             │\n│ --force    -F        Force update DVC config                                                     │\n│ --help               Show this message and exit.                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project pull": "                                                                                                    \n Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Retrieve available precomputed outputs from a remote storage. You can alias remotes in your        \n project.yml by mapping them to storage paths. A storage can be anything that the smart_open        \n library can upload to, e.g. AWS, Google Cloud Storage, SSH, local directories etc.                 \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-push                    \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project push": "                                                                                                    \n Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]                               \n                                                                                                    \n Persist outputs to a remote storage. You can alias remotes in your project.yml by mapping them to  \n storage paths. A storage can be anything that the smart_open library can upload to, e.g. AWS,      \n Google Cloud Storage, SSH, local directories etc.                                                  \n                                                                                                    \n DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push                      \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│   remote           [REMOTE]       Name or path of remote storage [default: default]              │\n│   project_dir      [PROJECT_DIR]  Location of project directory. Defaults to current working     │\n│                                   directory.                                                     │\n│                                   [default: /Users/matt/repos/explosion/spaCy]                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "train": "                                                                                                    \n Usage: python -m spacy train [OPTIONS] CONFIG_PATH                                                 \n                                                                                                    \n Train or update a spaCy pipeline. Requires data in spaCy's binary format. To convert data from     \n other formats, use the `spacy convert` command. The config file includes all settings and          \n hyperparameters used during training. To override settings in the config, e.g. settings that point \n to local paths or that you want to experiment with, you can override them as command line options. \n For instance, --training.batch_size 128 overrides the value of \"batch_size\" in the block \"\". The   \n --code argument lets you pass in a Python file that's imported before training. It can be used to  \n register custom functions and architectures that can then be referenced in the config.             \n                                                                                                    \n DOCS: https://spacy.io/api/cli#train                                                               \n                                                                                                    \n╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮\n│ *    config_path      PATH  Path to config file [required]                                       │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --output,--output-path  -o          PATH     Output directory to store trained pipeline in       │\n│ --code                  -c          PATH     Path to Python file with additional code            │\n│                                              (registered functions) to be imported               │\n│ --verbose               -V,-VV               Display more information for debugging purposes     │\n│ --gpu-id                -g          INTEGER  GPU ID or -1 for CPU [default: -1]                  │\n│ --help                                       Show this message and exit.                         │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "validate": "                                                                                                    \n Usage: python -m spacy validate [OPTIONS]                                                          \n                                                                                                    \n Validate the currently installed pipeline packages and spaCy version. Checks if the installed      \n packages are compatible and shows upgrade instructions if available. Should be run after `pip      \n install -U spacy`.                                                                                 \n                                                                                                    \n DOCS: https://spacy.io/api/cli#validate                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    "apply": "Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE\n\n  Apply a trained pipeline to documents to get predictions. Expects a loadable\n  spaCy pipeline and path to the data, which can be a directory or a file. The\n  data files can be provided in multiple formats:     1. .spacy files     2.\n  .jsonl files with a specified \"field\" to read the text from.     3. Files with\n  any other extension are assumed to be containing        a single document.\n  DOCS: https://spacy.io/api/cli#apply\n\nArguments:\n  MODEL        Model name or path  [required]\n  DATA_PATH    Location of the documents to predict on. Can be a single file in\n               .spacy format or a .jsonl file. Files with other extensions are\n               treated as single plain text documents. If a directory is\n               provided it is traversed recursively to grab all files to be\n               processed. The files can be a mixture of .spacy, .jsonl and text\n               files. If .jsonl is provided the specified field is going to be\n               grabbed (\"text\" by default).  [required]\n  OUTPUT_FILE  Path to save the resulting .spacy file  [required]\n\nOptions:\n  -c, --code PATH           Path to Python file with additional code (registered\n                            functions) to be imported\n  -tk, --text-key TEXT      Key containing text string for JSONL  [default:\n                            text]\n  -F, --force               Force overwriting the output file\n  -g, --gpu-id INTEGER      GPU ID or -1 for CPU.  [default: -1]\n  -b, --batch-size INTEGER  Batch size.  [default: 1]\n  -n, --n-process INTEGER   number of processors to use.  [default: 1]\n  --help                    Show this message and exit.\n",
+    "assemble": "Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\n  Assemble a spaCy pipeline from a config file. The config file includes all\n  settings for initializing the pipeline. To override settings in the config,\n  e.g. settings that point to local paths or that you want to experiment with,\n  you can override them as command line options. The --code argument lets you\n  pass in a Python file that can be used to register custom functions that are\n  referenced in the config.\n\n  DOCS: https://spacy.io/api/cli#assemble\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory to store assembled pipeline in  [required]\n\nOptions:\n  -c, --code PATH     Path to Python file with additional code (registered\n                      functions) to be imported\n  -V, -VV, --verbose  Display more information for debugging purposes\n  --help              Show this message and exit.\n",
+    "benchmark accuracy": "Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH\n\n  Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation\n  data in the binary .spacy format. The --gold-preproc option sets up the\n  evaluation examples with gold-standard sentences and tokens for the\n  predictions. Gold preprocessing helps the annotations align to the\n  tokenization, and may result in sequences of more consistent length. However,\n  it may reduce runtime accuracy due to train/test skew. To render a sample of\n  dependency parses in a HTML file, set as output directory as the displacy_path\n  argument.\n\n  DOCS: https://spacy.io/api/cli#benchmark-accuracy\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -o, --output FILE               Output JSON file for metrics\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc              Use gold preprocessing\n  -dp, --displacy-path DIRECTORY  Directory to output rendered parses as HTML\n  -dl, --displacy-limit INTEGER   Limit of parses to render as HTML  [default:\n                                  25]\n  -P, --per-component             Return scores per component, only applicable\n                                  when an output JSON file is specified.\n  -sk, --spans-key TEXT           Spans key to use when evaluating Doc.spans\n                                  [default: sc]\n  --help                          Show this message and exit.\n",
+    "benchmark speed": "Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH\n\n  Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in\n  the binary .spacy format.\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -b, --batch-size INTEGER RANGE  Override the pipeline batch size  [x>=1]\n  --no-shuffle                    Do not shuffle benchmark data\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  --batches INTEGER RANGE         Minimum number of batches to benchmark\n                                  [default: 50; x>=30]\n  -w, --warmup INTEGER RANGE      Number of iterations over the data for warmup\n                                  [default: 3; x>=0]\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  --help                          Show this message and exit.\n",
+    "convert": "Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]\n\n  Convert files into json or DocBin format for training. The resulting .spacy\n  file can be used with the train command and other experiment management\n  functions.\n\n  If no output_dir is specified and the output format is JSON, the data is\n  written to stdout, so you can pipe them forward to a JSON file: $ spacy\n  convert some_file.conllu --file-type json > some_file.json\n\n  DOCS: https://spacy.io/api/cli#convert\n\nArguments:\n  INPUT_PATH    Input file or directory  [required]\n  [OUTPUT_DIR]  Output directory. '-' for stdout.  [default: -]\n\nOptions:\n  -t, --file-type [json|spacy]  Type of data to produce  [default: spacy]\n  -n, --n-sents INTEGER         Number of sentences per doc (0 to disable)\n                                [default: 1]\n  -s, --seg-sents               Segment sentences (for -c ner)\n  -b, --model, --base TEXT      Trained spaCy pipeline for sentence segmentation\n                                to use as base (for --seg-sents)\n  -m, --morphology              Enable appending morphology to tags\n  -T, --merge-subtokens         Merge CoNLL-U subtokens\n  -c, --converter TEXT          Converter: ('conllubio', 'conllu', 'conll',\n                                'ner', 'iob', 'json')  [default: auto]\n  -nm, --ner-map PATH           NER tag mapping (as JSON-encoded dict of entity\n                                types)\n  -l, --lang TEXT               Language (if tokenizer required)\n  -C, --concatenate             Concatenate output to a single file\n  --help                        Show this message and exit.\n",
+    "debug config": "Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH\n\n  Debug a config file and show validation errors. The command will create all\n  objects in the tree and validate them. Note that some config validation errors\n  are blocking and will prevent the rest of the config from being resolved. This\n  means that you may not see all validation errors at once and some issues are\n  only shown once previous errors have been fixed. Similar as with the 'train'\n  command, you can override settings from the config as command line options.\n  For instance, --training.batch_size 128 overrides the value of \"batch_size\" in\n  the block \"[training]\".\n\n  DOCS: https://spacy.io/api/cli#debug-config\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -F, --show-functions          Show an overview of all registered functions\n                                used in the config and where they come from\n                                (modules, files etc.)\n  -V, --show-variables          Show an overview of all variables referenced in\n                                the config and their values. This will also\n                                reflect variables overwritten on the CLI.\n  --help                        Show this message and exit.\n",
+    "debug data": "Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH\n\n  Analyze, debug and validate your training and development data. Outputs useful\n  stats, and can help you find problems like invalid entity annotations, cyclic\n  dependencies, low data labels and more.\n\n  DOCS: https://spacy.io/api/cli#debug-data\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -IW, --ignore-warnings        Ignore warnings, only show stats and errors\n  -V, --verbose                 Print additional information and explanations\n  -NF, --no-format              Don't pretty-print the results\n  --help                        Show this message and exit.\n",
+    "debug diff-config": "Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH\n\n  Show a diff of a config file with respect to spaCy's defaults or another\n  config file. If additional settings were used in the creation of the config\n  file, then you must supply these as extra parameters to the command when\n  comparing to the default settings. The generated diff can also be used when\n  posting to the discussion forum to provide more information for the\n  maintainers.\n\n  The `optimize`, `gpu`, and `pretraining` options are only relevant when\n  comparing against the default configuration (or specifically when `compare_to`\n  is None).\n\n  DOCS: https://spacy.io/api/cli#debug-diff\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  --compare-to PATH               Path to a config file to diff against, or\n                                  `None` to compare against default settings\n  -o, --optimize [efficiency|accuracy]\n                                  Whether the user config was optimized for\n                                  efficiency or accuracy. Only relevant when\n                                  comparing against the default config.\n                                  [default: efficiency]\n  -G, --gpu                       Whether the original config can run on a GPU.\n                                  Only relevant when comparing against the\n                                  default config.\n  --pretraining, --pt             Whether to compare on a config with\n                                  pretraining involved. Only relevant when\n                                  comparing against the default config.\n  -md, --markdown                 Generate Markdown for GitHub issues\n  --help                          Show this message and exit.\n",
+    "debug model": "Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT\n\n  Analyze a Thinc model implementation. Includes checks for internal structure\n  and activations during training.\n\n  DOCS: https://spacy.io/api/cli#debug-model\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  COMPONENT    Name of the pipeline component of which the model should be\n               analysed  [required]\n\nOptions:\n  -l, --layers TEXT     Comma-separated names of layer IDs to print\n  -DIM, --dimensions    Show dimensions\n  -PAR, --parameters    Show parameters\n  -GRAD, --gradients    Show gradients\n  -ATTR, --attributes   Show attributes\n  -P0, --print-step0    Print model before training\n  -P1, --print-step1    Print model after initialization\n  -P2, --print-step2    Print model after training\n  -P3, --print-step3    Print final predictions\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "debug profile": "Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]\n\n  Profile which functions take the most time in a spaCy pipeline. Input should\n  be formatted as one JSON object per line with a key \"text\". It can either be\n  provided as a JSONL file, or be read from sys.sytdin. If no input file is\n  specified, the IMDB dataset is loaded via Thinc.\n\n  DOCS: https://spacy.io/api/cli#debug-profile\n\nArguments:\n  MODEL     Trained pipeline to load  [required]\n  [INPUTS]  Location of input file. '-' for stdin.\n\nOptions:\n  -n, --n-texts INTEGER  Maximum number of texts to use if available  [default:\n                         10000]\n  --help                 Show this message and exit.\n",
+    "debug-data": "Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH\n\n  Analyze, debug and validate your training and development data. Outputs useful\n  stats, and can help you find problems like invalid entity annotations, cyclic\n  dependencies, low data labels and more.\n\n  DOCS: https://spacy.io/api/cli#debug-data\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -IW, --ignore-warnings        Ignore warnings, only show stats and errors\n  -V, --verbose                 Print additional information and explanations\n  -NF, --no-format              Don't pretty-print the results\n  --help                        Show this message and exit.\n",
+    "download": "Usage: python -m spacy download [OPTIONS] MODEL\n\n  Download compatible trained pipeline from the default download path using pip.\n  If --direct flag is set, the command expects the full package name with\n  version. For direct downloads, the compatibility check will be skipped. All\n  additional arguments provided to this command will be passed to `pip install`\n  on package installation.\n\n  DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES:\n  https://spacy.io/models\n\nArguments:\n  MODEL  Name of pipeline package to download  [required]\n\nOptions:\n  -d, -D, --direct  Force direct download of name + version\n  -S, --sdist       Download sdist (.tar.gz) archive instead of pre-built binary\n                    wheel\n  -U, --url TEXT    Download from given url\n  --help            Show this message and exit.\n",
+    "evaluate": "Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH\n\n  Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation\n  data in the binary .spacy format. The --gold-preproc option sets up the\n  evaluation examples with gold-standard sentences and tokens for the\n  predictions. Gold preprocessing helps the annotations align to the\n  tokenization, and may result in sequences of more consistent length. However,\n  it may reduce runtime accuracy due to train/test skew. To render a sample of\n  dependency parses in a HTML file, set as output directory as the displacy_path\n  argument.\n\n  DOCS: https://spacy.io/api/cli#benchmark-accuracy\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -o, --output FILE               Output JSON file for metrics\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc              Use gold preprocessing\n  -dp, --displacy-path DIRECTORY  Directory to output rendered parses as HTML\n  -dl, --displacy-limit INTEGER   Limit of parses to render as HTML  [default:\n                                  25]\n  -P, --per-component             Return scores per component, only applicable\n                                  when an output JSON file is specified.\n  -sk, --spans-key TEXT           Spans key to use when evaluating Doc.spans\n                                  [default: sc]\n  --help                          Show this message and exit.\n",
+    "find-function": "Usage: python -m spacy find-function [OPTIONS] FUNC_NAME\n\n  Find the module, path and line number to the file the registered function is\n  defined in, if available.\n\n  func_name (str): Name of the registered function. registry_name\n  (Optional[str]): Name of the catalogue registry.\n\n  DOCS: https://spacy.io/api/cli#find-function\n\nArguments:\n  FUNC_NAME  Name of the registered function.  [required]\n\nOptions:\n  -r, --registry TEXT  Name of the catalogue registry.\n  --help               Show this message and exit.\n",
+    "find-threshold": "Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME\n                                      THRESHOLD_KEY SCORES_KEY\n\n  Runs prediction trials for a trained model with varying thresholds to maximize\n  the specified metric. The search space for the threshold is traversed linearly\n  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`\n  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`\n  returns all results).\n\n  This is applicable only for components whose predictions are influenced by\n  thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note\n  that the full path to the corresponding threshold attribute in the config has\n  to be provided.\n\n  DOCS: https://spacy.io/api/cli#find-threshold\n\nArguments:\n  MODEL          Model name or path  [required]\n  DATA_PATH      Location of binary evaluation data in .spacy format  [required]\n  PIPE_NAME      Name of pipe to examine thresholds for  [required]\n  THRESHOLD_KEY  Key of threshold attribute in component's configuration\n                 [required]\n  SCORES_KEY     Metric to optimize  [required]\n\nOptions:\n  -n, --n_trials INTEGER  Number of trials to determine optimal thresholds\n                          [default: 11]\n  -c, --code PATH         Path to Python file with additional code (registered\n                          functions) to be imported\n  -g, --gpu-id INTEGER    GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc      Use gold preprocessing\n  -V, -VV, --verbose      Display more information for debugging purposes\n  --help                  Show this message and exit.\n",
+    "info": "Usage: python -m spacy info [OPTIONS] [MODEL]\n\n  Print info about spaCy installation. If a pipeline is specified as an\n  argument, print its meta information. Flag --markdown prints details in\n  Markdown for easy copy-pasting to GitHub issues.\n\n  Flag --url prints only the download URL of the most recent compatible version\n  of the pipeline.\n\n  DOCS: https://spacy.io/api/cli#info\n\nArguments:\n  [MODEL]  Optional loadable spaCy pipeline\n\nOptions:\n  -md, --markdown     Generate Markdown for GitHub issues\n  -s, -S, --silent    Don't print anything (just return)\n  -e, --exclude TEXT  Comma-separated keys to exclude from the print-out\n                      [default: labels]\n  -u, --url           Print the URL to download the most recent compatible\n                      version of the pipeline\n  --help              Show this message and exit.\n",
+    "init config": "Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE\n\n  Generate a starter config file for training. Based on your requirements\n  specified via the CLI arguments, this command generates a config with the\n  optimal settings for your use case. This includes the choice of architecture,\n  pretrained weights and related hyperparameters.\n\n  DOCS: https://spacy.io/api/cli#init-config\n\nArguments:\n  OUTPUT_FILE  File to save the config to or - for stdout (will only output\n               config and no additional logging info)  [required]\n\nOptions:\n  -l, --lang TEXT                 Two-letter code of the language to use\n                                  [default: en]\n  -p, --pipeline TEXT             Comma-separated names of trainable pipeline\n                                  components to include (without 'tok2vec' or\n                                  'transformer')  [default: tagger,parser,ner]\n  -o, --optimize [efficiency|accuracy]\n                                  Whether to optimize for efficiency (faster\n                                  inference, smaller model, lower memory\n                                  consumption) or higher accuracy (potentially\n                                  larger and slower model). This will impact the\n                                  choice of architecture, pretrained weights and\n                                  related hyperparameters.  [default:\n                                  efficiency]\n  -G, --gpu                       Whether the model can run on GPU. This will\n                                  impact the choice of architecture, pretrained\n                                  weights and related hyperparameters.\n  -pt, --pretraining              Include config for pretraining (with 'spacy\n                                  pretrain')\n  -F, --force                     Force overwriting the output file\n  --help                          Show this message and exit.\n",
+    "init fill-config": "Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]\n\n  Fill partial config file with default values. Will add all missing settings\n  from the default config and will create all objects, check the registered\n  functions for their default values and update the base config. This command\n  can be used with a config generated via the training quickstart widget:\n  https://spacy.io/usage/training#quickstart\n\n  DOCS: https://spacy.io/api/cli#init-fill-config\n\nArguments:\n  BASE_PATH      Path to base config to fill  [required]\n  [OUTPUT_FILE]  Path to output .cfg file (or - for stdout)  [default: -]\n\nOptions:\n  -pt, --pretraining            Include config for pretraining (with 'spacy\n                                pretrain')\n  -D, --diff                    Print a visual diff highlighting the changes\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  --help                        Show this message and exit.\n",
+    "init labels": "Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\n  Generate JSON files for the labels in the data. This helps speed up the\n  training process, since spaCy won't have to preprocess the data to extract the\n  labels.\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory for the labels  [required]\n\nOptions:\n  -c, --code PATH       Path to Python file with additional code (registered\n                        functions) to be imported\n  -V, -VV, --verbose    Display more information for debugging purposes\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "init nlp": "Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory for the prepared data  [required]\n\nOptions:\n  -c, --code PATH       Path to Python file with additional code (registered\n                        functions) to be imported\n  -V, -VV, --verbose    Display more information for debugging purposes\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "init vectors": "Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR\n\n  Convert word vectors for use with spaCy. Will export an nlp object that you\n  can use in the [initialize] block of your config to initialize a model with\n  vectors.\n\nArguments:\n  LANG         The language of the nlp object to create  [required]\n  VECTORS_LOC  Vectors file in Word2Vec format  [required]\n  OUTPUT_DIR   Pipeline output directory  [required]\n\nOptions:\n  -p, --prune INTEGER     Optional number of vectors to prune to  [default: -1]\n  -t, --truncate INTEGER  Optional number of vectors to truncate to when reading\n                          in vectors file  [default: 0]\n  -m, --mode TEXT         Vectors mode: default or floret  [default: default]\n  -n, --name TEXT         Optional name for the word vectors, e.g.\n                          en_core_web_lg.vectors\n  -V, -VV, --verbose      Display more information for debugging purposes\n  -a, --attr TEXT         Optional token attribute to use for vectors, e.g.\n                          LOWER or NORM  [default: ORTH]\n  --help                  Show this message and exit.\n",
+    "link": "Usage: python -m spacy link [OPTIONS] ARGS KWARGS\n\n  As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load\n  trained pipeline packages using their full names or from a directory path.\n  (DEPRECATED)\n\nArguments:\n  ARGS    [required]\n  KWARGS  [required]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "package": "Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR\n\n  Generate an installable Python package for a pipeline. Includes binary data,\n  meta and required installation files. A new directory will be created in the\n  specified output directory, and the data will be copied over. If --create-meta\n  is set and a meta.json already exists in the output directory, the existing\n  values will be used as the defaults in the command-line prompt. After\n  packaging, \"python -m build --sdist\" is run in the package directory, which\n  will create a .tar.gz archive that can be installed via \"pip install\".\n\n  If additional code files are provided (e.g. Python files containing custom\n  registered functions like pipeline components), they are copied into the\n  package and imported in the __init__.py.\n\n  DOCS: https://spacy.io/api/cli#package\n\nArguments:\n  INPUT_DIR   Directory with pipeline data  [required]\n  OUTPUT_DIR  Output parent directory  [required]\n\nOptions:\n  -c, --code TEXT                 Comma-separated paths to Python file with\n                                  additional code (registered functions) to be\n                                  included in the package\n  -m, --meta-path, --meta FILE    Path to meta.json\n  -C, --create-meta               Create meta.json, even if one exists\n  -n, --name TEXT                 Package name to override meta\n  -v, --version TEXT              Package version to override meta\n  -b, --build TEXT                Comma-separated formats to build: sdist and/or\n                                  wheel, or none.  [default: sdist]\n  -f, -F, --force                 Force overwriting existing data in output\n                                  directory\n  -R, -R, --require-parent / --no-require-parent\n                                  Include the parent package (e.g. spacy) in the\n                                  requirements  [default: require-parent]\n  --help                          Show this message and exit.\n",
+    "pretrain": "Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR\n\n  Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using\n  an approximate language-modelling objective. Two objective types are\n  available, vector-based and character-based.\n\n  In the vector-based objective, we load word vectors that have been trained\n  using a word2vec-style distributional similarity algorithm, and train a\n  component like a CNN, BiLSTM, etc to predict vectors which match the\n  pretrained ones. The weights are saved to a directory after each epoch. You\n  can then pass a path to one of these pretrained weights files to the 'spacy\n  train' command.\n\n  This technique may be especially helpful if you have little labelled data.\n  However, it's still quite experimental, so your mileage may vary.\n\n  To load the weights back in during 'spacy train', you need to ensure all\n  settings are the same between pretraining and training. Ideally, this is done\n  by using the same config file for both commands.\n\n  DOCS: https://spacy.io/api/cli#pretrain\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_DIR   Directory to write weights to on each epoch  [required]\n\nOptions:\n  -c, --code PATH              Path to Python file with additional code\n                               (registered functions) to be imported\n  -r, --resume-path PATH       Path to pretrained weights from which to resume\n                               pretraining\n  -er, --epoch-resume INTEGER  The epoch to resume counting from when using\n                               --resume-path. Prevents unintended overwriting of\n                               existing weight files.\n  -g, --gpu-id INTEGER         GPU ID or -1 for CPU  [default: -1]\n  -L, --skip-last              Skip saving model-last.bin\n  --help                       Show this message and exit.\n",
+    "profile": "Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]\n\n  Profile which functions take the most time in a spaCy pipeline. Input should\n  be formatted as one JSON object per line with a key \"text\". It can either be\n  provided as a JSONL file, or be read from sys.sytdin. If no input file is\n  specified, the IMDB dataset is loaded via Thinc.\n\n  DOCS: https://spacy.io/api/cli#debug-profile\n\nArguments:\n  MODEL     Trained pipeline to load  [required]\n  [INPUTS]  Location of input file. '-' for stdin.\n\nOptions:\n  -n, --n-texts INTEGER  Maximum number of texts to use if available  [default:\n                         10000]\n  --help                 Show this message and exit.\n",
+    "project assets": "Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]\n\n  Fetch project assets like datasets and pretrained weights. Assets are defined\n  in the \"assets\" section of the project.yml. If a checksum is provided in the\n  project.yml, the file is only downloaded if no local file with the same\n  checksum exists.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-\n  and-assets.md\n\nArguments:\n  [PROJECT_DIR]  Path to cloned project. Defaults to current working directory.\n                 [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  -S, --sparse  Use sparse checkout for assets provided via Git, to only check\n                out and clone the files needed. Requires Git v22.2+.\n  -e, --extra   Download all assets, including those marked as 'extra'.\n  --help        Show this message and exit.\n",
+    "project clone": "Usage: python -m spacy project clone [OPTIONS] NAME [DEST]\n\n  Clone a project template from a repository. Calls into \"git\" and will only\n  download the files from the given subdirectory. The GitHub repo defaults to\n  the official Weasel template repo, but can be customized (including using a\n  private repo).\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-\n  clone\n\nArguments:\n  NAME    The name of the template to clone  [required]\n  [DEST]  Where to clone the project. Defaults to current working directory\n\nOptions:\n  -r, --repo TEXT    The repository to clone from  [default:\n                     https://github.com/explosion/projects]\n  -b, --branch TEXT  The branch to clone from. If not provided, will attempt\n                     main, master\n  -S, --sparse       Use sparse Git checkout to only check out and clone the\n                     files needed. Requires Git v22.2+.\n  --help             Show this message and exit.\n",
+    "project document": "Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]\n\n  Auto-generate a README.md for a project. If the content is saved to a file,\n  hidden markers are added so you can add custom content before or after the\n  auto-generated section and only the auto-generated docs will be replaced when\n  you re-run the command.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-\n  document\n\nArguments:\n  [PROJECT_DIR]  Path to cloned project. Defaults to current working directory.\n                 [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  -o, --output PATH  Path to output Markdown file for output. Defaults to - for\n                     standard output  [default: -]\n  -NE, --no-emoji    Don't use emoji\n  --help             Show this message and exit.\n",
+    "project dvc": "Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]\n\n  Auto-generate Data Version Control (DVC) config. A DVC project can only define\n  one pipeline, so you need to specify one workflow defined in the project.yml.\n  If no workflow is specified, the first defined workflow is used. The DVC\n  config will only be updated if the project.yml changed.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc\n\nArguments:\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n  [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first\n                 workflow if not set.\n\nOptions:\n  -V, --verbose  Print more info\n  -q, --quiet    Print less info\n  -F, --force    Force update DVC config\n  --help         Show this message and exit.\n",
+    "project pull": "Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]\n\n  Retrieve available precomputed outputs from a remote storage. You can alias\n  remotes in your project.yml by mapping them to storage paths. A storage can be\n  anything that the smart_open library can upload to, e.g. AWS, Google Cloud\n  Storage, SSH, local directories etc.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-\n  push\n\nArguments:\n  [REMOTE]       Name or path of remote storage  [default: default]\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "project push": "Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]\n\n  Persist outputs to a remote storage. You can alias remotes in your project.yml\n  by mapping them to storage paths. A storage can be anything that the\n  smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local\n  directories etc.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push\n\nArguments:\n  [REMOTE]       Name or path of remote storage  [default: default]\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "train": "Usage: python -m spacy train [OPTIONS] CONFIG_PATH\n\n  Train or update a spaCy pipeline. Requires data in spaCy's binary format. To\n  convert data from other formats, use the `spacy convert` command. The config\n  file includes all settings and hyperparameters used during training. To\n  override settings in the config, e.g. settings that point to local paths or\n  that you want to experiment with, you can override them as command line\n  options. For instance, --training.batch_size 128 overrides the value of\n  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a\n  Python file that's imported before training. It can be used to register custom\n  functions and architectures that can then be referenced in the config.\n\n  DOCS: https://spacy.io/api/cli#train\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -o, --output, --output-path PATH\n                                  Output directory to store trained pipeline in\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -V, -VV, --verbose              Display more information for debugging\n                                  purposes\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  --help                          Show this message and exit.\n",
+    "validate": "Usage: python -m spacy validate [OPTIONS]\n\n  Validate the currently installed pipeline packages and spaCy version. Checks\n  if the installed packages are compatible and shows upgrade instructions if\n  available. Should be run after `pip install -U spacy`.\n\n  DOCS: https://spacy.io/api/cli#validate\n\nOptions:\n  --help  Show this message and exit.\n"
   },
   "errors": {
-    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ Missing command.                                                                                 │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_COMMAND__'.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n\nError: Missing command.\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_COMMAND__'.\n",
     "unknown_subcommand": {
-      "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy benchmark --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-      "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy debug --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-      "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy init --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-      "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy project --help' for help.\n╭─ Error ──────────────────────────────────────────────────────────────────────────────────────────╮\n│ No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.                                                  │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n"
+      "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy benchmark --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy debug --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy init --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy project --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n"
     }
   },
   "group_help": {
-    "benchmark": "                                                                                                    \n Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...                                       \n                                                                                                    \n Commands for benchmarking pipelines.                                                               \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ accuracy   Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation         │\n│            data in the binary .spacy format. The --gold-preproc option sets up the               │\n│            evaluation examples with gold-standard sentences and tokens for the                   │\n│            predictions. Gold preprocessing helps the annotations align to the                    │\n│            tokenization, and may result in sequences of more consistent length. However,         │\n│            it may reduce runtime accuracy due to train/test skew. To render a sample of          │\n│            dependency parses in a HTML file, set as output directory as the                      │\n│            displacy_path argument.                                                               │\n│ speed      Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark                 │\n│            data in the binary .spacy format.                                                     │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "debug": "                                                                                                    \n Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...                                           \n                                                                                                    \n Suite of helpful commands for debugging and profiling. Includes commands to check and validate     \n your config files, training and evaluation data, and custom model implementations.                 \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ data          Analyze, debug and validate your training and development data. Outputs            │\n│               useful stats, and can help you find problems like invalid entity annotations,      │\n│               cyclic dependencies, low data labels and more.                                     │\n│ profile       Profile which functions take the most time in a spaCy pipeline.                    │\n│               Input should be formatted as one JSON object per line with a key \"text\".           │\n│               It can either be provided as a JSONL file, or be read from sys.sytdin.             │\n│               If no input file is specified, the IMDB dataset is loaded via Thinc.               │\n│ config        Debug a config file and show validation errors. The command will                   │\n│               create all objects in the tree and validate them. Note that some config            │\n│               validation errors are blocking and will prevent the rest of the config from        │\n│               being resolved. This means that you may not see all validation errors at           │\n│               once and some issues are only shown once previous errors have been fixed.          │\n│               Similar as with the 'train' command, you can override settings from the config     │\n│               as command line options. For instance, --training.batch_size 128 overrides         │\n│               the value of \"batch_size\" in the block \"\".                                         │\n│ diff-config   Show a diff of a config file with respect to spaCy's defaults or another config    │\n│               file. If                                                                           │\n│               additional settings were used in the creation of the config file, then you         │\n│               must supply these as extra parameters to the command when comparing to the default │\n│               settings. The generated diff                                                       │\n│               can also be used when posting to the discussion forum to provide more              │\n│               information for the maintainers.                                                   │\n│ model         Analyze a Thinc model implementation. Includes checks for internal structure       │\n│               and activations during training.                                                   │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "init": "                                                                                                    \n Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...                                            \n                                                                                                    \n Commands for initializing configs and pipeline packages.                                           \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ config        Generate a starter config file for training. Based on your requirements            │\n│               specified via the CLI arguments, this command generates a config with the          │\n│               optimal settings for your use case. This includes the choice of architecture,      │\n│               pretrained weights and related hyperparameters.                                    │\n│ fill-config   Fill partial config file with default values. Will add all missing settings        │\n│               from the default config and will create all objects, check the registered          │\n│               functions for their default values and update the base config. This command        │\n│               can be used with a config generated via the training quickstart widget:            │\n│               https://spacy.io/usage/training#quickstart                                         │\n│ vectors       Convert word vectors for use with spaCy. Will export an nlp object that            │\n│               you can use in the  block of your config to initialize                             │\n│               a model with vectors.                                                              │\n│ labels        Generate JSON files for the labels in the data. This helps speed up the            │\n│               training process, since spaCy won't have to preprocess the data to                 │\n│               extract the labels.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n",
-    "project": "                                                                                                    \n Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...                                         \n                                                                                                    \n Command-line interface for spaCy projects and templates. You'd typically start by cloning a        \n project template to a local directory and fetching its assets like datasets etc. See the project's \n project.yml for the available commands.                                                            \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --help          Show this message and exit.                                                      │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ assets     Fetch project assets like datasets and pretrained weights. Assets are                 │\n│            defined in the \"assets\" section of the project.yml. If a checksum is                  │\n│            provided in the project.yml, the file is only downloaded if no local file             │\n│            with the same checksum exists.                                                        │\n│ clone      Clone a project template from a repository. Calls into \"git\" and will                 │\n│            only download the files from the given subdirectory. The GitHub repo                  │\n│            defaults to the official Weasel template repo, but can be customized                  │\n│            (including using a private repo).                                                     │\n│ document   Auto-generate a README.md for a project. If the content is saved to a file,           │\n│            hidden markers are added so you can add custom content before or after the            │\n│            auto-generated section and only the auto-generated docs will be replaced              │\n│            when you re-run the command.                                                          │\n│ dvc        Auto-generate Data Version Control (DVC) config. A DVC                                │\n│            project can only define one pipeline, so you need to specify one workflow             │\n│            defined in the project.yml. If no workflow is specified, the first defined            │\n│            workflow is used. The DVC config will only be updated if the project.yml              │\n│            changed.                                                                              │\n│ run        Run a named command or workflow defined in the project.yml. If a workflow             │\n│            name is specified, all commands in the workflow are run, in order. If                 │\n│            commands define dependencies and/or outputs, they will only be re-run if              │\n│            state has changed.                                                                    │\n│ pull       Retrieve available precomputed outputs from a remote storage.                         │\n│            You can alias remotes in your project.yml by mapping them to storage paths.           │\n│            A storage can be anything that the smart_open library can upload to, e.g.             │\n│            AWS, Google Cloud Storage, SSH, local directories etc.                                │\n│ push       Persist outputs to a remote storage. You can alias remotes in your                    │\n│            project.yml by mapping them to storage paths. A storage can be anything that          │\n│            the smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH,            │\n│            local directories etc.                                                                │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+    "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\n\n  Commands for benchmarking pipelines.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  accuracy  Evaluate a trained pipeline.\n  speed     Benchmark a pipeline.\n",
+    "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\n\n  Suite of helpful commands for debugging and profiling. Includes commands to\n  check and validate your config files, training and evaluation data, and custom\n  model implementations.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  data         Analyze, debug and validate your training and development data.\n  profile      Profile which functions take the most time in a spaCy pipeline.\n  config       Debug a config file and show validation errors.\n  diff-config  Show a diff of a config file with respect to spaCy's...\n  model        Analyze a Thinc model implementation.\n",
+    "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\n\n  Commands for initializing configs and pipeline packages.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  config       Generate a starter config file for training.\n  fill-config  Fill partial config file with default values.\n  vectors      Convert word vectors for use with spaCy.\n  labels       Generate JSON files for the labels in the data.\n",
+    "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\n\n  Command-line interface for spaCy projects and templates. You'd typically start\n  by cloning a project template to a local directory and fetching its assets\n  like datasets etc. See the project's project.yml for the available commands.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  assets    Fetch project assets like datasets and pretrained weights.\n  clone     Clone a project template from a repository.\n  document  Auto-generate a README.md for a project.\n  dvc       Auto-generate Data Version Control (DVC) config.\n  run       Run a named command or workflow defined in the project.yml.\n  pull      Retrieve available precomputed outputs from a remote storage.\n  push      Persist outputs to a remote storage.\n"
   },
   "hidden_group_commands": {
     "benchmark": [],
@@ -114,5 +114,5 @@
     "train",
     "validate"
   ],
-  "root_help": "                                                                                                    \n Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...                                                 \n                                                                                                    \n spaCy Command-line Interface                                                                       \n                                                                                                    \n DOCS: https://spacy.io/api/cli                                                                     \n                                                                                                    \n╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮\n│ --install-completion          Install completion for the current shell.                          │\n│ --show-completion             Show completion for the current shell, to copy it or customize the │\n│                               installation.                                                      │\n│ --help                        Show this message and exit.                                        │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────╮\n│ download         Download compatible trained pipeline from the default download path using       │\n│                  pip. If --direct flag is set, the command expects the full package name with    │\n│                  version. For direct downloads, the compatibility check will be skipped. All     │\n│                  additional arguments provided to this command will be passed to `pip install`   │\n│                  on package installation.                                                        │\n│ info             Print info about spaCy installation. If a pipeline is specified as an argument, │\n│                  print its meta information. Flag --markdown prints details in Markdown for easy │\n│                  copy-pasting to GitHub issues.                                                  │\n│ apply            Apply a trained pipeline to documents to get predictions.                       │\n│                  Expects a loadable spaCy pipeline and path to the data, which                   │\n│                  can be a directory or a file.                                                   │\n│                  The data files can be provided in multiple formats:                             │\n│                      1. .spacy files                                                             │\n│                      2. .jsonl files with a specified \"field\" to read the text from.             │\n│                      3. Files with any other extension are assumed to be containing              │\n│                         a single document.                                                       │\n│                  DOCS: https://spacy.io/api/cli#apply                                            │\n│ assemble         Assemble a spaCy pipeline from a config file. The config file includes          │\n│                  all settings for initializing the pipeline. To override settings in the         │\n│                  config, e.g. settings that point to local paths or that you want to             │\n│                  experiment with, you can override them as command line options. The             │\n│                  --code argument lets you pass in a Python file that can be used to              │\n│                  register custom functions that are referenced in the config.                    │\n│ convert          Convert files into json or DocBin format for training. The resulting .spacy     │\n│                  file can be used with the train command and other experiment management         │\n│                  functions.                                                                      │\n│ evaluate         Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation   │\n│                  data in the binary .spacy format. The --gold-preproc option sets up the         │\n│                  evaluation examples with gold-standard sentences and tokens for the             │\n│                  predictions. Gold preprocessing helps the annotations align to the              │\n│                  tokenization, and may result in sequences of more consistent length. However,   │\n│                  it may reduce runtime accuracy due to train/test skew. To render a sample of    │\n│                  dependency parses in a HTML file, set as output directory as the                │\n│                  displacy_path argument.                                                         │\n│ find-function    Find the module, path and line number to the file the registered                │\n│                  function is defined in, if available.                                           │\n│ find-threshold   Runs prediction trials for a trained model with varying thresholds to maximize  │\n│                  the specified metric. The search space for the threshold is traversed linearly  │\n│                  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`   │\n│                  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`      │\n│                  returns all results).                                                           │\n│ package          Generate an installable Python package for a pipeline. Includes binary data,    │\n│                  meta and required installation files. A new directory will be created in the    │\n│                  specified output directory, and the data will be copied over. If                │\n│                  --create-meta is set and a meta.json already exists in the output directory,    │\n│                  the existing values will be used as the defaults in the command-line prompt.    │\n│                  After packaging, \"python -m build --sdist\" is run in the package directory,     │\n│                  which will create a .tar.gz archive that can be installed via \"pip install\".    │\n│ pretrain         Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,         │\n│                  using an approximate language-modelling objective. Two objective types          │\n│                  are available, vector-based and character-based.                                │\n│ train            Train or update a spaCy pipeline. Requires data in spaCy's binary format. To    │\n│                  convert data from other formats, use the `spacy convert` command. The           │\n│                  config file includes all settings and hyperparameters used during training.     │\n│                  To override settings in the config, e.g. settings that point to local           │\n│                  paths or that you want to experiment with, you can override them as             │\n│                  command line options. For instance, --training.batch_size 128 overrides         │\n│                  the value of \"batch_size\" in the block \"\". The --code argument                  │\n│                  lets you pass in a Python file that's imported before training. It can be       │\n│                  used to register custom functions and architectures that can then be            │\n│                  referenced in the config.                                                       │\n│ validate         Validate the currently installed pipeline packages and spaCy version. Checks    │\n│                  if the installed packages are compatible and shows upgrade instructions if      │\n│                  available. Should be run after `pip install -U spacy`.                          │\n│ debug            Suite of helpful commands for debugging and profiling. Includes                 │\n│                  commands to check and validate your config files, training and evaluation data, │\n│                  and custom model implementations.                                               │\n│ benchmark        Commands for benchmarking pipelines.                                            │\n│ init             Commands for initializing configs and pipeline packages.                        │\n│ project          Command-line interface for spaCy projects and templates.                        │\n│                  You'd typically start by cloning a project template to a local directory and    │\n│                  fetching its assets like datasets etc. See the project's project.yml for the    │\n│                  available commands.                                                             │\n╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n\n"
+  "root_help": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\n\n  spaCy Command-line Interface\n\n  DOCS: https://spacy.io/api/cli\n\nOptions:\n  --install-completion  Install completion for the current shell.\n  --show-completion     Show completion for the current shell, to copy it or\n                        customize the installation.\n  --help                Show this message and exit.\n\nCommands:\n  download        Download compatible trained pipeline from the default...\n  info            Print info about spaCy installation.\n  apply           Apply a trained pipeline to documents to get predictions.\n  assemble        Assemble a spaCy pipeline from a config file.\n  convert         Convert files into json or DocBin format for training.\n  evaluate        Evaluate a trained pipeline.\n  find-function   Find the module, path and line number to the file the...\n  find-threshold  Runs prediction trials for a trained model with varying...\n  package         Generate an installable Python package for a pipeline.\n  pretrain        Pre-train the 'token-to-vector' (tok2vec) layer of...\n  train           Train or update a spaCy pipeline.\n  validate        Validate the currently installed pipeline packages and...\n  debug           Suite of helpful commands for debugging and profiling.\n  benchmark       Commands for benchmarking pipelines.\n  init            Commands for initializing configs and pipeline packages.\n  project         Command-line interface for spaCy projects and templates.\n"
 }

From 1041f8b4269952e249863467754d4af9e01f771d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 15:53:13 +0100
Subject: [PATCH 41/42] Debug: dump manifest diff in test_manifest_is_current

---
 spacy/tests/test_cli_launcher.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
index 1fd6e3a6fc6..931ec9100f7 100644
--- a/spacy/tests/test_cli_launcher.py
+++ b/spacy/tests/test_cli_launcher.py
@@ -52,9 +52,12 @@ def test_manifest_is_current():
         [
             sys.executable,
             "-c",
+            "import json; "
             "from spacy_cli.build_manifest import build_manifest; "
             "from spacy_cli.static import load_manifest; "
-            "assert build_manifest() == load_manifest()",
+            "b, l = build_manifest(), load_manifest(); "
+            "diffs = {k: (str(b[k])[:80], str(l[k])[:80]) for k in b if b[k] != l[k]}; "
+            "assert b == l, json.dumps(diffs, indent=2)",
         ],
         capture_output=True,
         text=True,

From 66b1691dc2e7dc35fc6ee800fb3438ce634375c9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Mon, 23 Mar 2026 16:12:02 +0100
Subject: [PATCH 42/42] Debug: show per-key manifest diffs in
 test_manifest_is_current

---
 spacy/tests/test_cli_launcher.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
index 931ec9100f7..c9af62a509d 100644
--- a/spacy/tests/test_cli_launcher.py
+++ b/spacy/tests/test_cli_launcher.py
@@ -56,7 +56,12 @@ def test_manifest_is_current():
             "from spacy_cli.build_manifest import build_manifest; "
             "from spacy_cli.static import load_manifest; "
             "b, l = build_manifest(), load_manifest(); "
-            "diffs = {k: (str(b[k])[:80], str(l[k])[:80]) for k in b if b[k] != l[k]}; "
+            "diffs = {}; "
+            "[diffs.update({f'{k}.{sk}': (repr(b[k][sk])[:120], repr(l[k][sk])[:120])}) "
+            "for k in b if isinstance(b[k], dict) and b[k] != l[k] "
+            "for sk in b[k] if b[k].get(sk) != l[k].get(sk)]; "
+            "[diffs.update({k: (repr(b[k])[:120], repr(l[k])[:120])}) "
+            "for k in b if not isinstance(b[k], dict) and b[k] != l[k]]; "
             "assert b == l, json.dumps(diffs, indent=2)",
         ],
         capture_output=True,