diff --git a/.github/workflows/build_and_release.yml b/.github/workflows/build_and_release.yml
index 96a8ea2f4a..8ae45539dc 100644
--- a/.github/workflows/build_and_release.yml
+++ b/.github/workflows/build_and_release.yml
@@ -136,6 +136,17 @@ jobs:
         with:
           attestations: false
           packages-dir: .github/.internal_dspyai/dist/
+      # Publish to dspy-runtime (minimal-dependency build from the same source tree)
+      - name: Update version in pyproject-runtime.toml
+        run: sed -i '/#replace_package_version_marker/{n;s/version *= *"[^"]*"/version="${{ needs.extract-tag.outputs.version }}"/;}' pyproject-runtime.toml
+      - name: Build dspy-runtime distribution
+        run: |
+          rm -rf dist
+          bash scripts/build_dspy_runtime.sh
+      - name: Publish distribution 📦 to PyPI (dspy-runtime)
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1
+        with:
+          attestations: false
       - uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v5 # auto commit changes to release branch
         with:
           commit_message: Update versions
diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py
index 59a3b6f563..d799876167 100644
--- a/dspy/adapters/json_adapter.py
+++ b/dspy/adapters/json_adapter.py
@@ -4,7 +4,6 @@
 
 import json_repair
 import pydantic
-import regex
 from pydantic.fields import FieldInfo
 
 from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName
@@ -146,6 +145,8 @@ def format_assistant_message_content(
         return self.format_field_with_value(fields_with_values, role="assistant")
 
     def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]:
+        import regex
+
         fields = json_repair.loads(completion)
 
         if not isinstance(fields, dict):
diff --git a/dspy/clients/_litellm.py b/dspy/clients/_litellm.py
index 00955ec835..90eb9c715c 100644
--- a/dspy/clients/_litellm.py
+++ b/dspy/clients/_litellm.py
@@ -1,6 +1,8 @@
 import logging
 import os
 
+from dspy.utils.lazy_import import require
+
 _litellm = None
 
 
@@ -20,8 +22,8 @@ def get_litellm():
     if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ:
         os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
 
-    import litellm
-    from litellm._logging import verbose_logger
+    litellm = require("litellm", extra="litellm", feature="dspy.LM")
+    verbose_logger = require("litellm._logging", extra="litellm", feature="dspy.LM").verbose_logger
 
     litellm.telemetry = False
     litellm.cache = None
diff --git a/dspy/clients/openai.py b/dspy/clients/openai.py
index 1eda00d05a..ddba6f291c 100644
--- a/dspy/clients/openai.py
+++ b/dspy/clients/openai.py
@@ -2,10 +2,13 @@
 from datetime import datetime
 from typing import Any
 
-import openai
-
 from dspy.clients.provider import Provider, TrainingJob
 from dspy.clients.utils_finetune import TrainDataFormat, TrainingStatus, save_data
+from dspy.utils.lazy_import import require
+
+
+def _openai():
+    return require("openai", extra="full", feature="the OpenAI finetuning provider")
 
 
 class TrainingJobOpenAI(TrainingJob):
@@ -22,13 +25,13 @@ def cancel(self):
                 err_msg = "Jobs that are complete cannot be canceled."
                 err_msg += f" Job with ID {self.provider_job_id} is done."
                 raise Exception(err_msg)
-            openai.fine_tuning.jobs.cancel(self.provider_job_id)
+            _openai().fine_tuning.jobs.cancel(self.provider_job_id)
             self.provider_job_id = None
 
         # Delete the provider file
         if self.provider_file_id is not None:
             if OpenAIProvider.does_file_exist(self.provider_file_id):
-                openai.files.delete(self.provider_file_id)
+                _openai().files.delete(self.provider_file_id)
             self.provider_file_id = None
 
         # Call the super's cancel method after the custom cancellation logic
@@ -104,7 +107,7 @@ def does_job_exist(job_id: str) -> bool:
         try:
             # TODO(nit): This call may fail for other reasons. We should check
             # the error message to ensure that the job does not exist.
-            openai.fine_tuning.jobs.retrieve(job_id)
+            _openai().fine_tuning.jobs.retrieve(job_id)
             return True
         except Exception:
             return False
@@ -114,7 +117,7 @@ def does_file_exist(file_id: str) -> bool:
         try:
             # TODO(nit): This call may fail for other reasons. We should check
             # the error message to ensure that the file does not exist.
-            openai.files.retrieve(file_id)
+            _openai().files.retrieve(file_id)
             return True
         except Exception:
             return False
@@ -147,7 +150,7 @@ def get_training_status(job_id: str) -> TrainingStatus:
         assert OpenAIProvider.does_job_exist(job_id), err_msg
 
         # Retrieve the provider's job and report the status
-        provider_job = openai.fine_tuning.jobs.retrieve(job_id)
+        provider_job = _openai().fine_tuning.jobs.retrieve(job_id)
         provider_status = provider_job.status
         status = provider_status_to_training_status[provider_status]
 
@@ -166,7 +169,7 @@ def validate_data_format(data_format: TrainDataFormat):
     @staticmethod
     def upload_data(data_path: str) -> str:
         # Upload the data to the provider
-        provider_file = openai.files.create(
+        provider_file = _openai().files.create(
             file=open(data_path, "rb"),
             purpose="fine-tune",
         )
@@ -175,7 +178,7 @@ def upload_data(data_path: str) -> str:
     @staticmethod
     def _start_remote_training(train_file_id: str, model: str, train_kwargs: dict[str, Any] | None = None) -> str:
         train_kwargs = train_kwargs or {}
-        provider_job = openai.fine_tuning.jobs.create(
+        provider_job = _openai().fine_tuning.jobs.create(
             model=model,
             training_file=train_file_id,
             hyperparameters=train_kwargs,
@@ -194,7 +197,7 @@ def wait_for_job(
         while not done:
             # Report estimated time if not already reported
             if not reported_estimated_time:
-                remote_job = openai.fine_tuning.jobs.retrieve(job.provider_job_id)
+                remote_job = _openai().fine_tuning.jobs.retrieve(job.provider_job_id)
                 timestamp = remote_job.estimated_finish
                 if timestamp:
                     estimated_finish_dt = datetime.fromtimestamp(timestamp)
@@ -203,7 +206,7 @@ def wait_for_job(
                     reported_estimated_time = True
 
             # Get new events
-            page = openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job.provider_job_id, limit=1)
+            page = _openai().fine_tuning.jobs.list_events(fine_tuning_job_id=job.provider_job_id, limit=1)
             new_event = page.data[0] if page.data else None
             if new_event and new_event.id != cur_event_id:
                 dt = datetime.fromtimestamp(new_event.created_at)
@@ -222,6 +225,6 @@ def get_trained_model(job):
             err_msg += f" Must be {TrainingStatus.succeeded} to retrieve model."
             raise Exception(err_msg)
 
-        provider_job = openai.fine_tuning.jobs.retrieve(job.provider_job_id)
+        provider_job = _openai().fine_tuning.jobs.retrieve(job.provider_job_id)
         finetuned_model = provider_job.fine_tuned_model
         return finetuned_model
diff --git a/dspy/dsp/utils/dpr.py b/dspy/dsp/utils/dpr.py
index e9fc27813e..8db2cc769f 100644
--- a/dspy/dsp/utils/dpr.py
+++ b/dspy/dsp/utils/dpr.py
@@ -8,8 +8,6 @@
 import logging
 import unicodedata
 
-import regex
-
 logger = logging.getLogger(__name__)
 
 
@@ -157,6 +155,8 @@ def __init__(self, **kwargs):
         Args:
             annotators: None or empty set (only tokenizes).
         """
+        import regex
+
         self._regexp = regex.compile(
             "(%s)|(%s)" % (self.ALPHA_NUM, self.NON_WS),
             flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE,
@@ -225,11 +225,18 @@ def locate_answers(tokenized_answers, text):
     return occurrences
 
 
-STokenizer = SimpleTokenizer()
+_STokenizer = None
+
+
+def _get_stokenizer():
+    global _STokenizer
+    if _STokenizer is None:
+        _STokenizer = SimpleTokenizer()
+    return _STokenizer
 
 
 def DPR_tokenize(text):  # noqa: N802
-    return STokenizer.tokenize(unicodedata.normalize("NFD", text))
+    return _get_stokenizer().tokenize(unicodedata.normalize("NFD", text))
 
 
 def DPR_normalize(text):  # noqa: N802
diff --git a/dspy/streaming/streaming_listener.py b/dspy/streaming/streaming_listener.py
index 3cfc6a2234..750b5d1f24 100644
--- a/dspy/streaming/streaming_listener.py
+++ b/dspy/streaming/streaming_listener.py
@@ -4,8 +4,6 @@
 from queue import Queue
 from typing import TYPE_CHECKING, Any
 
-import jiter
-
 from dspy.adapters.chat_adapter import ChatAdapter
 from dspy.adapters.json_adapter import JSONAdapter
 from dspy.adapters.types import Type
@@ -245,6 +243,8 @@ def _json_adapter_handle_stream_chunk(self, token: str, chunk_message: str) -> S
                 # If the parse doesn't raise an error, that means the accumulated tokens is a valid json object. Because
                 # we add an extra "{" to the beginning of the field_accumulated_messages, so we know the streaming is
                 # finished.
+                import jiter
+
                 jiter.from_json(self.json_adapter_state["field_accumulated_messages"].encode("utf-8"))
                 self.stream_end = True
                 last_token = self.flush()
@@ -259,6 +259,8 @@ def _json_adapter_handle_stream_chunk(self, token: str, chunk_message: str) -> S
                 pass
 
         try:
+            import jiter
+
             parsed = jiter.from_json(
                 self.json_adapter_state["field_accumulated_messages"].encode("utf-8"),
                 partial_mode="trailing-strings",
diff --git a/dspy/teleprompt/gepa/gepa.py b/dspy/teleprompt/gepa/gepa.py
index 41fd6de584..a18bec14da 100644
--- a/dspy/teleprompt/gepa/gepa.py
+++ b/dspy/teleprompt/gepa/gepa.py
@@ -2,11 +2,12 @@
 import logging
 import random
 from dataclasses import dataclass
-from typing import Any, Literal, Optional, Protocol, Union
+from typing import TYPE_CHECKING, Any, Literal, Optional, Protocol, Union
 
-from gepa import GEPAResult
-from gepa.core.adapter import ProposalFn
-from gepa.proposer.reflective_mutation.base import ReflectionComponentSelector
+if TYPE_CHECKING:
+    from gepa import GEPAResult
+    from gepa.core.adapter import ProposalFn
+    from gepa.proposer.reflective_mutation.base import ReflectionComponentSelector
 
 from dspy.clients.lm import LM
 from dspy.primitives import Example, Module, Prediction
@@ -491,7 +492,11 @@ def compile(
         - trainset: The training set to use for reflective updates.
         - valset: The validation set to use for tracking Pareto scores. If not provided, GEPA will use the trainset for both.
         """
-        from gepa import GEPAResult, optimize
+        from dspy.utils.lazy_import import require
+
+        gepa = require("gepa", extra="gepa", feature="dspy.GEPA")
+        GEPAResult = gepa.GEPAResult
+        optimize = gepa.optimize
 
         from dspy.teleprompt.gepa.gepa_utils import DspyAdapter, LoggerAdapter
 
@@ -575,7 +580,7 @@ def feedback_fn(
         # Build the seed candidate: map each predictor name to its current instruction
         seed_candidate = {name: pred.signature.instructions for name, pred in student.named_predictors()}
 
-        gepa_result: GEPAResult = optimize(
+        gepa_result: "GEPAResult" = optimize(
             seed_candidate=seed_candidate,
             trainset=trainset,
             valset=valset,
diff --git a/dspy/teleprompt/gepa/gepa_utils.py b/dspy/teleprompt/gepa/gepa_utils.py
index dae7157feb..45e818ebdd 100644
--- a/dspy/teleprompt/gepa/gepa_utils.py
+++ b/dspy/teleprompt/gepa/gepa_utils.py
@@ -1,10 +1,6 @@
 import logging
 import random
-from typing import Any, Callable, Protocol, TypedDict
-
-from gepa import EvaluationBatch, GEPAAdapter
-from gepa.core.adapter import ProposalFn
-from gepa.strategies.instruction_proposal import InstructionProposalSignature
+from typing import TYPE_CHECKING, Any, Callable, Protocol, TypedDict
 
 import dspy
 from dspy.adapters.chat_adapter import ChatAdapter
@@ -13,10 +9,31 @@
 from dspy.evaluate import Evaluate
 from dspy.primitives import Example, Prediction
 from dspy.teleprompt.bootstrap_trace import FailedPrediction, TraceData
+from dspy.utils.lazy_import import optional, require
+
+if TYPE_CHECKING:
+    from gepa import EvaluationBatch, GEPAAdapter
+    from gepa.core.adapter import ProposalFn
 
 logger = logging.getLogger(__name__)
 
 
+def _require_gepa():
+    require("gepa", extra="gepa", feature="dspy.GEPA")
+
+
+def _get_gepa_adapter_base():
+    """Return the GEPAAdapter base class, or `object` if gepa is not installed.
+
+    Returning `object` lets ``DspyAdapter`` be defined at import time without gepa;
+    actual use is gated by ``_require_gepa()`` inside methods that touch gepa internals.
+    """
+    GEPAAdapter = optional("gepa", "GEPAAdapter")
+    if GEPAAdapter is None:
+        return object
+    return GEPAAdapter[Example, "TraceData", Prediction]
+
+
 class LoggerAdapter:
     def __init__(self, logger: logging.Logger):
         self.logger = logger
@@ -74,7 +91,7 @@ def __call__(
         ...
 
 
-class DspyAdapter(GEPAAdapter[Example, TraceData, Prediction]):
+class DspyAdapter(_get_gepa_adapter_base()):
     def __init__(
         self,
         student_module,
@@ -117,6 +134,8 @@ def propose_new_texts(
                     components_to_update=components_to_update,
                 )
 
+        from gepa.strategies.instruction_proposal import InstructionProposalSignature
+
         results: dict[str, str] = {}
 
         with dspy.context(lm=reflection_lm):
@@ -143,6 +162,8 @@ def build_program(self, candidate: dict[str, str]):
         return new_prog
 
     def evaluate(self, batch, candidate, capture_traces=False):
+        from gepa import EvaluationBatch
+
         program = self.build_program(candidate)
         callback_metadata = (
             {"metric_key": "eval_full"}
diff --git a/dspy/teleprompt/gepa/instruction_proposal.py b/dspy/teleprompt/gepa/instruction_proposal.py
index 9a0e26d35f..5eb354f946 100644
--- a/dspy/teleprompt/gepa/instruction_proposal.py
+++ b/dspy/teleprompt/gepa/instruction_proposal.py
@@ -1,15 +1,22 @@
 import logging
-from typing import Any
-
-from gepa.core.adapter import ProposalFn
+from typing import TYPE_CHECKING, Any
 
 import dspy
 from dspy.adapters.types.base_type import Type
 from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample
+from dspy.utils.lazy_import import optional
+
+if TYPE_CHECKING:
+    from gepa.core.adapter import ProposalFn
 
 logger = logging.getLogger(__name__)
 
 
+def _get_proposal_fn_base():
+    """Return ProposalFn base class, or `object` if gepa is not installed."""
+    return optional("gepa.core.adapter", "ProposalFn", default=object)
+
+
 class GenerateEnhancedMultimodalInstructionFromFeedback(dspy.Signature):
     """I provided an assistant with instructions to perform a task involving visual content, but the assistant's performance needs improvement based on the examples and feedback below.
 
@@ -269,7 +276,7 @@ def _create_multimodal_examples(self, formatted_text: str, image_map: dict[int,
         return multimodal_content
 
 
-class MultiModalInstructionProposer(ProposalFn):
+class MultiModalInstructionProposer(_get_proposal_fn_base()):
     """GEPA-compatible multimodal instruction proposer.
 
     This class handles multimodal inputs (like dspy.Image) during GEPA optimization by using
diff --git a/dspy/utils/lazy_import.py b/dspy/utils/lazy_import.py
new file mode 100644
index 0000000000..69f2a660b2
--- /dev/null
+++ b/dspy/utils/lazy_import.py
@@ -0,0 +1,54 @@
+"""Lazy-import helpers for optional dependencies.
+
+Dspy ships in two flavors with different hard-dependency sets (`dspy` and
+`dspy-runtime`). Optional deps must be importable lazily so that `import dspy`
+succeeds even when they are absent, and call sites must raise a clear,
+actionable ImportError when the dep really is needed.
+"""
+
+import importlib
+from typing import Any
+
+
+def require(module: str, *, extra: str | None = None, feature: str | None = None) -> Any:
+    """Import a module by dotted path; raise a friendly ImportError if missing.
+
+    Use at call sites where an optional dependency is needed to perform an action.
+
+    Args:
+        module: Dotted module path (e.g. ``"litellm"`` or ``"gepa.core.adapter"``).
+            The top-level segment is shown to the user.
+        extra: Name of the dspy extra that pulls in this dep. Defaults to the
+            top-level module name.
+        feature: Short feature label included in the error (e.g. ``"dspy.LM"``).
+            Defaults to ``"this feature"``.
+
+    Returns:
+        The imported module.
+    """
+    try:
+        return importlib.import_module(module)
+    except ImportError as e:
+        top = module.split(".", 1)[0]
+        feat = feature or "this feature"
+        ext = extra or top
+        raise ImportError(
+            f"{top} is required to use {feat}. "
+            f"Install with `pip install dspy[{ext}]` or `pip install {top}`."
+        ) from e
+
+
+def optional(module: str, attr: str | None = None, default: Any = None) -> Any:
+    """Try to import a module (and optionally one attribute). Return ``default`` if missing.
+
+    Use at module load time when a class needs to inherit from a base provided by
+    an optional dep: returning a sentinel (typically ``object``) lets the class be
+    defined even when the dep is absent. Gate actual use behind ``require()``.
+    """
+    try:
+        mod = importlib.import_module(module)
+    except ImportError:
+        return default
+    if attr is None:
+        return mod
+    return getattr(mod, attr, default)
diff --git a/pyproject-runtime.toml b/pyproject-runtime.toml
new file mode 100644
index 0000000000..629ae76bed
--- /dev/null
+++ b/pyproject-runtime.toml
@@ -0,0 +1,69 @@
+[build-system]
+requires = ["setuptools>=77.0.1"]
+build-backend = "setuptools.build_meta"
+
+[project]
+# Do not add spaces around the '=' sign for any of the fields
+# preceded by a marker comment as it affects the publish workflow.
+#replace_package_name_marker
+name="dspy-runtime"
+#replace_package_version_marker
+version="3.2.1"
+description = "DSPy runtime: minimal-dependency build of DSPy for production deployments. Same code as dspy, fewer hard dependencies."
+readme = "README.md"
+authors = [{ name = "Omar Khattab", email = "okhattab@stanford.edu" }]
+license = {file = "LICENSE"}
+requires-python = ">=3.10, <3.15"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "Operating System :: POSIX :: Linux",
+    "Programming Language :: Python :: 3"
+]
+# Minimal runtime dependencies only. Heavier deps (litellm, gepa, optuna, etc.) are optional extras.
+# numpy, cachetools, and requests are scheduled to become optional via separate work; they remain
+# hard deps here for now so `import dspy` works out of the box.
+dependencies = [
+    "pydantic>=2.0",
+    "orjson>=3.9.0",
+    "cloudpickle>=3.1.2",
+    "anyio",
+    "tqdm>=4.66.1",
+    "diskcache>=5.6.0",
+    "json-repair>=0.54.2",
+    "tenacity>=8.2.3",
+    "jsonschema>=4.0",
+    "jiter>=0.4.0",
+    # Transitional: scheduled to be made optional separately.
+    "numpy>=1.26.0",
+    "cachetools>=5.5.0",
+    "requests>=2.31.0",
+    "regex>=2023.10.3",
+]
+
+[project.optional-dependencies]
+litellm = ["litellm>=1.64.0"]
+openai = ["openai>=0.28.1"]
+gepa = ["gepa[dspy]==0.1.1"]
+optuna = ["optuna>=3.4.0"]
+mcp = ["mcp; python_version >= '3.10'"]
+langchain = ["langchain_core"]
+weaviate = ["weaviate-client~=4.5.4"]
+anthropic = ["anthropic>=0.18.0,<1.0.0"]
+# Aggregate matching the dspy hard-dep set.
+full = [
+    "litellm>=1.64.0",
+    "openai>=0.28.1",
+    "gepa[dspy]==0.1.1",
+]
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["dspy", "dspy.*"]
+exclude = ["tests", "tests.*"]
+
+[tool.setuptools.package-data]
+dspy = ["primitives/*.js"]
+
+[project.urls]
+homepage = "https://github.com/stanfordnlp/dspy"
diff --git a/scripts/build_dspy_runtime.sh b/scripts/build_dspy_runtime.sh
new file mode 100755
index 0000000000..d630aa39b8
--- /dev/null
+++ b/scripts/build_dspy_runtime.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Build the dspy-runtime distribution from the same source tree as dspy.
+#
+# Swaps `pyproject-runtime.toml` into place as `pyproject.toml` for the duration
+# of the build, then restores the original. Outputs land in `dist/`.
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+
+if [[ ! -f pyproject-runtime.toml ]]; then
+    echo "error: pyproject-runtime.toml not found at $ROOT_DIR" >&2
+    exit 1
+fi
+
+BACKUP_DIR="$(mktemp -d)"
+trap 'mv -f "$BACKUP_DIR/pyproject.toml" pyproject.toml 2>/dev/null || true; rm -rf "$BACKUP_DIR"' EXIT
+
+cp pyproject.toml "$BACKUP_DIR/pyproject.toml"
+cp pyproject-runtime.toml pyproject.toml
+
+"${PYTHON:-python3}" -m build "$@"
diff --git a/tests/clients/test_lazy_imports.py b/tests/clients/test_lazy_imports.py
new file mode 100644
index 0000000000..96157f99e1
--- /dev/null
+++ b/tests/clients/test_lazy_imports.py
@@ -0,0 +1,40 @@
+import subprocess
+import sys
+
+import pytest
+
+
+def _check_module_not_loaded(module_name: str) -> None:
+    result = subprocess.run(
+        [sys.executable, "-c", f"import dspy, sys; print('{module_name}' in sys.modules)"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, f"subprocess failed: {result.stderr}"
+    assert result.stdout.strip() == "False", (
+        f"{module_name!r} was imported during `import dspy`. "
+        "Heavy / optional dependencies must be imported lazily inside functions, "
+        "not at module top level."
+    )
+
+
+def test_import_dspy_does_not_load_litellm():
+    """Ensure `import dspy` does not eagerly import litellm.
+
+    litellm adds ~400-550ms to import time. It should only be loaded when
+    actually needed (e.g. on the first LM() call), not at `import dspy` time.
+    If this test fails, someone likely added a module-level `import litellm`
+    in a module that is transitively imported by dspy/__init__.py.
+    """
+    _check_module_not_loaded("litellm")
+
+
+@pytest.mark.parametrize("module_name", ["openai", "regex", "jiter"])
+def test_import_dspy_does_not_load_optional_extras(module_name):
+    """Ensure `import dspy` does not eagerly import dspy-runtime optional extras.
+
+    These deps are absent in dspy-runtime by default; importing them during
+    `import dspy` would break dspy-runtime users. Use lazy imports inside
+    functions/methods instead.
+    """
+    _check_module_not_loaded(module_name)
diff --git a/tests/clients/test_litellm_lazy.py b/tests/clients/test_litellm_lazy.py
deleted file mode 100644
index a1530ff2d6..0000000000
--- a/tests/clients/test_litellm_lazy.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import subprocess
-import sys
-
-
-def test_import_dspy_does_not_load_litellm():
-    """Ensure `import dspy` does not eagerly import litellm.
-
-    litellm adds ~400-550ms to import time. It should only be loaded when
-    actually needed (e.g. on the first LM() call), not at `import dspy` time.
-    If this test fails, someone likely added a module-level `import litellm`
-    in a module that is transitively imported by dspy/__init__.py.
-    """
-    result = subprocess.run(
-        [sys.executable, "-c", "import dspy, sys; print('litellm' in sys.modules)"],
-        capture_output=True,
-        text=True,
-    )
-    assert result.returncode == 0, f"subprocess failed: {result.stderr}"
-    assert result.stdout.strip() == "False", (
-        "litellm was imported during `import dspy`. "
-        "Use `from dspy.clients._litellm import get_litellm` and call get_litellm() "
-        "inside functions instead of `import litellm` at module level."
-    )