diff --git a/docs/docs/tutorials/items/model_integration/llm_integration.ipynb b/docs/docs/tutorials/items/model_integration/llm_integration.ipynb index 6c7577f9..eb91bb76 100644 --- a/docs/docs/tutorials/items/model_integration/llm_integration.ipynb +++ b/docs/docs/tutorials/items/model_integration/llm_integration.ipynb @@ -17,7 +17,8 @@ "|:-------------------------:|:---------:|:------------------:|:---------------:|\n", "| `bridgic-llms-openai-like`| ✅ | ❌ | ❌ |\n", "| `bridgic-llms-openai` | ✅ | ✅ | ✅ |\n", - "| `bridgic-llms-vllm` | ✅ | ✅ | ✅ |\n" + "| `bridgic-llms-vllm` | ✅ | ✅ | ✅ |\n", + "| `bridgic-llms-litellm` | ✅ | ❌ | ❌ |\n" ] }, { diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/Makefile b/packages/bridgic-integration/llms/bridgic-llms-litellm/Makefile new file mode 100644 index 00000000..1341a1e7 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/Makefile @@ -0,0 +1,36 @@ +.PHONY: venv-init test build publish + +package_name := $(notdir $(CURDIR)) +repo ?= btsk + +ROOT_DIR := $(shell git rev-parse --show-toplevel) +VERSION_CHECK := $(ROOT_DIR)/scripts/version_check.py +SET_CREDENTIALS := $(ROOT_DIR)/scripts/set_publish_credentials.sh + +venv-collect: + @echo "\n==> Installing dependencies for [${package_name}]..." + @uv pip install -e . + +test: + @uv run -- pytest + +build: + @mkdir -p dist + @rm -rf dist/* + @package_name=$$(uv run python -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['name'])") && \ + uv build --package "$$package_name" --out-dir dist + +publish: + @source $(SET_CREDENTIALS) && \ + version=$$(uv run python -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])") && \ + uv run python $(VERSION_CHECK) --version "$$version" --repo "$(repo)" --package "$(package_name)" && \ + $(MAKE) _publish_$(repo) + +_publish_btsk: + @uv publish dist/* --index btsk-repo --config-file $(ROOT_DIR)/uv.toml + +_publish_testpypi: + @uv publish dist/* --index test-pypi --config-file $(ROOT_DIR)/uv.toml + +_publish_pypi: + @uv publish dist/* --config-file $(ROOT_DIR)/uv.toml diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/README.md b/packages/bridgic-integration/llms/bridgic-llms-litellm/README.md new file mode 100644 index 00000000..86652ab8 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/README.md @@ -0,0 +1,27 @@ +# bridgic-llms-litellm + +LiteLLM adapters for [Bridgic](https://github.com/bitsky-tech/bridgic), enabling connectivity with 100+ LLM providers through a single unified interface. + +## Installation + +```shell +pip install bridgic-llms-litellm +``` + +## Usage + +```python +from bridgic.llms.litellm import LiteLLM +from bridgic.core.model.types import Message, Role + +# API keys are read from environment variables (e.g. OPENAI_API_KEY) +llm = LiteLLM() + +response = llm.chat( + model="openai/gpt-4o", + messages=[Message.from_text("Hello!", role=Role.USER)], +) +print(response.message.content) +``` + +See https://docs.litellm.ai/docs/providers for the full provider list. diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/__init__.py b/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/__init__.py new file mode 100644 index 00000000..37fe0471 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/__init__.py @@ -0,0 +1,22 @@ +""" +The LiteLLM integration module provides access to 100+ LLM providers through +a single unified interface. + +Supported providers include OpenAI, Anthropic, Google, Groq, Together AI, +AWS Bedrock, Azure, Mistral, and many more. Uses provider-prefixed model +names, e.g. ``openai/gpt-4o``, ``anthropic/claude-sonnet-4-6``. + +See https://docs.litellm.ai/docs/providers for the full provider list. + +You can install the LiteLLM integration package for Bridgic by running: + +```shell +pip install bridgic-llms-litellm +``` +""" + +from importlib.metadata import version +from ._litellm_llm import LiteLLMConfiguration, LiteLLM + +__version__ = version("bridgic-llms-litellm") +__all__ = ["LiteLLMConfiguration", "LiteLLM", "__version__"] diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/_litellm_llm.py b/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/_litellm_llm.py new file mode 100644 index 00000000..c064acc8 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/bridgic/llms/litellm/_litellm_llm.py @@ -0,0 +1,426 @@ +import warnings +from typing import List, Dict, Any, Optional + +from typing_extensions import override +from pydantic import BaseModel + +from bridgic.core.model import BaseLlm, RetryPolicyConfig, retryable_model_call +from bridgic.core.model.types import * +from bridgic.core.utils._collection import filter_dict, merge_dict, validate_required_params + + +class LiteLLMConfiguration(BaseModel): + """ + Configuration for LiteLLM chat completions. + + Provides default values that can be overridden at call time. + """ + + model: Optional[str] = None + """Default model to use when a call-time ``model`` is not provided.""" + temperature: Optional[float] = None + """Sampling temperature in [0, 2]. Higher is more random, lower is more deterministic.""" + top_p: Optional[float] = None + """Nucleus sampling probability mass in (0, 1]. Alternative to temperature.""" + presence_penalty: Optional[float] = None + """Penalize new tokens based on whether they appear so far. [-2.0, 2.0].""" + frequency_penalty: Optional[float] = None + """Penalize new tokens based on their frequency so far. [-2.0, 2.0].""" + max_tokens: Optional[int] = None + """Maximum number of tokens to generate for the completion.""" + stop: Optional[List[str]] = None + """Up to 4 sequences where generation will stop.""" + + +class LiteLLM(BaseLlm): + """ + LiteLLM integration for Bridgic, providing access to 100+ LLM providers + (OpenAI, Anthropic, Google, Groq, Together AI, AWS Bedrock, Azure, etc.) + through a single unified interface. + + Uses provider-prefixed model names, e.g. ``openai/gpt-4o``, + ``anthropic/claude-sonnet-4-6``, ``groq/llama-3.3-70b-versatile``. + See https://docs.litellm.ai/docs/providers for the full provider list. + + API keys are read from environment variables automatically by LiteLLM + (e.g. ``OPENAI_API_KEY``, ``ANTHROPIC_API_KEY``). You can also pass + ``api_key`` explicitly to override. + + Parameters + ---------- + api_key : str, optional + API key for the underlying provider. When ``None``, LiteLLM reads + the appropriate key from environment variables. + api_base : str, optional + Base URL for the API endpoint. Useful for LiteLLM proxy or custom + endpoints. When ``None``, uses the provider's default endpoint. + configuration : LiteLLMConfiguration, optional + Default configuration. If ``None``, uses ``LiteLLMConfiguration()``. + timeout : float, optional + Request timeout in seconds. If ``None``, no timeout is applied. + + Examples + -------- + Basic usage for chat completion: + + ```python + llm = LiteLLMLlm() + messages = [Message.from_text("Hello!", role=Role.USER)] + response = llm.chat(messages=messages, model="openai/gpt-4o") + ``` + + Using a different provider: + + ```python + llm = LiteLLMLlm(api_key="sk-ant-...") + response = llm.chat( + messages=[Message.from_text("Hello!", role=Role.USER)], + model="anthropic/claude-sonnet-4-6", + ) + ``` + """ + + api_key: Optional[str] + api_base: Optional[str] + configuration: LiteLLMConfiguration + timeout: Optional[float] + + def __init__( + self, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + configuration: Optional[LiteLLMConfiguration] = None, + timeout: Optional[float] = None, + ): + self.api_key = api_key + self.api_base = api_base + self.configuration = configuration or LiteLLMConfiguration() + self.timeout = timeout + + @retryable_model_call(RetryPolicyConfig()) + def chat( + self, + messages: List[Message], + model: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + stop: Optional[List[str]] = None, + **kwargs, + ) -> Response: + """ + Send a synchronous chat completion request via LiteLLM. + + Parameters + ---------- + messages : list[Message] + Conversation messages. + model : str, optional + LiteLLM model string (e.g. ``openai/gpt-4o``). Required unless + provided in ``configuration.model``. + temperature : float, optional + Sampling temperature in [0, 2]. + top_p : float, optional + Nucleus sampling probability mass in (0, 1]. + presence_penalty : float, optional + Penalize new tokens based on prior appearance. [-2.0, 2.0]. + frequency_penalty : float, optional + Penalize new tokens based on frequency. [-2.0, 2.0]. + max_tokens : int, optional + Maximum tokens to generate. + stop : list[str], optional + Up to 4 sequences where generation will stop. + **kwargs + Additional keyword arguments forwarded to ``litellm.completion``. + + Returns + ------- + Response + Bridgic response containing the generated message and raw API response. + """ + import litellm + + params = self._build_parameters( + messages=messages, + model=model, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + max_tokens=max_tokens, + stop=stop, + **kwargs, + ) + validate_required_params(params, ["messages", "model"]) + + response = litellm.completion(**params) + return self._handle_response(response) + + def stream( + self, + messages: List[Message], + model: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + stop: Optional[List[str]] = None, + **kwargs, + ) -> StreamResponse: + """ + Stream a chat completion response incrementally via LiteLLM. + + Parameters + ---------- + messages : list[Message] + Conversation messages. + model : str, optional + LiteLLM model string (e.g. ``openai/gpt-4o``). + temperature, top_p, presence_penalty, frequency_penalty, max_tokens, stop + See ``chat`` for details. + **kwargs + Additional keyword arguments forwarded to ``litellm.completion``. + + Yields + ------ + MessageChunk + Delta chunks as they arrive from the provider. + """ + import litellm + + params = self._build_parameters( + messages=messages, + model=model, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + max_tokens=max_tokens, + stop=stop, + stream=True, + **kwargs, + ) + validate_required_params(params, ["messages", "model", "stream"]) + + response = litellm.completion(**params) + for chunk in response: + delta_content = chunk.choices[0].delta.content if chunk.choices else None + delta_content = delta_content if delta_content else "" + yield MessageChunk(delta=delta_content, raw=chunk) + + @retryable_model_call(RetryPolicyConfig()) + async def achat( + self, + messages: List[Message], + model: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + stop: Optional[List[str]] = None, + **kwargs, + ) -> Response: + """ + Send an asynchronous chat completion request via LiteLLM. + + Parameters + ---------- + messages : list[Message] + Conversation messages. + model : str, optional + LiteLLM model string (e.g. ``openai/gpt-4o``). + temperature, top_p, presence_penalty, frequency_penalty, max_tokens, stop + See ``chat`` for details. + **kwargs + Additional keyword arguments forwarded to ``litellm.acompletion``. + + Returns + ------- + Response + Bridgic response containing the generated message and raw API response. + """ + import litellm + + params = self._build_parameters( + messages=messages, + model=model, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + max_tokens=max_tokens, + stop=stop, + **kwargs, + ) + validate_required_params(params, ["messages", "model"]) + + response = await litellm.acompletion(**params) + return self._handle_response(response) + + async def astream( + self, + messages: List[Message], + model: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + stop: Optional[List[str]] = None, + **kwargs, + ) -> AsyncStreamResponse: + """ + Stream a chat completion response asynchronously via LiteLLM. + + Parameters + ---------- + messages : list[Message] + Conversation messages. + model : str, optional + LiteLLM model string (e.g. ``openai/gpt-4o``). + temperature, top_p, presence_penalty, frequency_penalty, max_tokens, stop + See ``chat`` for details. + **kwargs + Additional keyword arguments forwarded to ``litellm.acompletion``. + + Yields + ------ + MessageChunk + Delta chunks as they arrive from the provider. + """ + import litellm + + params = self._build_parameters( + messages=messages, + model=model, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + max_tokens=max_tokens, + stop=stop, + stream=True, + **kwargs, + ) + validate_required_params(params, ["messages", "model", "stream"]) + + response = await litellm.acompletion(**params) + async for chunk in response: + delta_content = chunk.choices[0].delta.content if chunk.choices else None + delta_content = delta_content if delta_content else "" + yield MessageChunk(delta=delta_content, raw=chunk) + + def _build_parameters( + self, + messages: List[Message], + model: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + stop: Optional[List[str]] = None, + stream: Optional[bool] = None, + **kwargs, + ) -> Dict[str, Any]: + msgs = [self._convert_message(msg) for msg in messages] + merge_params = merge_dict(self.configuration.model_dump(), { + "messages": msgs, + "model": model, + "temperature": temperature, + "top_p": top_p, + "presence_penalty": presence_penalty, + "frequency_penalty": frequency_penalty, + "max_tokens": max_tokens, + "stop": stop, + "stream": stream, + "drop_params": True, + **kwargs, + }) + params = filter_dict(merge_params, exclude_none=True) + + if self.api_key: + params["api_key"] = self.api_key + if self.api_base: + params["api_base"] = self.api_base + if self.timeout is not None: + params["timeout"] = self.timeout + + return params + + @staticmethod + def _convert_message(message: Message) -> Dict[str, str]: + content_list = [] + for block in message.blocks: + if isinstance(block, TextBlock): + content_list.append(block.text) + elif isinstance(block, ToolCallBlock): + content_list.append( + f"Tool call:\n" + f"- id: {block.id}\n" + f"- name: {block.name}\n" + f"- arguments: {block.arguments}" + ) + elif isinstance(block, ToolResultBlock): + content_list.append(f"Tool result: {block.content}") + content = "\n\n".join(content_list) + + role_map = { + Role.SYSTEM: "system", + Role.USER: "user", + Role.AI: "assistant", + Role.TOOL: "tool", + } + role = role_map.get(message.role) + if role is None: + raise ValueError(f"Invalid role: {message.role}") + + return {"role": role, "content": content} + + def _handle_response(self, response) -> Response: + text = response.choices[0].message.content or "" + + if hasattr(response.choices[0].message, "refusal") and response.choices[0].message.refusal: + warnings.warn(response.choices[0].message.refusal, RuntimeWarning) + + usage = self._extract_usage(response) + return Response( + message=Message.from_text(text, role=Role.AI), + usage=usage, + raw=response, + ) + + @staticmethod + def _extract_usage(response) -> Optional[TokenUsage]: + usage_data = getattr(response, "usage", None) + if usage_data is None: + return None + + return TokenUsage( + model=getattr(response, "model", ""), + prompt_tokens=getattr(usage_data, "prompt_tokens", 0), + completion_tokens=getattr(usage_data, "completion_tokens", 0), + total_tokens=getattr(usage_data, "total_tokens", 0), + ) + + @override + def dump_to_dict(self) -> Dict[str, Any]: + return { + "api_key": self.api_key, + "api_base": self.api_base, + "timeout": self.timeout, + "configuration": self.configuration.model_dump(), + } + + @override + def load_from_dict(self, state_dict: Dict[str, Any]) -> None: + self.api_key = state_dict.get("api_key") + self.api_base = state_dict.get("api_base") + self.timeout = state_dict.get("timeout") + self.configuration = LiteLLMConfiguration( + **state_dict.get("configuration", {}) + ) diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/pyproject.toml b/packages/bridgic-integration/llms/bridgic-llms-litellm/pyproject.toml new file mode 100644 index 00000000..64f47230 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "bridgic-llms-litellm" +version = "0.1.0" +license = {text = "MIT"} +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +description = "LiteLLM adapters for Bridgic, enabling connectivity with 100+ LLM providers." +readme = "README.md" +requires-python = ">=3.9" +authors = [ + { name = "RheagalFire", email = "arishalam121@gmail.com" }, +] +dependencies = [ + "bridgic-core>=0.3.0", + "litellm>=1.80.0,<1.87", +] + +[dependency-groups] +dev = [ + "pytest>=8.4.0", + "pytest-asyncio>=1.0.0", +] + +[tool.pytest.ini_options] +addopts = ["--tb=short", "--verbose"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build] +include = ["bridgic/"] + +[tool.uv.sources] +bridgic-core = { workspace = true } diff --git a/packages/bridgic-integration/llms/bridgic-llms-litellm/tests/test_litellm.py b/packages/bridgic-integration/llms/bridgic-llms-litellm/tests/test_litellm.py new file mode 100644 index 00000000..c2ebcf65 --- /dev/null +++ b/packages/bridgic-integration/llms/bridgic-llms-litellm/tests/test_litellm.py @@ -0,0 +1,486 @@ +import pytest +import os +import sys +import types +from unittest import mock + +from bridgic.core.model.types import * + + +# --------------------------------------------------------------------------- +# Unit tests (no API key required — litellm is mocked) +# --------------------------------------------------------------------------- + +@pytest.fixture +def mock_litellm(): + """Install a fake litellm module so LiteLLM can be imported without the real package.""" + fake = types.ModuleType("litellm") + + fake_usage = mock.MagicMock() + fake_usage.prompt_tokens = 10 + fake_usage.completion_tokens = 5 + fake_usage.total_tokens = 15 + + fake_message = mock.MagicMock() + fake_message.content = "Hello from LiteLLM!" + fake_message.refusal = None + + fake_choice = mock.MagicMock() + fake_choice.message = fake_message + + fake_response = mock.MagicMock() + fake_response.choices = [fake_choice] + fake_response.usage = fake_usage + fake_response.model = "openai/gpt-4o-mini" + + fake.completion = mock.MagicMock(return_value=fake_response) + fake.acompletion = mock.AsyncMock(return_value=fake_response) + + sys.modules["litellm"] = fake + yield fake + sys.modules.pop("litellm", None) + + +@pytest.fixture +def llm_instance(mock_litellm): + from bridgic.llms.litellm import LiteLLM, LiteLLMConfiguration + config = LiteLLMConfiguration(model="openai/gpt-4o-mini") + return LiteLLM(configuration=config) + + +def test_chat_basic(llm_instance, mock_litellm): + response = llm_instance.chat( + messages=[Message.from_text("Hello!", role=Role.USER)], + ) + assert response.message.role == Role.AI + assert response.message.content == "Hello from LiteLLM!" + assert response.usage is not None + assert response.usage.prompt_tokens == 10 + assert response.usage.completion_tokens == 5 + assert response.usage.total_tokens == 15 + + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["model"] == "openai/gpt-4o-mini" + assert call_kwargs["drop_params"] is True + + +def test_chat_model_override(llm_instance, mock_litellm): + llm_instance.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="anthropic/claude-haiku-4-5", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["model"] == "anthropic/claude-haiku-4-5" + + +def test_api_key_forwarded(mock_litellm): + from bridgic.llms.litellm import LiteLLM + llm = LiteLLM(api_key="sk-test-123") + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["api_key"] == "sk-test-123" + + +def test_api_key_omitted_when_none(llm_instance, mock_litellm): + llm_instance.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert "api_key" not in call_kwargs + + +def test_api_base_forwarded(mock_litellm): + from bridgic.llms.litellm import LiteLLM + llm = LiteLLM(api_base="http://localhost:4000") + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["api_base"] == "http://localhost:4000" + + +def test_timeout_forwarded(mock_litellm): + from bridgic.llms.litellm import LiteLLM + llm = LiteLLM(timeout=30.0) + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["timeout"] == 30.0 + + +def test_drop_params_default_true(llm_instance, mock_litellm): + llm_instance.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["drop_params"] is True + + +def test_message_conversion_roles(llm_instance, mock_litellm): + messages = [ + Message.from_text("System prompt", role=Role.SYSTEM), + Message.from_text("User message", role=Role.USER), + Message.from_text("AI response", role=Role.AI), + ] + llm_instance.chat(messages=messages) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["messages"][0]["role"] == "system" + assert call_kwargs["messages"][1]["role"] == "user" + assert call_kwargs["messages"][2]["role"] == "assistant" + + +def test_message_conversion_tool_blocks(llm_instance, mock_litellm): + messages = [ + Message( + role=Role.AI, + blocks=[ + TextBlock(text="Checking weather."), + ToolCallBlock(id="call_1", name="get_weather", arguments={"city": "Tokyo"}), + ], + ), + Message.from_tool_result(tool_id="call_1", content="22°C sunny"), + ] + llm_instance.chat(messages=messages) + call_kwargs = mock_litellm.completion.call_args[1] + ai_msg = call_kwargs["messages"][0] + assert "Tool call:" in ai_msg["content"] + assert "get_weather" in ai_msg["content"] + tool_msg = call_kwargs["messages"][1] + assert "Tool result: 22°C sunny" in tool_msg["content"] + + +def test_configuration_defaults_merge(mock_litellm): + from bridgic.llms.litellm import LiteLLM, LiteLLMConfiguration + config = LiteLLMConfiguration( + model="openai/gpt-4o-mini", + temperature=0.5, + max_tokens=100, + ) + llm = LiteLLM(configuration=config) + llm.chat(messages=[Message.from_text("Hi", role=Role.USER)]) + + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["temperature"] == 0.5 + assert call_kwargs["max_tokens"] == 100 + + +def test_call_time_overrides_config(mock_litellm): + from bridgic.llms.litellm import LiteLLM, LiteLLMConfiguration + config = LiteLLMConfiguration(temperature=0.5) + llm = LiteLLM(configuration=config) + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + temperature=0.9, + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["temperature"] == 0.9 + + +@pytest.mark.asyncio +async def test_achat_basic(llm_instance, mock_litellm): + response = await llm_instance.achat( + messages=[Message.from_text("Hello!", role=Role.USER)], + ) + assert response.message.role == Role.AI + assert response.message.content == "Hello from LiteLLM!" + mock_litellm.acompletion.assert_called_once() + + +def test_serialization_roundtrip(mock_litellm): + from bridgic.llms.litellm import LiteLLM, LiteLLMConfiguration + config = LiteLLMConfiguration(model="openai/gpt-4o", temperature=0.7) + llm = LiteLLM(api_key="sk-test", api_base="http://proxy:4000", timeout=60.0, configuration=config) + + state = llm.dump_to_dict() + assert state["api_key"] == "sk-test" + assert state["api_base"] == "http://proxy:4000" + assert state["timeout"] == 60.0 + assert state["configuration"]["model"] == "openai/gpt-4o" + assert state["configuration"]["temperature"] == 0.7 + + new_llm = LiteLLM() + new_llm.load_from_dict(state) + assert new_llm.api_key == "sk-test" + assert new_llm.api_base == "http://proxy:4000" + assert new_llm.timeout == 60.0 + assert new_llm.configuration.model == "openai/gpt-4o" + assert new_llm.configuration.temperature == 0.7 + + +# --------------------------------------------------------------------------- +# Edge case tests — exception handling, malformed responses, streaming +# --------------------------------------------------------------------------- + +def test_auth_error_raises_unrecoverable(mock_litellm): + """Invalid/expired API key → ModelUnrecoverableError (not retried).""" + from bridgic.llms.litellm import LiteLLM + from bridgic.core.model._model_error import ModelUnrecoverableError + + exc_mod = types.ModuleType("litellm.exceptions") + exc_mod.AuthenticationError = type("AuthenticationError", (Exception,), {}) + sys.modules["litellm.exceptions"] = exc_mod + + mock_litellm.completion.side_effect = exc_mod.AuthenticationError( + "Invalid API key" + ) + llm = LiteLLM(api_key="sk-invalid") + with pytest.raises(ModelUnrecoverableError): + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert mock_litellm.completion.call_count == 1 + + sys.modules.pop("litellm.exceptions", None) + + +def test_rate_limit_retried(mock_litellm): + """429 rate limit → retried by retryable_model_call (has '429' in text).""" + from bridgic.llms.litellm import LiteLLM + from bridgic.core.model._model_error import ModelRetryLimitError + + rate_exc = Exception("Rate limit exceeded (429)") + mock_litellm.completion.side_effect = rate_exc + llm = LiteLLM() + with pytest.raises(ModelRetryLimitError): + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert mock_litellm.completion.call_count == 3 + + +def test_timeout_retried(mock_litellm): + """Timeout → retried by retryable_model_call.""" + from bridgic.llms.litellm import LiteLLM + from bridgic.core.model._model_error import ModelRetryLimitError + + mock_litellm.completion.side_effect = TimeoutError("Request timed out") + llm = LiteLLM(timeout=5.0) + with pytest.raises(ModelRetryLimitError): + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert mock_litellm.completion.call_count == 3 + + +def test_empty_response_content(mock_litellm): + """Provider returns empty content → handled gracefully (empty string).""" + from bridgic.llms.litellm import LiteLLM + + mock_litellm.completion.return_value.choices[0].message.content = None + mock_litellm.completion.return_value.choices[0].message.refusal = None + llm = LiteLLM() + response = llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert response.message.content == "" + + +def test_no_usage_in_response(mock_litellm): + """Provider returns no usage data → usage is None, not crash.""" + from bridgic.llms.litellm import LiteLLM + + mock_litellm.completion.return_value.usage = None + mock_litellm.completion.return_value.choices[0].message.content = "OK" + mock_litellm.completion.return_value.choices[0].message.refusal = None + llm = LiteLLM() + response = llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert response.usage is None + assert response.message.content == "OK" + + +def test_refusal_emits_warning(mock_litellm): + """Provider refusal → RuntimeWarning emitted.""" + from bridgic.llms.litellm import LiteLLM + import warnings + + mock_litellm.completion.return_value.choices[0].message.content = "" + mock_litellm.completion.return_value.choices[0].message.refusal = "Content policy violation" + llm = LiteLLM() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + llm.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ) + assert len(w) == 1 + assert "Content policy violation" in str(w[0].message) + assert issubclass(w[0].category, RuntimeWarning) + + +def test_stream_partial_chunks(mock_litellm): + """Streaming: partial chunks, some with empty delta.""" + from bridgic.llms.litellm import LiteLLM + + chunk1 = mock.MagicMock() + chunk1.choices = [mock.MagicMock()] + chunk1.choices[0].delta.content = "Hello" + + chunk2 = mock.MagicMock() + chunk2.choices = [mock.MagicMock()] + chunk2.choices[0].delta.content = None + + chunk3 = mock.MagicMock() + chunk3.choices = [mock.MagicMock()] + chunk3.choices[0].delta.content = " world" + + chunk4 = mock.MagicMock() + chunk4.choices = [] + + mock_litellm.completion.return_value = iter([chunk1, chunk2, chunk3, chunk4]) + llm = LiteLLM() + result = "" + for chunk in llm.stream( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ): + result += chunk.delta + assert result == "Hello world" + + +@pytest.mark.asyncio +async def test_astream_partial_chunks(mock_litellm): + """Async streaming: partial chunks, some with empty delta.""" + from bridgic.llms.litellm import LiteLLM + + chunk1 = mock.MagicMock() + chunk1.choices = [mock.MagicMock()] + chunk1.choices[0].delta.content = "Async" + + chunk2 = mock.MagicMock() + chunk2.choices = [mock.MagicMock()] + chunk2.choices[0].delta.content = None + + chunk3 = mock.MagicMock() + chunk3.choices = [mock.MagicMock()] + chunk3.choices[0].delta.content = " hello" + + async def async_iter(): + for c in [chunk1, chunk2, chunk3]: + yield c + + mock_litellm.acompletion = mock.AsyncMock(return_value=async_iter()) + llm = LiteLLM() + result = "" + async for chunk in llm.astream( + messages=[Message.from_text("Hi", role=Role.USER)], + model="openai/gpt-4o", + ): + result += chunk.delta + assert result == "Async hello" + + +def test_provider_prefixed_model_string(llm_instance, mock_litellm): + """Model string uses provider-prefix format consistently.""" + llm_instance.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + model="anthropic/claude-sonnet-4-6", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["model"] == "anthropic/claude-sonnet-4-6" + assert "/" in call_kwargs["model"] + + +def test_missing_model_raises(mock_litellm): + """No model in config or call → validation error.""" + from bridgic.llms.litellm import LiteLLM + + llm = LiteLLM() + with pytest.raises(Exception): + llm.chat(messages=[Message.from_text("Hi", role=Role.USER)]) + + +def test_extra_kwargs_forwarded(llm_instance, mock_litellm): + """Extra kwargs (seed, user, etc.) are forwarded to litellm.completion.""" + llm_instance.chat( + messages=[Message.from_text("Hi", role=Role.USER)], + seed=42, + user="test-user", + ) + call_kwargs = mock_litellm.completion.call_args[1] + assert call_kwargs["seed"] == 42 + assert call_kwargs["user"] == "test-user" + + +# --------------------------------------------------------------------------- +# Integration tests (require API key — skipped in CI) +# --------------------------------------------------------------------------- + +_api_key = os.environ.get("LITELLM_API_KEY") or os.environ.get("OPENAI_API_KEY") +_model_name = os.environ.get("LITELLM_MODEL") or "openai/gpt-4o-mini" + + +@pytest.fixture +def live_llm(): + from bridgic.llms.litellm import LiteLLM, LiteLLMConfiguration + config = LiteLLMConfiguration(model=_model_name) + return LiteLLM(api_key=_api_key, configuration=config) + + +@pytest.mark.skipif( + _api_key is None, + reason="LITELLM_API_KEY or OPENAI_API_KEY is not set", +) +def test_live_chat(live_llm): + response = live_llm.chat( + messages=[Message.from_text(text="Say 'OK' and nothing else.", role=Role.USER)], + ) + assert response.message.role == Role.AI + assert response.message.content is not None + assert response.usage is not None + assert response.usage.prompt_tokens > 0 + + +@pytest.mark.skipif( + _api_key is None, + reason="LITELLM_API_KEY or OPENAI_API_KEY is not set", +) +def test_live_stream(live_llm): + result = "" + for chunk in live_llm.stream( + messages=[Message.from_text(text="Say 'OK' and nothing else.", role=Role.USER)], + ): + result += chunk.delta + assert chunk.raw is not None + assert len(result) > 0 + + +@pytest.mark.skipif( + _api_key is None, + reason="LITELLM_API_KEY or OPENAI_API_KEY is not set", +) +@pytest.mark.asyncio +async def test_live_achat(live_llm): + response = await live_llm.achat( + messages=[Message.from_text(text="Say 'OK' and nothing else.", role=Role.USER)], + ) + assert response.message.role == Role.AI + assert response.message.content is not None + + +@pytest.mark.skipif( + _api_key is None, + reason="LITELLM_API_KEY or OPENAI_API_KEY is not set", +) +@pytest.mark.asyncio +async def test_live_astream(live_llm): + result = "" + async for chunk in live_llm.astream( + messages=[Message.from_text(text="Say 'OK' and nothing else.", role=Role.USER)], + ): + result += chunk.delta + assert chunk.raw is not None + assert len(result) > 0 diff --git a/pyproject.toml b/pyproject.toml index ecde0ff1..b65e3025 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "bridgic-llms-openai-like>=0.1.2,<0.2.0", "bridgic-llms-openai>=0.1.2,<0.2.0", "bridgic-llms-vllm>=0.1.3,<0.2.0", + "bridgic-llms-litellm>=0.1.0,<0.2.0", ] [dependency-groups] @@ -58,4 +59,5 @@ bridgic-asl = { workspace = true } bridgic-amphibious = { workspace = true } bridgic-llms-openai-like = { workspace = true } bridgic-llms-openai = { workspace = true } -bridgic-llms-vllm = { workspace = true } \ No newline at end of file +bridgic-llms-vllm = { workspace = true } +bridgic-llms-litellm = { workspace = true } \ No newline at end of file