diff --git a/README.md b/README.md index 49139148..464f08cb 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,10 @@ cp env.template .env # Edit .env and set: # - LLM_API_KEY (for memory extraction) # - VECTORIZE_API_KEY (for embedding/rerank) +# +# Supported LLM backends: OpenAI, Anthropic Claude, Google Gemini, +# MiniMax, Azure OpenAI, Ollama, or any OpenAI-compatible API. +# See src/config/llm_backends.yaml for full configuration details. # 5. Start server uv run python src/run.py diff --git a/README.zh.md b/README.zh.md index 7f7d3bd3..0d094709 100644 --- a/README.zh.md +++ b/README.zh.md @@ -229,6 +229,10 @@ cp env.template .env # 编辑 .env 并设置: # - LLM_API_KEY(用于记忆提取) # - VECTORIZE_API_KEY(用于向量化 / rerank) +# +# 支持的 LLM 后端:OpenAI、Anthropic Claude、Google Gemini、 +# MiniMax、Azure OpenAI、Ollama 或任何 OpenAI 兼容 API。 +# 详细配置请参阅 src/config/llm_backends.yaml。 # 5. 启动服务 uv run python src/run.py diff --git a/env.template b/env.template index 72ca8855..aba512c3 100755 --- a/env.template +++ b/env.template @@ -30,6 +30,12 @@ LLM_MAX_TOKENS=32768 # When using Qwen3 via OpenRouter, consider setting to "cerebras" # LLM_OPENROUTER_PROVIDER=cerebras +# =================== +# MiniMax Configuration (optional, for using MiniMax as LLM backend) +# =================== + +# MINIMAX_API_KEY=your-minimax-api-key-here + # =================== # Vectorize (Embedding) Service Configuration # =================== diff --git a/src/config/llm_backends.yaml b/src/config/llm_backends.yaml index 1cc863ef..fadd5e58 100644 --- a/src/config/llm_backends.yaml +++ b/src/config/llm_backends.yaml @@ -59,6 +59,21 @@ llm_backends: timeout: 600 # Increase to 10 minutes, suitable for time-consuming tasks like paper information extraction max_retries: 3 + # MiniMax configuration + minimax: + name: "MiniMax" + provider: "minimax" + base_url: "https://api.minimax.io/v1" + api_key: "" + models: + - "MiniMax-M2.7" + - "MiniMax-M2.7-highspeed" + - "MiniMax-M2.5" + - "MiniMax-M2.5-highspeed" + model: "MiniMax-M2.7" + timeout: 600 + max_retries: 3 + # Local Ollama configuration ollama: name: "Ollama Local" diff --git a/src/core/component/llm/llm_adapter/minimax_adapter.py b/src/core/component/llm/llm_adapter/minimax_adapter.py new file mode 100644 index 00000000..a4e200d3 --- /dev/null +++ b/src/core/component/llm/llm_adapter/minimax_adapter.py @@ -0,0 +1,115 @@ +import re +from typing import Dict, Any, List, Union, AsyncGenerator +import os +import openai +from core.component.llm.llm_adapter.completion import ( + ChatCompletionRequest, + ChatCompletionResponse, +) +from core.component.llm.llm_adapter.llm_backend_adapter import LLMBackendAdapter +from core.constants.errors import ErrorMessage + + +class MiniMaxAdapter(LLMBackendAdapter): + """MiniMax API adapter using OpenAI-compatible interface. + + MiniMax provides an OpenAI-compatible API at https://api.minimax.io/v1. + This adapter handles MiniMax-specific behaviors: + - Temperature clamping to [0.01, 1.0] range + - Stripping ... tags from reasoning model responses + - Auto-detection of MINIMAX_API_KEY environment variable + """ + + # MiniMax API requires temperature in (0.0, 1.0] for most models, + # but temperature=0 is now accepted. We clamp to [0.01, 1.0] for safety + # with older model versions. + MIN_TEMPERATURE = 0.01 + MAX_TEMPERATURE = 1.0 + + # Pattern to strip thinking tags from reasoning model output + _THINK_TAG_PATTERN = re.compile( + r".*?\s*", flags=re.DOTALL + ) + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.api_key = config.get("api_key") or os.getenv("MINIMAX_API_KEY") + self.base_url = config.get( + "base_url", "https://api.minimax.io/v1" + ) + self.timeout = config.get("timeout", 600) + + if not self.api_key: + raise ValueError(ErrorMessage.INVALID_PARAMETER.value) + + self.client = openai.AsyncOpenAI( + api_key=self.api_key, + base_url=self.base_url, + timeout=self.timeout, + ) + + @classmethod + def _clamp_temperature(cls, temperature: float | None) -> float | None: + """Clamp temperature to MiniMax's accepted range.""" + if temperature is None: + return None + return max(cls.MIN_TEMPERATURE, min(cls.MAX_TEMPERATURE, temperature)) + + @classmethod + def _strip_think_tags(cls, text: str) -> str: + """Strip ... blocks from model output.""" + return cls._THINK_TAG_PATTERN.sub("", text).strip() + + async def chat_completion( + self, request: ChatCompletionRequest + ) -> Union[ChatCompletionResponse, AsyncGenerator[str, None]]: + """Perform chat completion via MiniMax OpenAI-compatible API.""" + if not request.model: + raise ValueError(ErrorMessage.INVALID_PARAMETER.value) + + params = request.to_dict() + client_params = { + "model": params.get("model"), + "messages": params.get("messages"), + "temperature": self._clamp_temperature(params.get("temperature")), + "max_tokens": params.get("max_tokens"), + "top_p": params.get("top_p"), + "frequency_penalty": params.get("frequency_penalty"), + "presence_penalty": params.get("presence_penalty"), + "stream": params.get("stream", False), + } + final_params = {k: v for k, v in client_params.items() if v is not None} + + try: + if final_params.get("stream"): + async def stream_gen(): + response_stream = await self.client.chat.completions.create( + **final_params + ) + async for chunk in response_stream: + content = getattr( + chunk.choices[0].delta, "content", None + ) + if content: + yield self._strip_think_tags(content) + + return stream_gen() + else: + response = await self.client.chat.completions.create( + **final_params + ) + resp_dict = response.model_dump() + # Strip think tags from non-streaming response + for choice in resp_dict.get("choices", []): + msg = choice.get("message", {}) + if msg.get("content"): + msg["content"] = self._strip_think_tags(msg["content"]) + return ChatCompletionResponse.from_dict(resp_dict) + except Exception as e: + raise RuntimeError( + f"MiniMax chat completion request failed: {e}" + ) + + def get_available_models(self) -> List[str]: + """Get available MiniMax model list.""" + return self.config.get("models", []) diff --git a/src/core/component/openai_compatible_client.py b/src/core/component/openai_compatible_client.py index 98e0cdf4..30e41324 100644 --- a/src/core/component/openai_compatible_client.py +++ b/src/core/component/openai_compatible_client.py @@ -15,6 +15,7 @@ from core.component.llm.llm_adapter.openai_adapter import OpenAIAdapter from core.component.llm.llm_adapter.anthropic_adapter import AnthropicAdapter from core.component.llm.llm_adapter.gemini_adapter import GeminiAdapter +from core.component.llm.llm_adapter.minimax_adapter import MiniMaxAdapter logger = get_logger(__name__) @@ -75,6 +76,8 @@ async def _get_adapter(self, backend_name: str) -> LLMBackendAdapter: adapter = AnthropicAdapter(backend_config) elif provider == "gemini": adapter = GeminiAdapter(backend_config) + elif provider == "minimax": + adapter = MiniMaxAdapter(backend_config) else: raise ValueError(f"Unsupported provider type: {provider}") diff --git a/tests/test_minimax_adapter.py b/tests/test_minimax_adapter.py new file mode 100644 index 00000000..99ceb99b --- /dev/null +++ b/tests/test_minimax_adapter.py @@ -0,0 +1,518 @@ +""" +MiniMax Adapter Unit Tests + +Tests for the MiniMax LLM adapter, covering: +- Adapter initialization and configuration +- Temperature clamping to MiniMax range +- Think-tag stripping from responses +- Non-streaming and streaming chat completion +- Error handling + +Usage: + pytest tests/test_minimax_adapter.py -v +""" + +import os +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch +import pytest + +# Ensure the src directory is on the import path +import sys +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +from core.component.llm.llm_adapter.minimax_adapter import MiniMaxAdapter +from core.component.llm.llm_adapter.completion import ( + ChatCompletionRequest, + ChatCompletionResponse, +) +from core.component.llm.llm_adapter.message import ChatMessage, MessageRole + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def minimax_config(): + """Minimal valid config for MiniMaxAdapter.""" + return { + "api_key": "test-minimax-api-key", + "base_url": "https://api.minimax.io/v1", + "timeout": 60, + "models": [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", + "MiniMax-M2.5", + "MiniMax-M2.5-highspeed", + ], + } + + +@pytest.fixture +def adapter(minimax_config): + """Create a MiniMaxAdapter with test config.""" + return MiniMaxAdapter(minimax_config) + + +@pytest.fixture +def sample_messages(): + """Create sample chat messages for testing.""" + return [ + ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful assistant."), + ChatMessage(role=MessageRole.USER, content="Hello!"), + ] + + +@pytest.fixture +def sample_request(sample_messages): + """Create a sample ChatCompletionRequest.""" + return ChatCompletionRequest( + messages=sample_messages, + model="MiniMax-M2.7", + temperature=0.7, + max_tokens=1024, + ) + + +# --------------------------------------------------------------------------- +# Initialization tests +# --------------------------------------------------------------------------- + +class TestMiniMaxAdapterInit: + """Tests for adapter initialization.""" + + def test_init_with_config_api_key(self, minimax_config): + adapter = MiniMaxAdapter(minimax_config) + assert adapter.api_key == "test-minimax-api-key" + assert adapter.base_url == "https://api.minimax.io/v1" + assert adapter.timeout == 60 + + def test_init_with_env_api_key(self): + config = {"base_url": "https://api.minimax.io/v1"} + with patch.dict(os.environ, {"MINIMAX_API_KEY": "env-key"}): + adapter = MiniMaxAdapter(config) + assert adapter.api_key == "env-key" + + def test_init_without_api_key_raises(self): + config = {"base_url": "https://api.minimax.io/v1"} + with patch.dict(os.environ, {}, clear=True): + # Remove MINIMAX_API_KEY if present + os.environ.pop("MINIMAX_API_KEY", None) + with pytest.raises(ValueError): + MiniMaxAdapter(config) + + def test_init_default_base_url(self): + config = {"api_key": "test-key"} + adapter = MiniMaxAdapter(config) + assert adapter.base_url == "https://api.minimax.io/v1" + + def test_init_default_timeout(self): + config = {"api_key": "test-key"} + adapter = MiniMaxAdapter(config) + assert adapter.timeout == 600 + + def test_init_creates_async_openai_client(self, adapter): + import openai + assert isinstance(adapter.client, openai.AsyncOpenAI) + + +# --------------------------------------------------------------------------- +# Temperature clamping tests +# --------------------------------------------------------------------------- + +class TestTemperatureClamping: + """Tests for MiniMax temperature clamping.""" + + def test_clamp_normal_temperature(self): + assert MiniMaxAdapter._clamp_temperature(0.5) == 0.5 + + def test_clamp_zero_temperature(self): + """Temperature 0 should be clamped to MIN_TEMPERATURE.""" + result = MiniMaxAdapter._clamp_temperature(0.0) + assert result == MiniMaxAdapter.MIN_TEMPERATURE + + def test_clamp_negative_temperature(self): + result = MiniMaxAdapter._clamp_temperature(-0.5) + assert result == MiniMaxAdapter.MIN_TEMPERATURE + + def test_clamp_high_temperature(self): + result = MiniMaxAdapter._clamp_temperature(1.5) + assert result == MiniMaxAdapter.MAX_TEMPERATURE + + def test_clamp_max_temperature(self): + assert MiniMaxAdapter._clamp_temperature(1.0) == 1.0 + + def test_clamp_min_temperature(self): + assert MiniMaxAdapter._clamp_temperature(0.01) == 0.01 + + def test_clamp_none_temperature(self): + assert MiniMaxAdapter._clamp_temperature(None) is None + + +# --------------------------------------------------------------------------- +# Think-tag stripping tests +# --------------------------------------------------------------------------- + +class TestThinkTagStripping: + """Tests for stripping ... tags.""" + + def test_strip_single_think_tag(self): + text = "reasoning hereFinal answer" + assert MiniMaxAdapter._strip_think_tags(text) == "Final answer" + + def test_strip_multiline_think_tag(self): + text = "\nStep 1: analyze\nStep 2: compute\n\nThe answer is 42." + assert MiniMaxAdapter._strip_think_tags(text) == "The answer is 42." + + def test_no_think_tags(self): + text = "Just a plain response." + assert MiniMaxAdapter._strip_think_tags(text) == "Just a plain response." + + def test_empty_think_tag(self): + text = "Result" + assert MiniMaxAdapter._strip_think_tags(text) == "Result" + + def test_multiple_think_tags(self): + text = "firstAsecondB" + assert MiniMaxAdapter._strip_think_tags(text) == "AB" + + def test_strip_with_surrounding_whitespace(self): + text = "thinking \n Hello world" + result = MiniMaxAdapter._strip_think_tags(text) + assert result == "Hello world" + + def test_empty_string(self): + assert MiniMaxAdapter._strip_think_tags("") == "" + + +# --------------------------------------------------------------------------- +# Available models tests +# --------------------------------------------------------------------------- + +class TestGetAvailableModels: + """Tests for model list retrieval.""" + + def test_get_available_models(self, adapter): + models = adapter.get_available_models() + assert "MiniMax-M2.7" in models + assert "MiniMax-M2.7-highspeed" in models + assert "MiniMax-M2.5" in models + assert "MiniMax-M2.5-highspeed" in models + + def test_get_available_models_empty_config(self): + config = {"api_key": "test-key"} + adapter = MiniMaxAdapter(config) + assert adapter.get_available_models() == [] + + +# --------------------------------------------------------------------------- +# Chat completion tests (non-streaming) +# --------------------------------------------------------------------------- + +class TestChatCompletion: + """Tests for non-streaming chat completion.""" + + @pytest.mark.asyncio + async def test_chat_completion_success(self, adapter, sample_request): + mock_response = MagicMock() + mock_response.model_dump.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1700000000, + "model": "MiniMax-M2.7", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Hello!"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_response + result = await adapter.chat_completion(sample_request) + + assert isinstance(result, ChatCompletionResponse) + assert result.id == "chatcmpl-test" + assert result.model == "MiniMax-M2.7" + assert result.choices[0]["message"]["content"] == "Hello!" + + @pytest.mark.asyncio + async def test_chat_completion_strips_think_tags(self, adapter, sample_request): + mock_response = MagicMock() + mock_response.model_dump.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1700000000, + "model": "MiniMax-M2.7", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "reasoningThe answer is 42.", + }, + "finish_reason": "stop", + } + ], + } + + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_response + result = await adapter.chat_completion(sample_request) + + assert result.choices[0]["message"]["content"] == "The answer is 42." + + @pytest.mark.asyncio + async def test_chat_completion_clamps_temperature(self, adapter, sample_messages): + request = ChatCompletionRequest( + messages=sample_messages, + model="MiniMax-M2.7", + temperature=0.0, + max_tokens=100, + ) + + mock_response = MagicMock() + mock_response.model_dump.return_value = { + "id": "test", + "object": "chat.completion", + "created": 1700000000, + "model": "MiniMax-M2.7", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "ok"}, + "finish_reason": "stop", + } + ], + } + + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_response + await adapter.chat_completion(request) + + call_kwargs = mock_create.call_args[1] + assert call_kwargs["temperature"] == MiniMaxAdapter.MIN_TEMPERATURE + + @pytest.mark.asyncio + async def test_chat_completion_no_model_raises(self, adapter, sample_messages): + request = ChatCompletionRequest( + messages=sample_messages, + model=None, + ) + with pytest.raises(ValueError): + await adapter.chat_completion(request) + + @pytest.mark.asyncio + async def test_chat_completion_api_error(self, adapter, sample_request): + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.side_effect = Exception("API rate limit exceeded") + with pytest.raises(RuntimeError, match="MiniMax chat completion request failed"): + await adapter.chat_completion(sample_request) + + +# --------------------------------------------------------------------------- +# Chat completion tests (streaming) +# --------------------------------------------------------------------------- + +class TestChatCompletionStreaming: + """Tests for streaming chat completion.""" + + @pytest.mark.asyncio + async def test_streaming_returns_generator(self, adapter, sample_messages): + request = ChatCompletionRequest( + messages=sample_messages, + model="MiniMax-M2.7", + stream=True, + ) + + # Create mock streaming chunks + chunks = [] + for text in ["Hello", ", world", "!"]: + chunk = MagicMock() + chunk.choices = [MagicMock()] + chunk.choices[0].delta = MagicMock() + chunk.choices[0].delta.content = text + chunks.append(chunk) + + async def mock_stream(): + for c in chunks: + yield c + + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_stream() + result = await adapter.chat_completion(request) + + # Consume the generator inside the mock context + collected = [] + async for part in result: + collected.append(part) + assert "".join(collected) == "Hello, world!" + + @pytest.mark.asyncio + async def test_streaming_strips_think_tags(self, adapter, sample_messages): + request = ChatCompletionRequest( + messages=sample_messages, + model="MiniMax-M2.7", + stream=True, + ) + + chunk = MagicMock() + chunk.choices = [MagicMock()] + chunk.choices[0].delta = MagicMock() + chunk.choices[0].delta.content = "reasoningAnswer" + + async def mock_stream(): + yield chunk + + with patch.object( + adapter.client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_stream() + result = await adapter.chat_completion(request) + + collected = [] + async for part in result: + collected.append(part) + assert "".join(collected) == "Answer" + + +# --------------------------------------------------------------------------- +# OpenAICompatibleClient integration tests (minimax provider routing) +# --------------------------------------------------------------------------- + +class TestOpenAICompatibleClientMiniMaxRouting: + """Tests that OpenAICompatibleClient correctly routes to MiniMaxAdapter.""" + + @pytest.mark.asyncio + async def test_minimax_provider_creates_minimax_adapter(self): + """Verify that provider='minimax' creates a MiniMaxAdapter instance.""" + from core.component.llm.llm_adapter.minimax_adapter import MiniMaxAdapter + + config = { + "provider": "minimax", + "api_key": "test-key", + "base_url": "https://api.minimax.io/v1", + "models": ["MiniMax-M2.7"], + "model": "MiniMax-M2.7", + } + + # Simulate what OpenAICompatibleClient._get_adapter does + provider = config.get("provider", "openai") + assert provider == "minimax" + + adapter = MiniMaxAdapter(config) + assert isinstance(adapter, MiniMaxAdapter) + assert adapter.base_url == "https://api.minimax.io/v1" + + def test_minimax_in_llm_backends_yaml(self): + """Verify MiniMax is configured in llm_backends.yaml.""" + import yaml + + yaml_path = os.path.join( + os.path.dirname(__file__), "..", "src", "config", "llm_backends.yaml" + ) + with open(yaml_path) as f: + config = yaml.safe_load(f) + + backends = config.get("llm_backends", {}) + assert "minimax" in backends, "minimax backend should be in llm_backends.yaml" + + minimax_cfg = backends["minimax"] + assert minimax_cfg["provider"] == "minimax" + assert minimax_cfg["base_url"] == "https://api.minimax.io/v1" + assert "MiniMax-M2.7" in minimax_cfg["models"] + assert "MiniMax-M2.7-highspeed" in minimax_cfg["models"] + assert minimax_cfg["model"] == "MiniMax-M2.7" + + def test_openai_compatible_client_imports_minimax(self): + """Verify that MiniMaxAdapter is imported in openai_compatible_client.""" + from core.component.openai_compatible_client import OpenAICompatibleClient + + # If the import in openai_compatible_client.py works, this passes + assert OpenAICompatibleClient is not None + + +# --------------------------------------------------------------------------- +# Integration test (requires MINIMAX_API_KEY) +# --------------------------------------------------------------------------- + +class TestMiniMaxIntegration: + """Integration tests that call the real MiniMax API. + + These tests are skipped unless MINIMAX_API_KEY is set in the environment. + """ + + @pytest.fixture + def live_adapter(self): + api_key = os.getenv("MINIMAX_API_KEY") + if not api_key: + pytest.skip("MINIMAX_API_KEY not set") + return MiniMaxAdapter({ + "api_key": api_key, + "base_url": "https://api.minimax.io/v1", + "models": ["MiniMax-M2.5-highspeed"], + }) + + @pytest.mark.asyncio + async def test_live_chat_completion(self, live_adapter): + """Test a real API call to MiniMax.""" + request = ChatCompletionRequest( + messages=[ + ChatMessage(role=MessageRole.USER, content="Say 'hello' and nothing else."), + ], + model="MiniMax-M2.5-highspeed", + temperature=0.01, + max_tokens=256, + ) + result = await live_adapter.chat_completion(request) + assert isinstance(result, ChatCompletionResponse) + assert len(result.choices) > 0 + content = result.choices[0]["message"]["content"] + assert len(content) > 0 + + @pytest.mark.asyncio + async def test_live_streaming(self, live_adapter): + """Test a real streaming API call to MiniMax.""" + request = ChatCompletionRequest( + messages=[ + ChatMessage(role=MessageRole.USER, content="Count from 1 to 3."), + ], + model="MiniMax-M2.5-highspeed", + temperature=0.01, + max_tokens=64, + stream=True, + ) + result = await live_adapter.chat_completion(request) + collected = [] + async for chunk in result: + collected.append(chunk) + full_text = "".join(collected) + assert len(full_text) > 0 + + @pytest.mark.asyncio + async def test_live_temperature_clamping(self, live_adapter): + """Test that temperature=0 doesn't cause API errors.""" + request = ChatCompletionRequest( + messages=[ + ChatMessage(role=MessageRole.USER, content="Say 'ok'."), + ], + model="MiniMax-M2.5-highspeed", + temperature=0.0, + max_tokens=16, + ) + result = await live_adapter.chat_completion(request) + assert isinstance(result, ChatCompletionResponse)