From de9c9e92caefd6989ccc35234a759ef70716bc71 Mon Sep 17 00:00:00 2001 From: JunghwanNA <70629228+shaun0927@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:41:57 +0900 Subject: [PATCH] fix(llmcore): include temperature when provider required clamping Commit f418963 introduced the optimization of omitting `temperature` from the chat/completions payload when it equals the OpenAI default of 1, to avoid triggering errors on reasoning-only endpoints. After that change, the MiniMax and Kimi/Moonshot branches (which force temperature to a provider-required value) can produce a clamped value of exactly 1.0, which then gets dropped from the payload again. For providers where the request was explicitly adjusted, this silently changes behavior from 'temperature=1.0 sent' to 'temperature omitted'. Track whether the provider branch forced a value and always include temperature in that case. For other OpenAI-compatible endpoints the existing 'omit when default' behavior is preserved. Restores 3 previously-failing unit tests: tests/test_minimax.py::TestMiniMaxTemperatureClamping - test_minimax_temp_one_preserved - test_minimax_temp_above_one_clamped - test_kimi_temp_still_forced --- llmcore.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llmcore.py b/llmcore.py index 395dbf5b..4e91d570 100644 --- a/llmcore.py +++ b/llmcore.py @@ -256,8 +256,9 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion max_retries=0, connect_timeout=10, read_timeout=300, proxies=None): """Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block].""" ml = model.lower() - if 'kimi' in ml or 'moonshot' in ml: temperature = 1 - elif 'minimax' in ml: temperature = max(0.01, min(temperature, 1.0)) # MiniMax requires temp in (0, 1] + _force_temp = False + if 'kimi' in ml or 'moonshot' in ml: temperature, _force_temp = 1, True + elif 'minimax' in ml: temperature, _force_temp = max(0.01, min(temperature, 1.0)), True # MiniMax requires temp in (0, 1] headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"} if api_mode == "responses": url = auto_make_url(api_base, "responses") @@ -267,7 +268,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion url = auto_make_url(api_base, "chat/completions") _stamp_oai_cache_markers(messages, model) payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}} - if temperature != 1: payload["temperature"] = temperature + if _force_temp or temperature != 1: payload["temperature"] = temperature if max_tokens: payload["max_tokens"] = max_tokens if reasoning_effort: payload["reasoning_effort"] = reasoning_effort if tools: