From de9c9e92caefd6989ccc35234a759ef70716bc71 Mon Sep 17 00:00:00 2001
From: JunghwanNA <70629228+shaun0927@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:41:57 +0900
Subject: [PATCH] fix(llmcore): include temperature when provider required
 clamping

Commit f418963 introduced the optimization of omitting `temperature`
from the chat/completions payload when it equals the OpenAI default
of 1, to avoid triggering errors on reasoning-only endpoints.

After that change, the MiniMax and Kimi/Moonshot branches (which
force temperature to a provider-required value) can produce a
clamped value of exactly 1.0, which then gets dropped from the
payload again. For providers where the request was explicitly
adjusted, this silently changes behavior from 'temperature=1.0
sent' to 'temperature omitted'.

Track whether the provider branch forced a value and always include
temperature in that case. For other OpenAI-compatible endpoints the
existing 'omit when default' behavior is preserved.

Restores 3 previously-failing unit tests:
  tests/test_minimax.py::TestMiniMaxTemperatureClamping
    - test_minimax_temp_one_preserved
    - test_minimax_temp_above_one_clamped
    - test_kimi_temp_still_forced
---
 llmcore.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llmcore.py b/llmcore.py
index 395dbf5b..4e91d570 100644
--- a/llmcore.py
+++ b/llmcore.py
@@ -256,8 +256,9 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
                    max_retries=0, connect_timeout=10, read_timeout=300, proxies=None):
     """Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
     ml = model.lower()
-    if 'kimi' in ml or 'moonshot' in ml: temperature = 1
-    elif 'minimax' in ml: temperature = max(0.01, min(temperature, 1.0))  # MiniMax requires temp in (0, 1]
+    _force_temp = False
+    if 'kimi' in ml or 'moonshot' in ml: temperature, _force_temp = 1, True
+    elif 'minimax' in ml: temperature, _force_temp = max(0.01, min(temperature, 1.0)), True  # MiniMax requires temp in (0, 1]
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
     if api_mode == "responses":
         url = auto_make_url(api_base, "responses")
@@ -267,7 +268,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
         url = auto_make_url(api_base, "chat/completions")
         _stamp_oai_cache_markers(messages, model)
         payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}}
-        if temperature != 1: payload["temperature"] = temperature
+        if _force_temp or temperature != 1: payload["temperature"] = temperature
         if max_tokens: payload["max_tokens"] = max_tokens
         if reasoning_effort: payload["reasoning_effort"] = reasoning_effort
     if tools: