diff --git a/README.md b/README.md index 08a2d1d..cf3950b 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ http://localhost:3264/api ## Возможности fork - **Chat Completions API**: `POST /api/chat/completions`, совместимый с OpenAI SDK, Open WebUI, LiteLLM и агентами. +- **Thinking / reasoning для Qwen Chat**: текстовые запросы в этой копии по умолчанию уходят с `thinking_enabled`; ответ разделяет размышления (`reasoning_content`) и финальный текст (`content`). - **Актуальные модели Qwen Chat**: `qwen3.7-max`, `qwen3.7-plus`, `qwen3.6-plus` и другие модели из `src/AvailableModels.txt`. - **Генерация изображений через Qwen Chat**: `POST /api/images/generations` без `DASHSCOPE_API_KEY`. - **Генерация видео через Qwen Chat**: `POST /api/videos/generations` + polling задач через `GET /api/tasks/status/:taskId`. @@ -122,6 +123,7 @@ curl http://localhost:3264/api/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "qwen3.7-max", + "enable_thinking": true, "messages": [ {"role": "user", "content": "Ответь коротко: что такое FreeQwenApi?"} ], @@ -129,6 +131,24 @@ curl http://localhost:3264/api/chat/completions \ }' ``` +В non-streaming ответе размышления возвращаются отдельно: + +```json +{ + "choices": [ + { + "message": { + "role": "assistant", + "reasoning_content": "...ход рассуждения...", + "content": "...финальный ответ..." + } + } + ] +} +``` + +В streaming-режиме reasoning приходит чанками как `delta.reasoning_content`, а финальный ответ как `delta.content`. Отключить thinking для конкретного запроса можно через `enable_thinking: false`, `thinking_enabled: false` или `reasoning_effort: "none"`. + OpenAI SDK: ```js diff --git a/examples/openai-sdk/conversation.js b/examples/openai-sdk/conversation.js index 9caa8fa..1bf2660 100644 --- a/examples/openai-sdk/conversation.js +++ b/examples/openai-sdk/conversation.js @@ -4,30 +4,30 @@ import OpenAI from 'openai'; const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', + baseURL: 'http://localhost:3264/api', apiKey: 'dummy-key', // Ключ не используется, но требуется для SDK }); async function conversationExample() { try { console.log('Начинаем диалог с Qwen AI...\n'); - + // Первое сообщение пользователя console.log('Пользователь: Привет! Расскажи о квантовой физике простыми словами.'); - + let completion = await openai.chat.completions.create({ messages: [ { role: 'user', content: 'Привет! Расскажи о квантовой физике простыми словами.' } ], model: 'qwen-max-latest', }); - + const assistantResponse1 = completion.choices[0].message.content; console.log('\nQwen:', assistantResponse1); - + // Второе сообщение пользователя, включающее историю беседы console.log('\nПользователь: А как это связано с теорией относительности?'); - + completion = await openai.chat.completions.create({ messages: [ { role: 'user', content: 'Привет! Расскажи о квантовой физике простыми словами.' }, @@ -36,13 +36,13 @@ async function conversationExample() { ], model: 'qwen-max-latest', }); - + const assistantResponse2 = completion.choices[0].message.content; console.log('\nQwen:', assistantResponse2); - + // Третье сообщение пользователя console.log('\nПользователь: Спасибо! Кто из ученых внес наибольший вклад в развитие этих теорий?'); - + completion = await openai.chat.completions.create({ messages: [ { role: 'user', content: 'Привет! Расскажи о квантовой физике простыми словами.' }, @@ -53,7 +53,7 @@ async function conversationExample() { ], model: 'qwen-max-latest', }); - + console.log('\nQwen:', completion.choices[0].message.content); console.log('\nДиалог успешно завершен.'); @@ -63,4 +63,4 @@ async function conversationExample() { } // Запуск -conversationExample(); \ No newline at end of file +conversationExample(); \ No newline at end of file diff --git a/examples/openai-sdk/image-analysis.js b/examples/openai-sdk/image-analysis.js index c36e189..c5d1a83 100644 --- a/examples/openai-sdk/image-analysis.js +++ b/examples/openai-sdk/image-analysis.js @@ -4,7 +4,7 @@ import OpenAI from 'openai'; const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', + baseURL: 'http://localhost:3264/api', apiKey: 'dummy-key', // Ключ не используется, но требуется для SDK }); @@ -18,7 +18,7 @@ async function analyzeImage() { const completion = await openai.chat.completions.create({ messages: [ - { + { role: 'user', content: [ { @@ -45,4 +45,4 @@ async function analyzeImage() { } // Запуск -analyzeImage(); \ No newline at end of file +analyzeImage(); \ No newline at end of file diff --git a/examples/openai-sdk/openai-compatibility.js b/examples/openai-sdk/openai-compatibility.js index 851b365..636b230 100644 --- a/examples/openai-sdk/openai-compatibility.js +++ b/examples/openai-sdk/openai-compatibility.js @@ -5,17 +5,17 @@ import OpenAI from 'openai'; // Настройка клиента OpenAI с использованием нашего прокси как точки доступа const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', + baseURL: 'http://localhost:3264/api', apiKey: 'dummy-key', // Ключ не используется, но требуется для SDK }); async function openaiCompatibilityExample() { try { console.log('Демонстрация совместимости с OpenAI API\n'); - + // 1. Стандартный запрос в формате OpenAI console.log('1. Стандартный запрос в формате OpenAI...'); - + const completion = await openai.chat.completions.create({ model: 'qwen-max-latest', messages: [ @@ -24,13 +24,13 @@ async function openaiCompatibilityExample() { ], temperature: 0.7, }); - + console.log('Ответ:'); console.log(completion.choices[0].message.content); - + // 2. Потоковый запрос в формате OpenAI console.log('\n2. Потоковый запрос в формате OpenAI...'); - + console.log('Ответ (потоковый режим):'); const stream = await openai.chat.completions.create({ model: 'qwen-max-latest', @@ -40,7 +40,7 @@ async function openaiCompatibilityExample() { ], stream: true, }); - + let streamedContent = ''; for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ''; @@ -48,15 +48,15 @@ async function openaiCompatibilityExample() { process.stdout.write(content); } console.log('\n'); - + // 3. Демонстрация структуры ответа в формате OpenAI console.log('\n3. Структура ответа в формате OpenAI:'); - + const responseDemo = await openai.chat.completions.create({ model: 'qwen-max-latest', messages: [{ role: 'user', content: 'Привет!' }], }); - + // Выводим структуру ответа (без содержимого сообщения) const { choices, ...responseWithoutChoices } = responseDemo; console.log(JSON.stringify({ @@ -66,13 +66,13 @@ async function openaiCompatibilityExample() { message: { role: choices[0].message.role, content: '[содержимое сообщения скрыто для краткости]' } }] }, null, 2)); - - + + } catch (error) { console.error('Ошибка при выполнении примера:', error); } } // Запуск -openaiCompatibilityExample(); \ No newline at end of file +openaiCompatibilityExample(); \ No newline at end of file diff --git a/examples/openai-sdk/simple.js b/examples/openai-sdk/simple.js index e66ddf6..3e75ff3 100644 --- a/examples/openai-sdk/simple.js +++ b/examples/openai-sdk/simple.js @@ -4,7 +4,7 @@ import OpenAI from 'openai'; const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', + baseURL: 'http://localhost:3264/api', apiKey: 'dummy-key', // Ключ не используется, но требуется для SDK }); @@ -16,7 +16,7 @@ async function simpleRequest() { messages: [ { role: 'user', content: 'Напиши 5 интересных фактов о космосе' } ], - model: 'qwen-max-latest', + model: 'qwen-max-latest', }); console.log('Ответ от Qwen:\n'); @@ -29,4 +29,4 @@ async function simpleRequest() { } // Запуск -simpleRequest(); \ No newline at end of file +simpleRequest(); \ No newline at end of file diff --git a/examples/openai-sdk/streaming.js b/examples/openai-sdk/streaming.js index c9a6412..9edd8f5 100644 --- a/examples/openai-sdk/streaming.js +++ b/examples/openai-sdk/streaming.js @@ -4,8 +4,8 @@ import OpenAI from 'openai'; const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', - apiKey: 'dummy-key', + baseURL: 'http://localhost:3264/api', + apiKey: 'dummy-key', }); async function streamFromQwen() { @@ -17,8 +17,8 @@ async function streamFromQwen() { messages: [ { role: 'user', content: 'Напиши небольшую историю о космических путешествиях' } ], - model: 'qwen-max-latest', - stream: true, + model: 'qwen-max-latest', + stream: true, }); console.log('Ответ от Qwen (потоковый режим):\n'); @@ -36,4 +36,4 @@ async function streamFromQwen() { } // Запуск -streamFromQwen(); \ No newline at end of file +streamFromQwen(); \ No newline at end of file diff --git a/examples/openai-sdk/system-message.js b/examples/openai-sdk/system-message.js index acf2b6b..a0816fc 100644 --- a/examples/openai-sdk/system-message.js +++ b/examples/openai-sdk/system-message.js @@ -4,7 +4,7 @@ import OpenAI from 'openai'; const openai = new OpenAI({ - baseURL: 'http://localhost:3264/api', + baseURL: 'http://localhost:3264/api', apiKey: 'dummy-key', // Ключ не используется, но требуется для SDK }); @@ -14,16 +14,16 @@ async function systemMessageExample() { const completion = await openai.chat.completions.create({ messages: [ - { - role: 'system', - content: 'Ты опытный астроном, который специализируется на планетах Солнечной системы. Отвечай научно точно, но понятным языком.' + { + role: 'system', + content: 'Ты опытный астроном, который специализируется на планетах Солнечной системы. Отвечай научно точно, но понятным языком.' }, - { - role: 'user', - content: 'Расскажи мне о Марсе и его особенностях' + { + role: 'user', + content: 'Расскажи мне о Марсе и его особенностях' } ], - model: 'qwen-max-latest', + model: 'qwen-max-latest', }); console.log('Ответ от Qwen:\n'); @@ -36,4 +36,4 @@ async function systemMessageExample() { } // Запуск -systemMessageExample(); \ No newline at end of file +systemMessageExample(); \ No newline at end of file diff --git a/main.py b/main.py index f6a222b..b09b99a 100644 --- a/main.py +++ b/main.py @@ -33,6 +33,8 @@ TOKENS_FILE = os.path.join(SESSION_DIR, "tokens.json") DEFAULT_MODEL = "qwen-max-latest" AVAILABLE_MODELS_FILE = os.path.join("src", "AvailableModels.txt") +QWEN_THINKING_ENABLED = os.environ.get("QWEN_THINKING_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off", "none"} +QWEN_THINKING_BUDGET = int(os.environ.get("QWEN_THINKING_BUDGET", "81920")) # ================================================================= # MODEL MAPPING (Embedded for standalone usage) @@ -277,7 +279,11 @@ def build_qwen_payload(message_content, model, chat_id, parent_id=None, system_m "files": files or [], "childrenIds": [assistant_msg_id], "extra": {"meta": {"subChatType": "t2t"}}, - "feature_config": {"thinking_enabled": False, "output_schema": "phase"} + "feature_config": { + "thinking_enabled": QWEN_THINKING_ENABLED, + "output_schema": "phase", + **({"thinking_budget": QWEN_THINKING_BUDGET} if QWEN_THINKING_ENABLED else {}) + } } payload = { @@ -342,7 +348,12 @@ def _extract_chat_ids(body: Dict[str, Any]): parent_id = body.get("parentId") or body.get("parent_id") or body.get("x_qwen_parent_id") return chat_id, parent_id -def _build_openai_completion(content: str, model: str, chat_id: Optional[str], parent_id: Optional[str], usage: Optional[Dict[str, Any]] = None): +def _build_openai_completion(content: str, model: str, chat_id: Optional[str], parent_id: Optional[str], usage: Optional[Dict[str, Any]] = None, reasoning_content: str = ""): + message = {"role": "assistant", "content": content} + if reasoning_content: + message["reasoning_content"] = reasoning_content + message["reasoning"] = reasoning_content + return { "id": f"chatcmpl-{uuid.uuid4()}", "object": "chat.completion", @@ -350,7 +361,7 @@ def _build_openai_completion(content: str, model: str, chat_id: Optional[str], p "model": model, "choices": [{ "index": 0, - "message": {"role": "assistant", "content": content}, + "message": message, "finish_reason": "stop" }], "usage": usage or {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}, @@ -442,25 +453,33 @@ async def execute_qwen_completion(token_obj, chat_id, payload, on_chunk=None): return structured_error content = "" + reasoning_content = "" choices = parsed.get("choices") if isinstance(choices, list) and choices: first_choice = choices[0] if isinstance(choices[0], dict) else {} msg = first_choice.get("message") if isinstance(first_choice.get("message"), dict) else {} content = str(msg.get("content") or "") + reasoning_content = str(msg.get("reasoning_content") or msg.get("reasoning") or "") elif parsed.get("success") is True and isinstance(parsed.get("data"), dict): content = str(parsed["data"].get("content") or "") + reasoning_content = str(parsed["data"].get("reasoning_content") or parsed["data"].get("reasoning") or "") - if content and callable(on_chunk): - on_chunk(content) + if (content or reasoning_content) and callable(on_chunk): + if reasoning_content: + on_chunk(reasoning_content, "reasoning") + if content: + on_chunk(content, "content") return { "success": True, "content": content, + "reasoning_content": reasoning_content, "response_id": parsed.get("response_id") or parsed.get("id"), "usage": parsed.get("usage") or {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} } full_content = "" + full_reasoning_content = "" response_id = None usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} finished = False @@ -502,20 +521,41 @@ async def execute_qwen_completion(token_obj, chat_id, payload, on_chunk=None): first_choice = choices[0] if isinstance(choices[0], dict) else {} delta = first_choice.get("delta") if isinstance(first_choice.get("delta"), dict) else {} + phase = str(delta.get("phase") or "").strip().lower() + is_thinking_phase = phase in {"think", "thinking", "reasoning"} + + reasoning_piece = ( + delta.get("reasoning_content") or + delta.get("reasoning") or + delta.get("thinking") or + delta.get("thinking_content") + ) + if reasoning_piece is not None: + reasoning_piece_str = str(reasoning_piece) + full_reasoning_content += reasoning_piece_str + if callable(on_chunk): + on_chunk(reasoning_piece_str, "reasoning") + piece = delta.get("content") if piece is not None: piece_str = str(piece) - full_content += piece_str - if callable(on_chunk): - on_chunk(piece_str) - - if delta.get("status") == "finished" or first_choice.get("finish_reason"): + if is_thinking_phase: + full_reasoning_content += piece_str + if callable(on_chunk): + on_chunk(piece_str, "reasoning") + else: + full_content += piece_str + if callable(on_chunk): + on_chunk(piece_str, "content") + + if first_choice.get("finish_reason") or (delta.get("status") == "finished" and not is_thinking_phase): finished = True break return { "success": True, "content": full_content, + "reasoning_content": full_reasoning_content, "response_id": response_id, "usage": usage, "finished": finished @@ -547,9 +587,9 @@ async def _stream_openai_response(token_info, chat_id: str, payload: Dict[str, A queue: asyncio.Queue = asyncio.Queue() has_streamed_chunks = False - def on_chunk(chunk_text: str): + def on_chunk(chunk_text: str, part_type: str = "content"): if chunk_text: - queue.put_nowait(chunk_text) + queue.put_nowait((part_type, chunk_text)) task = asyncio.create_task(execute_qwen_completion(token_info, chat_id, payload, on_chunk=on_chunk)) @@ -558,17 +598,18 @@ def on_chunk(chunk_text: str): if task.done() and queue.empty(): break try: - chunk = await asyncio.wait_for(queue.get(), timeout=0.2) + part_type, chunk = await asyncio.wait_for(queue.get(), timeout=0.2) except asyncio.TimeoutError: continue has_streamed_chunks = True + delta = {"reasoning_content": chunk} if part_type == "reasoning" else {"content": chunk} yield "data: " + json.dumps({ "id": "chatcmpl-stream", "object": "chat.completion.chunk", "created": int(time.time()), "model": model, - "choices": [{"index": 0, "delta": {"content": chunk}, "finish_reason": None}] + "choices": [{"index": 0, "delta": delta, "finish_reason": None}] }, ensure_ascii=False) + "\n\n" result = await task @@ -582,15 +623,19 @@ def on_chunk(chunk_text: str): "model": model, "choices": [{"index": 0, "delta": {"content": err_text}, "finish_reason": None}] }, ensure_ascii=False) + "\n\n" - elif not has_streamed_chunks and result.get("content"): + elif not has_streamed_chunks and (result.get("reasoning_content") or result.get("content")): # Qwen иногда отвечает обычным JSON вместо SSE. - yield "data: " + json.dumps({ - "id": "chatcmpl-stream", - "object": "chat.completion.chunk", - "created": int(time.time()), - "model": model, - "choices": [{"index": 0, "delta": {"content": result["content"]}, "finish_reason": None}] - }, ensure_ascii=False) + "\n\n" + for part_type, chunk in (("reasoning", result.get("reasoning_content")), ("content", result.get("content"))): + if not chunk: + continue + delta = {"reasoning_content": chunk} if part_type == "reasoning" else {"content": chunk} + yield "data: " + json.dumps({ + "id": "chatcmpl-stream", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [{"index": 0, "delta": delta, "finish_reason": None}] + }, ensure_ascii=False) + "\n\n" yield "data: " + json.dumps({ "id": "chatcmpl-stream", @@ -673,6 +718,7 @@ async def handle_chat_completions(body: Dict[str, Any]): chat_id, response_parent_id, usage=result.get("usage"), + reasoning_content=result.get("reasoning_content", ""), ) @app.get("/api/chat/completions") diff --git a/package.json b/package.json index 9a81c0a..8039535 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "type": "module", "scripts": { "start": "node index.js", - "test": "echo \"Ошибка: тест не задан\" && exit 1", + "test": "node --test \"tests/**/*.test.js\"", "example:stream": "node examples/openai-sdk/streaming.js", "example:simple": "node examples/openai-sdk/simple.js", "example:system": "node examples/openai-sdk/system-message.js", diff --git a/src/api/chat.js b/src/api/chat.js index 4d19743..6ba23e9 100644 --- a/src/api/chat.js +++ b/src/api/chat.js @@ -12,8 +12,15 @@ import { CHAT_API_URL, CREATE_CHAT_URL, CHAT_PAGE_URL, TASK_STATUS_URL, PAGE_TIMEOUT, RETRY_DELAY, PAGE_POOL_SIZE, DEFAULT_MODEL, MAX_RETRY_COUNT, - TASK_POLL_MAX_ATTEMPTS, TASK_POLL_INTERVAL + TASK_POLL_MAX_ATTEMPTS, TASK_POLL_INTERVAL, + QWEN_THINKING_ENABLED, QWEN_THINKING_BUDGET } from '../config.js'; +import { + appendQwenStreamChunk, + buildAssistantMessage, + createQwenAccumulator, + resolveThinkingOptions +} from './qwenReasoning.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -338,16 +345,21 @@ async function resolveAuthToken(browserContext) { return authToken ? { id: 'browser', token: authToken } : null; } -function buildPayloadV2(messageContent, model, chatId, parentId, files, systemMessage, tools, toolChoice, chatType = 't2t', size = null) { +export function buildPayloadV2(messageContent, model, chatId, parentId, files, systemMessage, tools, toolChoice, chatType = 't2t', size = null, reasoningOptions = {}) { const userMessageId = crypto.randomUUID(); const assistantChildId = crypto.randomUUID(); const isVideo = chatType === 't2v'; + const isTextChat = chatType === 't2t'; + const thinking = resolveThinkingOptions(reasoningOptions, QWEN_THINKING_ENABLED, QWEN_THINKING_BUDGET); const featureConfig = { - thinking_enabled: isVideo, + thinking_enabled: isVideo || (isTextChat && thinking.enabled), output_schema: 'phase' }; + if (isTextChat && featureConfig.thinking_enabled) { + featureConfig.thinking_budget = thinking.budget; + } if (isVideo) { featureConfig.research_mode = 'normal'; featureConfig.auto_thinking = true; @@ -468,14 +480,10 @@ async function executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChun const decoder = new TextDecoder(); let buffer = ''; - let fullContent = ''; - let responseId = null; - let usage = null; - let finished = false; + const accumulator = createQwenAccumulator(); let streamError = null; - let hasStreamedChunks = false; - while (!finished) { + while (!accumulator.finished) { const { done, value } = await reader.read(); if (done) break; @@ -485,12 +493,14 @@ async function executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChun for (const rawLine of lines) { const line = rawLine.trim(); - if (!line || !line.startsWith('data:')) continue; + if (!line) continue; - const jsonStr = line.substring(5).trim(); + const jsonStr = line.startsWith('data:') + ? line.substring(5).trim() + : line.startsWith('{') ? line : ''; if (!jsonStr) continue; if (jsonStr === '[DONE]') { - finished = true; + accumulator.finished = true; break; } @@ -499,32 +509,16 @@ async function executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChun if (chunk.code === 'RateLimited' || (chunk.code && chunk.detail)) { streamError = { status: 429, errorBody: JSON.stringify(chunk) }; - finished = true; + accumulator.finished = true; break; } if (chunk.error && !chunk.choices) { streamError = { status: 500, errorBody: JSON.stringify(chunk) }; - finished = true; + accumulator.finished = true; break; } - if (chunk['response.created']) responseId = chunk['response.created'].response_id; - if (chunk.response_id) responseId = chunk.response_id; - - if (chunk.choices && chunk.choices[0]) { - const delta = chunk.choices[0].delta; - if (delta && delta.content) { - fullContent += delta.content; - if (typeof onChunk === 'function') { - onChunk(delta.content); - hasStreamedChunks = true; - } - } - if (delta && delta.status === 'finished') finished = true; - if (chunk.choices[0].finish_reason) finished = true; - } - - if (chunk.usage) usage = chunk.usage; + appendQwenStreamChunk(accumulator, chunk, onChunk); } catch { // Ignore broken chunks, keep reading stream. } @@ -532,21 +526,21 @@ async function executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChun } if (streamError) { - return { success: false, ...streamError, hasStreamedChunks }; + return { success: false, ...streamError, hasStreamedChunks: accumulator.hasStreamedChunks }; } return { success: true, isTask: false, - hasStreamedChunks, + hasStreamedChunks: accumulator.hasStreamedChunks, data: { - id: responseId || 'chatcmpl-' + Date.now(), + id: accumulator.responseId || 'chatcmpl-' + Date.now(), object: 'chat.completion', created: Math.floor(Date.now() / 1000), model: payload.model, - choices: [{ index: 0, message: { role: 'assistant', content: fullContent }, finish_reason: 'stop' }], - usage: usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, - response_id: responseId + choices: [{ index: 0, message: buildAssistantMessage(accumulator.content, accumulator.reasoningContent), finish_reason: 'stop' }], + usage: accumulator.usage, + response_id: accumulator.responseId } }; } catch (error) { @@ -555,7 +549,7 @@ async function executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChun } async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { - if (payload?.stream !== false && typeof onChunk === 'function') { + if (payload?.stream !== false) { const streamedResponse = await executeApiRequestWithNodeStreaming(apiUrl, payload, token, onChunk); const canReturnDirectly = @@ -636,11 +630,18 @@ async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { const decoder = new TextDecoder(); let buffer = ''; let fullContent = ''; + let fullReasoningContent = ''; let responseId = null; let usage = null; let finished = false; let streamError = null; + const isThinkingPhase = (phase) => { + const normalized = typeof phase === 'string' ? phase.trim().toLowerCase() : ''; + return normalized === 'think' || normalized === 'thinking' || normalized === 'reasoning'; + }; + const appendValue = (value) => value === undefined || value === null ? '' : String(value); + while (!finished) { const { done, value } = await reader.read(); if (done) break; @@ -648,9 +649,12 @@ async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { const lines = buffer.split('\n'); buffer = lines.pop() || ''; - for (const line of lines) { - if (!line.trim() || !line.startsWith('data: ')) continue; - const jsonStr = line.substring(6).trim(); + for (const rawLine of lines) { + const line = rawLine.trim(); + if (!line) continue; + const jsonStr = line.startsWith('data:') + ? line.substring(5).trim() + : line.startsWith('{') ? line : ''; if (!jsonStr) continue; try { const chunk = JSON.parse(jsonStr); @@ -667,10 +671,27 @@ async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { } if (chunk['response.created']) responseId = chunk['response.created'].response_id; + if (chunk.response_id) responseId = chunk.response_id; if (chunk.choices && chunk.choices[0]) { const delta = chunk.choices[0].delta; - if (delta && delta.content) fullContent += delta.content; - if (delta && delta.status === 'finished') finished = true; + if (delta) { + const phase = delta.phase; + const explicitReasoning = + appendValue(delta.reasoning_content) + + appendValue(delta.reasoning) + + appendValue(delta.thinking) + + appendValue(delta.thinking_content); + if (explicitReasoning) fullReasoningContent += explicitReasoning; + if (delta.content) { + if (isThinkingPhase(phase)) { + fullReasoningContent += String(delta.content); + } else { + fullContent += String(delta.content); + } + } + if (delta.status === 'finished' && !isThinkingPhase(phase)) finished = true; + } + if (chunk.choices[0].finish_reason) finished = true; } if (chunk.usage) usage = chunk.usage; } catch { /* ignore parse errors for individual chunks */ } @@ -689,7 +710,18 @@ async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { object: 'chat.completion', created: Math.floor(Date.now() / 1000), model: data.payload.model, - choices: [{ index: 0, message: { role: 'assistant', content: fullContent }, finish_reason: 'stop' }], + choices: [{ + index: 0, + message: { + role: 'assistant', + content: fullContent, + ...(fullReasoningContent ? { + reasoning_content: fullReasoningContent, + reasoning: fullReasoningContent + } : {}) + }, + finish_reason: 'stop' + }], usage: usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, response_id: responseId } @@ -704,7 +736,7 @@ async function executeApiRequest(page, apiUrl, payload, token, onChunk = null) { }, requestBody); } -async function handleApiError(response, tokenObj, message, model, chatId, parentId, files, retryCount, chatType, size, waitForCompletion, onChunk = null) { +async function handleApiError(response, tokenObj, message, model, chatId, parentId, files, retryCount, chatType, size, waitForCompletion, onChunk = null, reasoningOptions = {}) { logRaw(JSON.stringify(response)); logError(`Ошибка при получении ответа: ${response.error || response.statusText}`); if (response.errorBody) logDebug(`Тело ответа с ошибкой: ${response.errorBody}`); @@ -729,7 +761,7 @@ async function handleApiError(response, tokenObj, message, model, chatId, parent } const { hasValidTokens } = await import('./tokenManager.js'); if (hasValidTokens() && retryCount < MAX_RETRY_COUNT) { - return sendMessage(message, model, chatId, parentId, files, null, null, null, chatType, size, waitForCompletion, retryCount + 1, onChunk); + return sendMessage(message, model, chatId, parentId, files, null, null, null, chatType, size, waitForCompletion, retryCount + 1, onChunk, reasoningOptions); } logError('Не осталось валидных токенов или исчерпаны попытки.'); return { error: 'Все токены недействительны (401). Требуется повторная авторизация.', chatId }; @@ -753,7 +785,7 @@ async function handleApiError(response, tokenObj, message, model, chatId, parent authToken = null; const { hasValidTokens } = await import('./tokenManager.js'); if (hasValidTokens() && retryCount < MAX_RETRY_COUNT) { - return sendMessage(message, model, chatId, parentId, files, null, null, null, chatType, size, waitForCompletion, retryCount + 1, onChunk); + return sendMessage(message, model, chatId, parentId, files, null, null, null, chatType, size, waitForCompletion, retryCount + 1, onChunk, reasoningOptions); } return { error: `Все токены заблокированы по лимиту (${hours}ч)`, chatId }; } @@ -763,7 +795,7 @@ async function handleApiError(response, tokenObj, message, model, chatId, parent // ─── Main public API ───────────────────────────────────────────────────────── -export async function sendMessage(message, model = DEFAULT_MODEL, chatId = null, parentId = null, files = null, tools = null, toolChoice = null, systemMessage = null, chatType = 't2t', size = null, waitForCompletion = true, retryCount = 0, onChunk = null) { +export async function sendMessage(message, model = DEFAULT_MODEL, chatId = null, parentId = null, files = null, tools = null, toolChoice = null, systemMessage = null, chatType = 't2t', size = null, waitForCompletion = true, retryCount = 0, onChunk = null, reasoningOptions = {}) { if (!availableModels) availableModels = getAvailableModelsFromFile(); if (!chatId) { @@ -816,7 +848,7 @@ export async function sendMessage(message, model = DEFAULT_MODEL, chatId = null, logInfo('Отправка запроса к API v2...'); - const payload = buildPayloadV2(messageContent, model, chatId, parentId, files, systemMessage, tools, toolChoice, chatType, size); + const payload = buildPayloadV2(messageContent, model, chatId, parentId, files, systemMessage, tools, toolChoice, chatType, size, reasoningOptions); logDebug('=== PAYLOAD V2 ===\n' + JSON.stringify(payload, null, 2)); logDebug(`Отправка сообщения в чат ${chatId} с parent_id: ${parentId || 'null'}`); @@ -896,15 +928,21 @@ export async function sendMessage(message, model = DEFAULT_MODEL, chatId = null, response.data.parentId = response.data.response_id; response.data.id = response.data.id || 'chatcmpl-' + Date.now(); - // Fallback: если поток чанков не был отдан, отправляем контент единым куском. - if (typeof onChunk === 'function' && response.data.choices?.[0]?.message?.content && !response.hasStreamedChunks) { - onChunk(response.data.choices[0].message.content); + // Fallback: если поток чанков не был отдан, отправляем части единым куском. + if (typeof onChunk === 'function' && response.data.choices?.[0]?.message && !response.hasStreamedChunks) { + const message = response.data.choices[0].message; + if (message.reasoning_content) { + onChunk(message.reasoning_content, 'reasoning'); + } + if (message.content) { + onChunk(message.content, 'content'); + } } return response.data; } - return handleApiError(response, tokenObj, message, model, chatId, parentId, files, retryCount, chatType, size, waitForCompletion, onChunk); + return handleApiError(response, tokenObj, message, model, chatId, parentId, files, retryCount, chatType, size, waitForCompletion, onChunk, reasoningOptions); } catch (error) { logError('Ошибка при отправке сообщения', error); return { error: error.toString(), chatId }; diff --git a/src/api/qwenReasoning.js b/src/api/qwenReasoning.js new file mode 100644 index 0000000..06286bd --- /dev/null +++ b/src/api/qwenReasoning.js @@ -0,0 +1,198 @@ +const TRUE_VALUES = new Set(['1', 'true', 'yes', 'on', 'enabled', 'enable']); +const FALSE_VALUES = new Set(['0', 'false', 'no', 'off', 'disabled', 'disable', 'none']); +const REASONING_DELTA_KEYS = ['reasoning_content', 'reasoning', 'thinking', 'thinking_content']; + +function firstDefined(...values) { + return values.find(value => value !== undefined && value !== null); +} + +function normalizePhase(phase) { + return typeof phase === 'string' ? phase.trim().toLowerCase() : ''; +} + +function valueToText(value) { + if (value === undefined || value === null) return ''; + return typeof value === 'string' ? value : String(value); +} + +export function parseBooleanLike(value, fallback = false) { + if (value === undefined || value === null) return fallback; + if (typeof value === 'boolean') return value; + if (typeof value === 'number') return value === 1; + if (typeof value !== 'string') return fallback; + + const normalized = value.trim().toLowerCase(); + if (TRUE_VALUES.has(normalized)) return true; + if (FALSE_VALUES.has(normalized)) return false; + return fallback; +} + +export function getReasoningOptionsFromRequest(body = {}) { + const extraBody = body.extra_body || body.extraBody || {}; + const chatTemplateKwargs = body.chat_template_kwargs || extraBody.chat_template_kwargs || {}; + + return { + enableThinking: firstDefined( + body.enable_thinking, + body.enableThinking, + body.thinking_enabled, + body.thinkingEnabled, + body.thinking, + body.reasoning, + body.reasoning_effort, + extraBody.enable_thinking, + extraBody.enableThinking, + extraBody.thinking_enabled, + extraBody.thinkingEnabled, + extraBody.thinking, + extraBody.reasoning, + extraBody.reasoning_effort, + chatTemplateKwargs.enable_thinking, + chatTemplateKwargs.enableThinking + ), + thinkingBudget: firstDefined( + body.thinking_budget, + body.thinkingBudget, + extraBody.thinking_budget, + extraBody.thinkingBudget, + chatTemplateKwargs.thinking_budget, + chatTemplateKwargs.thinkingBudget + ) + }; +} + +export function getToolAwareReasoningOptionsFromRequest(body = {}, hasTools = false) { + const options = getReasoningOptionsFromRequest(body); + + // Tool calls are emulated through a strict JSON-in-content adapter. Qwen thinking + // can prepend reasoning phases and make that JSON contract unreliable. + if (hasTools) { + return { ...options, enableThinking: false }; + } + + return options; +} + +export function resolveThinkingOptions(options = {}, defaultEnabled = true, defaultBudget = 81920) { + const rawEnabled = options.enableThinking; + let enabled = defaultEnabled; + + if (typeof rawEnabled === 'string' && rawEnabled.trim().toLowerCase() === 'none') { + enabled = false; + } else { + enabled = parseBooleanLike(rawEnabled, defaultEnabled); + } + + const rawBudget = Number(options.thinkingBudget); + const budget = Number.isFinite(rawBudget) && rawBudget > 0 ? rawBudget : defaultBudget; + + return { enabled, budget }; +} + +export function isThinkingPhase(phase) { + const normalized = normalizePhase(phase); + return normalized === 'think' || normalized === 'thinking' || normalized === 'reasoning'; +} + +export function splitQwenDelta(delta = {}) { + const phase = normalizePhase(delta.phase); + let reasoningContent = ''; + let answerContent = ''; + + for (const key of REASONING_DELTA_KEYS) { + reasoningContent += valueToText(delta[key]); + } + + const content = valueToText(delta.content); + if (content) { + if (isThinkingPhase(phase)) { + reasoningContent += content; + } else { + answerContent += content; + } + } + + return { + phase, + reasoningContent, + answerContent, + isThinking: isThinkingPhase(phase) + }; +} + +export function getQwenChunkChoice(chunk = {}) { + const choices = Array.isArray(chunk.choices) ? chunk.choices : []; + return choices[0] && typeof choices[0] === 'object' ? choices[0] : {}; +} + +export function getQwenChunkResponseId(chunk = {}) { + const created = chunk['response.created']; + if (created && typeof created === 'object' && created.response_id) { + return created.response_id; + } + return chunk.response_id || null; +} + +export function shouldFinishQwenStreamChunk(chunk = {}) { + const choice = getQwenChunkChoice(chunk); + const delta = choice.delta && typeof choice.delta === 'object' ? choice.delta : {}; + const { isThinking } = splitQwenDelta(delta); + + if (choice.finish_reason) return true; + return delta.status === 'finished' && !isThinking; +} + +export function createQwenAccumulator() { + return { + content: '', + reasoningContent: '', + responseId: null, + usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, + finished: false, + hasStreamedChunks: false + }; +} + +export function appendQwenStreamChunk(accumulator, chunk, onChunk = null) { + const responseId = getQwenChunkResponseId(chunk); + if (responseId) accumulator.responseId = responseId; + + if (chunk.usage && typeof chunk.usage === 'object') { + accumulator.usage = chunk.usage; + } + + const choice = getQwenChunkChoice(chunk); + const delta = choice.delta && typeof choice.delta === 'object' ? choice.delta : {}; + const { reasoningContent, answerContent } = splitQwenDelta(delta); + + if (reasoningContent) { + accumulator.reasoningContent += reasoningContent; + if (typeof onChunk === 'function') { + onChunk(reasoningContent, 'reasoning'); + accumulator.hasStreamedChunks = true; + } + } + + if (answerContent) { + accumulator.content += answerContent; + if (typeof onChunk === 'function') { + onChunk(answerContent, 'content'); + accumulator.hasStreamedChunks = true; + } + } + + if (shouldFinishQwenStreamChunk(chunk)) { + accumulator.finished = true; + } + + return accumulator; +} + +export function buildAssistantMessage(content = '', reasoningContent = '') { + const message = { role: 'assistant', content }; + if (reasoningContent) { + message.reasoning_content = reasoningContent; + message.reasoning = reasoningContent; + } + return message; +} diff --git a/src/api/routes.js b/src/api/routes.js index 4d01858..909f864 100644 --- a/src/api/routes.js +++ b/src/api/routes.js @@ -14,6 +14,7 @@ import fs from 'fs'; import crypto from 'crypto'; import { listTokens, markInvalid, markRateLimited, markValid } from './tokenManager.js'; import { FORGETMEAI_WATERMARK } from '../utils/branding.js'; +import { getReasoningOptionsFromRequest, getToolAwareReasoningOptionsFromRequest } from './qwenReasoning.js'; // Функция для генерирования детерминированного chatId на основе истории function generateChatIdFromHistory(messages) { @@ -649,6 +650,12 @@ function writeToolCallsSse(res, mappedModel, result, toolCalls) { // ─── Helpers: streaming ────────────────────────────────────────────────────── +function buildOpenAIStreamDelta(chunk, partType = 'content') { + return partType === 'reasoning' + ? { reasoning_content: chunk } + : { content: chunk }; +} + async function handleStreamingResponse(res, mappedModel, messageContent, chatId, parentId, combinedTools, toolChoice, systemMessage) { res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); @@ -726,6 +733,7 @@ function handleNonStreamingResponse(res, result, mappedModel) { router.post('/chat', async (req, res) => { try { const { message, messages, model, chatId, parentId, stream, chatType, size, waitForCompletion } = req.body; + const reasoningOptions = getReasoningOptionsFromRequest(req.body); // Поддержка как message, так и messages для совместимости let messageContent = message; @@ -783,7 +791,7 @@ router.post('/chat', async (req, res) => { let streamingCallback = null; let hasStreamedChunks = false; if (stream) { - streamingCallback = (chunk) => { + streamingCallback = (chunk, partType = 'content') => { hasStreamedChunks = true; writeSse({ id: 'chatcmpl-' + Date.now(), @@ -791,7 +799,7 @@ router.post('/chat', async (req, res) => { created: Math.floor(Date.now() / 1000), model: mappedModel || 'qwen-max-latest', choices: [ - { index: 0, delta: { content: chunk }, finish_reason: null } + { index: 0, delta: buildOpenAIStreamDelta(chunk, partType), finish_reason: null } ] }); }; @@ -810,7 +818,8 @@ router.post('/chat', async (req, res) => { null, true, 0, - streamingCallback + streamingCallback, + reasoningOptions ); if (result.error) { @@ -875,7 +884,7 @@ router.post('/chat', async (req, res) => { } } - const result = await sendMessage(messageContent, mappedModel, isMeta ? null : chatId, isMeta ? null : parentId, null, null, null, systemMessage, chatType || 't2t', size || null, waitForCompletion ?? true); + const result = await sendMessage(messageContent, mappedModel, isMeta ? null : chatId, isMeta ? null : parentId, null, null, null, systemMessage, chatType || 't2t', size || null, waitForCompletion ?? true, 0, null, reasoningOptions); if (result.choices && result.choices[0] && result.choices[0].message) { const responseLength = result.choices[0].message.content ? result.choices[0].message.content.length : 0; @@ -1081,6 +1090,8 @@ router.post('/chat/completions', async (req, res) => { const systemMsg = messages.find(msg => msg.role === 'system'); const systemMessage = systemMsg ? systemMsg.content : null; const { combinedTools } = buildCombinedTools(tools, functions, tool_choice); + const hasTools = Array.isArray(combinedTools) && combinedTools.length > 0; + const reasoningOptions = getToolAwareReasoningOptionsFromRequest(req.body, hasTools); const preparedInput = prepareOpenAIMessageInput(messages, combinedTools, effectiveChatId); if (preparedInput.missingUser) { @@ -1158,7 +1169,7 @@ router.post('/chat/completions', async (req, res) => { let hasStreamedChunks = false; const captureToolCalls = Array.isArray(combinedTools) && combinedTools.length > 0; if (stream && !captureToolCalls) { - streamingCallback = (chunk) => { + streamingCallback = (chunk, partType = 'content') => { hasStreamedChunks = true; writeSse({ id: 'chatcmpl-stream', @@ -1166,7 +1177,7 @@ router.post('/chat/completions', async (req, res) => { created: Math.floor(Date.now() / 1000), model: mappedModel || 'qwen-max-latest', choices: [ - { index: 0, delta: { content: chunk }, finish_reason: null } + { index: 0, delta: buildOpenAIStreamDelta(chunk, partType), finish_reason: null } ] }); }; @@ -1185,7 +1196,8 @@ router.post('/chat/completions', async (req, res) => { null, true, 0, - streamingCallback + streamingCallback, + reasoningOptions ); if (captureToolCalls) { @@ -1268,7 +1280,7 @@ router.post('/chat/completions', async (req, res) => { } } else { const qwenChatId = await resolveQwenChatId(effectiveChatId, mappedModel); - const result = await sendMessage(messageContent, mappedModel, qwenChatId, effectiveParentId, null, qwenTools, tool_choice, toolAwareSystemMessage); + const result = await sendMessage(messageContent, mappedModel, qwenChatId, effectiveParentId, null, qwenTools, tool_choice, toolAwareSystemMessage, 't2t', null, true, 0, null, reasoningOptions); // Сохраняем chatId в сессию для следующих запросов if (!isMeta && result.chatId) { @@ -1406,6 +1418,8 @@ router.post('/v1/chat/completions', async (req, res) => { const systemMsg = messages.find(msg => msg.role === 'system'); const systemMessage = systemMsg ? systemMsg.content : null; const { combinedTools } = buildCombinedTools(tools, functions, tool_choice); + const hasTools = Array.isArray(combinedTools) && combinedTools.length > 0; + const reasoningOptions = getToolAwareReasoningOptionsFromRequest(req.body, hasTools); const preparedInput = prepareOpenAIMessageInput(messages, combinedTools, effectiveChatId); if (preparedInput.missingUser) { @@ -1485,7 +1499,7 @@ router.post('/v1/chat/completions', async (req, res) => { let hasStreamedChunks = false; const captureToolCalls = Array.isArray(combinedTools) && combinedTools.length > 0; if (stream && !captureToolCalls) { - streamingCallback = (chunk) => { + streamingCallback = (chunk, partType = 'content') => { hasStreamedChunks = true; // OpenWebUI не нуждается в role в чанках - только контент writeSse({ @@ -1494,7 +1508,7 @@ router.post('/v1/chat/completions', async (req, res) => { created: Math.floor(Date.now() / 1000), model: mappedModel || 'qwen-max-latest', choices: [ - { index: 0, delta: { content: chunk }, finish_reason: null } + { index: 0, delta: buildOpenAIStreamDelta(chunk, partType), finish_reason: null } ] }); }; @@ -1513,7 +1527,8 @@ router.post('/v1/chat/completions', async (req, res) => { null, true, 0, - streamingCallback + streamingCallback, + reasoningOptions ); if (captureToolCalls) { @@ -1592,7 +1607,7 @@ router.post('/v1/chat/completions', async (req, res) => { } else { const qwenChatId = await resolveQwenChatId(effectiveChatId, mappedModel); - const result = await sendMessage(messageContent, mappedModel, qwenChatId, effectiveParentId, files, qwenTools, tool_choice, toolAwareSystemMessage); + const result = await sendMessage(messageContent, mappedModel, qwenChatId, effectiveParentId, files, qwenTools, tool_choice, toolAwareSystemMessage, 't2t', null, true, 0, null, reasoningOptions); // Сохраняем chatId в сессии для следующих запросов if (!isMeta && result.chatId) { @@ -1614,8 +1629,11 @@ router.post('/v1/chat/completions', async (req, res) => { // Извлекаем контент сообщения let messageText = ''; + let reasoningText = ''; if (result.choices && result.choices[0] && result.choices[0].message) { - messageText = result.choices[0].message.content || ''; + const assistantMessage = result.choices[0].message; + messageText = assistantMessage.content || ''; + reasoningText = assistantMessage.reasoning_content || assistantMessage.reasoning || ''; } else if (result.response && result.response.text) { messageText = result.response.text; } @@ -1634,7 +1652,11 @@ router.post('/v1/chat/completions', async (req, res) => { index: 0, message: { role: "assistant", - content: messageText + content: messageText, + ...(reasoningText ? { + reasoning_content: reasoningText, + reasoning: reasoningText + } : {}) }, finish_reason: "stop" }], diff --git a/src/config.js b/src/config.js index 7b8e11a..e75420f 100644 --- a/src/config.js +++ b/src/config.js @@ -50,6 +50,8 @@ export const PORT = Number(process.env.PORT) || 3264; export const HOST = process.env.HOST || '0.0.0.0'; export const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'qwen-max-latest'; export const ALLOW_UNSCOPED_SESSION_CHAT_RESTORE = toBoolean(process.env.ALLOW_UNSCOPED_SESSION_CHAT_RESTORE); +export const QWEN_THINKING_ENABLED = toBoolean(process.env.QWEN_THINKING_ENABLED ?? 'true'); +export const QWEN_THINKING_BUDGET = Number(process.env.QWEN_THINKING_BUDGET) || 81920; // ─── Логирование ───────────────────────────────────────────────────────────── export const LOG_LEVEL = process.env.LOG_LEVEL || 'info'; diff --git a/tests/qwen-reasoning.test.js b/tests/qwen-reasoning.test.js new file mode 100644 index 0000000..a7c1f9a --- /dev/null +++ b/tests/qwen-reasoning.test.js @@ -0,0 +1,93 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; + +import { buildPayloadV2 } from '../src/api/chat.js'; +import { + appendQwenStreamChunk, + createQwenAccumulator, + getReasoningOptionsFromRequest, + getToolAwareReasoningOptionsFromRequest, + resolveThinkingOptions, + splitQwenDelta +} from '../src/api/qwenReasoning.js'; + +test('buildPayloadV2 enables text thinking by default', () => { + const payload = buildPayloadV2('hello', 'qwen3.7-max', 'chat-1', null, [], null, null, null); + const featureConfig = payload.messages[0].feature_config; + + assert.equal(featureConfig.thinking_enabled, true); + assert.equal(featureConfig.output_schema, 'phase'); + assert.equal(featureConfig.thinking_budget, 81920); +}); + +test('buildPayloadV2 allows request-level thinking disable', () => { + const payload = buildPayloadV2('hello', 'qwen3.7-max', 'chat-1', null, [], null, null, null, 't2t', null, { + enableThinking: false + }); + + assert.deepEqual(payload.messages[0].feature_config, { + thinking_enabled: false, + output_schema: 'phase' + }); +}); + +test('request parser accepts common reasoning controls', () => { + const options = getReasoningOptionsFromRequest({ + reasoning_effort: 'none', + extra_body: { thinking_budget: 1234 } + }); + + assert.equal(resolveThinkingOptions(options, true, 81920).enabled, false); + assert.equal(resolveThinkingOptions(options, true, 81920).budget, 1234); +}); + +test('tool-call requests disable thinking to preserve JSON tool adapter', () => { + const options = getToolAwareReasoningOptionsFromRequest({ + enable_thinking: true, + thinking_budget: 1234 + }, true); + + assert.equal(resolveThinkingOptions(options, true, 81920).enabled, false); + + const payload = buildPayloadV2('use a tool', 'qwen3.7-max', 'chat-1', null, [], null, null, null, 't2t', null, options); + assert.deepEqual(payload.messages[0].feature_config, { + thinking_enabled: false, + output_schema: 'phase' + }); +}); + +test('splitQwenDelta treats think phase content as reasoning', () => { + assert.deepEqual(splitQwenDelta({ phase: 'think', content: 'plan' }), { + phase: 'think', + reasoningContent: 'plan', + answerContent: '', + isThinking: true + }); + + assert.deepEqual(splitQwenDelta({ phase: 'answer', content: 'result' }), { + phase: 'answer', + reasoningContent: '', + answerContent: 'result', + isThinking: false + }); +}); + +test('appendQwenStreamChunk separates reasoning and answer without finishing on think phase', () => { + const accumulator = createQwenAccumulator(); + const streamed = []; + + appendQwenStreamChunk(accumulator, { + choices: [{ delta: { phase: 'think', content: 'first ', status: 'finished' } }] + }, (chunk, partType) => streamed.push([partType, chunk])); + + assert.equal(accumulator.finished, false); + + appendQwenStreamChunk(accumulator, { + choices: [{ delta: { phase: 'answer', content: 'second', status: 'finished' } }] + }, (chunk, partType) => streamed.push([partType, chunk])); + + assert.equal(accumulator.reasoningContent, 'first '); + assert.equal(accumulator.content, 'second'); + assert.equal(accumulator.finished, true); + assert.deepEqual(streamed, [['reasoning', 'first '], ['content', 'second']]); +});