From 53621336b497793ba2b6b0ebff068136d13b6201 Mon Sep 17 00:00:00 2001 From: Roj234 <82699138+roj234@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:31:26 +0800 Subject: [PATCH 1/2] Allow thinking prefill --- tools/server/server-common.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index f66b1f2557c..1e4c398e45c 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1068,13 +1068,6 @@ json oaicompat_chat_params_parse( throw std::invalid_argument("Cannot have 2 or more assistant messages at the end of the list."); } - /* TODO: test this properly */ - inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE; - - if ( inputs.enable_thinking ) { - throw std::invalid_argument("Assistant response prefill is incompatible with enable_thinking."); - } - inputs.add_generation_prompt = true; } inputs.force_pure_content = opt.force_pure_content; @@ -1084,11 +1077,27 @@ json oaicompat_chat_params_parse( /* Append assistant prefilled message */ if (prefill_assistant_message) { + auto append_thinking_end = inputs.enable_thinking; + if ( append_thinking_end ) { + chat_params.prompt += last_message.reasoning_content; + } + // TODO may there need \n\n + std::string thinking_end = chat_params.thinking_end_tag; + if (!last_message.content_parts.empty()) { for (auto & p : last_message.content_parts) { + if (p.text.length() > 0 && append_thinking_end) { + chat_params.prompt += thinking_end; + append_thinking_end = false; + } + chat_params.prompt += p.text; } } else { + if (last_message.content.length() > 0 && append_thinking_end) { + chat_params.prompt += thinking_end; + append_thinking_end = false; + } chat_params.prompt += last_message.content; } } From 623ea9e178331f8f9f165a044abadbdd4853737f Mon Sep 17 00:00:00 2001 From: Roj234 <82699138+roj234@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:59:00 +0800 Subject: [PATCH 2/2] make tag include \n this also make reasoning-budget works better without prompt --- common/chat-diff-analyzer.cpp | 2 +- tools/server/server-common.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 2f0bd14afa3..54a998dec05 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -296,7 +296,7 @@ void analyze_reasoning::compare_reasoning_presence() { return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest()); }); auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) { - return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest(); + return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.tag("post", (p.space() + p.marker() + p.space())) + p.rest(); }); // try the more aggressive parse first, if it fails, fall back to the delimiter one auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B); diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 1e4c398e45c..8539e215bb3 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1081,7 +1081,6 @@ json oaicompat_chat_params_parse( if ( append_thinking_end ) { chat_params.prompt += last_message.reasoning_content; } - // TODO may there need \n\n std::string thinking_end = chat_params.thinking_end_tag; if (!last_message.content_parts.empty()) {