From 2e63c428407b46c8cd711d83919f513fef2e3633 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Thu, 30 Apr 2026 11:50:59 +0800 Subject: [PATCH 1/2] Keep model reasoning out of inserted polish text Thinking-capable OpenAI-compatible models can return tagged reasoning before the final polished answer. The cleanup layer now strips only explicit think-tag blocks after response parsing so existing provider requests, UI, and fallback behavior stay unchanged. Constraint: Issue #25 asks to adapt thinking output rather than disabling model thinking. Rejected: Add a provider-specific thinking toggle | broader UI and settings change than needed for the bug. Rejected: Strip localized heading text | too likely to remove normal user-facing content without a stable provider contract. Confidence: medium Scope-risk: narrow Tested: cargo test polish::tests -- --nocapture Tested: cargo check --- openless-all/app/src-tauri/src/polish.rs | 36 +++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index 6f01d8f4..2511869f 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -223,7 +223,8 @@ fn extract_assistant_content(body: &str) -> Result { /// an iterative trim — if the model stacks two boilerplate sentences we'll still /// strip both. fn clean_polish_output(content: &str) -> String { - let trimmed = content.trim(); + let without_thinking = strip_thinking_blocks(content); + let trimmed = without_thinking.trim(); let stripped = strip_markdown_fence(trimmed); let mut output = stripped.to_string(); @@ -239,6 +240,26 @@ fn clean_polish_output(content: &str) -> String { output.trim().to_string() } +/// Strip model reasoning blocks so only the final polished text is inserted. +/// +/// Thinking-capable OpenAI-compatible models commonly return their reasoning in +/// `...` before the final answer, so keep this as a +/// conservative cleanup layer after parsing `message.content` instead of +/// provider-specific handling. +fn strip_thinking_blocks(text: &str) -> String { + let mut output = text.to_string(); + + while let Some(start) = output.find("") { + let Some(end_from_start) = output[start..].find("") else { + break; + }; + let end = start + end_from_start + "".len(); + output.replace_range(start..end, ""); + } + + output +} + fn strip_markdown_fence(text: &str) -> &str { if !(text.starts_with("```") && text.ends_with("```")) { return text; @@ -348,3 +369,16 @@ pub mod prompts { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clean_polish_output_strips_think_tag_block() { + let content = + "先分析用户意图。\n这里可能很长。\n\n请明天上午十点提醒我开会。"; + + assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。"); + } +} From 32eb406d0bca8e6fa457f8146dcaf45ec2d713d4 Mon Sep 17 00:00:00 2001 From: H-Chris233 Date: Thu, 30 Apr 2026 13:10:21 +0800 Subject: [PATCH 2/2] Keep reasoning cleanup linear and conservative Review feedback pointed out that repeated replace_range passes could become quadratic and that provider tags may vary by casing or attributes. The parser now scans the response once, allocates only when a complete think block is removed, and recognizes explicit think tags with optional attributes and ASCII casing variants. Constraint: Keep cleanup conservative and avoid localized heading stripping. Rejected: Regex dependency | unnecessary for one small tag parser. Rejected: Strip broad reasoning headings | can remove valid user-facing prose. Confidence: medium Scope-risk: narrow Tested: cargo test polish::tests -- --nocapture Tested: cargo check Tested: npm run build --- openless-all/app/src-tauri/src/polish.rs | 134 +++++++++++++++++++++-- 1 file changed, 123 insertions(+), 11 deletions(-) diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index 2511869f..edea6b79 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -4,6 +4,7 @@ //! and `PolishPrompts.swift`. The system prompt strings are copied verbatim //! from Swift to keep behaviour identical. +use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; @@ -243,21 +244,101 @@ fn clean_polish_output(content: &str) -> String { /// Strip model reasoning blocks so only the final polished text is inserted. /// /// Thinking-capable OpenAI-compatible models commonly return their reasoning in -/// `...` before the final answer, so keep this as a -/// conservative cleanup layer after parsing `message.content` instead of -/// provider-specific handling. -fn strip_thinking_blocks(text: &str) -> String { - let mut output = text.to_string(); - - while let Some(start) = output.find("") { - let Some(end_from_start) = output[start..].find("") else { +/// `...` before the final answer. Match only explicit `think` +/// tags, with optional attributes and ASCII casing variants, so normal prose is +/// left untouched. +fn strip_thinking_blocks(text: &str) -> Cow<'_, str> { + let mut cursor = 0; + let mut output: Option = None; + + while let Some((open_start, open_end)) = find_think_open(&text[cursor..]) { + let open_start = cursor + open_start; + let open_end = cursor + open_end; + let Some((_, close_end)) = find_think_close(&text[open_end..]) else { break; }; - let end = start + end_from_start + "".len(); - output.replace_range(start..end, ""); + let close_end = open_end + close_end; + + output + .get_or_insert_with(|| String::with_capacity(text.len())) + .push_str(&text[cursor..open_start]); + cursor = close_end; + } + + match output { + Some(mut output) => { + output.push_str(&text[cursor..]); + Cow::Owned(output) + } + None => Cow::Borrowed(text), + } +} + +fn find_think_open(text: &str) -> Option<(usize, usize)> { + let mut cursor = 0; + while let Some(offset) = text[cursor..].find('<') { + let start = cursor + offset; + if let Some(end) = parse_think_open_at(text, start) { + return Some((start, end)); + } + cursor = start + '<'.len_utf8(); + } + None +} + +fn find_think_close(text: &str) -> Option<(usize, usize)> { + let mut cursor = 0; + while let Some(offset) = text[cursor..].find('<') { + let start = cursor + offset; + if let Some(end) = parse_think_close_at(text, start) { + return Some((start, end)); + } + cursor = start + '<'.len_utf8(); + } + None +} + +fn parse_think_open_at(text: &str, start: usize) -> Option { + let tag_start = start + '<'.len_utf8(); + if text.as_bytes().get(tag_start) == Some(&b'/') { + return None; + } + parse_think_tag_end(text, tag_start, true) +} + +fn parse_think_close_at(text: &str, start: usize) -> Option { + let slash = start + '<'.len_utf8(); + if text.as_bytes().get(slash) != Some(&b'/') { + return None; + } + parse_think_tag_end(text, slash + '/'.len_utf8(), false) +} + +fn parse_think_tag_end(text: &str, tag_start: usize, allow_attributes: bool) -> Option { + let tag_end = tag_start.checked_add("think".len())?; + if tag_end > text.len() || !text[tag_start..tag_end].eq_ignore_ascii_case("think") { + return None; + } + + let next = text.as_bytes().get(tag_end).copied()?; + if next == b'>' { + return Some(tag_end + 1); + } + if !next.is_ascii_whitespace() { + return None; + } + + if allow_attributes { + return text[tag_end..].find('>').map(|offset| tag_end + offset + 1); } - output + let suffix = &text[tag_end..]; + let trimmed = suffix.trim_start_matches(|c: char| c.is_ascii_whitespace()); + if trimmed.starts_with('>') { + Some(text.len() - trimmed.len() + 1) + } else { + None + } } fn strip_markdown_fence(text: &str) -> &str { @@ -381,4 +462,35 @@ mod tests { assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。"); } + + #[test] + fn clean_polish_output_strips_think_tag_with_attributes_and_case() { + let content = r#"hidden +最终文本。"#; + + assert_eq!(clean_polish_output(content), "最终文本。"); + } + + #[test] + fn clean_polish_output_strips_multiple_think_blocks() { + let content = "one第一句。two第二句。"; + + assert_eq!(clean_polish_output(content), "第一句。第二句。"); + } + + #[test] + fn strip_thinking_blocks_ignores_non_think_and_unclosed_tags() { + assert!(matches!( + strip_thinking_blocks("普通文本"), + Cow::Borrowed(_) + )); + assert_eq!( + strip_thinking_blocks("保留正文"), + "保留正文" + ); + assert_eq!( + strip_thinking_blocks("未闭合正文"), + "未闭合正文" + ); + } }