From 2e63c428407b46c8cd711d83919f513fef2e3633 Mon Sep 17 00:00:00 2001
From: H-Chris233 <h-chris233@outlook.com>
Date: Thu, 30 Apr 2026 11:50:59 +0800
Subject: [PATCH 1/2] Keep model reasoning out of inserted polish text

Thinking-capable OpenAI-compatible models can return tagged reasoning before the final polished answer. The cleanup layer now strips only explicit think-tag blocks after response parsing so existing provider requests, UI, and fallback behavior stay unchanged.

Constraint: Issue #25 asks to adapt thinking output rather than disabling model thinking.

Rejected: Add a provider-specific thinking toggle | broader UI and settings change than needed for the bug.

Rejected: Strip localized heading text | too likely to remove normal user-facing content without a stable provider contract.

Confidence: medium

Scope-risk: narrow

Tested: cargo test polish::tests -- --nocapture

Tested: cargo check
---
 openless-all/app/src-tauri/src/polish.rs | 36 +++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs
index 6f01d8f4..2511869f 100644
--- a/openless-all/app/src-tauri/src/polish.rs
+++ b/openless-all/app/src-tauri/src/polish.rs
@@ -223,7 +223,8 @@ fn extract_assistant_content(body: &str) -> Result<String, LLMError> {
 /// an iterative trim — if the model stacks two boilerplate sentences we'll still
 /// strip both.
 fn clean_polish_output(content: &str) -> String {
-    let trimmed = content.trim();
+    let without_thinking = strip_thinking_blocks(content);
+    let trimmed = without_thinking.trim();
     let stripped = strip_markdown_fence(trimmed);
     let mut output = stripped.to_string();
 
@@ -239,6 +240,26 @@ fn clean_polish_output(content: &str) -> String {
     output.trim().to_string()
 }
 
+/// Strip model reasoning blocks so only the final polished text is inserted.
+///
+/// Thinking-capable OpenAI-compatible models commonly return their reasoning in
+/// `<think>...</think>` before the final answer, so keep this as a
+/// conservative cleanup layer after parsing `message.content` instead of
+/// provider-specific handling.
+fn strip_thinking_blocks(text: &str) -> String {
+    let mut output = text.to_string();
+
+    while let Some(start) = output.find("<think>") {
+        let Some(end_from_start) = output[start..].find("</think>") else {
+            break;
+        };
+        let end = start + end_from_start + "</think>".len();
+        output.replace_range(start..end, "");
+    }
+
+    output
+}
+
 fn strip_markdown_fence(text: &str) -> &str {
     if !(text.starts_with("```") && text.ends_with("```")) {
         return text;
@@ -348,3 +369,16 @@ pub mod prompts {
         )
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn clean_polish_output_strips_think_tag_block() {
+        let content =
+            "<think>先分析用户意图。\n这里可能很长。</think>\n\n请明天上午十点提醒我开会。";
+
+        assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。");
+    }
+}

From 32eb406d0bca8e6fa457f8146dcaf45ec2d713d4 Mon Sep 17 00:00:00 2001
From: H-Chris233 <h-chris233@outlook.com>
Date: Thu, 30 Apr 2026 13:10:21 +0800
Subject: [PATCH 2/2] Keep reasoning cleanup linear and conservative

Review feedback pointed out that repeated replace_range passes could become quadratic and that provider tags may vary by casing or attributes. The parser now scans the response once, allocates only when a complete think block is removed, and recognizes explicit think tags with optional attributes and ASCII casing variants.

Constraint: Keep cleanup conservative and avoid localized heading stripping.

Rejected: Regex dependency | unnecessary for one small tag parser.

Rejected: Strip broad reasoning headings | can remove valid user-facing prose.

Confidence: medium

Scope-risk: narrow

Tested: cargo test polish::tests -- --nocapture

Tested: cargo check

Tested: npm run build
---
 openless-all/app/src-tauri/src/polish.rs | 134 +++++++++++++++++++++--
 1 file changed, 123 insertions(+), 11 deletions(-)

diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs
index 2511869f..edea6b79 100644
--- a/openless-all/app/src-tauri/src/polish.rs
+++ b/openless-all/app/src-tauri/src/polish.rs
@@ -4,6 +4,7 @@
 //! and `PolishPrompts.swift`. The system prompt strings are copied verbatim
 //! from Swift to keep behaviour identical.
 
+use std::borrow::Cow;
 use std::collections::HashMap;
 use std::time::Duration;
 
@@ -243,21 +244,101 @@ fn clean_polish_output(content: &str) -> String {
 /// Strip model reasoning blocks so only the final polished text is inserted.
 ///
 /// Thinking-capable OpenAI-compatible models commonly return their reasoning in
-/// `<think>...</think>` before the final answer, so keep this as a
-/// conservative cleanup layer after parsing `message.content` instead of
-/// provider-specific handling.
-fn strip_thinking_blocks(text: &str) -> String {
-    let mut output = text.to_string();
-
-    while let Some(start) = output.find("<think>") {
-        let Some(end_from_start) = output[start..].find("</think>") else {
+/// `<think>...</think>` before the final answer. Match only explicit `think`
+/// tags, with optional attributes and ASCII casing variants, so normal prose is
+/// left untouched.
+fn strip_thinking_blocks(text: &str) -> Cow<'_, str> {
+    let mut cursor = 0;
+    let mut output: Option<String> = None;
+
+    while let Some((open_start, open_end)) = find_think_open(&text[cursor..]) {
+        let open_start = cursor + open_start;
+        let open_end = cursor + open_end;
+        let Some((_, close_end)) = find_think_close(&text[open_end..]) else {
             break;
         };
-        let end = start + end_from_start + "</think>".len();
-        output.replace_range(start..end, "");
+        let close_end = open_end + close_end;
+
+        output
+            .get_or_insert_with(|| String::with_capacity(text.len()))
+            .push_str(&text[cursor..open_start]);
+        cursor = close_end;
+    }
+
+    match output {
+        Some(mut output) => {
+            output.push_str(&text[cursor..]);
+            Cow::Owned(output)
+        }
+        None => Cow::Borrowed(text),
+    }
+}
+
+fn find_think_open(text: &str) -> Option<(usize, usize)> {
+    let mut cursor = 0;
+    while let Some(offset) = text[cursor..].find('<') {
+        let start = cursor + offset;
+        if let Some(end) = parse_think_open_at(text, start) {
+            return Some((start, end));
+        }
+        cursor = start + '<'.len_utf8();
+    }
+    None
+}
+
+fn find_think_close(text: &str) -> Option<(usize, usize)> {
+    let mut cursor = 0;
+    while let Some(offset) = text[cursor..].find('<') {
+        let start = cursor + offset;
+        if let Some(end) = parse_think_close_at(text, start) {
+            return Some((start, end));
+        }
+        cursor = start + '<'.len_utf8();
+    }
+    None
+}
+
+fn parse_think_open_at(text: &str, start: usize) -> Option<usize> {
+    let tag_start = start + '<'.len_utf8();
+    if text.as_bytes().get(tag_start) == Some(&b'/') {
+        return None;
+    }
+    parse_think_tag_end(text, tag_start, true)
+}
+
+fn parse_think_close_at(text: &str, start: usize) -> Option<usize> {
+    let slash = start + '<'.len_utf8();
+    if text.as_bytes().get(slash) != Some(&b'/') {
+        return None;
+    }
+    parse_think_tag_end(text, slash + '/'.len_utf8(), false)
+}
+
+fn parse_think_tag_end(text: &str, tag_start: usize, allow_attributes: bool) -> Option<usize> {
+    let tag_end = tag_start.checked_add("think".len())?;
+    if tag_end > text.len() || !text[tag_start..tag_end].eq_ignore_ascii_case("think") {
+        return None;
+    }
+
+    let next = text.as_bytes().get(tag_end).copied()?;
+    if next == b'>' {
+        return Some(tag_end + 1);
+    }
+    if !next.is_ascii_whitespace() {
+        return None;
+    }
+
+    if allow_attributes {
+        return text[tag_end..].find('>').map(|offset| tag_end + offset + 1);
     }
 
-    output
+    let suffix = &text[tag_end..];
+    let trimmed = suffix.trim_start_matches(|c: char| c.is_ascii_whitespace());
+    if trimmed.starts_with('>') {
+        Some(text.len() - trimmed.len() + 1)
+    } else {
+        None
+    }
 }
 
 fn strip_markdown_fence(text: &str) -> &str {
@@ -381,4 +462,35 @@ mod tests {
 
         assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。");
     }
+
+    #[test]
+    fn clean_polish_output_strips_think_tag_with_attributes_and_case() {
+        let content = r#"<THINK reason="true">hidden</THINK>
+最终文本。"#;
+
+        assert_eq!(clean_polish_output(content), "最终文本。");
+    }
+
+    #[test]
+    fn clean_polish_output_strips_multiple_think_blocks() {
+        let content = "<think>one</think>第一句。<think>two</think>第二句。";
+
+        assert_eq!(clean_polish_output(content), "第一句。第二句。");
+    }
+
+    #[test]
+    fn strip_thinking_blocks_ignores_non_think_and_unclosed_tags() {
+        assert!(matches!(
+            strip_thinking_blocks("普通文本"),
+            Cow::Borrowed(_)
+        ));
+        assert_eq!(
+            strip_thinking_blocks("<thinking>保留</thinking>正文"),
+            "<thinking>保留</thinking>正文"
+        );
+        assert_eq!(
+            strip_thinking_blocks("<think>未闭合正文"),
+            "<think>未闭合正文"
+        );
+    }
 }