diff --git a/openless-all/app/src-tauri/src/polish.rs b/openless-all/app/src-tauri/src/polish.rs index 6f01d8f4..edea6b79 100644 --- a/openless-all/app/src-tauri/src/polish.rs +++ b/openless-all/app/src-tauri/src/polish.rs @@ -4,6 +4,7 @@ //! and `PolishPrompts.swift`. The system prompt strings are copied verbatim //! from Swift to keep behaviour identical. +use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; @@ -223,7 +224,8 @@ fn extract_assistant_content(body: &str) -> Result { /// an iterative trim — if the model stacks two boilerplate sentences we'll still /// strip both. fn clean_polish_output(content: &str) -> String { - let trimmed = content.trim(); + let without_thinking = strip_thinking_blocks(content); + let trimmed = without_thinking.trim(); let stripped = strip_markdown_fence(trimmed); let mut output = stripped.to_string(); @@ -239,6 +241,106 @@ fn clean_polish_output(content: &str) -> String { output.trim().to_string() } +/// Strip model reasoning blocks so only the final polished text is inserted. +/// +/// Thinking-capable OpenAI-compatible models commonly return their reasoning in +/// `...` before the final answer. Match only explicit `think` +/// tags, with optional attributes and ASCII casing variants, so normal prose is +/// left untouched. +fn strip_thinking_blocks(text: &str) -> Cow<'_, str> { + let mut cursor = 0; + let mut output: Option = None; + + while let Some((open_start, open_end)) = find_think_open(&text[cursor..]) { + let open_start = cursor + open_start; + let open_end = cursor + open_end; + let Some((_, close_end)) = find_think_close(&text[open_end..]) else { + break; + }; + let close_end = open_end + close_end; + + output + .get_or_insert_with(|| String::with_capacity(text.len())) + .push_str(&text[cursor..open_start]); + cursor = close_end; + } + + match output { + Some(mut output) => { + output.push_str(&text[cursor..]); + Cow::Owned(output) + } + None => Cow::Borrowed(text), + } +} + +fn find_think_open(text: &str) -> Option<(usize, usize)> { + let mut cursor = 0; + while let Some(offset) = text[cursor..].find('<') { + let start = cursor + offset; + if let Some(end) = parse_think_open_at(text, start) { + return Some((start, end)); + } + cursor = start + '<'.len_utf8(); + } + None +} + +fn find_think_close(text: &str) -> Option<(usize, usize)> { + let mut cursor = 0; + while let Some(offset) = text[cursor..].find('<') { + let start = cursor + offset; + if let Some(end) = parse_think_close_at(text, start) { + return Some((start, end)); + } + cursor = start + '<'.len_utf8(); + } + None +} + +fn parse_think_open_at(text: &str, start: usize) -> Option { + let tag_start = start + '<'.len_utf8(); + if text.as_bytes().get(tag_start) == Some(&b'/') { + return None; + } + parse_think_tag_end(text, tag_start, true) +} + +fn parse_think_close_at(text: &str, start: usize) -> Option { + let slash = start + '<'.len_utf8(); + if text.as_bytes().get(slash) != Some(&b'/') { + return None; + } + parse_think_tag_end(text, slash + '/'.len_utf8(), false) +} + +fn parse_think_tag_end(text: &str, tag_start: usize, allow_attributes: bool) -> Option { + let tag_end = tag_start.checked_add("think".len())?; + if tag_end > text.len() || !text[tag_start..tag_end].eq_ignore_ascii_case("think") { + return None; + } + + let next = text.as_bytes().get(tag_end).copied()?; + if next == b'>' { + return Some(tag_end + 1); + } + if !next.is_ascii_whitespace() { + return None; + } + + if allow_attributes { + return text[tag_end..].find('>').map(|offset| tag_end + offset + 1); + } + + let suffix = &text[tag_end..]; + let trimmed = suffix.trim_start_matches(|c: char| c.is_ascii_whitespace()); + if trimmed.starts_with('>') { + Some(text.len() - trimmed.len() + 1) + } else { + None + } +} + fn strip_markdown_fence(text: &str) -> &str { if !(text.starts_with("```") && text.ends_with("```")) { return text; @@ -348,3 +450,47 @@ pub mod prompts { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clean_polish_output_strips_think_tag_block() { + let content = + "先分析用户意图。\n这里可能很长。\n\n请明天上午十点提醒我开会。"; + + assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。"); + } + + #[test] + fn clean_polish_output_strips_think_tag_with_attributes_and_case() { + let content = r#"hidden +最终文本。"#; + + assert_eq!(clean_polish_output(content), "最终文本。"); + } + + #[test] + fn clean_polish_output_strips_multiple_think_blocks() { + let content = "one第一句。two第二句。"; + + assert_eq!(clean_polish_output(content), "第一句。第二句。"); + } + + #[test] + fn strip_thinking_blocks_ignores_non_think_and_unclosed_tags() { + assert!(matches!( + strip_thinking_blocks("普通文本"), + Cow::Borrowed(_) + )); + assert_eq!( + strip_thinking_blocks("保留正文"), + "保留正文" + ); + assert_eq!( + strip_thinking_blocks("未闭合正文"), + "未闭合正文" + ); + } +}