Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 147 additions & 1 deletion openless-all/app/src-tauri/src/polish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
//! and `PolishPrompts.swift`. The system prompt strings are copied verbatim
//! from Swift to keep behaviour identical.

use std::borrow::Cow;
use std::collections::HashMap;
use std::time::Duration;

Expand Down Expand Up @@ -223,7 +224,8 @@ fn extract_assistant_content(body: &str) -> Result<String, LLMError> {
/// an iterative trim — if the model stacks two boilerplate sentences we'll still
/// strip both.
fn clean_polish_output(content: &str) -> String {
let trimmed = content.trim();
let without_thinking = strip_thinking_blocks(content);
let trimmed = without_thinking.trim();
let stripped = strip_markdown_fence(trimmed);
let mut output = stripped.to_string();

Expand All @@ -239,6 +241,106 @@ fn clean_polish_output(content: &str) -> String {
output.trim().to_string()
}

/// Strip model reasoning blocks so only the final polished text is inserted.
///
/// Thinking-capable OpenAI-compatible models commonly return their reasoning in
/// `<think>...</think>` before the final answer. Match only explicit `think`
/// tags, with optional attributes and ASCII casing variants, so normal prose is
/// left untouched.
fn strip_thinking_blocks(text: &str) -> Cow<'_, str> {
let mut cursor = 0;
let mut output: Option<String> = None;

while let Some((open_start, open_end)) = find_think_open(&text[cursor..]) {
let open_start = cursor + open_start;
let open_end = cursor + open_end;
let Some((_, close_end)) = find_think_close(&text[open_end..]) else {
break;
};
let close_end = open_end + close_end;

output
.get_or_insert_with(|| String::with_capacity(text.len()))
.push_str(&text[cursor..open_start]);
cursor = close_end;
}

match output {
Some(mut output) => {
output.push_str(&text[cursor..]);
Cow::Owned(output)
}
None => Cow::Borrowed(text),
}
}

fn find_think_open(text: &str) -> Option<(usize, usize)> {
let mut cursor = 0;
while let Some(offset) = text[cursor..].find('<') {
let start = cursor + offset;
if let Some(end) = parse_think_open_at(text, start) {
return Some((start, end));
}
cursor = start + '<'.len_utf8();
}
None
}

fn find_think_close(text: &str) -> Option<(usize, usize)> {
let mut cursor = 0;
while let Some(offset) = text[cursor..].find('<') {
let start = cursor + offset;
if let Some(end) = parse_think_close_at(text, start) {
return Some((start, end));
}
cursor = start + '<'.len_utf8();
}
None
}

fn parse_think_open_at(text: &str, start: usize) -> Option<usize> {
let tag_start = start + '<'.len_utf8();
if text.as_bytes().get(tag_start) == Some(&b'/') {
return None;
}
parse_think_tag_end(text, tag_start, true)
}

fn parse_think_close_at(text: &str, start: usize) -> Option<usize> {
let slash = start + '<'.len_utf8();
if text.as_bytes().get(slash) != Some(&b'/') {
return None;
}
parse_think_tag_end(text, slash + '/'.len_utf8(), false)
}

fn parse_think_tag_end(text: &str, tag_start: usize, allow_attributes: bool) -> Option<usize> {
let tag_end = tag_start.checked_add("think".len())?;
if tag_end > text.len() || !text[tag_start..tag_end].eq_ignore_ascii_case("think") {
return None;
}

let next = text.as_bytes().get(tag_end).copied()?;
if next == b'>' {
return Some(tag_end + 1);
}
if !next.is_ascii_whitespace() {
return None;
}

if allow_attributes {
return text[tag_end..].find('>').map(|offset| tag_end + offset + 1);
}

let suffix = &text[tag_end..];
let trimmed = suffix.trim_start_matches(|c: char| c.is_ascii_whitespace());
if trimmed.starts_with('>') {
Some(text.len() - trimmed.len() + 1)
} else {
None
}
}

fn strip_markdown_fence(text: &str) -> &str {
if !(text.starts_with("```") && text.ends_with("```")) {
return text;
Expand Down Expand Up @@ -348,3 +450,47 @@ pub mod prompts {
)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn clean_polish_output_strips_think_tag_block() {
let content =
"<think>先分析用户意图。\n这里可能很长。</think>\n\n请明天上午十点提醒我开会。";

assert_eq!(clean_polish_output(content), "请明天上午十点提醒我开会。");
}

#[test]
fn clean_polish_output_strips_think_tag_with_attributes_and_case() {
let content = r#"<THINK reason="true">hidden</THINK>
最终文本。"#;

assert_eq!(clean_polish_output(content), "最终文本。");
}

#[test]
fn clean_polish_output_strips_multiple_think_blocks() {
let content = "<think>one</think>第一句。<think>two</think>第二句。";

assert_eq!(clean_polish_output(content), "第一句。第二句。");
}

#[test]
fn strip_thinking_blocks_ignores_non_think_and_unclosed_tags() {
assert!(matches!(
strip_thinking_blocks("普通文本"),
Cow::Borrowed(_)
));
assert_eq!(
strip_thinking_blocks("<thinking>保留</thinking>正文"),
"<thinking>保留</thinking>正文"
);
assert_eq!(
strip_thinking_blocks("<think>未闭合正文"),
"<think>未闭合正文"
);
}
}
Loading