From 2fb365de49e21d08b850dbf4c23b6ed3c248d903 Mon Sep 17 00:00:00 2001 From: Minkyu Kim Date: Tue, 7 Apr 2026 20:44:42 +0900 Subject: [PATCH 1/4] fix(markdown): preserve korean spacing when wrapping text --- Cargo.lock | 1 + crates/forge_markdown_stream/Cargo.toml | 1 + crates/forge_markdown_stream/src/heading.rs | 17 +- crates/forge_markdown_stream/src/lib.rs | 82 ++++++- crates/forge_markdown_stream/src/list.rs | 23 +- crates/forge_markdown_stream/src/renderer.rs | 7 +- crates/forge_markdown_stream/src/utils.rs | 219 +++++++++++++++++++ 7 files changed, 331 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f77d1be63d..845eeb1058 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2219,6 +2219,7 @@ version = "0.1.0" dependencies = [ "colored", "insta", + "pretty_assertions", "streamdown-ansi", "streamdown-core", "streamdown-parser", diff --git a/crates/forge_markdown_stream/Cargo.toml b/crates/forge_markdown_stream/Cargo.toml index 539a247568..858cb28704 100644 --- a/crates/forge_markdown_stream/Cargo.toml +++ b/crates/forge_markdown_stream/Cargo.toml @@ -21,3 +21,4 @@ terminal-colorsaurus = "1.0.3" [dev-dependencies] insta.workspace = true strip-ansi-escapes.workspace = true +pretty_assertions.workspace = true diff --git a/crates/forge_markdown_stream/src/heading.rs b/crates/forge_markdown_stream/src/heading.rs index 10d9a77e57..66fac74088 100644 --- a/crates/forge_markdown_stream/src/heading.rs +++ b/crates/forge_markdown_stream/src/heading.rs @@ -1,9 +1,8 @@ //! Heading rendering with theme-based styling. -use streamdown_render::simple_wrap; - use crate::inline::render_inline_content; use crate::style::{HeadingStyler, InlineStyler}; +use crate::utils::simple_wrap_preserving_spaces; /// Render a heading with appropriate styling. pub fn render_heading( @@ -30,7 +29,7 @@ pub fn render_heading( // chars, etc.) let prefix_display_width = level as usize + 1; let content_width = width.saturating_sub(prefix_display_width); - let lines = simple_wrap(&rendered_content, content_width); + let lines = simple_wrap_preserving_spaces(&rendered_content, content_width); let mut result = Vec::new(); for line in lines { @@ -201,6 +200,18 @@ mod tests { "); } + #[test] + fn test_h3_wrapping_preserves_korean_word_spaces() { + let actual = render_with_width(3, "한글 공백 보존 확인", 12); + + insta::assert_snapshot!(actual, @r" +

###

한글

+

###

공백

+

###

보존

+

###

확인

+ "); + } + #[test] fn test_special_characters() { insta::assert_snapshot!(render(2, "Hello & Goodbye < World >"), @"

##

Hello & Goodbye < World >

"); diff --git a/crates/forge_markdown_stream/src/lib.rs b/crates/forge_markdown_stream/src/lib.rs index 86af890e11..f700997923 100644 --- a/crates/forge_markdown_stream/src/lib.rs +++ b/crates/forge_markdown_stream/src/lib.rs @@ -12,11 +12,11 @@ //! //! fn main() -> io::Result<()> { //! let mut renderer = StreamdownRenderer::new(io::stdout(), 80); -//! +//! //! // Push tokens as they arrive from LLM //! renderer.push("Hello ")?; //! renderer.push("**world**!\n")?; -//! +//! //! // Finish rendering //! let _ = renderer.finish()?; //! Ok(()) @@ -109,3 +109,81 @@ impl StreamdownRenderer { Ok(()) } } + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::StreamdownRenderer; + + fn fixture_rendered_output(markdown: &str, width: usize) -> String { + let mut output = Vec::new(); + let mut fixture = StreamdownRenderer::new(&mut output, width); + fixture.push(markdown).unwrap(); + fixture.finish().unwrap(); + + let actual = strip_ansi_escapes::strip(output); + String::from_utf8(actual).unwrap().trim_matches('\n').to_string() + } + + fn fixture_rendered_output_from_chunks(chunks: &[&str], width: usize) -> String { + let mut output = Vec::new(); + let mut fixture = StreamdownRenderer::new(&mut output, width); + for chunk in chunks { + fixture.push(chunk).unwrap(); + } + fixture.finish().unwrap(); + + let actual = strip_ansi_escapes::strip(output); + String::from_utf8(actual).unwrap().trim_matches('\n').to_string() + } + + #[test] + fn test_streaming_renderer_preserves_korean_spacing_in_structured_markdown() { + let fixture = concat!( + "## 구현 요약\n", + "- 각 서비스에서 metadata key를 개별 수정하지 않고, object storage 공통 레이어에서 일괄 정규화하도록 반영했습니다.\n", + "## 검토 사항\n", + "- 본 수정은 업로드 시 metadata header 이름 문제를 해결합니다.\n", + "- 추가적인 권한 정책, bucket policy, reverse proxy 제한이 있으면 별도 오류가 발생할 수 있습니다.\n", + ); + let actual = fixture_rendered_output(fixture, 200); + let expected = concat!( + "## 구현 요약\n", + "• 각 서비스에서 metadata key를 개별 수정하지 않고, object storage 공통 레이어에서 일괄 정규화하도록 반영했습니다.\n", + "\n", + "## 검토 사항\n", + "• 본 수정은 업로드 시 metadata header 이름 문제를 해결합니다.\n", + "• 추가적인 권한 정책, bucket policy, reverse proxy 제한이 있으면 별도 오류가 발생할 수 있습니다.", + ); + + assert_eq!(actual, expected); + } + + #[test] + fn test_streaming_renderer_preserves_korean_spacing_when_structured_tail_arrives_in_chunks() { + let fixture = [ + "## 검토 결과\n", + "- 본 사례는 스트리밍 마크다운 렌더링의 공백 재조합 문제와 관련이 있습니다.\n", + "- 핵심 구현은 공백 보존 래퍼에 위치합니다.\n", + "- 회귀 테스트는 스트리밍 렌더러 검증 항목에 추가되어 있습니다.\n\n", + "후속 작업은 다음과 같습니다.\n", + "1. 변경 사항을 검토 가능한 형식으로 정리합니다.\n", + "2. 실제 대화 출력과 유사한 통합 테스트 범위를 ", + "확장합니다.", + ]; + let actual = fixture_rendered_output_from_chunks(&fixture, 200); + let expected = concat!( + "## 검토 결과\n", + "• 본 사례는 스트리밍 마크다운 렌더링의 공백 재조합 문제와 관련이 있습니다.\n", + "• 핵심 구현은 공백 보존 래퍼에 위치합니다.\n", + "• 회귀 테스트는 스트리밍 렌더러 검증 항목에 추가되어 있습니다.\n", + "\n", + "후속 작업은 다음과 같습니다.\n", + "1. 변경 사항을 검토 가능한 형식으로 정리합니다.\n", + "2. 실제 대화 출력과 유사한 통합 테스트 범위를 확장합니다.", + ); + + assert_eq!(actual, expected); + } +} diff --git a/crates/forge_markdown_stream/src/list.rs b/crates/forge_markdown_stream/src/list.rs index 273e13886b..7785a3886b 100644 --- a/crates/forge_markdown_stream/src/list.rs +++ b/crates/forge_markdown_stream/src/list.rs @@ -2,10 +2,10 @@ use streamdown_ansi::utils::visible_length; use streamdown_parser::ListBullet; -use streamdown_render::text::text_wrap; use crate::inline::render_inline_content; use crate::style::{InlineStyler, ListStyler}; +use crate::utils::wrap_text_preserving_spaces; /// Bullet characters for dash lists at different nesting levels. const BULLETS_DASH: [&str; 4] = ["•", "◦", "▪", "‣"]; @@ -183,20 +183,13 @@ pub fn render_list_item( let next_prefix = format!("{}{}", margin, " ".repeat(content_indent)); // Wrap the content - let wrapped = text_wrap( - &rendered_content, - width, - 0, - &first_prefix, - &next_prefix, - false, - true, - ); + let wrapped = + wrap_text_preserving_spaces(&rendered_content, width, &first_prefix, &next_prefix); if wrapped.is_empty() { vec![first_prefix] } else { - wrapped.lines + wrapped } } @@ -338,6 +331,14 @@ mod tests { "); } + #[test] + fn test_wrapping_preserves_korean_word_spaces() { + let actual = render_with_width(0, ListBullet::Dash, "한글 공백 보존 확인", 8); + let expected = " 한글\n 공백\n 보존\n 확인"; + + pretty_assertions::assert_eq!(actual, expected); + } + #[test] fn test_list_state_reset() { let mut state = ListState::default(); diff --git a/crates/forge_markdown_stream/src/renderer.rs b/crates/forge_markdown_stream/src/renderer.rs index 00404c1882..a6c03cba05 100644 --- a/crates/forge_markdown_stream/src/renderer.rs +++ b/crates/forge_markdown_stream/src/renderer.rs @@ -3,7 +3,6 @@ use std::io::{self, Write}; use streamdown_parser::ParseEvent; -use streamdown_render::text::text_wrap; use crate::code::CodeHighlighter; use crate::heading::render_heading; @@ -12,6 +11,7 @@ use crate::list::{ListState, render_list_item}; use crate::style::InlineStyler; use crate::table::render_table; use crate::theme::Theme; +use crate::utils::wrap_text_preserving_spaces; /// Main renderer for markdown events. pub struct Renderer { @@ -257,11 +257,12 @@ impl Renderer { let width = self.current_width(); // Parse inline formatting (bold, italic, etc.) in blockquote content let rendered_content = render_inline_content(text, &self.theme); - let wrapped = text_wrap(&rendered_content, width, 0, &margin, &margin, false, true); + let wrapped = + wrap_text_preserving_spaces(&rendered_content, width, &margin, &margin); if wrapped.is_empty() { self.writeln(&margin)?; } else { - for line in wrapped.lines { + for line in wrapped { self.writeln(&line)?; } } diff --git a/crates/forge_markdown_stream/src/utils.rs b/crates/forge_markdown_stream/src/utils.rs index 85412158c2..555b60ee67 100644 --- a/crates/forge_markdown_stream/src/utils.rs +++ b/crates/forge_markdown_stream/src/utils.rs @@ -1,5 +1,7 @@ //! Utility functions for the markdown renderer. +use streamdown_ansi::utils::{ansi_collapse, extract_ansi_codes, visible, visible_length}; + /// Terminal theme mode (dark or light). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ThemeMode { @@ -18,3 +20,220 @@ pub fn detect_theme_mode() -> ThemeMode { Ok(ColorsaurusThemeMode::Dark) | Err(_) => ThemeMode::Dark, } } + +#[derive(Debug, Clone, PartialEq, Eq)] +struct WrapChunk { + content: String, + is_whitespace: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct WrapSegment { + separator: String, + word: String, +} + +/// Wraps ANSI-styled text while preserving explicit whitespace between words. +/// +/// Unlike the upstream streamdown wrapper, this keeps the original separator +/// string between tokens instead of reconstructing it from CJK heuristics. +pub(crate) fn wrap_text_preserving_spaces( + text: &str, + width: usize, + first_prefix: &str, + next_prefix: &str, +) -> Vec { + if width == 0 { + return Vec::new(); + } + + let segments = wrap_segments(text); + if segments.is_empty() { + return Vec::new(); + } + + let mut lines = Vec::new(); + let mut current_line = String::new(); + let mut current_style: Vec = Vec::new(); + + for segment in segments { + let separator = if current_line.is_empty() { + "" + } else { + segment.separator.as_str() + }; + let separator_width = visible_length(separator); + let word_width = visible_length(&segment.word); + let line_width = visible_length(¤t_line); + + if current_line.is_empty() || line_width + separator_width + word_width <= width { + current_line.push_str(separator); + apply_style_transition(&mut current_style, separator); + current_line.push_str(&segment.word); + apply_style_transition(&mut current_style, &segment.word); + continue; + } + + push_wrapped_line(&mut lines, ¤t_line, first_prefix, next_prefix); + + current_line = current_style.join(""); + current_line.push_str(&segment.word); + apply_style_transition(&mut current_style, &segment.word); + } + + push_wrapped_line(&mut lines, ¤t_line, first_prefix, next_prefix); + lines +} + +/// Wraps ANSI-styled inline text without prefixes while preserving explicit +/// spaces. +pub(crate) fn simple_wrap_preserving_spaces(text: &str, width: usize) -> Vec { + if width == 0 || text.is_empty() { + return vec![text.to_string()]; + } + + let lines = wrap_text_preserving_spaces(text, width, "", ""); + if lines.is_empty() { + vec![String::new()] + } else { + lines + } +} + +fn wrap_segments(text: &str) -> Vec { + let chunks = wrap_chunks(text); + let mut segments = Vec::new(); + let mut separator = String::new(); + + for chunk in chunks { + if chunk.is_whitespace { + separator.push_str(&chunk.content); + } else { + segments.push(WrapSegment { + separator: std::mem::take(&mut separator), + word: chunk.content, + }); + } + } + + segments +} + +fn wrap_chunks(text: &str) -> Vec { + let mut chunks = Vec::new(); + let mut current = String::new(); + let mut current_is_whitespace = None; + let mut in_escape = false; + let mut escape_buf = String::new(); + + for ch in text.chars() { + if in_escape { + escape_buf.push(ch); + if ch == 'm' { + current.push_str(&escape_buf); + escape_buf.clear(); + in_escape = false; + } + continue; + } + + if ch == '\x1b' { + in_escape = true; + escape_buf.push(ch); + continue; + } + + let is_whitespace = ch.is_whitespace(); + match current_is_whitespace { + Some(kind) if kind != is_whitespace => { + chunks + .push(WrapChunk { content: std::mem::take(&mut current), is_whitespace: kind }); + current_is_whitespace = Some(is_whitespace); + } + None => { + current_is_whitespace = Some(is_whitespace); + } + _ => {} + } + + current.push(ch); + } + + if !escape_buf.is_empty() { + current.push_str(&escape_buf); + } + + if let Some(is_whitespace) = current_is_whitespace + && !current.is_empty() + { + chunks.push(WrapChunk { content: current, is_whitespace }); + } + + chunks +} + +fn apply_style_transition(current_style: &mut Vec, text: &str) { + current_style.extend(extract_ansi_codes(text)); + *current_style = ansi_collapse(current_style, ""); +} + +fn push_wrapped_line( + lines: &mut Vec, + current_line: &str, + first_prefix: &str, + next_prefix: &str, +) { + if visible(current_line).trim().is_empty() { + return; + } + + let prefix = if lines.is_empty() { + first_prefix + } else { + next_prefix + }; + lines.push(format!("{prefix}{current_line}")); +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::{simple_wrap_preserving_spaces, wrap_text_preserving_spaces}; + + #[test] + fn test_simple_wrap_preserving_spaces_keeps_korean_word_boundaries() { + let fixture = "한글 공백 보존 문장"; + let actual = simple_wrap_preserving_spaces(fixture, 8); + let expected = vec![ + "한글".to_string(), + "공백".to_string(), + "보존".to_string(), + "문장".to_string(), + ]; + + assert_eq!(actual, expected); + } + + #[test] + fn test_wrap_text_preserving_spaces_keeps_multiple_spaces_on_same_line() { + let fixture = "한글 공백 보존"; + let actual = wrap_text_preserving_spaces(fixture, 40, "", ""); + let expected = vec!["한글 공백 보존".to_string()]; + + assert_eq!(actual, expected); + } + + #[test] + fn test_wrap_text_preserving_spaces_applies_prefixes_after_wrap() { + let fixture = "한글 공백 검증"; + let actual = wrap_text_preserving_spaces(fixture, 8, "> ", " "); + let expected = vec![ + "> 한글".to_string(), + " 공백".to_string(), + " 검증".to_string(), + ]; + + assert_eq!(actual, expected); + } +} From 018bbf9e5ab46812f193ce8de0906ca32871e3e7 Mon Sep 17 00:00:00 2001 From: Minkyu Kim Date: Thu, 9 Apr 2026 03:25:33 +0900 Subject: [PATCH 2/4] fix(markdown): harden cjk wrapping edge cases --- crates/forge_markdown_stream/src/heading.rs | 11 + crates/forge_markdown_stream/src/lib.rs | 30 ++ crates/forge_markdown_stream/src/list.rs | 111 +++++++- crates/forge_markdown_stream/src/renderer.rs | 10 +- crates/forge_markdown_stream/src/utils.rs | 272 +++++++++++++++---- 5 files changed, 378 insertions(+), 56 deletions(-) diff --git a/crates/forge_markdown_stream/src/heading.rs b/crates/forge_markdown_stream/src/heading.rs index 66fac74088..42279a03cf 100644 --- a/crates/forge_markdown_stream/src/heading.rs +++ b/crates/forge_markdown_stream/src/heading.rs @@ -212,6 +212,17 @@ mod tests { "); } + #[test] + fn test_h3_wrapping_splits_long_tokens() { + let actual = render_with_width(3, "supercalifragilistic", 12); + + insta::assert_snapshot!(actual, @r" +

###

supercal

+

###

ifragili

+

###

stic

+ "); + } + #[test] fn test_special_characters() { insta::assert_snapshot!(render(2, "Hello & Goodbye < World >"), @"

##

Hello & Goodbye < World >

"); diff --git a/crates/forge_markdown_stream/src/lib.rs b/crates/forge_markdown_stream/src/lib.rs index f700997923..58dd1259af 100644 --- a/crates/forge_markdown_stream/src/lib.rs +++ b/crates/forge_markdown_stream/src/lib.rs @@ -186,4 +186,34 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn test_streaming_renderer_wraps_blockquotes_with_prefix_width_and_long_tokens() { + let fixture = "> supercalifragilistic\n> 한글 공백\n"; + let actual = fixture_rendered_output(fixture, 10); + let expected = concat!( + "│ super\n", + "│ calif\n", + "│ ragil\n", + "│ istic\n", + "│ 한글\n", + "│ 공백" + ); + + assert_eq!(actual, expected); + } + + #[test] + fn test_streaming_renderer_wraps_blockquote_links_without_losing_separator() { + let fixture = "> [링크](https://example.com/very/long/path) 설명\n"; + let actual = fixture_rendered_output(fixture, 20); + let expected = concat!( + "│ 링크\n", + "│ (https://exampl\n", + "│ e.com/very/long\n", + "│ /path) 설명" + ); + + assert_eq!(actual, expected); + } } diff --git a/crates/forge_markdown_stream/src/list.rs b/crates/forge_markdown_stream/src/list.rs index 7785a3886b..beed14c74e 100644 --- a/crates/forge_markdown_stream/src/list.rs +++ b/crates/forge_markdown_stream/src/list.rs @@ -183,8 +183,13 @@ pub fn render_list_item( let next_prefix = format!("{}{}", margin, " ".repeat(content_indent)); // Wrap the content - let wrapped = - wrap_text_preserving_spaces(&rendered_content, width, &first_prefix, &next_prefix); + let wrapped = wrap_text_preserving_spaces( + &rendered_content, + width.saturating_sub(visible_length(&first_prefix)), + width.saturating_sub(visible_length(&next_prefix)), + &first_prefix, + &next_prefix, + ); if wrapped.is_empty() { vec![first_prefix] @@ -196,7 +201,7 @@ pub fn render_list_item( #[cfg(test)] mod tests { use super::*; - use crate::theme::TagStyler; + use crate::theme::{TagStyler, Theme}; fn render(indent: usize, bullet: ListBullet, content: &str) -> String { let mut state = ListState::default(); @@ -220,6 +225,28 @@ mod tests { .join("\n") } + fn render_visible_with_width( + indent: usize, + bullet: ListBullet, + content: &str, + width: usize, + ) -> String { + let mut state = ListState::default(); + let actual = render_list_item( + indent, + &bullet, + content, + width, + " ", + &Theme::default(), + &mut state, + ) + .join("\n"); + let stripped = strip_ansi_escapes::strip(actual.as_bytes()); + + String::from_utf8(stripped).unwrap() + } + #[test] fn test_unordered_dash() { insta::assert_snapshot!(render(0, ListBullet::Dash, "Item one"), @" Item one"); @@ -326,8 +353,9 @@ mod tests { 40, ); insta::assert_snapshot!(result, @r" - This is a very long list item that - should wrap to multiple lines + This is a very long + list item that should wrap to + multiple lines "); } @@ -339,6 +367,79 @@ mod tests { pretty_assertions::assert_eq!(actual, expected); } + #[test] + fn test_wrapping_respects_bullet_prefix_width() { + let actual = render_with_width(0, ListBullet::Dash, "한글 공백", 6); + let expected = " 한\n 글\n 공\n 백"; + + pretty_assertions::assert_eq!(actual, expected); + } + + #[test] + fn test_wrapping_respects_checkbox_prefix_width() { + let actual = render_with_width(0, ListBullet::Dash, "[ ] 한글 공백", 8); + let expected = " 한\n 글\n 공\n 백"; + + pretty_assertions::assert_eq!(actual, expected); + } + + #[test] + fn test_wrapping_respects_multidigit_ordered_prefix_width() { + let mut fixture = ListState::default(); + for index in 1..10 { + let _ = render_list_item( + 0, + &ListBullet::Ordered(1), + &format!("예시 {index}"), + 8, + " ", + &TagStyler, + &mut fixture, + ); + } + let actual = render_list_item( + 0, + &ListBullet::Ordered(1), + "한글 공백", + 8, + " ", + &TagStyler, + &mut fixture, + ) + .join("\n"); + let expected = " 10. 한\n 글\n 공\n 백"; + + pretty_assertions::assert_eq!(actual, expected); + } + + #[test] + fn test_wrapping_splits_long_tokens() { + let actual = render_with_width(0, ListBullet::Dash, "supercalifragilistic", 10); + let expected = " superc\n alifra\n gilist\n ic"; + + pretty_assertions::assert_eq!(actual, expected); + } + + #[test] + fn test_wrapping_preserves_link_breaks() { + let actual = render_visible_with_width( + 0, + ListBullet::Dash, + "[링크](https://example.com/very/long/path) 설명", + 14, + ); + let expected = concat!( + " • 링크\n", + " (https://e\n", + " xample.com\n", + " /very/long\n", + " /path)\n", + " 설명" + ); + + pretty_assertions::assert_eq!(actual, expected); + } + #[test] fn test_list_state_reset() { let mut state = ListState::default(); diff --git a/crates/forge_markdown_stream/src/renderer.rs b/crates/forge_markdown_stream/src/renderer.rs index a6c03cba05..6c3d682543 100644 --- a/crates/forge_markdown_stream/src/renderer.rs +++ b/crates/forge_markdown_stream/src/renderer.rs @@ -2,6 +2,7 @@ use std::io::{self, Write}; +use streamdown_ansi::utils::visible_length; use streamdown_parser::ParseEvent; use crate::code::CodeHighlighter; @@ -257,8 +258,13 @@ impl Renderer { let width = self.current_width(); // Parse inline formatting (bold, italic, etc.) in blockquote content let rendered_content = render_inline_content(text, &self.theme); - let wrapped = - wrap_text_preserving_spaces(&rendered_content, width, &margin, &margin); + let wrapped = wrap_text_preserving_spaces( + &rendered_content, + width.saturating_sub(visible_length(&margin)), + width.saturating_sub(visible_length(&margin)), + &margin, + &margin, + ); if wrapped.is_empty() { self.writeln(&margin)?; } else { diff --git a/crates/forge_markdown_stream/src/utils.rs b/crates/forge_markdown_stream/src/utils.rs index 555b60ee67..8a52e8a251 100644 --- a/crates/forge_markdown_stream/src/utils.rs +++ b/crates/forge_markdown_stream/src/utils.rs @@ -1,6 +1,7 @@ //! Utility functions for the markdown renderer. use streamdown_ansi::utils::{ansi_collapse, extract_ansi_codes, visible, visible_length}; +use unicode_width::UnicodeWidthChar; /// Terminal theme mode (dark or light). #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -33,17 +34,24 @@ struct WrapSegment { word: String, } +#[derive(Debug, Clone, PartialEq, Eq)] +enum WrapAtom { + Escape(String), + Char(char), +} + /// Wraps ANSI-styled text while preserving explicit whitespace between words. /// /// Unlike the upstream streamdown wrapper, this keeps the original separator /// string between tokens instead of reconstructing it from CJK heuristics. pub(crate) fn wrap_text_preserving_spaces( text: &str, - width: usize, + first_width: usize, + next_width: usize, first_prefix: &str, next_prefix: &str, ) -> Vec { - if width == 0 { + if first_width == 0 && next_width == 0 { return Vec::new(); } @@ -55,18 +63,18 @@ pub(crate) fn wrap_text_preserving_spaces( let mut lines = Vec::new(); let mut current_line = String::new(); let mut current_style: Vec = Vec::new(); + let mut current_width = first_width; for segment in segments { + let line_width = visible_length(¤t_line); let separator = if current_line.is_empty() { "" } else { segment.separator.as_str() }; - let separator_width = visible_length(separator); - let word_width = visible_length(&segment.word); - let line_width = visible_length(¤t_line); + let combined_width = visible_length(separator) + visible_length(&segment.word); - if current_line.is_empty() || line_width + separator_width + word_width <= width { + if !current_line.is_empty() && line_width + combined_width <= current_width { current_line.push_str(separator); apply_style_transition(&mut current_style, separator); current_line.push_str(&segment.word); @@ -74,11 +82,28 @@ pub(crate) fn wrap_text_preserving_spaces( continue; } - push_wrapped_line(&mut lines, ¤t_line, first_prefix, next_prefix); + if current_line.is_empty() && visible_length(&segment.word) <= current_width { + current_line.push_str(&segment.word); + apply_style_transition(&mut current_style, &segment.word); + continue; + } - current_line = current_style.join(""); - current_line.push_str(&segment.word); - apply_style_transition(&mut current_style, &segment.word); + if !current_line.is_empty() { + push_wrapped_line(&mut lines, ¤t_line, first_prefix, next_prefix); + current_line = current_style.join(""); + current_width = next_width; + } + + append_wrapped_word( + &mut lines, + &mut current_line, + &mut current_style, + &segment.word, + &mut current_width, + next_width, + first_prefix, + next_prefix, + ); } push_wrapped_line(&mut lines, ¤t_line, first_prefix, next_prefix); @@ -92,7 +117,7 @@ pub(crate) fn simple_wrap_preserving_spaces(text: &str, width: usize) -> Vec Vec, + current_line: &mut String, + current_style: &mut Vec, + word: &str, + current_width: &mut usize, + next_width: usize, + first_prefix: &str, + next_prefix: &str, +) { + let mut remainder = word.to_string(); + + while !remainder.is_empty() { + let line_width = visible_length(current_line); + let mut available = current_width.saturating_sub(line_width); + + if available == 0 { + push_wrapped_line(lines, current_line, first_prefix, next_prefix); + *current_line = current_style.join(""); + *current_width = next_width; + available = (*current_width).max(1); + } + + if visible_length(&remainder) <= available { + current_line.push_str(&remainder); + apply_style_transition(current_style, &remainder); + break; + } + + let prefix = take_prefix_fitting(&remainder, available) + .or_else(|| take_prefix_fitting(&remainder, 1)) + .unwrap_or_else(|| remainder.clone()); + + current_line.push_str(&prefix); + apply_style_transition(current_style, &prefix); + remainder = remainder[prefix.len()..].to_string(); + + if !remainder.is_empty() { + push_wrapped_line(lines, current_line, first_prefix, next_prefix); + *current_line = current_style.join(""); + *current_width = next_width; + } + } +} + +fn take_prefix_fitting(text: &str, max_width: usize) -> Option { + if text.is_empty() { + return None; + } + + let mut width = 0; + let mut result = String::new(); + let mut consumed_visible = false; + + for atom in parse_atoms(text) { + match atom { + WrapAtom::Escape(sequence) => result.push_str(&sequence), + WrapAtom::Char(ch) => { + let char_width = UnicodeWidthChar::width(ch).unwrap_or(0); + if consumed_visible && width + char_width > max_width { + break; + } + if !consumed_visible && char_width > max_width { + result.push(ch); + break; + } + + result.push(ch); + width += char_width; + consumed_visible = true; + } + } + } + + if result.is_empty() { None } else { Some(result) } +} + fn wrap_segments(text: &str) -> Vec { let chunks = wrap_chunks(text); let mut segments = Vec::new(); @@ -123,55 +225,94 @@ fn wrap_chunks(text: &str) -> Vec { let mut chunks = Vec::new(); let mut current = String::new(); let mut current_is_whitespace = None; - let mut in_escape = false; - let mut escape_buf = String::new(); - - for ch in text.chars() { - if in_escape { - escape_buf.push(ch); - if ch == 'm' { - current.push_str(&escape_buf); - escape_buf.clear(); - in_escape = false; - } - continue; - } - if ch == '\x1b' { - in_escape = true; - escape_buf.push(ch); - continue; - } - - let is_whitespace = ch.is_whitespace(); - match current_is_whitespace { - Some(kind) if kind != is_whitespace => { - chunks - .push(WrapChunk { content: std::mem::take(&mut current), is_whitespace: kind }); - current_is_whitespace = Some(is_whitespace); + for atom in parse_atoms(text) { + match atom { + WrapAtom::Escape(sequence) => current.push_str(&sequence), + WrapAtom::Char(ch) => { + let is_whitespace = ch.is_whitespace(); + match current_is_whitespace { + Some(kind) if kind != is_whitespace => { + chunks.push(WrapChunk { + content: std::mem::take(&mut current), + is_whitespace: kind, + }); + current_is_whitespace = Some(is_whitespace); + } + None => { + current_is_whitespace = Some(is_whitespace); + } + _ => {} + } + + current.push(ch); } - None => { - current_is_whitespace = Some(is_whitespace); - } - _ => {} } - - current.push(ch); - } - - if !escape_buf.is_empty() { - current.push_str(&escape_buf); } if let Some(is_whitespace) = current_is_whitespace && !current.is_empty() { - chunks.push(WrapChunk { content: current, is_whitespace }); + chunks.push(WrapChunk { + content: current, + is_whitespace, + }); } chunks } +fn parse_atoms(text: &str) -> Vec { + let mut atoms = Vec::new(); + let bytes = text.as_bytes(); + let mut index = 0; + + while index < bytes.len() { + if bytes[index] != 0x1b { + let ch = text[index..].chars().next().expect("slice should start at char boundary"); + atoms.push(WrapAtom::Char(ch)); + index += ch.len_utf8(); + continue; + } + + let end = match bytes.get(index + 1) { + Some(b'[') => parse_csi_escape(bytes, index), + Some(b']') => parse_osc_escape(bytes, index), + Some(_) => (index + 2).min(bytes.len()), + None => bytes.len(), + }; + atoms.push(WrapAtom::Escape(text[index..end].to_string())); + index = end; + } + + atoms +} + +fn parse_csi_escape(bytes: &[u8], start: usize) -> usize { + let mut index = start + 2; + while index < bytes.len() { + if (0x40..=0x7e).contains(&bytes[index]) { + return index + 1; + } + index += 1; + } + bytes.len() +} + +fn parse_osc_escape(bytes: &[u8], start: usize) -> usize { + let mut index = start + 2; + while index < bytes.len() { + if bytes[index] == 0x07 { + return index + 1; + } + if bytes[index] == 0x1b && bytes.get(index + 1) == Some(&b'\\') { + return index + 2; + } + index += 1; + } + bytes.len() +} + fn apply_style_transition(current_style: &mut Vec, text: &str) { current_style.extend(extract_ansi_codes(text)); *current_style = ansi_collapse(current_style, ""); @@ -198,6 +339,7 @@ fn push_wrapped_line( #[cfg(test)] mod tests { use pretty_assertions::assert_eq; + use streamdown_ansi::utils::visible; use super::{simple_wrap_preserving_spaces, wrap_text_preserving_spaces}; @@ -215,10 +357,24 @@ mod tests { assert_eq!(actual, expected); } + #[test] + fn test_simple_wrap_preserving_spaces_splits_long_tokens() { + let fixture = "supercalifragilistic"; + let actual = simple_wrap_preserving_spaces(fixture, 5); + let expected = vec![ + "super".to_string(), + "calif".to_string(), + "ragil".to_string(), + "istic".to_string(), + ]; + + assert_eq!(actual, expected); + } + #[test] fn test_wrap_text_preserving_spaces_keeps_multiple_spaces_on_same_line() { let fixture = "한글 공백 보존"; - let actual = wrap_text_preserving_spaces(fixture, 40, "", ""); + let actual = wrap_text_preserving_spaces(fixture, 40, 40, "", ""); let expected = vec!["한글 공백 보존".to_string()]; assert_eq!(actual, expected); @@ -227,7 +383,7 @@ mod tests { #[test] fn test_wrap_text_preserving_spaces_applies_prefixes_after_wrap() { let fixture = "한글 공백 검증"; - let actual = wrap_text_preserving_spaces(fixture, 8, "> ", " "); + let actual = wrap_text_preserving_spaces(fixture, 4, 4, "> ", " "); let expected = vec![ "> 한글".to_string(), " 공백".to_string(), @@ -236,4 +392,22 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn test_wrap_text_preserving_spaces_preserves_link_separator_after_osc_escape() { + let fixture = concat!( + "\x1b]8;;https://example.com\x1b\\", + "link", + "\x1b]8;;\x1b\\", + " ", + "\x1b[34m(https://x.co)\x1b[39m" + ); + let actual = wrap_text_preserving_spaces(fixture, 4, 14, "", "") + .into_iter() + .map(|line| visible(&line)) + .collect::>(); + let expected = vec!["link".to_string(), "(https://x.co)".to_string()]; + + assert_eq!(actual, expected); + } } From 322d975eb80fb8250b6a0f2b556106bc682da8b3 Mon Sep 17 00:00:00 2001 From: Minkyu Kim Date: Thu, 9 Apr 2026 03:57:41 +0900 Subject: [PATCH 3/4] fix(markdown): preserve grapheme and ansi state during wrapping --- Cargo.lock | 1 + crates/forge_markdown_stream/Cargo.toml | 1 + crates/forge_markdown_stream/src/utils.rs | 171 +++++++++++++++++++--- 3 files changed, 156 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 845eeb1058..cd71eab880 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2227,6 +2227,7 @@ dependencies = [ "strip-ansi-escapes", "syntect", "terminal-colorsaurus", + "unicode-segmentation", "unicode-width 0.2.2", ] diff --git a/crates/forge_markdown_stream/Cargo.toml b/crates/forge_markdown_stream/Cargo.toml index 858cb28704..5449822e33 100644 --- a/crates/forge_markdown_stream/Cargo.toml +++ b/crates/forge_markdown_stream/Cargo.toml @@ -16,6 +16,7 @@ streamdown-render = "0.1.4" syntect.workspace = true colored.workspace = true unicode-width = "0.2" +unicode-segmentation = "1.12" terminal-colorsaurus = "1.0.3" [dev-dependencies] diff --git a/crates/forge_markdown_stream/src/utils.rs b/crates/forge_markdown_stream/src/utils.rs index 8a52e8a251..ad683915f4 100644 --- a/crates/forge_markdown_stream/src/utils.rs +++ b/crates/forge_markdown_stream/src/utils.rs @@ -1,7 +1,8 @@ //! Utility functions for the markdown renderer. -use streamdown_ansi::utils::{ansi_collapse, extract_ansi_codes, visible, visible_length}; -use unicode_width::UnicodeWidthChar; +use streamdown_ansi::utils::{extract_ansi_codes, parse_sgr_params, visible, visible_length}; +use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; /// Terminal theme mode (dark or light). #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -37,7 +38,7 @@ struct WrapSegment { #[derive(Debug, Clone, PartialEq, Eq)] enum WrapAtom { Escape(String), - Char(char), + Grapheme(String), } /// Wraps ANSI-styled text while preserving explicit whitespace between words. @@ -182,18 +183,18 @@ fn take_prefix_fitting(text: &str, max_width: usize) -> Option { for atom in parse_atoms(text) { match atom { WrapAtom::Escape(sequence) => result.push_str(&sequence), - WrapAtom::Char(ch) => { - let char_width = UnicodeWidthChar::width(ch).unwrap_or(0); - if consumed_visible && width + char_width > max_width { + WrapAtom::Grapheme(grapheme) => { + let grapheme_width = UnicodeWidthStr::width(grapheme.as_str()); + if consumed_visible && width + grapheme_width > max_width { break; } - if !consumed_visible && char_width > max_width { - result.push(ch); + if !consumed_visible && grapheme_width > max_width { + result.push_str(&grapheme); break; } - result.push(ch); - width += char_width; + result.push_str(&grapheme); + width += grapheme_width; consumed_visible = true; } } @@ -229,8 +230,8 @@ fn wrap_chunks(text: &str) -> Vec { for atom in parse_atoms(text) { match atom { WrapAtom::Escape(sequence) => current.push_str(&sequence), - WrapAtom::Char(ch) => { - let is_whitespace = ch.is_whitespace(); + WrapAtom::Grapheme(grapheme) => { + let is_whitespace = grapheme.chars().all(char::is_whitespace); match current_is_whitespace { Some(kind) if kind != is_whitespace => { chunks.push(WrapChunk { @@ -245,7 +246,7 @@ fn wrap_chunks(text: &str) -> Vec { _ => {} } - current.push(ch); + current.push_str(&grapheme); } } } @@ -269,9 +270,17 @@ fn parse_atoms(text: &str) -> Vec { while index < bytes.len() { if bytes[index] != 0x1b { - let ch = text[index..].chars().next().expect("slice should start at char boundary"); - atoms.push(WrapAtom::Char(ch)); - index += ch.len_utf8(); + let next_escape = bytes[index..] + .iter() + .position(|byte| *byte == 0x1b) + .map(|offset| index + offset) + .unwrap_or(bytes.len()); + + for grapheme in text[index..next_escape].graphemes(true) { + atoms.push(WrapAtom::Grapheme(grapheme.to_string())); + } + + index = next_escape; continue; } @@ -315,7 +324,108 @@ fn parse_osc_escape(bytes: &[u8], start: usize) -> usize { fn apply_style_transition(current_style: &mut Vec, text: &str) { current_style.extend(extract_ansi_codes(text)); - *current_style = ansi_collapse(current_style, ""); + *current_style = collapse_ansi_codes(current_style); +} + +fn collapse_ansi_codes(code_list: &[String]) -> Vec { + let mut bold = false; + let mut italic = false; + let mut underline = false; + let mut strikeout = false; + let mut dim = false; + let mut fg_color: Option = None; + let mut bg_color: Option = None; + + for code in code_list { + let params = parse_sgr_params(code); + let mut index = 0; + + while index < params.len() { + match params[index] { + 0 => { + bold = false; + italic = false; + underline = false; + strikeout = false; + dim = false; + fg_color = None; + bg_color = None; + } + 1 => bold = true, + 2 => dim = true, + 3 => italic = true, + 4 => underline = true, + 9 => strikeout = true, + 22 => { + bold = false; + dim = false; + } + 23 => italic = false, + 24 => underline = false, + 29 => strikeout = false, + 30..=37 | 90..=97 => fg_color = Some(format!("\x1b[{}m", params[index])), + 39 => fg_color = None, + 40..=47 | 100..=107 => bg_color = Some(format!("\x1b[{}m", params[index])), + 49 => bg_color = None, + 38 => { + if index + 4 < params.len() && params[index + 1] == 2 { + fg_color = Some(format!( + "\x1b[38;2;{};{};{}m", + params[index + 2], + params[index + 3], + params[index + 4] + )); + index += 4; + } + } + 48 => { + if index + 4 < params.len() && params[index + 1] == 2 { + bg_color = Some(format!( + "\x1b[48;2;{};{};{}m", + params[index + 2], + params[index + 3], + params[index + 4] + )); + index += 4; + } + } + _ => {} + } + + index += 1; + } + } + + let mut result = Vec::new(); + let mut sgr_parts = Vec::new(); + + if bold { + sgr_parts.push("1"); + } + if dim { + sgr_parts.push("2"); + } + if italic { + sgr_parts.push("3"); + } + if underline { + sgr_parts.push("4"); + } + if strikeout { + sgr_parts.push("9"); + } + + if !sgr_parts.is_empty() { + result.push(format!("\x1b[{}m", sgr_parts.join(";"))); + } + if let Some(fg_color) = fg_color { + result.push(fg_color); + } + if let Some(bg_color) = bg_color { + result.push(bg_color); + } + + result } fn push_wrapped_line( @@ -410,4 +520,31 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn test_simple_wrap_preserving_spaces_keeps_grapheme_clusters_intact() { + let fixture = "👨‍👩‍👧‍👦 a\u{0301} 한글"; + let actual = simple_wrap_preserving_spaces(fixture, 2); + let expected = vec![ + "👨‍👩‍👧‍👦".to_string(), + "a\u{0301}".to_string(), + "한".to_string(), + "글".to_string(), + ]; + + assert_eq!(actual, expected); + } + + #[test] + fn test_wrap_text_preserving_spaces_reapplies_ansi_style_after_wrap() { + let fixture = "\x1b[31mabcdef\x1b[39m"; + let actual = wrap_text_preserving_spaces(fixture, 3, 3, "", ""); + let expected_visible = vec!["abc".to_string(), "def".to_string()]; + let actual_visible = actual.iter().map(|line| visible(line)).collect::>(); + + assert_eq!(actual_visible, expected_visible); + assert!(actual[0].contains("\x1b[31m")); + assert!(actual[1].contains("\x1b[31m")); + assert!(actual[1].ends_with("\x1b[39m")); + } } From fb9836689224f79f9424613942abbb35f5073ac5 Mon Sep 17 00:00:00 2001 From: Minkyu Kim Date: Thu, 9 Apr 2026 03:57:52 +0900 Subject: [PATCH 4/4] fix(markdown): correct blockquote width regression coverage --- crates/forge_markdown_stream/src/lib.rs | 26 ++++++++++++++------ crates/forge_markdown_stream/src/renderer.rs | 8 +++--- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/crates/forge_markdown_stream/src/lib.rs b/crates/forge_markdown_stream/src/lib.rs index 58dd1259af..e48bfcad8a 100644 --- a/crates/forge_markdown_stream/src/lib.rs +++ b/crates/forge_markdown_stream/src/lib.rs @@ -192,10 +192,9 @@ mod tests { let fixture = "> supercalifragilistic\n> 한글 공백\n"; let actual = fixture_rendered_output(fixture, 10); let expected = concat!( - "│ super\n", - "│ calif\n", - "│ ragil\n", - "│ istic\n", + "│ supercal\n", + "│ ifragili\n", + "│ stic\n", "│ 한글\n", "│ 공백" ); @@ -209,9 +208,22 @@ mod tests { let actual = fixture_rendered_output(fixture, 20); let expected = concat!( "│ 링크\n", - "│ (https://exampl\n", - "│ e.com/very/long\n", - "│ /path) 설명" + "│ (https://example.c\n", + "│ om/very/long/path)\n", + "│ 설명" + ); + + assert_eq!(actual, expected); + } + + #[test] + fn test_streaming_renderer_wraps_nested_blockquotes_with_correct_prefix_width() { + let fixture = ">> supercalifragilistic\n"; + let actual = fixture_rendered_output(fixture, 12); + let expected = concat!( + "│ │ supercal\n", + "│ │ ifragili\n", + "│ │ stic" ); assert_eq!(actual, expected); diff --git a/crates/forge_markdown_stream/src/renderer.rs b/crates/forge_markdown_stream/src/renderer.rs index 6c3d682543..23948028bc 100644 --- a/crates/forge_markdown_stream/src/renderer.rs +++ b/crates/forge_markdown_stream/src/renderer.rs @@ -8,7 +8,7 @@ use streamdown_parser::ParseEvent; use crate::code::CodeHighlighter; use crate::heading::render_heading; use crate::inline::{render_inline_content, render_inline_elements}; -use crate::list::{ListState, render_list_item}; +use crate::list::{render_list_item, ListState}; use crate::style::InlineStyler; use crate::table::render_table; use crate::theme::Theme; @@ -255,13 +255,13 @@ impl Renderer { ParseEvent::BlockquoteLine(text) => { let margin = self.left_margin(); - let width = self.current_width(); + let content_width = self.width.saturating_sub(visible_length(&margin)); // Parse inline formatting (bold, italic, etc.) in blockquote content let rendered_content = render_inline_content(text, &self.theme); let wrapped = wrap_text_preserving_spaces( &rendered_content, - width.saturating_sub(visible_length(&margin)), - width.saturating_sub(visible_length(&margin)), + content_width, + content_width, &margin, &margin, );