diff --git a/CHANGELOG.md b/CHANGELOG.md index f0fb15a..ec32620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.11.1] - 2026-06-08 + +**Fix: `pack` panicked on multibyte UTF-8.** Pack truncation sliced the +rendered text at a raw byte index, panicking ("byte index N is not a char +boundary") whenever the budget cutoff landed inside a multibyte character — +i.e. on Cyrillic/CJK/emoji-heavy journals that exceed the pack budget. +ASCII-only content was unaffected, so it stayed latent. Truncation now cuts +at a UTF-8 char boundary. + +### Fixed +- `tj_core::pack` truncation is now char-boundary-safe (`truncate_to_budget`); + packs with non-ASCII text exceeding the budget no longer panic. + ## [0.11.0] - 2026-06-08 **Live `session_id` on emitted events (additive, opt-in).** The journal now diff --git a/Cargo.lock b/Cargo.lock index 5493d34..f3bf04c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2166,7 +2166,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.11.0" +version = "0.11.1" dependencies = [ "anyhow", "assert_cmd", @@ -2189,7 +2189,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.11.0" +version = "0.11.1" dependencies = [ "anyhow", "chrono", @@ -2213,7 +2213,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.11.0" +version = "0.11.1" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index f635ce9..d52906a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.11.0" +version = "0.11.1" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 35031cc..bde4f94 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -16,7 +16,7 @@ name = "task-journal" path = "src/main.rs" [dependencies] -tj-core = { package = "task-journal-core", version = "0.11.0", path = "../tj-core" } +tj-core = { package = "task-journal-core", version = "0.11.1", path = "../tj-core" } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-core/src/pack.rs b/crates/tj-core/src/pack.rs index ed3e11f..a2230d3 100644 --- a/crates/tj-core/src/pack.rs +++ b/crates/tj-core/src/pack.rs @@ -164,6 +164,23 @@ fn render_lifecycle(conn: &Connection, task_id: &str) -> anyhow::Result Ok(out) } +/// Truncate `text` to at most `budget` bytes, cutting at a UTF-8 char +/// boundary and preferring the last newline within the kept prefix, then +/// append `marker`. Char-boundary-safe: a raw `text[..budget]` byte slice +/// panics when `budget` lands inside a multibyte char (Cyrillic/CJK/emoji). +fn truncate_to_budget(text: &mut String, budget: usize, marker: &str) { + if text.len() <= budget { + return; + } + let mut end = budget; + while end > 0 && !text.is_char_boundary(end) { + end -= 1; + } + let cutoff = text[..end].rfind('\n').unwrap_or(end); + text.truncate(cutoff); + text.push_str(marker); +} + pub fn assemble(conn: &Connection, task_id: &str, mode: PackMode) -> anyhow::Result { let mode_str = match mode { PackMode::Compact => "compact", @@ -334,9 +351,7 @@ pub fn assemble(conn: &Connection, task_id: &str, mode: PackMode) -> anyhow::Res }; let truncated = text.len() > budget; if truncated { - let cutoff = text[..budget].rfind('\n').unwrap_or(budget); - text.truncate(cutoff); - text.push_str(TRUNC_MARKER); + truncate_to_budget(&mut text, budget, TRUNC_MARKER); } let generated_at = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); @@ -585,6 +600,29 @@ mod tests { assert!(pack.text.contains("truncated to fit pack budget")); } + #[test] + fn truncate_to_budget_handles_multibyte_boundary() { + // 1 ASCII byte shifts every 'я' (2 bytes) start to an ODD offset, so an + // EVEN budget lands INSIDE a char — a raw text[..budget] slice would panic. + let marker = "\n[cut]"; + let mut s = String::from("x"); + s.push_str(&"я".repeat(2000)); // total = 1 + 4000 = 4001 bytes + let budget = 100usize; // even → mid-char given the odd char starts + assert!(!s.is_char_boundary(budget), "precondition: budget must be mid-char"); + truncate_to_budget(&mut s, budget, marker); // must NOT panic + assert!(s.ends_with(marker)); + assert!(s.len() <= budget + marker.len()); + assert!(std::str::from_utf8(s.as_bytes()).is_ok(), "result must be valid UTF-8"); + } + + #[test] + fn truncate_to_budget_noop_under_budget() { + let mut s = String::from("маленький текст"); + let before = s.clone(); + truncate_to_budget(&mut s, 10_000, "\n[cut]"); + assert_eq!(s, before); + } + #[test] fn corrected_events_appear_with_correction_event_type() { use crate::db; diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index f62e0db..803b35e 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -16,7 +16,7 @@ name = "task-journal-mcp" path = "src/main.rs" [dependencies] -tj-core = { package = "task-journal-core", version = "0.11.0", path = "../tj-core" } +tj-core = { package = "task-journal-core", version = "0.11.1", path = "../tj-core" } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true }