From 8ec6f1b6038e5a6e3c56be33f7abea017ad7c312 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 8 Jun 2026 11:12:27 +0000 Subject: [PATCH 1/2] fix: make preview truncation utf8 safe Co-authored-by: EXboy --- .../skilllite-agent/src/agent_loop/helpers.rs | 20 +++++- crates/skilllite-agent/src/llm/mod.rs | 16 +++-- crates/skilllite-agent/src/llm/tests.rs | 20 ++++++ .../src/evolution_status.rs | 35 +++++++-- .../CONTEXT.md | 43 +++++++++++ .../PRD.md | 44 ++++++++++++ .../REVIEW.md | 29 ++++++++ .../STATUS.md | 17 +++++ .../TASK.md | 71 +++++++++++++++++++ tasks/board.md | 4 +- 10 files changed, 284 insertions(+), 15 deletions(-) create mode 100644 tasks/TASK-2026-068-utf8-preview-crash-fixes/CONTEXT.md create mode 100644 tasks/TASK-2026-068-utf8-preview-crash-fixes/PRD.md create mode 100644 tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md create mode 100644 tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md create mode 100644 tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md diff --git a/crates/skilllite-agent/src/agent_loop/helpers.rs b/crates/skilllite-agent/src/agent_loop/helpers.rs index 15ecee94..00f0be2a 100644 --- a/crates/skilllite-agent/src/agent_loop/helpers.rs +++ b/crates/skilllite-agent/src/agent_loop/helpers.rs @@ -72,7 +72,7 @@ pub(super) fn handle_update_task_plan( "'tasks' must be a JSON array, got a string that is not valid JSON array. \ Pass tasks as a real array: {{\"tasks\": [{{...}}]}} not {{\"tasks\": \"[...]\"}}. \ Received string preview: {:?}", - &s[..s.len().min(120)] + safe_truncate(s, 120) ), is_error: true, counts_as_failure: true, @@ -1104,6 +1104,24 @@ mod tests { assert!(r.content.contains("must be a JSON array"), "{}", r.content); } + #[test] + fn update_task_plan_rejects_multibyte_non_array_string_without_panic() { + let mut planner = TaskPlanner::new(None, None, None); + let mut sink = SilentEventSink; + let malformed = format!("{}{}", "a".repeat(40), "界".repeat(40)); + let args = serde_json::json!({ "tasks": malformed }).to_string(); + + let r = handle_update_task_plan(&args, &mut planner, &[], &mut sink); + + assert!(r.is_error); + assert!(r.content.contains("must be a JSON array"), "{}", r.content); + assert!( + r.content.contains("Received string preview"), + "{}", + r.content + ); + } + #[test] fn update_task_plan_rejects_missing_tasks_field() { let mut planner = TaskPlanner::new(None, None, None); diff --git a/crates/skilllite-agent/src/llm/mod.rs b/crates/skilllite-agent/src/llm/mod.rs index 31a21d25..fcaf76b1 100644 --- a/crates/skilllite-agent/src/llm/mod.rs +++ b/crates/skilllite-agent/src/llm/mod.rs @@ -206,13 +206,7 @@ impl LlmClient { return Self::extract_embeddings_from_items(items); } - // Log the unexpected response shape for debugging - let preview = serde_json::to_string(&json).unwrap_or_default(); - let preview = &preview[..preview.len().min(500)]; - bail!( - "Unexpected embedding response format (no 'data' or 'output.embeddings'): {}", - preview - ) + bail!("{}", format_unexpected_embedding_response_error(&json)) } /// Extract embedding vectors from a JSON array of items, each containing an "embedding" field. @@ -237,6 +231,14 @@ impl LlmClient { // ═══════════════════════════════════════════════════════════════════════════ } +fn format_unexpected_embedding_response_error(json: &Value) -> String { + let preview = serde_json::to_string(json).unwrap_or_default(); + format!( + "Unexpected embedding response format (no 'data' or 'output.embeddings'): {}", + safe_truncate(&preview, 500) + ) +} + // ─── Response types ───────────────────────────────────────────────────────── // Fields id/model/usage/index/finish_reason/role are required for API deserialization // but not read by our code. diff --git a/crates/skilllite-agent/src/llm/tests.rs b/crates/skilllite-agent/src/llm/tests.rs index da110d66..e4a94204 100644 --- a/crates/skilllite-agent/src/llm/tests.rs +++ b/crates/skilllite-agent/src/llm/tests.rs @@ -408,3 +408,23 @@ fn test_format_api_error_truncates_non_json_body_on_utf8_boundary() { "should truncate the long raw body: {result}" ); } + +#[test] +fn unexpected_embedding_response_error_truncates_on_utf8_boundary() { + let json = json!({ + "error": format!("{}{}", "a".repeat(41), "界".repeat(200)) + }); + + let result = format_unexpected_embedding_response_error(&json); + + assert!( + result.contains("Unexpected embedding response format"), + "{result}" + ); + assert!(result.is_char_boundary(result.len())); + assert!( + result.len() < 600, + "should keep preview bounded: {}", + result.len() + ); +} diff --git a/crates/skilllite-commands/src/evolution_status.rs b/crates/skilllite-commands/src/evolution_status.rs index 41a2e25c..2ffb6c78 100644 --- a/crates/skilllite-commands/src/evolution_status.rs +++ b/crates/skilllite-commands/src/evolution_status.rs @@ -295,6 +295,25 @@ pub fn cmd_status(json: bool, workspace: &str, periodic_anchor_unix: Option cmd_status_human(workspace) } +fn truncate_utf8_prefix(s: &str, max: usize) -> &str { + if s.len() <= max { + return s; + } + let mut end = max; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + &s[..end] +} + +fn shorten_event_reason(reason: &str) -> String { + if reason.len() > 50 { + format!("{}...", truncate_utf8_prefix(reason, 47)) + } else { + reason.to_string() + } +} + fn cmd_status_human(workspace: &str) -> Result<()> { let workspace_root = resolve_workspace_root(workspace); skilllite_core::config::load_dotenv_from_dir(&workspace_root); @@ -401,11 +420,7 @@ fn cmd_status_human(workspace: &str) -> Result<()> { t if t.contains("rolled_back") => "🔙", _ => " ", }; - let reason_short = if reason.len() > 50 { - format!("{}...", &reason[..47]) - } else { - reason - }; + let reason_short = shorten_event_reason(&reason); println!(" {} {} {} {}", icon, date, etype, reason_short); if !target.is_empty() { println!(" └─ target: {}", target); @@ -454,4 +469,14 @@ mod tests { assert!(v.get("would_have_evolution_proposals").is_some()); assert!(v.get("recent_events").is_some()); } + + #[test] + fn shorten_event_reason_preserves_utf8_boundaries() { + let reason = format!("{}{}", "a".repeat(40), "界".repeat(10)); + let shortened = shorten_event_reason(&reason); + + assert!(shortened.ends_with("...")); + assert!(shortened.len() <= 50); + assert!(shortened.is_char_boundary(shortened.len())); + } } diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/CONTEXT.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/CONTEXT.md new file mode 100644 index 00000000..5883de81 --- /dev/null +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/CONTEXT.md @@ -0,0 +1,43 @@ +# Technical Context + +## Current State + +- Relevant crates/files: + - `crates/skilllite-commands/src/evolution_status.rs` + - `crates/skilllite-agent/src/agent_loop/helpers.rs` + - `crates/skilllite-agent/src/llm/mod.rs` +- Current behavior: + - Human evolution status slices `reason` at byte 47 when shortening long event reasons. + - `update_task_plan` slices invalid string payload previews at byte 120. + - `LlmClient::embed` slices unexpected response JSON previews at byte 500. + - Each can panic if the byte boundary lands inside a multibyte UTF-8 character. + +## Architecture Fit + +- Layer boundaries involved: + - `skilllite-commands` owns CLI presentation and should avoid panics while formatting persisted data. + - `skilllite-agent` owns LLM/tool orchestration and should return structured errors on invalid inputs. +- Interfaces to preserve: + - `skilllite evolution status` CLI behavior and JSON status output. + - `ToolResult` error shape for planning-control validation. + - `LlmClient::embed` return type and provider format support. + +## Dependency and Compatibility + +- New dependencies: None. +- Backward compatibility notes: Preview text may be clipped at a safe boundary but remains diagnostic-only. + +## Design Decisions + +- Decision: Use existing UTF-8-safe helpers (`safe_truncate` or local char-boundary helper) instead + of direct byte slices. + - Rationale: Matches recent fixes and keeps changes local. + - Alternatives considered: Convert all truncation sites in the repo. + - Why rejected: The task is a critical bug fix and should avoid unrelated refactors. + +## Open Questions + +- [x] Does the change require docs sync? No, because flags, output schema, defaults, and documented + command semantics are unchanged. +- [x] Are lower-severity skills-list issues included? No, they do not meet the crash/data-loss bar for + this automation run. diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/PRD.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/PRD.md new file mode 100644 index 00000000..5fe170f7 --- /dev/null +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/PRD.md @@ -0,0 +1,44 @@ +# PRD + +## Background + +The latest critical-bug audits fixed UTF-8 unsafe truncation in selected evolution and LLM error +paths. Follow-up inspection found adjacent code that still slices user/model/provider text by byte +index. These paths can crash on valid UTF-8 when long CJK or emoji content reaches a preview cap. + +## Objective + +Prevent crash-class UTF-8 boundary panics in the identified evolution status and agent preview +paths while preserving existing command/error behavior. + +## Functional Requirements + +- FR-1: Evolution status human output must display shortened event reasons without byte-slicing + arbitrary UTF-8. +- FR-2: Planning-control validation must return a structured tool error for malformed multibyte + `tasks` strings. +- FR-3: Embedding response validation must return an error for unexpected multibyte JSON response + bodies instead of panicking while building the preview. + +## Non-Functional Requirements + +- Security: No new sandbox, auth, or policy surface. +- Performance: Truncation remains bounded and allocation impact is negligible on error/display paths. +- Compatibility: Existing CLI flags, JSON schemas, and error semantics are preserved. + +## Constraints + +- Technical: Reuse existing Unicode-safe truncation helpers where available; avoid broad utility + migrations unrelated to the concrete crash paths. +- Timeline: N/A for autonomous execution; keep the change small and reviewable. + +## Success Metrics + +- Metric: Non-ASCII regression tests for the fixed paths. +- Baseline: Byte slicing can panic at fixed byte caps. +- Target: Tests and manual CLI reproduction complete without panic. + +## Rollout + +- Rollout plan: Ship as a small bug-fix PR on the assigned branch. +- Rollback plan: Revert the commit if regressions appear; no data migration is involved. diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md new file mode 100644 index 00000000..e5fd29ef --- /dev/null +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md @@ -0,0 +1,29 @@ +# Review Report + +## Scope Reviewed + +- Files/modules: +- Commits/changes: + +## Findings + +- Critical: +- Major: +- Minor: + +## Quality Gates + +- Architecture boundary checks: `pass | fail` +- Security invariants: `pass | fail` +- Required tests executed: `pass | fail` +- Docs sync (EN/ZH): `pass | fail` + +## Test Evidence + +- Commands run: +- Key outputs: + +## Decision + +- Merge readiness: `ready | not ready` +- Follow-up actions: diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md new file mode 100644 index 00000000..90215463 --- /dev/null +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md @@ -0,0 +1,17 @@ +# Status Journal + +## Timeline + +- 2026-06-08: + - Progress: Created task artifacts; scoped three concrete UTF-8 byte-slice crash paths from recent audit. + - Blockers: None. + - Next step: Implement minimal safe truncation fixes and non-ASCII regression tests. + +## Checkpoints + +- [x] PRD drafted before implementation (or `N/A` recorded) +- [x] Context drafted before implementation (or `N/A` recorded) +- [ ] Implementation complete +- [ ] Tests passed +- [ ] Review complete +- [ ] Board updated diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md new file mode 100644 index 00000000..bb596bd6 --- /dev/null +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md @@ -0,0 +1,71 @@ +# TASK Card + +## Metadata + +- Task ID: `TASK-2026-068` +- Title: Fix UTF-8 preview truncation crashes +- Status: `in_progress` +- Priority: `P1` +- Owner: `agent` +- Contributors: +- Created: `2026-06-08` +- Target milestone: + +## Problem + +Recent UTF-8 truncation fixes covered several write/error paths, but adjacent preview/display +paths still byte-slice arbitrary strings. Multibyte CJK/emoji content can panic the CLI or agent +instead of returning a human-readable status or structured error. + +## Scope + +- In scope: + - Make `skilllite evolution status` human output safe when `evolution_log.reason` contains long multibyte text. + - Make `update_task_plan` invalid string previews safe for multibyte task payloads. + - Make unexpected embedding response previews safe for multibyte JSON bodies. + - Add focused non-ASCII regression tests for each fixed behavior. +- Out of scope: + - Broader refactors of truncation utilities. + - Lower-severity desktop skills-list error handling issues found during audit. + - Packaging or UI behavior changes. + +## Acceptance Criteria + +- [ ] Human `evolution status` formatting cannot panic on long CJK/emoji event reasons. +- [ ] Agent planning-control errors cannot panic when previewing malformed multibyte task strings. +- [ ] Embedding unexpected-response errors cannot panic when previewing multibyte JSON. +- [ ] Focused tests cover the above non-ASCII cases. +- [ ] Required Rust formatting, linting, tests, and task validation pass. + +## Risks + +- Risk: Changing truncation unit could slightly alter visible preview length. + - Impact: Low; previews remain diagnostic-only and preserve existing error semantics. + - Mitigation: Use existing UTF-8-safe helpers and keep limits close to the original byte caps. + +## Validation Plan + +- Required tests: + - `cargo test -p skilllite-agent` + - `cargo test -p skilllite` + - `cargo test` +- Commands to run: + - `cargo fmt --check` + - `cargo clippy --all-targets -- -D warnings` + - `python3 scripts/validate_tasks.py` +- Manual checks: + - Build and run a CLI reproduction for `skilllite evolution status` with a long CJK reason. + +## Regression Scope + +- Areas likely affected: + - `skilllite-commands` evolution status display. + - `skilllite-agent` planning-control and embedding error paths. +- Explicit non-goals: + - No schema, CLI flag, API contract, or docs semantics changes. + +## Links + +- Source TODO section: N/A +- Related PRs/issues: Recent critical-bug investigations around `TASK-2026-066` and `TASK-2026-067` +- Related docs: N/A diff --git a/tasks/board.md b/tasks/board.md index 4b63f2b2..4b18325d 100644 --- a/tasks/board.md +++ b/tasks/board.md @@ -1,10 +1,10 @@ # Task Board -Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done) +Last updated: 2026-06-08 (TASK-2026-068 utf8 preview crash fixes in progress) ## In Progress -- None. +- `TASK-2026-068-utf8-preview-crash-fixes` - Status: `in_progress` - Owner: `agent` ## Ready From 4e5cac7b94143b7e3800c8fa76cd24b56697e819 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 8 Jun 2026 11:16:26 +0000 Subject: [PATCH 2/2] docs(task): record utf8 preview validation Co-authored-by: EXboy --- .../REVIEW.md | 40 +++++++++++++++---- .../STATUS.md | 16 ++++++-- .../TASK.md | 15 +++---- tasks/board.md | 5 ++- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md index e5fd29ef..aca4297b 100644 --- a/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/REVIEW.md @@ -3,27 +3,51 @@ ## Scope Reviewed - Files/modules: + - `crates/skilllite-commands/src/evolution_status.rs` + - `crates/skilllite-agent/src/agent_loop/helpers.rs` + - `crates/skilllite-agent/src/llm/mod.rs` + - `crates/skilllite-agent/src/llm/tests.rs` - Commits/changes: + - Recent UTF-8 truncation fixes in `TASK-2026-066` and `TASK-2026-067` + - This task's safe-preview follow-up changes ## Findings -- Critical: +- Critical: None remaining in the fixed scope. - Major: -- Minor: + - Fixed human `evolution status` panic when persisted `evolution_log.reason` has long CJK/emoji text. + - Fixed `update_task_plan` invalid string preview panic for multibyte task payloads. + - Fixed embedding unexpected-response preview panic for multibyte provider JSON. +- Minor: Lower-severity desktop skills-list UI error handling issues were identified but left out of scope because they do not meet the crash/data-loss/security bar for this run. ## Quality Gates -- Architecture boundary checks: `pass | fail` -- Security invariants: `pass | fail` -- Required tests executed: `pass | fail` -- Docs sync (EN/ZH): `pass | fail` +- Architecture boundary checks: `pass` +- Security invariants: `pass` +- Required tests executed: `pass` +- Docs sync (EN/ZH): `pass` (not required; no CLI flags, output schema, defaults, or documented semantics changed) ## Test Evidence - Commands run: + - `cargo fmt --check` + - `cargo test -p skilllite-agent update_task_plan_rejects_multibyte_non_array_string_without_panic` + - `cargo test -p skilllite-agent unexpected_embedding_response_error_truncates_on_utf8_boundary` + - `cargo test -p skilllite-commands --features agent shorten_event_reason_preserves_utf8_boundaries` + - `cargo test -p skilllite-agent` + - `cargo test -p skilllite` + - `cargo test` + - `cargo clippy --all-targets -- -D warnings` + - `python3 scripts/validate_tasks.py` + - Manual CLI reproduction with `feedback.sqlite` containing a long CJK `evolution_log.reason` - Key outputs: + - Pre-fix CLI reproduction: `exit=101`, panic at `evolution_status.rs` because byte index `47` was inside `界`. + - Post-fix CLI reproduction: `exit=0`, event printed with safe shortened Chinese reason and empty stderr. + - Full `cargo test`: all workspace test binaries reported `test result: ok`. + - Clippy: `Finished dev profile` with no warnings under `-D warnings`. + - Task validation: `Task validation passed (68 task directories checked).` ## Decision -- Merge readiness: `ready | not ready` -- Follow-up actions: +- Merge readiness: `ready` +- Follow-up actions: Consider a separate lower-priority task for desktop skills-list error surfacing if desired. diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md index 90215463..d56a34e9 100644 --- a/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/STATUS.md @@ -6,12 +6,20 @@ - Progress: Created task artifacts; scoped three concrete UTF-8 byte-slice crash paths from recent audit. - Blockers: None. - Next step: Implement minimal safe truncation fixes and non-ASCII regression tests. +- 2026-06-08: + - Progress: Implemented safe truncation in evolution status, update-task-plan previews, and embedding response previews; added non-ASCII regression tests. + - Blockers: None. + - Next step: Record final review and update board. +- 2026-06-08: + - Progress: Validation passed: `cargo fmt --check`, focused regression tests, `cargo test -p skilllite-agent`, `cargo test -p skilllite`, `cargo test`, `cargo clippy --all-targets -- -D warnings`, `python3 scripts/validate_tasks.py`, and manual CLI reproduction. + - Blockers: None. + - Next step: Open PR and report results. ## Checkpoints - [x] PRD drafted before implementation (or `N/A` recorded) - [x] Context drafted before implementation (or `N/A` recorded) -- [ ] Implementation complete -- [ ] Tests passed -- [ ] Review complete -- [ ] Board updated +- [x] Implementation complete +- [x] Tests passed +- [x] Review complete +- [x] Board updated diff --git a/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md b/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md index bb596bd6..b2f0e2c6 100644 --- a/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md +++ b/tasks/TASK-2026-068-utf8-preview-crash-fixes/TASK.md @@ -4,7 +4,7 @@ - Task ID: `TASK-2026-068` - Title: Fix UTF-8 preview truncation crashes -- Status: `in_progress` +- Status: `done` - Priority: `P1` - Owner: `agent` - Contributors: @@ -31,11 +31,11 @@ instead of returning a human-readable status or structured error. ## Acceptance Criteria -- [ ] Human `evolution status` formatting cannot panic on long CJK/emoji event reasons. -- [ ] Agent planning-control errors cannot panic when previewing malformed multibyte task strings. -- [ ] Embedding unexpected-response errors cannot panic when previewing multibyte JSON. -- [ ] Focused tests cover the above non-ASCII cases. -- [ ] Required Rust formatting, linting, tests, and task validation pass. +- [x] Human `evolution status` formatting cannot panic on long CJK/emoji event reasons. +- [x] Agent planning-control errors cannot panic when previewing malformed multibyte task strings. +- [x] Embedding unexpected-response errors cannot panic when previewing multibyte JSON. +- [x] Focused tests cover the above non-ASCII cases. +- [x] Required Rust formatting, linting, tests, and task validation pass. ## Risks @@ -48,13 +48,14 @@ instead of returning a human-readable status or structured error. - Required tests: - `cargo test -p skilllite-agent` - `cargo test -p skilllite` + - `cargo test -p skilllite-commands --features agent shorten_event_reason_preserves_utf8_boundaries` - `cargo test` - Commands to run: - `cargo fmt --check` - `cargo clippy --all-targets -- -D warnings` - `python3 scripts/validate_tasks.py` - Manual checks: - - Build and run a CLI reproduction for `skilllite evolution status` with a long CJK reason. + - Build and run a CLI reproduction for `skilllite evolution status` with a long CJK reason: passed, exit `0`. ## Regression Scope diff --git a/tasks/board.md b/tasks/board.md index 4b18325d..5213b971 100644 --- a/tasks/board.md +++ b/tasks/board.md @@ -1,10 +1,10 @@ # Task Board -Last updated: 2026-06-08 (TASK-2026-068 utf8 preview crash fixes in progress) +Last updated: 2026-06-08 (TASK-2026-068 utf8 preview crash fixes done) ## In Progress -- `TASK-2026-068-utf8-preview-crash-fixes` - Status: `in_progress` - Owner: `agent` +- None. ## Ready @@ -17,6 +17,7 @@ Last updated: 2026-06-08 (TASK-2026-068 utf8 preview crash fixes in progress) ## Done +- `TASK-2026-068-utf8-preview-crash-fixes` - Status: `done` - Owner: `agent` - `TASK-2026-067-utf8-llm-error-truncate` - Status: `done` - Owner: `agent` - `TASK-2026-066-utf8-evolution-log-truncate` - Status: `done` - Owner: `agent` - `TASK-2026-064-env-keys-single-source` - Status: `done` - Owner: `agent`