diff --git a/crates/skilllite-agent/src/agent_loop/helpers.rs b/crates/skilllite-agent/src/agent_loop/helpers.rs index 15ecee9..2f1fa9d 100644 --- a/crates/skilllite-agent/src/agent_loop/helpers.rs +++ b/crates/skilllite-agent/src/agent_loop/helpers.rs @@ -72,7 +72,7 @@ pub(super) fn handle_update_task_plan( "'tasks' must be a JSON array, got a string that is not valid JSON array. \ Pass tasks as a real array: {{\"tasks\": [{{...}}]}} not {{\"tasks\": \"[...]\"}}. \ Received string preview: {:?}", - &s[..s.len().min(120)] + safe_truncate(s, 120) ), is_error: true, counts_as_failure: true, @@ -1104,6 +1104,24 @@ mod tests { assert!(r.content.contains("must be a JSON array"), "{}", r.content); } + #[test] + fn update_task_plan_rejects_non_array_string_preview_utf8_boundary_safe() { + let mut planner = TaskPlanner::new(None, None, None); + let mut sink = SilentEventSink; + let tasks = format!("{}界{}", "a".repeat(119), "tail"); + let args = serde_json::json!({ "tasks": tasks }).to_string(); + + let r = handle_update_task_plan(&args, &mut planner, &[], &mut sink); + + assert!(r.is_error); + assert!(r.content.contains("must be a JSON array"), "{}", r.content); + assert!( + !r.content.contains("tail"), + "preview should be truncated before the suffix: {}", + r.content + ); + } + #[test] fn update_task_plan_rejects_missing_tasks_field() { let mut planner = TaskPlanner::new(None, None, None); diff --git a/crates/skilllite-agent/src/llm/mod.rs b/crates/skilllite-agent/src/llm/mod.rs index 31a21d2..e954be5 100644 --- a/crates/skilllite-agent/src/llm/mod.rs +++ b/crates/skilllite-agent/src/llm/mod.rs @@ -206,13 +206,7 @@ impl LlmClient { return Self::extract_embeddings_from_items(items); } - // Log the unexpected response shape for debugging - let preview = serde_json::to_string(&json).unwrap_or_default(); - let preview = &preview[..preview.len().min(500)]; - bail!( - "Unexpected embedding response format (no 'data' or 'output.embeddings'): {}", - preview - ) + bail!("{}", format_unexpected_embedding_response(&json)) } /// Extract embedding vectors from a JSON array of items, each containing an "embedding" field. @@ -310,6 +304,14 @@ fn record_llm_usage_totals(out: Option<&mut LlmUsageTotals>, usage: &Option String { + let preview = serde_json::to_string(json).unwrap_or_default(); + format!( + "Unexpected embedding response format (no 'data' or 'output.embeddings'): {}", + safe_truncate(&preview, 500) + ) +} + /// Format a user-friendly error from an LLM API HTTP failure. /// /// Extracts a concise error detail from the response body (tries JSON `error.message` diff --git a/crates/skilllite-agent/src/llm/tests.rs b/crates/skilllite-agent/src/llm/tests.rs index da110d6..873c9fc 100644 --- a/crates/skilllite-agent/src/llm/tests.rs +++ b/crates/skilllite-agent/src/llm/tests.rs @@ -408,3 +408,21 @@ fn test_format_api_error_truncates_non_json_body_on_utf8_boundary() { "should truncate the long raw body: {result}" ); } + +#[test] +fn unexpected_embedding_response_preview_is_utf8_boundary_safe() { + let body = format!("{}界{}", "a".repeat(487), "tail"); + let response = json!({ "message": body }); + + let result = format_unexpected_embedding_response(&response); + + assert!( + result.contains("Unexpected embedding response format"), + "{result}" + ); + assert!(result.contains(&"a".repeat(487)), "{result}"); + assert!( + !result.contains("tail"), + "preview should be truncated before the suffix: {result}" + ); +} diff --git a/crates/skilllite-agent/src/task_planner.rs b/crates/skilllite-agent/src/task_planner.rs index ef577c3..2c97e0e 100644 --- a/crates/skilllite-agent/src/task_planner.rs +++ b/crates/skilllite-agent/src/task_planner.rs @@ -394,7 +394,7 @@ impl TaskPlanner { tracing::debug!( "parse_task_list raw (first 500 chars): {}", - &raw[..raw.len().min(500)] + safe_truncate(raw, 500) ); bail!("No valid JSON task array found in LLM response") } @@ -736,6 +736,28 @@ mod tests { use super::*; use crate::extensions::ExtensionRegistry; + struct AlwaysEnabledSubscriber; + + impl tracing::Subscriber for AlwaysEnabledSubscriber { + fn enabled(&self, _metadata: &tracing::Metadata<'_>) -> bool { + true + } + + fn new_span(&self, _span: &tracing::span::Attributes<'_>) -> tracing::span::Id { + tracing::span::Id::from_u64(1) + } + + fn record(&self, _span: &tracing::span::Id, _values: &tracing::span::Record<'_>) {} + + fn record_follows_from(&self, _span: &tracing::span::Id, _follows: &tracing::span::Id) {} + + fn event(&self, _event: &tracing::Event<'_>) {} + + fn enter(&self, _span: &tracing::span::Id) {} + + fn exit(&self, _span: &tracing::span::Id) {} + } + #[test] fn test_planning_prompt_phase1_balance() { let planner = TaskPlanner::new(None, None, None); @@ -777,6 +799,22 @@ mod tests { assert!(empty.is_empty()); } + #[test] + fn parse_task_list_error_preview_is_utf8_boundary_safe() { + let planner = TaskPlanner::new(None, None, None); + let raw = format!("{}界{}", "a".repeat(499), "tail"); + let dispatch = tracing::Dispatch::new(AlwaysEnabledSubscriber); + + let result = tracing::dispatcher::with_default(&dispatch, || planner.parse_task_list(&raw)); + + let err = result.expect_err("invalid planner output should be rejected"); + assert!( + err.to_string() + .contains("No valid JSON task array found in LLM response"), + "{err}" + ); + } + #[test] fn test_planning_prompt_contains_placeholders_resolved() { let planner = TaskPlanner::new(None, None, None); diff --git a/tasks/TASK-2026-068-agent-utf8-preview-panics/CONTEXT.md b/tasks/TASK-2026-068-agent-utf8-preview-panics/CONTEXT.md new file mode 100644 index 0000000..26c2255 --- /dev/null +++ b/tasks/TASK-2026-068-agent-utf8-preview-panics/CONTEXT.md @@ -0,0 +1,40 @@ +# Technical Context + +## Current State + +- Relevant crates/files: + - `crates/skilllite-agent/src/llm/mod.rs` + - `crates/skilllite-agent/src/task_planner.rs` + - `crates/skilllite-agent/src/agent_loop/helpers.rs` +- Current behavior: + - Some error previews use expressions like `&s[..s.len().min(N)]`. + - These expressions panic if the byte index lands inside a multi-byte UTF-8 character. + +## Architecture Fit + +- Layer boundaries involved: + - `skilllite-agent` internal error and tool-argument handling. +- Interfaces to preserve: + - Existing `LlmClient` embedding API error behavior. + - Existing `TaskPlanner::parse_task_list` result shape. + - Existing `update_task_plan` tool result schema. + +## Dependency and Compatibility + +- New dependencies: + - None. +- Backward compatibility notes: + - Error previews keep the same byte limits but may end earlier to preserve UTF-8 validity. + - Successful runtime paths are unchanged. + +## Design Decisions + +- Decision: Reuse `crate::types::safe_truncate` for all affected previews. + - Rationale: It is the existing crate-level UTF-8 boundary helper and is already used in nearby truncation paths. + - Alternatives considered: Add local helper functions. + - Why rejected: Local helpers would duplicate established behavior and increase drift risk. + +## Open Questions + +- [x] Are the affected slices in user-controlled or upstream-controlled text paths? Yes. +- [x] Can this be fixed without changing public command behavior? Yes. diff --git a/tasks/TASK-2026-068-agent-utf8-preview-panics/PRD.md b/tasks/TASK-2026-068-agent-utf8-preview-panics/PRD.md new file mode 100644 index 0000000..6eef00e --- /dev/null +++ b/tasks/TASK-2026-068-agent-utf8-preview-panics/PRD.md @@ -0,0 +1,37 @@ +# PRD + +## Background + +The latest critical bug investigations fixed UTF-8 boundary panics in several truncation paths. Follow-up review found the same byte-slicing class in nearby agent error paths. These paths should never crash the agent while reporting malformed upstream responses or malformed tool arguments. + +## Objective + +Prevent recoverable agent error paths from panicking on long CJK/emoji text by using UTF-8-safe preview truncation. + +## Functional Requirements + +- FR-1: Embedding unexpected-format errors must include a bounded preview without slicing through a UTF-8 code point. +- FR-2: Task planner parse-failure diagnostics must log a bounded preview without slicing through a UTF-8 code point. +- FR-3: `update_task_plan` invalid string previews must return a tool error without slicing through a UTF-8 code point. + +## Non-Functional Requirements + +- Security: No new trust boundary or permission behavior changes. +- Performance: Preview truncation remains O(limit) and only runs in error/debug paths. +- Compatibility: Preserve existing error categories and byte budgets, allowing previews to be a few bytes shorter at character boundaries. + +## Constraints + +- Technical: Reuse existing `safe_truncate` helper instead of adding new truncation utilities. +- Timeline: `N/A` for autonomous execution. + +## Success Metrics + +- Metric: Non-ASCII boundary regression tests for all affected paths. +- Baseline: Existing byte slices can panic when the limit lands inside a multi-byte character. +- Target: Tests exercise those inputs and receive structured errors instead of panics. + +## Rollout + +- Rollout plan: Ship as a small Rust fix with regression tests. +- Rollback plan: Revert the commit if unexpected behavior appears; no data migration is involved. diff --git a/tasks/TASK-2026-068-agent-utf8-preview-panics/REVIEW.md b/tasks/TASK-2026-068-agent-utf8-preview-panics/REVIEW.md new file mode 100644 index 0000000..207b07c --- /dev/null +++ b/tasks/TASK-2026-068-agent-utf8-preview-panics/REVIEW.md @@ -0,0 +1,40 @@ +# Review Report + +## Scope Reviewed + +- Files/modules: + - `crates/skilllite-agent/src/llm/mod.rs` + - `crates/skilllite-agent/src/llm/tests.rs` + - `crates/skilllite-agent/src/task_planner.rs` + - `crates/skilllite-agent/src/agent_loop/helpers.rs` +- Commits/changes: + - Replaced byte-sliced UTF-8 previews with `safe_truncate`. + - Added regression tests for non-ASCII boundary inputs. + +## Findings + +- Critical: Fixed recoverable agent error paths that could panic on long non-ASCII previews. +- Major: None remaining in scope. +- Minor: None. + +## Quality Gates + +- Architecture boundary checks: `pass` +- Security invariants: `pass` +- Required tests executed: `pass` +- Docs sync (EN/ZH): `pass` + +## Test Evidence + +- Commands run: + - `cargo test -p skilllite-agent` + - `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test && python3 scripts/validate_tasks.py` +- Key outputs: + - `cargo test -p skilllite-agent`: `test result: ok. 248 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out` + - Full validation: `Finished 'dev' profile`, `Finished 'test' profile`, all test groups reported `test result: ok`, and `Task validation passed (68 task directories checked).` + +## Decision + +- Merge readiness: `ready` +- Follow-up actions: + - None. diff --git a/tasks/TASK-2026-068-agent-utf8-preview-panics/STATUS.md b/tasks/TASK-2026-068-agent-utf8-preview-panics/STATUS.md new file mode 100644 index 0000000..f5f400d --- /dev/null +++ b/tasks/TASK-2026-068-agent-utf8-preview-panics/STATUS.md @@ -0,0 +1,21 @@ +# Status Journal + +## Timeline + +- 2026-06-09: + - Progress: Fixed three adjacent UTF-8 byte-slice preview panics in agent error paths and added non-ASCII regression tests. + - Blockers: None. + - Next step: Open PR and report validation evidence. +- 2026-06-09 validation: + - Progress: `cargo test -p skilllite-agent` passed with 248 tests; full validation command `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test && python3 scripts/validate_tasks.py` passed. + - Blockers: None. + - Next step: None. + +## Checkpoints + +- [x] PRD drafted before implementation (or `N/A` recorded) +- [x] Context drafted before implementation (or `N/A` recorded) +- [x] Implementation complete +- [x] Tests passed +- [x] Review complete +- [x] Board updated diff --git a/tasks/TASK-2026-068-agent-utf8-preview-panics/TASK.md b/tasks/TASK-2026-068-agent-utf8-preview-panics/TASK.md new file mode 100644 index 0000000..598da46 --- /dev/null +++ b/tasks/TASK-2026-068-agent-utf8-preview-panics/TASK.md @@ -0,0 +1,68 @@ +# TASK Card + +## Metadata + +- Task ID: `TASK-2026-068` +- Title: Fix UTF-8 preview panics in agent error paths +- Status: `in_progress` +- Priority: `P0` +- Owner: `agent` +- Contributors: `GPT-5.5` +- Created: `2026-06-09` +- Target milestone: `N/A` + +## Problem + +Recent UTF-8 truncation fixes removed several byte-slicing panics, but adjacent agent error paths still slice UTF-8 strings by byte offsets when building previews. A long non-ASCII response or malformed tool argument can panic while the agent is trying to report a recoverable error. + +## Scope + +- In scope: + - Replace unsafe byte-sliced previews in `skilllite-agent` error/debug paths with `safe_truncate`. + - Add non-ASCII regression tests for the affected failure paths. + - Validate formatting, linting, task artifacts, and Rust tests. +- Out of scope: + - Broad refactors of LLM response parsing or planning behavior. + - Unrelated byte slices that are ASCII/index-safe by construction. + +## Acceptance Criteria + +- [x] Unexpected embedding JSON responses with non-ASCII content around the 500-byte preview boundary return an error instead of panicking. +- [x] Invalid task-planner LLM output with non-ASCII content around the 500-byte debug preview boundary returns the existing parse error instead of panicking. +- [x] Invalid `update_task_plan.tasks` string arguments with non-ASCII content around the 120-byte preview boundary return a tool error instead of panicking. +- [x] Required Rust and task validation commands pass. + +## Risks + +- Risk: Error preview content could become slightly shorter at UTF-8 boundaries. + - Impact: Debug output may omit a few bytes near the limit. + - Mitigation: Preserve existing byte budgets and only move the cut point back to a valid boundary. + +## Validation Plan + +- Required tests: + - `cargo test -p skilllite-agent` + - `cargo test` + - `cargo fmt --check` + - `cargo clippy --all-targets -- -D warnings` + - `python3 scripts/validate_tasks.py` +- Commands to run: + - See required tests. +- Manual checks: + - Re-read modified files and task board after edits. + +## Regression Scope + +- Areas likely affected: + - Agent LLM embedding response error formatting. + - Task planner parse failure diagnostics. + - `update_task_plan` tool argument validation errors. +- Explicit non-goals: + - Changing successful execution behavior. + - Changing public CLI flags or configuration. + +## Links + +- Source TODO section: `N/A` +- Related PRs/issues: Recent UTF-8 truncation fixes in `TASK-2026-066` and `TASK-2026-067`. +- Related docs: `spec/rust-conventions.md`, `spec/testing-policy.md`, `spec/verification-integrity.md` diff --git a/tasks/board.md b/tasks/board.md index 4b63f2b..7e3fcc6 100644 --- a/tasks/board.md +++ b/tasks/board.md @@ -1,6 +1,6 @@ # Task Board -Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done) +Last updated: 2026-06-09 (TASK-2026-068 agent utf8 preview panics done) ## In Progress @@ -17,6 +17,7 @@ Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done) ## Done +- `TASK-2026-068-agent-utf8-preview-panics` - Status: `done` - Owner: `agent` - `TASK-2026-067-utf8-llm-error-truncate` - Status: `done` - Owner: `agent` - `TASK-2026-066-utf8-evolution-log-truncate` - Status: `done` - Owner: `agent` - `TASK-2026-064-env-keys-single-source` - Status: `done` - Owner: `agent`