Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion crates/skilllite-agent/src/agent_loop/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub(super) fn handle_update_task_plan(
"'tasks' must be a JSON array, got a string that is not valid JSON array. \
Pass tasks as a real array: {{\"tasks\": [{{...}}]}} not {{\"tasks\": \"[...]\"}}. \
Received string preview: {:?}",
&s[..s.len().min(120)]
safe_truncate(s, 120)
),
is_error: true,
counts_as_failure: true,
Expand Down Expand Up @@ -1104,6 +1104,24 @@ mod tests {
assert!(r.content.contains("must be a JSON array"), "{}", r.content);
}

#[test]
fn update_task_plan_rejects_non_array_string_preview_utf8_boundary_safe() {
let mut planner = TaskPlanner::new(None, None, None);
let mut sink = SilentEventSink;
let tasks = format!("{}界{}", "a".repeat(119), "tail");
let args = serde_json::json!({ "tasks": tasks }).to_string();

let r = handle_update_task_plan(&args, &mut planner, &[], &mut sink);

assert!(r.is_error);
assert!(r.content.contains("must be a JSON array"), "{}", r.content);
assert!(
!r.content.contains("tail"),
"preview should be truncated before the suffix: {}",
r.content
);
}

#[test]
fn update_task_plan_rejects_missing_tasks_field() {
let mut planner = TaskPlanner::new(None, None, None);
Expand Down
16 changes: 9 additions & 7 deletions crates/skilllite-agent/src/llm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,7 @@ impl LlmClient {
return Self::extract_embeddings_from_items(items);
}

// Log the unexpected response shape for debugging
let preview = serde_json::to_string(&json).unwrap_or_default();
let preview = &preview[..preview.len().min(500)];
bail!(
"Unexpected embedding response format (no 'data' or 'output.embeddings'): {}",
preview
)
bail!("{}", format_unexpected_embedding_response(&json))
}

/// Extract embedding vectors from a JSON array of items, each containing an "embedding" field.
Expand Down Expand Up @@ -310,6 +304,14 @@ fn record_llm_usage_totals(out: Option<&mut LlmUsageTotals>, usage: &Option<Usag

// ─── Helpers ────────────────────────────────────────────────────────────────

fn format_unexpected_embedding_response(json: &Value) -> String {
let preview = serde_json::to_string(json).unwrap_or_default();
format!(
"Unexpected embedding response format (no 'data' or 'output.embeddings'): {}",
safe_truncate(&preview, 500)
)
}

/// Format a user-friendly error from an LLM API HTTP failure.
///
/// Extracts a concise error detail from the response body (tries JSON `error.message`
Expand Down
18 changes: 18 additions & 0 deletions crates/skilllite-agent/src/llm/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,21 @@ fn test_format_api_error_truncates_non_json_body_on_utf8_boundary() {
"should truncate the long raw body: {result}"
);
}

#[test]
fn unexpected_embedding_response_preview_is_utf8_boundary_safe() {
let body = format!("{}界{}", "a".repeat(487), "tail");
let response = json!({ "message": body });

let result = format_unexpected_embedding_response(&response);

assert!(
result.contains("Unexpected embedding response format"),
"{result}"
);
assert!(result.contains(&"a".repeat(487)), "{result}");
assert!(
!result.contains("tail"),
"preview should be truncated before the suffix: {result}"
);
}
40 changes: 39 additions & 1 deletion crates/skilllite-agent/src/task_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ impl TaskPlanner {

tracing::debug!(
"parse_task_list raw (first 500 chars): {}",
&raw[..raw.len().min(500)]
safe_truncate(raw, 500)
);
bail!("No valid JSON task array found in LLM response")
}
Expand Down Expand Up @@ -736,6 +736,28 @@ mod tests {
use super::*;
use crate::extensions::ExtensionRegistry;

struct AlwaysEnabledSubscriber;

impl tracing::Subscriber for AlwaysEnabledSubscriber {
fn enabled(&self, _metadata: &tracing::Metadata<'_>) -> bool {
true
}

fn new_span(&self, _span: &tracing::span::Attributes<'_>) -> tracing::span::Id {
tracing::span::Id::from_u64(1)
}

fn record(&self, _span: &tracing::span::Id, _values: &tracing::span::Record<'_>) {}

fn record_follows_from(&self, _span: &tracing::span::Id, _follows: &tracing::span::Id) {}

fn event(&self, _event: &tracing::Event<'_>) {}

fn enter(&self, _span: &tracing::span::Id) {}

fn exit(&self, _span: &tracing::span::Id) {}
}

#[test]
fn test_planning_prompt_phase1_balance() {
let planner = TaskPlanner::new(None, None, None);
Expand Down Expand Up @@ -777,6 +799,22 @@ mod tests {
assert!(empty.is_empty());
}

#[test]
fn parse_task_list_error_preview_is_utf8_boundary_safe() {
let planner = TaskPlanner::new(None, None, None);
let raw = format!("{}界{}", "a".repeat(499), "tail");
let dispatch = tracing::Dispatch::new(AlwaysEnabledSubscriber);

let result = tracing::dispatcher::with_default(&dispatch, || planner.parse_task_list(&raw));

let err = result.expect_err("invalid planner output should be rejected");
assert!(
err.to_string()
.contains("No valid JSON task array found in LLM response"),
"{err}"
);
}

#[test]
fn test_planning_prompt_contains_placeholders_resolved() {
let planner = TaskPlanner::new(None, None, None);
Expand Down
40 changes: 40 additions & 0 deletions tasks/TASK-2026-068-agent-utf8-preview-panics/CONTEXT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Technical Context

## Current State

- Relevant crates/files:
- `crates/skilllite-agent/src/llm/mod.rs`
- `crates/skilllite-agent/src/task_planner.rs`
- `crates/skilllite-agent/src/agent_loop/helpers.rs`
- Current behavior:
- Some error previews use expressions like `&s[..s.len().min(N)]`.
- These expressions panic if the byte index lands inside a multi-byte UTF-8 character.

## Architecture Fit

- Layer boundaries involved:
- `skilllite-agent` internal error and tool-argument handling.
- Interfaces to preserve:
- Existing `LlmClient` embedding API error behavior.
- Existing `TaskPlanner::parse_task_list` result shape.
- Existing `update_task_plan` tool result schema.

## Dependency and Compatibility

- New dependencies:
- None.
- Backward compatibility notes:
- Error previews keep the same byte limits but may end earlier to preserve UTF-8 validity.
- Successful runtime paths are unchanged.

## Design Decisions

- Decision: Reuse `crate::types::safe_truncate` for all affected previews.
- Rationale: It is the existing crate-level UTF-8 boundary helper and is already used in nearby truncation paths.
- Alternatives considered: Add local helper functions.
- Why rejected: Local helpers would duplicate established behavior and increase drift risk.

## Open Questions

- [x] Are the affected slices in user-controlled or upstream-controlled text paths? Yes.
- [x] Can this be fixed without changing public command behavior? Yes.
37 changes: 37 additions & 0 deletions tasks/TASK-2026-068-agent-utf8-preview-panics/PRD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# PRD

## Background

The latest critical bug investigations fixed UTF-8 boundary panics in several truncation paths. Follow-up review found the same byte-slicing class in nearby agent error paths. These paths should never crash the agent while reporting malformed upstream responses or malformed tool arguments.

## Objective

Prevent recoverable agent error paths from panicking on long CJK/emoji text by using UTF-8-safe preview truncation.

## Functional Requirements

- FR-1: Embedding unexpected-format errors must include a bounded preview without slicing through a UTF-8 code point.
- FR-2: Task planner parse-failure diagnostics must log a bounded preview without slicing through a UTF-8 code point.
- FR-3: `update_task_plan` invalid string previews must return a tool error without slicing through a UTF-8 code point.

## Non-Functional Requirements

- Security: No new trust boundary or permission behavior changes.
- Performance: Preview truncation remains O(limit) and only runs in error/debug paths.
- Compatibility: Preserve existing error categories and byte budgets, allowing previews to be a few bytes shorter at character boundaries.

## Constraints

- Technical: Reuse existing `safe_truncate` helper instead of adding new truncation utilities.
- Timeline: `N/A` for autonomous execution.

## Success Metrics

- Metric: Non-ASCII boundary regression tests for all affected paths.
- Baseline: Existing byte slices can panic when the limit lands inside a multi-byte character.
- Target: Tests exercise those inputs and receive structured errors instead of panics.

## Rollout

- Rollout plan: Ship as a small Rust fix with regression tests.
- Rollback plan: Revert the commit if unexpected behavior appears; no data migration is involved.
40 changes: 40 additions & 0 deletions tasks/TASK-2026-068-agent-utf8-preview-panics/REVIEW.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Review Report

## Scope Reviewed

- Files/modules:
- `crates/skilllite-agent/src/llm/mod.rs`
- `crates/skilllite-agent/src/llm/tests.rs`
- `crates/skilllite-agent/src/task_planner.rs`
- `crates/skilllite-agent/src/agent_loop/helpers.rs`
- Commits/changes:
- Replaced byte-sliced UTF-8 previews with `safe_truncate`.
- Added regression tests for non-ASCII boundary inputs.

## Findings

- Critical: Fixed recoverable agent error paths that could panic on long non-ASCII previews.
- Major: None remaining in scope.
- Minor: None.

## Quality Gates

- Architecture boundary checks: `pass`
- Security invariants: `pass`
- Required tests executed: `pass`
- Docs sync (EN/ZH): `pass`

## Test Evidence

- Commands run:
- `cargo test -p skilllite-agent`
- `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test && python3 scripts/validate_tasks.py`
- Key outputs:
- `cargo test -p skilllite-agent`: `test result: ok. 248 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out`
- Full validation: `Finished 'dev' profile`, `Finished 'test' profile`, all test groups reported `test result: ok`, and `Task validation passed (68 task directories checked).`

## Decision

- Merge readiness: `ready`
- Follow-up actions:
- None.
21 changes: 21 additions & 0 deletions tasks/TASK-2026-068-agent-utf8-preview-panics/STATUS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Status Journal

## Timeline

- 2026-06-09:
- Progress: Fixed three adjacent UTF-8 byte-slice preview panics in agent error paths and added non-ASCII regression tests.
- Blockers: None.
- Next step: Open PR and report validation evidence.
- 2026-06-09 validation:
- Progress: `cargo test -p skilllite-agent` passed with 248 tests; full validation command `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test && python3 scripts/validate_tasks.py` passed.
- Blockers: None.
- Next step: None.

## Checkpoints

- [x] PRD drafted before implementation (or `N/A` recorded)
- [x] Context drafted before implementation (or `N/A` recorded)
- [x] Implementation complete
- [x] Tests passed
- [x] Review complete
- [x] Board updated
68 changes: 68 additions & 0 deletions tasks/TASK-2026-068-agent-utf8-preview-panics/TASK.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# TASK Card

## Metadata

- Task ID: `TASK-2026-068`
- Title: Fix UTF-8 preview panics in agent error paths
- Status: `in_progress`
- Priority: `P0`
- Owner: `agent`
- Contributors: `GPT-5.5`
- Created: `2026-06-09`
- Target milestone: `N/A`

## Problem

Recent UTF-8 truncation fixes removed several byte-slicing panics, but adjacent agent error paths still slice UTF-8 strings by byte offsets when building previews. A long non-ASCII response or malformed tool argument can panic while the agent is trying to report a recoverable error.

## Scope

- In scope:
- Replace unsafe byte-sliced previews in `skilllite-agent` error/debug paths with `safe_truncate`.
- Add non-ASCII regression tests for the affected failure paths.
- Validate formatting, linting, task artifacts, and Rust tests.
- Out of scope:
- Broad refactors of LLM response parsing or planning behavior.
- Unrelated byte slices that are ASCII/index-safe by construction.

## Acceptance Criteria

- [x] Unexpected embedding JSON responses with non-ASCII content around the 500-byte preview boundary return an error instead of panicking.
- [x] Invalid task-planner LLM output with non-ASCII content around the 500-byte debug preview boundary returns the existing parse error instead of panicking.
- [x] Invalid `update_task_plan.tasks` string arguments with non-ASCII content around the 120-byte preview boundary return a tool error instead of panicking.
- [x] Required Rust and task validation commands pass.

## Risks

- Risk: Error preview content could become slightly shorter at UTF-8 boundaries.
- Impact: Debug output may omit a few bytes near the limit.
- Mitigation: Preserve existing byte budgets and only move the cut point back to a valid boundary.

## Validation Plan

- Required tests:
- `cargo test -p skilllite-agent`
- `cargo test`
- `cargo fmt --check`
- `cargo clippy --all-targets -- -D warnings`
- `python3 scripts/validate_tasks.py`
- Commands to run:
- See required tests.
- Manual checks:
- Re-read modified files and task board after edits.

## Regression Scope

- Areas likely affected:
- Agent LLM embedding response error formatting.
- Task planner parse failure diagnostics.
- `update_task_plan` tool argument validation errors.
- Explicit non-goals:
- Changing successful execution behavior.
- Changing public CLI flags or configuration.

## Links

- Source TODO section: `N/A`
- Related PRs/issues: Recent UTF-8 truncation fixes in `TASK-2026-066` and `TASK-2026-067`.
- Related docs: `spec/rust-conventions.md`, `spec/testing-policy.md`, `spec/verification-integrity.md`
3 changes: 2 additions & 1 deletion tasks/board.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Task Board

Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done)
Last updated: 2026-06-09 (TASK-2026-068 agent utf8 preview panics done)

## In Progress

Expand All @@ -17,6 +17,7 @@ Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done)

## Done

- `TASK-2026-068-agent-utf8-preview-panics` - Status: `done` - Owner: `agent`
- `TASK-2026-067-utf8-llm-error-truncate` - Status: `done` - Owner: `agent`
- `TASK-2026-066-utf8-evolution-log-truncate` - Status: `done` - Owner: `agent`
- `TASK-2026-064-env-keys-single-source` - Status: `done` - Owner: `agent`
Expand Down
Loading