EXboys · cursor · Jun 7, 2026 · Jun 7, 2026
diff --git a/crates/skilllite-agent/src/agent_loop/helpers.rs b/crates/skilllite-agent/src/agent_loop/helpers.rs
@@ -72,7 +72,7 @@ pub(super) fn handle_update_task_plan(
                                 "'tasks' must be a JSON array, got a string that is not valid JSON array. \
                                  Pass tasks as a real array: {{\"tasks\": [{{...}}]}} not {{\"tasks\": \"[...]\"}}. \
                                  Received string preview: {:?}",
-                                &s[..s.len().min(120)]
+                                safe_truncate(s, 120)
                             ),
                             is_error: true,
                             counts_as_failure: true,
@@ -1104,6 +1104,19 @@ mod tests {
         assert!(r.content.contains("must be a JSON array"), "{}", r.content);
     }
 
+    #[test]
+    fn update_task_plan_rejects_non_array_string_without_utf8_boundary_panic() {
+        let mut planner = TaskPlanner::new(None, None, None);
+        let mut sink = SilentEventSink;
+        let invalid_tasks = format!("{}汉 trailing invalid json", "a".repeat(119));
+        let args = serde_json::json!({ "tasks": invalid_tasks }).to_string();
+
+        let r = handle_update_task_plan(&args, &mut planner, &[], &mut sink);
+
+        assert!(r.is_error);
+        assert!(r.content.contains("must be a JSON array"), "{}", r.content);
+    }
+
     #[test]
     fn update_task_plan_rejects_missing_tasks_field() {
         let mut planner = TaskPlanner::new(None, None, None);

diff --git a/crates/skilllite-agent/src/task_planner.rs b/crates/skilllite-agent/src/task_planner.rs
@@ -394,7 +394,7 @@ impl TaskPlanner {
 
         tracing::debug!(
             "parse_task_list raw (first 500 chars): {}",
-            &raw[..raw.len().min(500)]
+            safe_truncate(raw, 500)
         );
         bail!("No valid JSON task array found in LLM response")
     }
@@ -777,6 +777,20 @@ mod tests {
         assert!(empty.is_empty());
     }
 
+    #[test]
+    fn parse_task_list_returns_error_without_utf8_boundary_panic() {
+        let planner = TaskPlanner::new(None, None, None);
+        let raw = format!("{}汉 trailing invalid planning response", "a".repeat(499));
+
+        let err = planner.parse_task_list(&raw).unwrap_err();
+
+        assert!(
+            err.to_string().contains("No valid JSON task array found"),
+            "{}",
+            err
+        );
+    }
+
     #[test]
     fn test_planning_prompt_contains_placeholders_resolved() {
         let planner = TaskPlanner::new(None, None, None);

diff --git a/tasks/TASK-2026-068-critical-bug-audit/CONTEXT.md b/tasks/TASK-2026-068-critical-bug-audit/CONTEXT.md
@@ -0,0 +1,28 @@
+# Technical Context
+
+## Current State
+
+- Relevant crates/files: `crates/skilllite-agent/src/agent_loop/helpers.rs`, `crates/skilllite-agent/src/task_planner.rs`, and recent UTF-8 truncation commits on `main`.
+- Current behavior: Agent planning error paths now use `safe_truncate` for previews instead of raw byte slicing.
+
+## Architecture Fit
+
+- Layer boundaries involved: `skilllite-agent` planning and agent-loop error handling.
+- Interfaces to preserve: Tool-result error reporting, planner parse/fallback behavior, and existing `safe_truncate` utility semantics.
+
+## Dependency and Compatibility
+
+- New dependencies: None.
+- Backward compatibility notes: No API, CLI, schema, or persisted-format changes; invalid non-ASCII planner inputs now return errors instead of panicking.
+
+## Design Decisions
+
+- Decision: Fix only the confirmed agent crash paths in this PR.
+  - Rationale: `update_task_plan` and `parse_task_list` are active agent/planning paths where LLM-produced non-ASCII invalid data can panic before returning a recoverable error.
+  - Alternatives considered: Sweep every remaining byte slice across CLI/admin formatting paths.
+  - Why rejected: Several remaining paths are lower-blast-radius or ASCII/index-derived; broad cleanup would exceed the critical-bug scope.
+
+## Open Questions
+
+- [x] Which recent commits have the largest behavioral blast radius?
+- [x] Do any suspicious changes have a concrete critical trigger scenario?
diff --git a/tasks/TASK-2026-068-critical-bug-audit/PRD.md b/tasks/TASK-2026-068-critical-bug-audit/PRD.md
@@ -0,0 +1,40 @@
+# PRD
+
+## Background
+
+This scheduled automation performs a focused audit of recent commits to catch high-severity correctness regressions before they reach users.
+
+## Objective
+
+Identify and, when highly confident, minimally fix critical bugs with concrete trigger scenarios. If no such bug is found, produce a concise no-critical-findings report.
+
+## Functional Requirements
+
+- FR-1: Inspect recent behavioral changes and trace affected code paths beyond surface diffs.
+- FR-2: Only treat findings as actionable when they can cause data loss, crash, security bypass, silent corruption/truncation, or significant user-facing breakage.
+- FR-3: If a fix is made, include focused validation evidence.
+
+## Non-Functional Requirements
+
+- Security: Preserve sandbox and approval invariants; do not relax policy while auditing.
+- Performance: Avoid broad test or build work unless needed to validate a concrete finding.
+- Compatibility: Do not alter shipped behavior unless fixing a confirmed critical bug.
+
+## Constraints
+
+- Technical: Work on branch `cursor/critical-bug-investigation-b158`; avoid broad refactors.
+- Timeline: Scheduled automation run; complete autonomously with evidence.
+
+## Success Metrics
+
+- Metric: Number of surfaced critical findings without concrete trigger scenarios.
+- Baseline: Unknown.
+- Target: Zero speculative surfaced findings.
+- Metric: Validation evidence for any repository changes.
+- Baseline: Required by repository workflow.
+- Target: Commands actually run and recorded.
+
+## Rollout
+
+- Rollout plan: If no code fix is needed, report results only. If a critical fix is made, commit, push, and open a PR.
+- Rollback plan: Revert the minimal fix commit if validation or review disproves the finding.
diff --git a/tasks/TASK-2026-068-critical-bug-audit/REVIEW.md b/tasks/TASK-2026-068-critical-bug-audit/REVIEW.md
@@ -0,0 +1,38 @@
+# Review Report
+
+## Scope Reviewed
+
+- Files/modules: Recent commits on current branch/main, `crates/skilllite-agent/src/agent_loop/helpers.rs`, `crates/skilllite-agent/src/task_planner.rs`, related UTF-8 truncation helpers and tests.
+- Commits/changes: Latest UTF-8 truncation fixes, recent agent/CLI/runtime bridge commits, and the new fix commit `ea54799`.
+
+## Findings
+
+- Critical: `update_task_plan` error preview used raw byte slicing on model-provided `tasks` strings. A non-array string with a multibyte character crossing byte 120 panicked before returning a tool error, crashing the active agent turn.
+- Critical: `parse_task_list` debug preview used raw byte slicing on malformed LLM planning output. A multibyte character crossing byte 500 panicked before the intended structured parse error/fallback path.
+- Major: Other lower-blast-radius byte-slice truncation sites remain in CLI/admin formatting paths, but were not fixed because they did not meet the requested critical-confidence bar for this PR.
+- Minor: None.
+
+## Quality Gates
+
+- Architecture boundary checks: `pass`
+- Security invariants: `pass`
+- Required tests executed: `pass`
+- Docs sync (EN/ZH): `pass` - no user-facing command/env/doc semantics changed.
+
+## Test Evidence
+
+- Commands run:
+  - `cargo test -p skilllite-agent update_task_plan_rejects_non_array_string_without_utf8_boundary_panic`
+  - `cargo test -p skilllite-agent parse_task_list_returns_error_without_utf8_boundary_panic`
+  - `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test -p skilllite-agent && cargo test && python3 scripts/validate_tasks.py`
+- Key outputs:
+  - Targeted tests: both passed with 1 passed, 0 failed.
+  - `cargo clippy --all-targets -- -D warnings`: completed successfully.
+  - `cargo test -p skilllite-agent`: 247 passed, 0 failed.
+  - `cargo test`: full workspace test suite completed successfully.
+  - `python3 scripts/validate_tasks.py`: `Task validation passed (68 task directories checked).`
+
+## Decision
+
+- Merge readiness: `ready`
+- Follow-up actions: Consider a separate non-critical cleanup sweep for remaining CLI/admin byte-slice previews.
diff --git a/tasks/TASK-2026-068-critical-bug-audit/STATUS.md b/tasks/TASK-2026-068-critical-bug-audit/STATUS.md
@@ -0,0 +1,24 @@
+# Status Journal
+
+## Timeline
+
+- 2026-06-07:
+  - Progress: Created audit task, loaded injected specs, reviewed recent commits, and implemented a minimal UTF-8-safe fix for two agent planning error paths.
+  - Blockers: None.
+  - Next step: Open PR and report results.
+
+## Checkpoints
+
+- [x] PRD drafted before implementation (or `N/A` recorded)
+- [x] Context drafted before implementation (or `N/A` recorded)
+- [x] Implementation complete
+- [x] Tests passed
+- [x] Review complete
+- [x] Board updated
+
+## Validation Evidence
+
+- `cargo test -p skilllite-agent update_task_plan_rejects_non_array_string_without_utf8_boundary_panic` passed: 1 test passed, 0 failed.
+- `cargo test -p skilllite-agent parse_task_list_returns_error_without_utf8_boundary_panic` passed: 1 test passed, 0 failed.
+- `cargo fmt --check && cargo clippy --all-targets -- -D warnings && cargo test -p skilllite-agent && cargo test && python3 scripts/validate_tasks.py` exited 0.
+- Key output: `skilllite-agent` tests passed with 247 passed, 0 failed; full workspace tests completed successfully; task validation passed for 68 task directories.
diff --git a/tasks/TASK-2026-068-critical-bug-audit/TASK.md b/tasks/TASK-2026-068-critical-bug-audit/TASK.md
@@ -0,0 +1,54 @@
+# TASK Card
+
+## Metadata
+
+- Task ID: `TASK-2026-068`
+- Title: Critical Bug Audit
+- Status: `done`
+- Priority: `P0`
+- Owner: `agent`
+- Contributors:
+- Created: `2026-06-07`
+- Target milestone:
+
+## Problem
+
+Inspect recent SkillLite commits for high-severity correctness regressions that escaped review. Only actionable findings with a concrete trigger scenario should result in a code change or PR.
+
+## Scope
+
+- In scope: Recent commits on `main` / current branch, with emphasis on behavioral changes affecting agent loops, CLI/runtime bridges, sandbox/security behavior, persistence, and UTF-8/error handling paths.
+- Out of scope: Style-only issues, theoretical concerns without a plausible trigger, broad refactors, and low-severity UX degradation.
+
+## Acceptance Criteria
+
+- [x] Recent behavioral commits are reviewed beyond the diff by tracing relevant caller and downstream paths.
+- [x] Any reported bug has a concrete trigger scenario and critical impact.
+- [x] If a critical bug is fixed, the fix is minimal and covered by targeted validation.
+- [x] Validation commands pass and evidence is recorded.
+
+## Risks
+
+- Risk: Over-reporting speculative issues.
+  - Impact: Unnecessary PR noise and reviewer distraction.
+  - Mitigation: Require a concrete trigger scenario and critical user impact before opening a PR.
+- Risk: Missing a regression outside the highest-risk recent changes.
+  - Impact: Critical bug remains undetected.
+  - Mitigation: Review commit metadata first, then inspect broad-blast-radius code paths.
+
+## Validation Plan
+
+- Required tests: `cargo fmt --check`, `cargo clippy --all-targets -- -D warnings`, `cargo test -p skilllite-agent`, `cargo test`, and `python3 scripts/validate_tasks.py`.
+- Commands to run: `git log`, targeted `git show` / `git diff`, targeted agent regression tests, full workspace validation.
+- Manual checks: Trace caller/downstream paths and confirm whether each candidate has a plausible critical trigger.
+
+## Regression Scope
+
+- Areas likely affected: Agent behavior, command routing, sandbox execution, persistence/logging, UTF-8 truncation and user-facing error summaries.
+- Explicit non-goals: Cosmetic cleanup, unrelated TODO execution, documentation rewrite.
+
+## Links
+
+- Source TODO section: N/A - scheduled critical bug-finding automation.
+- Related PRs/issues: Fix commit `ea54799`; recent commits on current branch and `main`.
+- Related docs: `spec/verification-integrity.md`, `spec/README.md`.
diff --git a/tasks/board.md b/tasks/board.md
@@ -1,6 +1,6 @@
 # Task Board
 
-Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done)
+Last updated: 2026-06-07 (TASK-2026-068 critical bug audit done)
 
 ## In Progress
 
@@ -17,6 +17,7 @@ Last updated: 2026-06-06 (TASK-2026-067 utf8 llm error truncate done)
 
 ## Done
 
+- `TASK-2026-068-critical-bug-audit` - Status: `done` - Owner: `agent`
 - `TASK-2026-067-utf8-llm-error-truncate` - Status: `done` - Owner: `agent`
 - `TASK-2026-066-utf8-evolution-log-truncate` - Status: `done` - Owner: `agent`
 - `TASK-2026-064-env-keys-single-source` - Status: `done` - Owner: `agent`