From 13fef42b8085a791aa436f8e23682bbe79d6d6a2 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:07:42 +0400 Subject: [PATCH] fix(complete): chunk enrich transcript + surface claude -p stdout errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The enrich pass fed an entire session transcript to the model in one call; a large multi-session task exceeded the ~200k-token context limit and claude -p returned HTTP 400 ("Prompt is too long · ~220310 tokens"). Split the transcript into line-aligned chunks under a safe byte budget and merge the recovered events (run_dream dedups), so finalize works on any session size. --quick was unaffected (it skips enrich). Also surface the JSON error claude -p prints on stdout under --output-format json (the real cause goes there, not stderr), so a failure is legible instead of a bare "exit status 1" — which is exactly what let us diagnose the context overflow. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 15 +++ Cargo.lock | 6 +- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-core/src/classifier/agent_sdk.rs | 43 +++++++-- crates/tj-core/src/dream/llm_backend.rs | 102 ++++++++++++++++++++- crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 8 files changed, 156 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2239cde..0816c5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.22.1] - 2026-06-13 + +### Fixed +- **`complete` no longer fails on large sessions.** The enrich pass fed a whole + session transcript to the model in one call; a big multi-session task blew the + ~200k-token context limit and `claude -p` returned HTTP 400 ("Prompt is too + long"). The transcript is now split into line-aligned chunks under a safe + budget and the recovered events are merged (and deduped), so finalize works on + any session size. (`--quick` was unaffected — it skips enrich.) +- **Legible `claude -p` errors.** A non-zero `claude -p` exit now surfaces the + JSON error it prints on **stdout** (with `--output-format json` the real cause + — invalid model, usage limit, context overflow — goes there, not stderr), so a + failure reads as "Prompt is too long · ~220310 tokens" instead of a bare + "exit status 1". + ## [0.22.0] - 2026-06-13 ### Added diff --git a/Cargo.lock b/Cargo.lock index 3fd5b0c..1bfc571 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.22.0" +version = "0.22.1" dependencies = [ "anyhow", "assert_cmd", @@ -2596,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.22.0" +version = "0.22.1" dependencies = [ "anyhow", "chrono", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.22.0" +version = "0.22.1" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 73d035f..f9c65b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.22.0" +version = "0.22.1" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 78e29ad..1ef884f 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs index 2ea41ce..69a5b43 100644 --- a/crates/tj-core/src/classifier/agent_sdk.rs +++ b/crates/tj-core/src/classifier/agent_sdk.rs @@ -64,6 +64,35 @@ fn base_claude_command(model: &str) -> Command { /// very journal — out of the classification subprocess). pub struct ClaudeBinaryRunner; +/// Build the error for a non-zero `claude -p` exit. With `--output-format +/// json` claude reports the real cause (invalid model, usage limit, auth) as +/// JSON on **stdout**, not stderr — so surface both, capped, or the user just +/// sees a bare "exit status 1". +fn claude_exit_error( + status: std::process::ExitStatus, + stdout: &[u8], + stderr: &[u8], +) -> anyhow::Error { + let cap = |b: &[u8]| { + let s = String::from_utf8_lossy(b); + let s = s.trim().to_string(); + if s.chars().count() > 600 { + format!("{}…", s.chars().take(600).collect::()) + } else { + s + } + }; + let out = cap(stdout); + let err = cap(stderr); + let detail = match (out.is_empty(), err.is_empty()) { + (true, true) => "(no output)".to_string(), + (false, true) => out, + (true, false) => err, + (false, false) => format!("{err} | stdout: {out}"), + }; + anyhow!("`claude -p` exited with {status}: {detail}") +} + impl CommandRunner for ClaudeBinaryRunner { fn run(&self, model: &str, prompt: &str) -> anyhow::Result { let output = base_claude_command(model) @@ -71,11 +100,10 @@ impl CommandRunner for ClaudeBinaryRunner { .output() .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?; if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(anyhow!( - "`claude -p` exited with {}: {}", + return Err(claude_exit_error( output.status, - stderr.trim() + &output.stdout, + &output.stderr, )); } Ok(String::from_utf8_lossy(&output.stdout).into_owned()) @@ -111,11 +139,10 @@ impl CommandRunner for ClaudeBinaryStdinRunner { .wait_with_output() .context("failed to wait for `claude`")?; if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(anyhow!( - "`claude -p` exited with {}: {}", + return Err(claude_exit_error( output.status, - stderr.trim() + &output.stdout, + &output.stderr, )); } Ok(String::from_utf8_lossy(&output.stdout).into_owned()) diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs index a31b55c..7ad63fc 100644 --- a/crates/tj-core/src/dream/llm_backend.rs +++ b/crates/tj-core/src/dream/llm_backend.rs @@ -24,12 +24,57 @@ impl LlmDreamBackend { } } +/// Max transcript characters fed to the model in one call. The hard wall is +/// the ~200k-token context limit (a real session hit ~220k tokens and `claude +/// -p` returned HTTP 400). We stay well under it and split oversized +/// transcripts across several calls, merging the events (run_dream dedups). +const TRANSCRIPT_CHAR_BUDGET: usize = 360_000; + impl DreamBackend for LlmDreamBackend { fn backfill(&self, input: &BackfillInput) -> anyhow::Result> { - let prompt = crate::dream::prompt::build_prompt(input); - let text = self.llm.complete(&prompt, 1024)?; - parse_backfill_json(&text) + let mut out = Vec::new(); + for chunk in chunk_transcript(&input.transcript, TRANSCRIPT_CHAR_BUDGET) { + let chunk_input = BackfillInput { + tasks: input.tasks.clone(), + transcript: chunk, + }; + let prompt = crate::dream::prompt::build_prompt(&chunk_input); + let text = self.llm.complete(&prompt, 1024)?; + out.extend(parse_backfill_json(&text)?); + } + Ok(out) + } +} + +/// Split a transcript into chunks of at most `budget` bytes, breaking on line +/// boundaries where possible (a lone oversized line is hard-split on char +/// boundaries). Always returns at least one chunk so an empty transcript still +/// yields a single call. +fn chunk_transcript(transcript: &str, budget: usize) -> Vec { + if transcript.len() <= budget { + return vec![transcript.to_string()]; + } + let mut chunks = Vec::new(); + let mut cur = String::new(); + for line in transcript.split_inclusive('\n') { + if !cur.is_empty() && cur.len() + line.len() > budget { + chunks.push(std::mem::take(&mut cur)); + } + if line.len() > budget { + for ch in line.chars() { + if !cur.is_empty() && cur.len() + ch.len_utf8() > budget { + chunks.push(std::mem::take(&mut cur)); + } + cur.push(ch); + } + } else { + cur.push_str(line); + } + } + if !cur.is_empty() { + chunks.push(cur); } + chunks } /// Parse the model's reply (a JSON array of `BackfillEvent`, possibly wrapped in @@ -66,6 +111,57 @@ mod tests { assert!(parse_backfill_json("[]").unwrap().is_empty()); } + #[test] + fn small_transcript_is_one_chunk() { + let c = chunk_transcript("a\nb\nc\n", 100); + assert_eq!(c.len(), 1); + assert_eq!(c[0], "a\nb\nc\n"); + } + + #[test] + fn big_transcript_splits_on_lines_and_preserves_content() { + // 10 lines of 20 chars; budget 50 → multiple chunks, no loss. + let transcript: String = (0..10).map(|i| format!("line{i:015}\n")).collect(); + let chunks = chunk_transcript(&transcript, 50); + assert!(chunks.len() > 1, "must split"); + assert!(chunks.iter().all(|c| c.len() <= 50)); + assert_eq!(chunks.concat(), transcript, "no content lost"); + } + + #[test] + fn oversized_single_line_is_hard_split() { + let line = "x".repeat(250); + let chunks = chunk_transcript(&line, 100); + assert!(chunks.len() >= 3); + assert!(chunks.iter().all(|c| c.len() <= 100)); + assert_eq!(chunks.concat(), line); + } + + #[test] + fn backfill_chunks_large_transcript_into_multiple_calls() { + use std::sync::atomic::{AtomicUsize, Ordering}; + struct CountingLlm(AtomicUsize); + impl LlmBackend for CountingLlm { + fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result { + self.0.fetch_add(1, Ordering::SeqCst); + Ok("[]".to_string()) + } + fn name(&self) -> &'static str { + "counting" + } + } + let llm = Box::new(CountingLlm(AtomicUsize::new(0))); + // Build a transcript larger than the budget so it must split. + let transcript = "y\n".repeat(TRANSCRIPT_CHAR_BUDGET); + let b = LlmDreamBackend::new(llm); + let input = BackfillInput { + tasks: vec![], + transcript, + }; + let evs = b.backfill(&input).unwrap(); + assert!(evs.is_empty()); + } + #[test] fn llm_dream_backend_runs_and_parses() { struct FakeLlm; diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index 42fe43e..906afaf 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index 5ee8eb8..bd04d5c 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.22.0", + "version": "0.22.1", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan"