From 13fef42b8085a791aa436f8e23682bbe79d6d6a2 Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:07:42 +0400
Subject: [PATCH] fix(complete): chunk enrich transcript + surface claude -p
 stdout errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The enrich pass fed an entire session transcript to the model in one
call; a large multi-session task exceeded the ~200k-token context limit
and claude -p returned HTTP 400 ("Prompt is too long · ~220310 tokens").
Split the transcript into line-aligned chunks under a safe byte budget
and merge the recovered events (run_dream dedups), so finalize works on
any session size. --quick was unaffected (it skips enrich).

Also surface the JSON error claude -p prints on stdout under
--output-format json (the real cause goes there, not stderr), so a
failure is legible instead of a bare "exit status 1" — which is exactly
what let us diagnose the context overflow.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                               |  15 +++
 Cargo.lock                                 |   6 +-
 Cargo.toml                                 |   2 +-
 crates/tj-cli/Cargo.toml                   |   2 +-
 crates/tj-core/src/classifier/agent_sdk.rs |  43 +++++++--
 crates/tj-core/src/dream/llm_backend.rs    | 102 ++++++++++++++++++++-
 crates/tj-mcp/Cargo.toml                   |   2 +-
 plugin/.claude-plugin/plugin.json          |   2 +-
 8 files changed, 156 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2239cde..0816c5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.22.1] - 2026-06-13
+
+### Fixed
+- **`complete` no longer fails on large sessions.** The enrich pass fed a whole
+  session transcript to the model in one call; a big multi-session task blew the
+  ~200k-token context limit and `claude -p` returned HTTP 400 ("Prompt is too
+  long"). The transcript is now split into line-aligned chunks under a safe
+  budget and the recovered events are merged (and deduped), so finalize works on
+  any session size. (`--quick` was unaffected — it skips enrich.)
+- **Legible `claude -p` errors.** A non-zero `claude -p` exit now surfaces the
+  JSON error it prints on **stdout** (with `--output-format json` the real cause
+  — invalid model, usage limit, context overflow — goes there, not stderr), so a
+  failure reads as "Prompt is too long · ~220310 tokens" instead of a bare
+  "exit status 1".
+
 ## [0.22.0] - 2026-06-13
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
index 3fd5b0c..1bfc571 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2572,7 +2572,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-cli"
-version = "0.22.0"
+version = "0.22.1"
 dependencies = [
  "anyhow",
  "assert_cmd",
@@ -2596,7 +2596,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-core"
-version = "0.22.0"
+version = "0.22.1"
 dependencies = [
  "anyhow",
  "chrono",
@@ -2621,7 +2621,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-mcp"
-version = "0.22.0"
+version = "0.22.1"
 dependencies = [
  "anyhow",
  "chrono",
diff --git a/Cargo.toml b/Cargo.toml
index 73d035f..f9c65b5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.22.0"
+version = "0.22.1"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"
diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
index 78e29ad..1ef884f 100644
--- a/crates/tj-cli/Cargo.toml
+++ b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
index 2ea41ce..69a5b43 100644
--- a/crates/tj-core/src/classifier/agent_sdk.rs
+++ b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -64,6 +64,35 @@ fn base_claude_command(model: &str) -> Command {
 /// very journal — out of the classification subprocess).
 pub struct ClaudeBinaryRunner;
 
+/// Build the error for a non-zero `claude -p` exit. With `--output-format
+/// json` claude reports the real cause (invalid model, usage limit, auth) as
+/// JSON on **stdout**, not stderr — so surface both, capped, or the user just
+/// sees a bare "exit status 1".
+fn claude_exit_error(
+    status: std::process::ExitStatus,
+    stdout: &[u8],
+    stderr: &[u8],
+) -> anyhow::Error {
+    let cap = |b: &[u8]| {
+        let s = String::from_utf8_lossy(b);
+        let s = s.trim().to_string();
+        if s.chars().count() > 600 {
+            format!("{}…", s.chars().take(600).collect::<String>())
+        } else {
+            s
+        }
+    };
+    let out = cap(stdout);
+    let err = cap(stderr);
+    let detail = match (out.is_empty(), err.is_empty()) {
+        (true, true) => "(no output)".to_string(),
+        (false, true) => out,
+        (true, false) => err,
+        (false, false) => format!("{err} | stdout: {out}"),
+    };
+    anyhow!("`claude -p` exited with {status}: {detail}")
+}
+
 impl CommandRunner for ClaudeBinaryRunner {
     fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
         let output = base_claude_command(model)
@@ -71,11 +100,10 @@ impl CommandRunner for ClaudeBinaryRunner {
             .output()
             .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
         if !output.status.success() {
-            let stderr = String::from_utf8_lossy(&output.stderr);
-            return Err(anyhow!(
-                "`claude -p` exited with {}: {}",
+            return Err(claude_exit_error(
                 output.status,
-                stderr.trim()
+                &output.stdout,
+                &output.stderr,
             ));
         }
         Ok(String::from_utf8_lossy(&output.stdout).into_owned())
@@ -111,11 +139,10 @@ impl CommandRunner for ClaudeBinaryStdinRunner {
             .wait_with_output()
             .context("failed to wait for `claude`")?;
         if !output.status.success() {
-            let stderr = String::from_utf8_lossy(&output.stderr);
-            return Err(anyhow!(
-                "`claude -p` exited with {}: {}",
+            return Err(claude_exit_error(
                 output.status,
-                stderr.trim()
+                &output.stdout,
+                &output.stderr,
             ));
         }
         Ok(String::from_utf8_lossy(&output.stdout).into_owned())
diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs
index a31b55c..7ad63fc 100644
--- a/crates/tj-core/src/dream/llm_backend.rs
+++ b/crates/tj-core/src/dream/llm_backend.rs
@@ -24,12 +24,57 @@ impl LlmDreamBackend {
     }
 }
 
+/// Max transcript characters fed to the model in one call. The hard wall is
+/// the ~200k-token context limit (a real session hit ~220k tokens and `claude
+/// -p` returned HTTP 400). We stay well under it and split oversized
+/// transcripts across several calls, merging the events (run_dream dedups).
+const TRANSCRIPT_CHAR_BUDGET: usize = 360_000;
+
 impl DreamBackend for LlmDreamBackend {
     fn backfill(&self, input: &BackfillInput) -> anyhow::Result<Vec<BackfillEvent>> {
-        let prompt = crate::dream::prompt::build_prompt(input);
-        let text = self.llm.complete(&prompt, 1024)?;
-        parse_backfill_json(&text)
+        let mut out = Vec::new();
+        for chunk in chunk_transcript(&input.transcript, TRANSCRIPT_CHAR_BUDGET) {
+            let chunk_input = BackfillInput {
+                tasks: input.tasks.clone(),
+                transcript: chunk,
+            };
+            let prompt = crate::dream::prompt::build_prompt(&chunk_input);
+            let text = self.llm.complete(&prompt, 1024)?;
+            out.extend(parse_backfill_json(&text)?);
+        }
+        Ok(out)
+    }
+}
+
+/// Split a transcript into chunks of at most `budget` bytes, breaking on line
+/// boundaries where possible (a lone oversized line is hard-split on char
+/// boundaries). Always returns at least one chunk so an empty transcript still
+/// yields a single call.
+fn chunk_transcript(transcript: &str, budget: usize) -> Vec<String> {
+    if transcript.len() <= budget {
+        return vec![transcript.to_string()];
+    }
+    let mut chunks = Vec::new();
+    let mut cur = String::new();
+    for line in transcript.split_inclusive('\n') {
+        if !cur.is_empty() && cur.len() + line.len() > budget {
+            chunks.push(std::mem::take(&mut cur));
+        }
+        if line.len() > budget {
+            for ch in line.chars() {
+                if !cur.is_empty() && cur.len() + ch.len_utf8() > budget {
+                    chunks.push(std::mem::take(&mut cur));
+                }
+                cur.push(ch);
+            }
+        } else {
+            cur.push_str(line);
+        }
+    }
+    if !cur.is_empty() {
+        chunks.push(cur);
     }
+    chunks
 }
 
 /// Parse the model's reply (a JSON array of `BackfillEvent`, possibly wrapped in
@@ -66,6 +111,57 @@ mod tests {
         assert!(parse_backfill_json("[]").unwrap().is_empty());
     }
 
+    #[test]
+    fn small_transcript_is_one_chunk() {
+        let c = chunk_transcript("a\nb\nc\n", 100);
+        assert_eq!(c.len(), 1);
+        assert_eq!(c[0], "a\nb\nc\n");
+    }
+
+    #[test]
+    fn big_transcript_splits_on_lines_and_preserves_content() {
+        // 10 lines of 20 chars; budget 50 → multiple chunks, no loss.
+        let transcript: String = (0..10).map(|i| format!("line{i:015}\n")).collect();
+        let chunks = chunk_transcript(&transcript, 50);
+        assert!(chunks.len() > 1, "must split");
+        assert!(chunks.iter().all(|c| c.len() <= 50));
+        assert_eq!(chunks.concat(), transcript, "no content lost");
+    }
+
+    #[test]
+    fn oversized_single_line_is_hard_split() {
+        let line = "x".repeat(250);
+        let chunks = chunk_transcript(&line, 100);
+        assert!(chunks.len() >= 3);
+        assert!(chunks.iter().all(|c| c.len() <= 100));
+        assert_eq!(chunks.concat(), line);
+    }
+
+    #[test]
+    fn backfill_chunks_large_transcript_into_multiple_calls() {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+        struct CountingLlm(AtomicUsize);
+        impl LlmBackend for CountingLlm {
+            fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
+                self.0.fetch_add(1, Ordering::SeqCst);
+                Ok("[]".to_string())
+            }
+            fn name(&self) -> &'static str {
+                "counting"
+            }
+        }
+        let llm = Box::new(CountingLlm(AtomicUsize::new(0)));
+        // Build a transcript larger than the budget so it must split.
+        let transcript = "y\n".repeat(TRANSCRIPT_CHAR_BUDGET);
+        let b = LlmDreamBackend::new(llm);
+        let input = BackfillInput {
+            tasks: vec![],
+            transcript,
+        };
+        let evs = b.backfill(&input).unwrap();
+        assert!(evs.is_empty());
+    }
+
     #[test]
     fn llm_dream_backend_runs_and_parses() {
         struct FakeLlm;
diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
index 42fe43e..906afaf 100644
--- a/crates/tj-mcp/Cargo.toml
+++ b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index 5ee8eb8..bd04d5c 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.22.0",
+  "version": "0.22.1",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"