Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.22.1] - 2026-06-13

### Fixed
- **`complete` no longer fails on large sessions.** The enrich pass fed a whole
session transcript to the model in one call; a big multi-session task blew the
~200k-token context limit and `claude -p` returned HTTP 400 ("Prompt is too
long"). The transcript is now split into line-aligned chunks under a safe
budget and the recovered events are merged (and deduped), so finalize works on
any session size. (`--quick` was unaffected — it skips enrich.)
- **Legible `claude -p` errors.** A non-zero `claude -p` exit now surfaces the
JSON error it prints on **stdout** (with `--output-format json` the real cause
— invalid model, usage limit, context overflow — goes there, not stderr), so a
failure reads as "Prompt is too long · ~220310 tokens" instead of a bare
"exit status 1".

## [0.22.0] - 2026-06-13

### Added
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
]

[workspace.package]
version = "0.22.0"
version = "0.22.1"
edition = "2021"
rust-version = "1.88"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ default = ["embed"]
embed = ["tj-core/embed"]

[dependencies]
tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false }
tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
anyhow = { workspace = true }
clap = { workspace = true }
tracing = { workspace = true }
Expand Down
43 changes: 35 additions & 8 deletions crates/tj-core/src/classifier/agent_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,46 @@ fn base_claude_command(model: &str) -> Command {
/// very journal — out of the classification subprocess).
pub struct ClaudeBinaryRunner;

/// Build the error for a non-zero `claude -p` exit. With `--output-format
/// json` claude reports the real cause (invalid model, usage limit, auth) as
/// JSON on **stdout**, not stderr — so surface both, capped, or the user just
/// sees a bare "exit status 1".
fn claude_exit_error(
status: std::process::ExitStatus,
stdout: &[u8],
stderr: &[u8],
) -> anyhow::Error {
let cap = |b: &[u8]| {
let s = String::from_utf8_lossy(b);
let s = s.trim().to_string();
if s.chars().count() > 600 {
format!("{}…", s.chars().take(600).collect::<String>())
} else {
s
}
};
let out = cap(stdout);
let err = cap(stderr);
let detail = match (out.is_empty(), err.is_empty()) {
(true, true) => "(no output)".to_string(),
(false, true) => out,
(true, false) => err,
(false, false) => format!("{err} | stdout: {out}"),
};
anyhow!("`claude -p` exited with {status}: {detail}")
}

impl CommandRunner for ClaudeBinaryRunner {
fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
let output = base_claude_command(model)
.arg(prompt)
.output()
.context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!(
"`claude -p` exited with {}: {}",
return Err(claude_exit_error(
output.status,
stderr.trim()
&output.stdout,
&output.stderr,
));
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
Expand Down Expand Up @@ -111,11 +139,10 @@ impl CommandRunner for ClaudeBinaryStdinRunner {
.wait_with_output()
.context("failed to wait for `claude`")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!(
"`claude -p` exited with {}: {}",
return Err(claude_exit_error(
output.status,
stderr.trim()
&output.stdout,
&output.stderr,
));
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
Expand Down
102 changes: 99 additions & 3 deletions crates/tj-core/src/dream/llm_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,57 @@ impl LlmDreamBackend {
}
}

/// Max transcript characters fed to the model in one call. The hard wall is
/// the ~200k-token context limit (a real session hit ~220k tokens and `claude
/// -p` returned HTTP 400). We stay well under it and split oversized
/// transcripts across several calls, merging the events (run_dream dedups).
const TRANSCRIPT_CHAR_BUDGET: usize = 360_000;

impl DreamBackend for LlmDreamBackend {
fn backfill(&self, input: &BackfillInput) -> anyhow::Result<Vec<BackfillEvent>> {
let prompt = crate::dream::prompt::build_prompt(input);
let text = self.llm.complete(&prompt, 1024)?;
parse_backfill_json(&text)
let mut out = Vec::new();
for chunk in chunk_transcript(&input.transcript, TRANSCRIPT_CHAR_BUDGET) {
let chunk_input = BackfillInput {
tasks: input.tasks.clone(),
transcript: chunk,
};
let prompt = crate::dream::prompt::build_prompt(&chunk_input);
let text = self.llm.complete(&prompt, 1024)?;
out.extend(parse_backfill_json(&text)?);
}
Ok(out)
}
}

/// Split a transcript into chunks of at most `budget` bytes, breaking on line
/// boundaries where possible (a lone oversized line is hard-split on char
/// boundaries). Always returns at least one chunk so an empty transcript still
/// yields a single call.
fn chunk_transcript(transcript: &str, budget: usize) -> Vec<String> {
if transcript.len() <= budget {
return vec![transcript.to_string()];
}
let mut chunks = Vec::new();
let mut cur = String::new();
for line in transcript.split_inclusive('\n') {
if !cur.is_empty() && cur.len() + line.len() > budget {
chunks.push(std::mem::take(&mut cur));
}
if line.len() > budget {
for ch in line.chars() {
if !cur.is_empty() && cur.len() + ch.len_utf8() > budget {
chunks.push(std::mem::take(&mut cur));
}
cur.push(ch);
}
} else {
cur.push_str(line);
}
}
if !cur.is_empty() {
chunks.push(cur);
}
chunks
}

/// Parse the model's reply (a JSON array of `BackfillEvent`, possibly wrapped in
Expand Down Expand Up @@ -66,6 +111,57 @@ mod tests {
assert!(parse_backfill_json("[]").unwrap().is_empty());
}

#[test]
fn small_transcript_is_one_chunk() {
let c = chunk_transcript("a\nb\nc\n", 100);
assert_eq!(c.len(), 1);
assert_eq!(c[0], "a\nb\nc\n");
}

#[test]
fn big_transcript_splits_on_lines_and_preserves_content() {
// 10 lines of 20 chars; budget 50 → multiple chunks, no loss.
let transcript: String = (0..10).map(|i| format!("line{i:015}\n")).collect();
let chunks = chunk_transcript(&transcript, 50);
assert!(chunks.len() > 1, "must split");
assert!(chunks.iter().all(|c| c.len() <= 50));
assert_eq!(chunks.concat(), transcript, "no content lost");
}

#[test]
fn oversized_single_line_is_hard_split() {
let line = "x".repeat(250);
let chunks = chunk_transcript(&line, 100);
assert!(chunks.len() >= 3);
assert!(chunks.iter().all(|c| c.len() <= 100));
assert_eq!(chunks.concat(), line);
}

#[test]
fn backfill_chunks_large_transcript_into_multiple_calls() {
use std::sync::atomic::{AtomicUsize, Ordering};
struct CountingLlm(AtomicUsize);
impl LlmBackend for CountingLlm {
fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
self.0.fetch_add(1, Ordering::SeqCst);
Ok("[]".to_string())
}
fn name(&self) -> &'static str {
"counting"
}
}
let llm = Box::new(CountingLlm(AtomicUsize::new(0)));
// Build a transcript larger than the budget so it must split.
let transcript = "y\n".repeat(TRANSCRIPT_CHAR_BUDGET);
let b = LlmDreamBackend::new(llm);
let input = BackfillInput {
tasks: vec![],
transcript,
};
let evs = b.backfill(&input).unwrap();
assert!(evs.is_empty());
}

#[test]
fn llm_dream_backend_runs_and_parses() {
struct FakeLlm;
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-mcp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ path = "src/main.rs"

[dependencies]
# Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
tj-core = { package = "task-journal-core", version = "0.22.0", path = "../tj-core", default-features = false }
tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
anyhow = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion plugin/.claude-plugin/plugin.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "task-journal",
"version": "0.22.0",
"version": "0.22.1",
"description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
"author": {
"name": "Mher Shahinyan"
Expand Down
Loading