From 4feb6215f08e7fd84e404ee4a2de7f3611432b66 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:23:56 +0400 Subject: [PATCH 1/5] fix(complete): tolerate non-JSON enrich replies; never abort finalize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backfill model sometimes answers with prose instead of the JSON array — e.g. continuing the transcript's own dialogue ('Контекст в норме... Что дальше?'). The parse error aborted the whole `complete`, losing the retitle and close. Backfill is best-effort: skip an unparseable chunk reply (warn), extract a JSON array even when wrapped in prose, and re-assert 'output ONLY the JSON array, do not continue the transcript' after the transcript. Retitle/close run regardless of what enrich recovers. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 12 ++++++ Cargo.lock | 6 +-- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-core/src/dream/llm_backend.rs | 56 +++++++++++++++++++++++-- crates/tj-core/src/dream/prompt.rs | 5 ++- crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 8 files changed, 76 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0816c5e..0058eb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.22.2] - 2026-06-13 + +### Fixed +- **`complete` survives a non-JSON enrich reply.** When the backfill model + answered with prose instead of the requested JSON array — e.g. continuing the + transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error + aborted the whole `complete`, losing the retitle and close. Backfill is now + best-effort: an unparseable chunk reply is skipped (with a warning), the parser + extracts a JSON array even when wrapped in prose, and the prompt re-asserts + "output ONLY the JSON array, do not continue the transcript" after the + transcript. Retitle/close always run regardless of what enrich recovers. + ## [0.22.1] - 2026-06-13 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 1bfc571..0213ed3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.22.1" +version = "0.22.2" dependencies = [ "anyhow", "assert_cmd", @@ -2596,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.22.1" +version = "0.22.2" dependencies = [ "anyhow", "chrono", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.22.1" +version = "0.22.2" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index f9c65b5..3fe8092 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.22.1" +version = "0.22.2" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 1ef884f..4ba290c 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs index 7ad63fc..79e87c1 100644 --- a/crates/tj-core/src/dream/llm_backend.rs +++ b/crates/tj-core/src/dream/llm_backend.rs @@ -40,7 +40,16 @@ impl DreamBackend for LlmDreamBackend { }; let prompt = crate::dream::prompt::build_prompt(&chunk_input); let text = self.llm.complete(&prompt, 1024)?; - out.extend(parse_backfill_json(&text)?); + // Backfill is best-effort: a model that replied with prose instead + // of the JSON array (e.g. continued the transcript dialogue) yields + // nothing for this chunk, but must NOT abort the whole finalize — + // the retitle/close still need to run. + match parse_backfill_json(&text) { + Ok(evs) => out.extend(evs), + Err(e) => { + tracing::warn!(error = %e, "dream backfill: skipping unparseable chunk reply") + } + } } Ok(out) } @@ -86,8 +95,12 @@ pub fn parse_backfill_json(text: &str) -> anyhow::Result> { .trim_start_matches("```") .trim_end_matches("```") .trim(); - serde_json::from_str(json_str) - .with_context(|| format!("dream JSON parse failed; got: {json_str}")) + // Tolerate a JSON array wrapped in prose by slicing to the outer brackets. + let slice = match (json_str.find('['), json_str.rfind(']')) { + (Some(a), Some(b)) if b > a => &json_str[a..=b], + _ => json_str, + }; + serde_json::from_str(slice).with_context(|| format!("dream JSON parse failed; got: {json_str}")) } #[cfg(test)] @@ -111,6 +124,43 @@ mod tests { assert!(parse_backfill_json("[]").unwrap().is_empty()); } + #[test] + fn parse_extracts_array_wrapped_in_prose() { + let reply = "Here are the missed events:\n[{\"event_type\":\"finding\",\ +\"task_id\":\"tj-1\",\"text\":\"found\",\"timestamp\":\"2026-06-13T00:00:00Z\"}]\nHope that helps!"; + let evs = parse_backfill_json(reply).unwrap(); + assert_eq!(evs.len(), 1); + } + + #[test] + fn parse_errors_on_pure_prose() { + // A conversational reply with no array at all must be an Err so the + // backfill loop can skip the chunk instead of inventing events. + assert!(parse_backfill_json("Контекст в норме. Что дальше?").is_err()); + } + + #[test] + fn backfill_skips_unparseable_chunk_reply() { + // Model replies with prose, not JSON → backfill yields nothing but does + // NOT error, so the surrounding finalize (retitle/close) still runs. + struct ChattyLlm; + impl LlmBackend for ChattyLlm { + fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result { + Ok("Контекст в норме. 566.5k/1M использовано. Что дальше?".to_string()) + } + fn name(&self) -> &'static str { + "chatty" + } + } + let b = LlmDreamBackend::new(Box::new(ChattyLlm)); + let input = BackfillInput { + tasks: vec![], + transcript: "user: hi\nassistant: hello".into(), + }; + let evs = b.backfill(&input).unwrap(); + assert!(evs.is_empty()); + } + #[test] fn small_transcript_is_one_chunk() { let c = chunk_transcript("a\nb\nc\n", 100); diff --git a/crates/tj-core/src/dream/prompt.rs b/crates/tj-core/src/dream/prompt.rs index 210cfc4..9c1f6c1 100644 --- a/crates/tj-core/src/dream/prompt.rs +++ b/crates/tj-core/src/dream/prompt.rs @@ -34,7 +34,10 @@ pub fn build_prompt(input: &BackfillInput) -> String { - Respond with ONLY a JSON array of objects: \ {{\"event_type\",\"task_id\",\"text\",\"timestamp\"}}. Empty array if nothing missed.\n\n\ # Candidate tasks and their existing events\n{tasks}\n\ - # Transcript\n{transcript}\n", + # Transcript\n{transcript}\n\n\ + Remember: output ONLY the JSON array of missed events described above. \ + Do NOT reply to, summarise, or continue the transcript; if nothing was \ + missed, output [].\n", types = ALLOWED_TYPES, tasks = tasks_block, transcript = input.transcript, diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index 906afaf..8d5995c 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index bd04d5c..b0e406c 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.22.1", + "version": "0.22.2", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan" From 664fd077c6594d5b8687e2cb8641ad8e1ce372ba Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:30:04 +0400 Subject: [PATCH 2/5] fix(complete): size enrich chunks for claude -p overhead; never abort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claude -p is a full Claude Code instance: its system prompt + tool definitions cost ~113k tokens before our content, so a 360k-char chunk (~91k tokens) still 400'd at ~204k total. Drop TRANSCRIPT_CHAR_BUDGET to 150k chars (~37k tokens) and make backfill swallow ANY per-chunk error (over-budget 400, transient failure, non-JSON reply) — enrich is strictly best-effort and never sinks the retitle/close. A truly broken backend still surfaces at the judge step. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/tj-core/src/dream/llm_backend.rs | 55 +++++++++++++++++++------ 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs index 79e87c1..11e1042 100644 --- a/crates/tj-core/src/dream/llm_backend.rs +++ b/crates/tj-core/src/dream/llm_backend.rs @@ -25,10 +25,13 @@ impl LlmDreamBackend { } /// Max transcript characters fed to the model in one call. The hard wall is -/// the ~200k-token context limit (a real session hit ~220k tokens and `claude -/// -p` returned HTTP 400). We stay well under it and split oversized -/// transcripts across several calls, merging the events (run_dream dedups). -const TRANSCRIPT_CHAR_BUDGET: usize = 360_000; +/// the ~200k-token context window, but `claude -p` is a full Claude Code +/// instance: its system prompt + tool definitions alone cost ~113k tokens +/// before our content (measured: a 360k-char chunk was ~91k tokens, yet the +/// request totalled ~204k and 400'd). So the usable budget is far below the +/// nominal limit — keep each chunk well under it (~37k tokens) and split the +/// rest across calls, merging the events (run_dream dedups). +const TRANSCRIPT_CHAR_BUDGET: usize = 150_000; impl DreamBackend for LlmDreamBackend { fn backfill(&self, input: &BackfillInput) -> anyhow::Result> { @@ -39,16 +42,18 @@ impl DreamBackend for LlmDreamBackend { transcript: chunk, }; let prompt = crate::dream::prompt::build_prompt(&chunk_input); - let text = self.llm.complete(&prompt, 1024)?; - // Backfill is best-effort: a model that replied with prose instead - // of the JSON array (e.g. continued the transcript dialogue) yields - // nothing for this chunk, but must NOT abort the whole finalize — - // the retitle/close still need to run. - match parse_backfill_json(&text) { + // Backfill is strictly best-effort: ANY per-chunk failure — an + // over-budget 400, a transient backend error, or a non-JSON reply + // (model continued the transcript dialogue) — is skipped, never + // aborting the finalize. A genuinely broken backend still surfaces + // at the judge step, which has its own (small, always-sized) call. + match self + .llm + .complete(&prompt, 1024) + .and_then(|text| parse_backfill_json(&text)) + { Ok(evs) => out.extend(evs), - Err(e) => { - tracing::warn!(error = %e, "dream backfill: skipping unparseable chunk reply") - } + Err(e) => tracing::warn!(error = %e, "dream backfill: skipping chunk"), } } Ok(out) @@ -161,6 +166,30 @@ mod tests { assert!(evs.is_empty()); } + #[test] + fn backfill_skips_chunk_whose_call_errors() { + // An over-budget 400 / transient backend error on a chunk must be + // swallowed so the surrounding finalize (retitle/close) still runs. + struct FailingLlm; + impl LlmBackend for FailingLlm { + fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result { + Err(anyhow::anyhow!( + "`claude -p` exited with status 1: Prompt is too long" + )) + } + fn name(&self) -> &'static str { + "failing" + } + } + let b = LlmDreamBackend::new(Box::new(FailingLlm)); + let input = BackfillInput { + tasks: vec![], + transcript: "user: hi\nassistant: hello".into(), + }; + let evs = b.backfill(&input).unwrap(); + assert!(evs.is_empty()); + } + #[test] fn small_transcript_is_one_chunk() { let c = chunk_transcript("a\nb\nc\n", 100); From 8b8a4514372639ec9b19bfadad68f5f869975589 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:44:00 +0400 Subject: [PATCH 3/5] fix(complete): timeout claude -p calls + enrich progress (no more hang) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A big task makes many sequential claude -p calls (one+ per session); without a timeout one wedged call hung the whole complete, with no output so it looked dead. Add a per-call wall-clock timeout (90s, TJ_CLAUDE_TIMEOUT_SECS) that kills a stuck claude and drains pipes in threads to avoid buffer deadlock; a timed-out chunk is skipped (enrich is best-effort). Print an 'enriching N session(s)…' progress line so a multi-minute run is legible, and point at --quick. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/tj-cli/src/main.rs | 8 +++ crates/tj-core/src/classifier/agent_sdk.rs | 67 ++++++++++++++++++++-- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 3894399..6b03703 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -4153,6 +4153,14 @@ fn enrich_task( if sessions.is_empty() { return Ok(0); } + // Enrich is the slow part — one (or more, for big transcripts) `claude -p` + // call per session. Announce it so a multi-minute run doesn't look hung; + // `--quick` skips this entirely. + eprintln!( + "complete: enriching {} session(s) via {} — can take a few minutes (or use --quick to skip)…", + sessions.len(), + llm.name() + ); let run_id = ulid::Ulid::new().to_string(); let dream_backend = tj_core::dream::llm_backend::LlmDreamBackend::new(llm); let opts = tj_core::dream::DreamOptions { diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs index 69a5b43..239489d 100644 --- a/crates/tj-core/src/classifier/agent_sdk.rs +++ b/crates/tj-core/src/classifier/agent_sdk.rs @@ -93,12 +93,71 @@ fn claude_exit_error( anyhow!("`claude -p` exited with {status}: {detail}") } +/// Per-call wall-clock ceiling for a `claude -p` invocation. A spawned full +/// Claude Code instance normally answers in seconds; this kills a wedged one so +/// a multi-chunk enrich can't hang the whole `complete`. Override with +/// `TJ_CLAUDE_TIMEOUT_SECS`. +fn claude_timeout() -> std::time::Duration { + let secs = std::env::var("TJ_CLAUDE_TIMEOUT_SECS") + .ok() + .and_then(|s| s.parse::().ok()) + .filter(|n| *n > 0) + .unwrap_or(90); + std::time::Duration::from_secs(secs) +} + +/// Wait for `child` up to `timeout`, draining stdout/stderr concurrently so a +/// full pipe can't deadlock the wait. On timeout the child is killed and an +/// error returned; otherwise the captured output is handed back. +fn wait_with_timeout( + mut child: std::process::Child, + timeout: std::time::Duration, +) -> anyhow::Result { + use std::io::Read; + let mut out_pipe = child.stdout.take(); + let mut err_pipe = child.stderr.take(); + let so = std::thread::spawn(move || { + let mut b = Vec::new(); + if let Some(p) = out_pipe.as_mut() { + let _ = p.read_to_end(&mut b); + } + b + }); + let se = std::thread::spawn(move || { + let mut b = Vec::new(); + if let Some(p) = err_pipe.as_mut() { + let _ = p.read_to_end(&mut b); + } + b + }); + let start = std::time::Instant::now(); + let status = loop { + if let Some(status) = child.try_wait()? { + break status; + } + if start.elapsed() >= timeout { + let _ = child.kill(); + let _ = child.wait(); + anyhow::bail!("`claude -p` timed out after {}s", timeout.as_secs()); + } + std::thread::sleep(std::time::Duration::from_millis(150)); + }; + Ok(std::process::Output { + status, + stdout: so.join().unwrap_or_default(), + stderr: se.join().unwrap_or_default(), + }) +} + impl CommandRunner for ClaudeBinaryRunner { fn run(&self, model: &str, prompt: &str) -> anyhow::Result { - let output = base_claude_command(model) + let child = base_claude_command(model) .arg(prompt) - .output() + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?; + let output = wait_with_timeout(child, claude_timeout())?; if !output.status.success() { return Err(claude_exit_error( output.status, @@ -135,9 +194,7 @@ impl CommandRunner for ClaudeBinaryStdinRunner { .context("claude stdin was not captured")? .write_all(prompt.as_bytes()) .context("failed to write prompt to claude stdin")?; - let output = child - .wait_with_output() - .context("failed to wait for `claude`")?; + let output = wait_with_timeout(child, claude_timeout())?; if !output.status.success() { return Err(claude_exit_error( output.status, From 8a8bc38369f6214dbed2aeb51583e2d129cfd5ce Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:44:30 +0400 Subject: [PATCH 4/5] docs(changelog): expand 0.22.2 with enrich hardening (sizing, timeout, progress) Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0058eb7..ae4feee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.22.2] - 2026-06-13 +Hardening pass on `complete`'s enrich step, found by running it on a real +12-session task. Enrich is now strictly best-effort and can never hang or sink +the retitle/close; `--quick` skips it entirely for an instant judge-only finalize. + ### Fixed - **`complete` survives a non-JSON enrich reply.** When the backfill model answered with prose instead of the requested JSON array — e.g. continuing the transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error - aborted the whole `complete`, losing the retitle and close. Backfill is now - best-effort: an unparseable chunk reply is skipped (with a warning), the parser - extracts a JSON array even when wrapped in prose, and the prompt re-asserts - "output ONLY the JSON array, do not continue the transcript" after the - transcript. Retitle/close always run regardless of what enrich recovers. + aborted the whole `complete`, losing the retitle and close. Backfill now skips + an unparseable chunk reply (with a warning), the parser extracts a JSON array + even when wrapped in prose, and the prompt re-asserts "output ONLY the JSON + array, do not continue the transcript" after the transcript. +- **Enrich chunks are sized for `claude -p`'s overhead.** `claude -p` is a full + Claude Code instance whose system prompt + tool definitions cost ~113k tokens + before our content, so the earlier 360k-char chunk still 400'd at ~204k total. + The per-call transcript budget drops to 150k chars (~37k tokens), and **any** + per-chunk failure (over-budget 400, transient error, non-JSON) is skipped + rather than aborting — a genuinely broken backend still surfaces at the judge + step. +- **No more apparent hang.** A big task makes many sequential `claude -p` calls; + without a timeout one wedged call hung the whole command with no output. Each + call now has a wall-clock timeout (90s, `TJ_CLAUDE_TIMEOUT_SECS`) that kills a + stuck `claude` (pipes drained in threads to avoid buffer deadlock), and enrich + prints an "enriching N session(s)…" progress line pointing at `--quick`. +- **Legible `claude -p` errors** (carried from the same investigation): a + non-zero exit now surfaces the JSON error claude prints on stdout, so failures + read as "Prompt is too long · ~204261 tokens" instead of a bare "exit 1". ## [0.22.1] - 2026-06-13 From 6c5cd64d47f9f4663266ebdf5b090a7ba08b1fb8 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 20:57:59 +0400 Subject: [PATCH 5/5] feat(complete)!: judge-only by default, enrich opt-in via --enrich MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running complete on real 12-session tasks showed the session-backfill pass takes 10-15 min (dozens of sequential claude -p spawns, ~113k-token overhead each) — too slow to be the default. The judge-only path (retitle + close + outcome) is seconds and delivers ~90% of the value. Flip the default: complete now finalizes via judgment only; add --enrich to also backfill missed events from sessions. The old --quick flag is removed (its behaviour is the default). Behaviour change → 0.23.0. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 18 +++++++++--- Cargo.lock | 6 ++-- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-cli/src/main.rs | 46 +++++++++++++++++-------------- crates/tj-cli/tests/cli.rs | 8 +++--- crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 8 files changed, 51 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae4feee..75eceee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.22.2] - 2026-06-13 +## [0.23.0] - 2026-06-13 -Hardening pass on `complete`'s enrich step, found by running it on a real -12-session task. Enrich is now strictly best-effort and can never hang or sink -the retitle/close; `--quick` skips it entirely for an instant judge-only finalize. +Finalize, retuned after running `complete` on real 12-session tasks: the fast, +reliable judge-only path is now the default, and the slow session-enrich pass is +opt-in. + +### Changed +- **`complete` is judge-only by default; enrich is opt-in via `--enrich`.** + Finalizing through the model's judgment (retitle + close + outcome) takes + seconds and is what gives ~90% of the value. The session-backfill pass — one + `claude -p` call per session, minutes on a big multi-session task — proved too + slow to be the default, so it now runs only with `--enrich`. (The old `--quick` + flag is gone: its behaviour is the default. Replace `complete --quick` + with `complete `, and `complete ` with `complete --enrich` if you + want the old full behaviour.) ### Fixed - **`complete` survives a non-JSON enrich reply.** When the backfill model diff --git a/Cargo.lock b/Cargo.lock index 0213ed3..bec2953 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.22.2" +version = "0.23.0" dependencies = [ "anyhow", "assert_cmd", @@ -2596,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.22.2" +version = "0.23.0" dependencies = [ "anyhow", "chrono", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.22.2" +version = "0.23.0" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 3fe8092..acf8fae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.22.2" +version = "0.23.0" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 4ba290c..6f3ce8b 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 6b03703..28d49a0 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -871,21 +871,22 @@ enum Commands { #[arg(long)] backend: Option, }, - /// Finalize a task: enrich its memory from the sessions it touched, fix a - /// junk auto-title, and close it IF the events clearly show it is done — - /// the model decides from the content. Omit the id to finalize every open - /// task in the project (batch, with a reviewable list). One LLM call per - /// session for enrich + one judge call per task, via the chosen backend - /// (free with `--backend ollama`). + /// Finalize a task: fix a junk auto-title and close it IF the events + /// clearly show it is done — the model decides from the content, in + /// seconds. Omit the id to finalize every open task (batch, with a + /// reviewable list). Add `--enrich` to also re-read the task's sessions and + /// backfill missed events first — thorough but slow (one `claude -p` call + /// per session; minutes on a big multi-session task). Complete { /// The task id to finalize. Omit to finalize all open tasks (batch). task: Option, /// Show scope and planned actions without calling the model or writing. #[arg(long)] dry_run: bool, - /// Skip the (heavy) enrich pass; judge/retitle/close from stored events only. + /// Also backfill missed events from the task's sessions before judging. + /// Thorough but slow (one `claude -p` call per session). #[arg(long)] - quick: bool, + enrich: bool, /// Required for batch finalize when stdin is not an interactive terminal. #[arg(long)] yes: bool, @@ -2784,12 +2785,12 @@ fn main() -> Result<()> { Commands::Complete { task, dry_run, - quick, + enrich, yes, backend, } => match task { - Some(id) => run_complete_single(&id, dry_run, quick, backend.as_deref())?, - None => run_complete_batch(dry_run, quick, yes, backend.as_deref())?, + Some(id) => run_complete_single(&id, dry_run, enrich, backend.as_deref())?, + None => run_complete_batch(dry_run, enrich, yes, backend.as_deref())?, }, Commands::Export { format, @@ -4214,7 +4215,7 @@ fn task_event_lines(conn: &rusqlite::Connection, task_id: &str) -> anyhow::Resul fn finalize_one_task( ctx: &ProjectCtx<'_>, task_id: &str, - quick: bool, + enrich: bool, dry_run: bool, backend: Option<&str>, ) -> anyhow::Result { @@ -4223,8 +4224,9 @@ fn finalize_one_task( let events_path = ctx.events_path; let project_hash = ctx.project_hash; - // 1. Enrich (unless quick / dry-run) — needs sessions and a backend. - if !quick && !dry_run { + // 1. Enrich (only when asked, and not on a dry-run) — needs sessions and a + // backend. Off by default because it is slow (one claude -p per session). + if enrich && !dry_run { if let Some(dir) = ctx.project_dir { if let Some(llm) = tj_core::llm::backend_from_env(backend)? { out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?; @@ -4339,7 +4341,7 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l fn run_complete_single( task_id: &str, dry_run: bool, - quick: bool, + enrich: bool, backend: Option<&str>, ) -> anyhow::Result<()> { let cwd = std::env::current_dir()?; @@ -4360,7 +4362,7 @@ fn run_complete_single( project_hash: &project_hash, project_dir: project_dir.as_deref(), }; - let out = finalize_one_task(&ctx, task_id, quick, dry_run, backend)?; + let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?; print_finalize_outcome(task_id, &out); Ok(()) } @@ -4369,7 +4371,7 @@ fn run_complete_single( /// user can prune before confirming. Refuses without a TTY unless `--yes`. fn run_complete_batch( dry_run: bool, - quick: bool, + enrich: bool, yes: bool, backend: Option<&str>, ) -> anyhow::Result<()> { @@ -4425,7 +4427,7 @@ fn run_complete_batch( if dry_run { println!(); for (id, _) in &open { - finalize_one_task(&ctx, id, quick, true, backend)?; + finalize_one_task(&ctx, id, enrich, true, backend)?; } return Ok(()); } @@ -4465,7 +4467,11 @@ fn run_complete_batch( println!( "\nWill finalize {} task(s){}. Proceed? [y/N]", targets.len(), - if quick { " (quick: no enrich)" } else { "" } + if enrich { + " (with --enrich: slow, reads sessions)" + } else { + "" + } ); let mut buf = String::new(); std::io::stdin().read_line(&mut buf)?; @@ -4477,7 +4483,7 @@ fn run_complete_batch( let mut left_open: Vec<(String, String)> = Vec::new(); for (id, _) in &targets { - let out = finalize_one_task(&ctx, id, quick, false, backend)?; + let out = finalize_one_task(&ctx, id, enrich, false, backend)?; print_finalize_outcome(id, &out); if out.skipped_no_backend { println!("complete: stopping batch — no LLM backend available."); diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index c90699c..645b804 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -5549,10 +5549,10 @@ fn complete_batch_dry_run_lists_open_tasks() { /// `claude` on PATH returning a canned judgment. Proves the wiring: junk /// title → Rename, done verdict → Close with a persisted outcome. Unix-only /// (shell-script stub); the logic itself is covered cross-platform by the -/// finalize.rs unit tests. +/// finalize.rs unit tests. Default mode (judge-only, no `--enrich`). #[cfg(unix)] #[test] -fn complete_quick_retitles_and_closes_via_fake_backend() { +fn complete_retitles_and_closes_via_fake_backend() { use std::os::unix::fs::PermissionsExt; let dir = assert_fs::TempDir::new().unwrap(); @@ -5609,14 +5609,14 @@ fn complete_quick_retitles_and_closes_via_fake_backend() { .trim() .to_string(); - // --quick: skip enrich (no sessions), exercise judge → retitle → close. + // Default mode (judge-only): exercise judge → retitle → close. Command::cargo_bin("task-journal") .unwrap() .current_dir(proj.path()) .env("XDG_DATA_HOME", dir.path()) .env("PATH", &path_env) .env_remove("ANTHROPIC_API_KEY") - .args(["complete", &task_id, "--quick"]) + .args(["complete", &task_id]) .assert() .success() .stdout(contains("retitled")) diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index 8d5995c..ac459ec 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index b0e406c..79f4891 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.22.2", + "version": "0.23.0", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan"