From 4feb6215f08e7fd84e404ee4a2de7f3611432b66 Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:23:56 +0400
Subject: [PATCH 1/5] fix(complete): tolerate non-JSON enrich replies; never
 abort finalize
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The backfill model sometimes answers with prose instead of the JSON
array — e.g. continuing the transcript's own dialogue ('Контекст в
норме... Что дальше?'). The parse error aborted the whole `complete`,
losing the retitle and close. Backfill is best-effort: skip an
unparseable chunk reply (warn), extract a JSON array even when wrapped
in prose, and re-assert 'output ONLY the JSON array, do not continue the
transcript' after the transcript. Retitle/close run regardless of what
enrich recovers.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                            | 12 ++++++
 Cargo.lock                              |  6 +--
 Cargo.toml                              |  2 +-
 crates/tj-cli/Cargo.toml                |  2 +-
 crates/tj-core/src/dream/llm_backend.rs | 56 +++++++++++++++++++++++--
 crates/tj-core/src/dream/prompt.rs      |  5 ++-
 crates/tj-mcp/Cargo.toml                |  2 +-
 plugin/.claude-plugin/plugin.json       |  2 +-
 8 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0816c5e..0058eb7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.22.2] - 2026-06-13
+
+### Fixed
+- **`complete` survives a non-JSON enrich reply.** When the backfill model
+  answered with prose instead of the requested JSON array — e.g. continuing the
+  transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error
+  aborted the whole `complete`, losing the retitle and close. Backfill is now
+  best-effort: an unparseable chunk reply is skipped (with a warning), the parser
+  extracts a JSON array even when wrapped in prose, and the prompt re-asserts
+  "output ONLY the JSON array, do not continue the transcript" after the
+  transcript. Retitle/close always run regardless of what enrich recovers.
+
 ## [0.22.1] - 2026-06-13
 
 ### Fixed
diff --git a/Cargo.lock b/Cargo.lock
index 1bfc571..0213ed3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2572,7 +2572,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-cli"
-version = "0.22.1"
+version = "0.22.2"
 dependencies = [
  "anyhow",
  "assert_cmd",
@@ -2596,7 +2596,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-core"
-version = "0.22.1"
+version = "0.22.2"
 dependencies = [
  "anyhow",
  "chrono",
@@ -2621,7 +2621,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-mcp"
-version = "0.22.1"
+version = "0.22.2"
 dependencies = [
  "anyhow",
  "chrono",
diff --git a/Cargo.toml b/Cargo.toml
index f9c65b5..3fe8092 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.22.1"
+version = "0.22.2"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"
diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
index 1ef884f..4ba290c 100644
--- a/crates/tj-cli/Cargo.toml
+++ b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs
index 7ad63fc..79e87c1 100644
--- a/crates/tj-core/src/dream/llm_backend.rs
+++ b/crates/tj-core/src/dream/llm_backend.rs
@@ -40,7 +40,16 @@ impl DreamBackend for LlmDreamBackend {
             };
             let prompt = crate::dream::prompt::build_prompt(&chunk_input);
             let text = self.llm.complete(&prompt, 1024)?;
-            out.extend(parse_backfill_json(&text)?);
+            // Backfill is best-effort: a model that replied with prose instead
+            // of the JSON array (e.g. continued the transcript dialogue) yields
+            // nothing for this chunk, but must NOT abort the whole finalize —
+            // the retitle/close still need to run.
+            match parse_backfill_json(&text) {
+                Ok(evs) => out.extend(evs),
+                Err(e) => {
+                    tracing::warn!(error = %e, "dream backfill: skipping unparseable chunk reply")
+                }
+            }
         }
         Ok(out)
     }
@@ -86,8 +95,12 @@ pub fn parse_backfill_json(text: &str) -> anyhow::Result<Vec<BackfillEvent>> {
         .trim_start_matches("```")
         .trim_end_matches("```")
         .trim();
-    serde_json::from_str(json_str)
-        .with_context(|| format!("dream JSON parse failed; got: {json_str}"))
+    // Tolerate a JSON array wrapped in prose by slicing to the outer brackets.
+    let slice = match (json_str.find('['), json_str.rfind(']')) {
+        (Some(a), Some(b)) if b > a => &json_str[a..=b],
+        _ => json_str,
+    };
+    serde_json::from_str(slice).with_context(|| format!("dream JSON parse failed; got: {json_str}"))
 }
 
 #[cfg(test)]
@@ -111,6 +124,43 @@ mod tests {
         assert!(parse_backfill_json("[]").unwrap().is_empty());
     }
 
+    #[test]
+    fn parse_extracts_array_wrapped_in_prose() {
+        let reply = "Here are the missed events:\n[{\"event_type\":\"finding\",\
+\"task_id\":\"tj-1\",\"text\":\"found\",\"timestamp\":\"2026-06-13T00:00:00Z\"}]\nHope that helps!";
+        let evs = parse_backfill_json(reply).unwrap();
+        assert_eq!(evs.len(), 1);
+    }
+
+    #[test]
+    fn parse_errors_on_pure_prose() {
+        // A conversational reply with no array at all must be an Err so the
+        // backfill loop can skip the chunk instead of inventing events.
+        assert!(parse_backfill_json("Контекст в норме. Что дальше?").is_err());
+    }
+
+    #[test]
+    fn backfill_skips_unparseable_chunk_reply() {
+        // Model replies with prose, not JSON → backfill yields nothing but does
+        // NOT error, so the surrounding finalize (retitle/close) still runs.
+        struct ChattyLlm;
+        impl LlmBackend for ChattyLlm {
+            fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
+                Ok("Контекст в норме. 566.5k/1M использовано. Что дальше?".to_string())
+            }
+            fn name(&self) -> &'static str {
+                "chatty"
+            }
+        }
+        let b = LlmDreamBackend::new(Box::new(ChattyLlm));
+        let input = BackfillInput {
+            tasks: vec![],
+            transcript: "user: hi\nassistant: hello".into(),
+        };
+        let evs = b.backfill(&input).unwrap();
+        assert!(evs.is_empty());
+    }
+
     #[test]
     fn small_transcript_is_one_chunk() {
         let c = chunk_transcript("a\nb\nc\n", 100);
diff --git a/crates/tj-core/src/dream/prompt.rs b/crates/tj-core/src/dream/prompt.rs
index 210cfc4..9c1f6c1 100644
--- a/crates/tj-core/src/dream/prompt.rs
+++ b/crates/tj-core/src/dream/prompt.rs
@@ -34,7 +34,10 @@ pub fn build_prompt(input: &BackfillInput) -> String {
          - Respond with ONLY a JSON array of objects: \
          {{\"event_type\",\"task_id\",\"text\",\"timestamp\"}}. Empty array if nothing missed.\n\n\
          # Candidate tasks and their existing events\n{tasks}\n\
-         # Transcript\n{transcript}\n",
+         # Transcript\n{transcript}\n\n\
+         Remember: output ONLY the JSON array of missed events described above. \
+         Do NOT reply to, summarise, or continue the transcript; if nothing was \
+         missed, output [].\n",
         types = ALLOWED_TYPES,
         tasks = tasks_block,
         transcript = input.transcript,
diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
index 906afaf..8d5995c 100644
--- a/crates/tj-mcp/Cargo.toml
+++ b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index bd04d5c..b0e406c 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.22.1",
+  "version": "0.22.2",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"

From 664fd077c6594d5b8687e2cb8641ad8e1ce372ba Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:30:04 +0400
Subject: [PATCH 2/5] fix(complete): size enrich chunks for claude -p overhead;
 never abort
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

claude -p is a full Claude Code instance: its system prompt + tool
definitions cost ~113k tokens before our content, so a 360k-char chunk
(~91k tokens) still 400'd at ~204k total. Drop TRANSCRIPT_CHAR_BUDGET to
150k chars (~37k tokens) and make backfill swallow ANY per-chunk error
(over-budget 400, transient failure, non-JSON reply) — enrich is strictly
best-effort and never sinks the retitle/close. A truly broken backend
still surfaces at the judge step.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 crates/tj-core/src/dream/llm_backend.rs | 55 +++++++++++++++++++------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs
index 79e87c1..11e1042 100644
--- a/crates/tj-core/src/dream/llm_backend.rs
+++ b/crates/tj-core/src/dream/llm_backend.rs
@@ -25,10 +25,13 @@ impl LlmDreamBackend {
 }
 
 /// Max transcript characters fed to the model in one call. The hard wall is
-/// the ~200k-token context limit (a real session hit ~220k tokens and `claude
-/// -p` returned HTTP 400). We stay well under it and split oversized
-/// transcripts across several calls, merging the events (run_dream dedups).
-const TRANSCRIPT_CHAR_BUDGET: usize = 360_000;
+/// the ~200k-token context window, but `claude -p` is a full Claude Code
+/// instance: its system prompt + tool definitions alone cost ~113k tokens
+/// before our content (measured: a 360k-char chunk was ~91k tokens, yet the
+/// request totalled ~204k and 400'd). So the usable budget is far below the
+/// nominal limit — keep each chunk well under it (~37k tokens) and split the
+/// rest across calls, merging the events (run_dream dedups).
+const TRANSCRIPT_CHAR_BUDGET: usize = 150_000;
 
 impl DreamBackend for LlmDreamBackend {
     fn backfill(&self, input: &BackfillInput) -> anyhow::Result<Vec<BackfillEvent>> {
@@ -39,16 +42,18 @@ impl DreamBackend for LlmDreamBackend {
                 transcript: chunk,
             };
             let prompt = crate::dream::prompt::build_prompt(&chunk_input);
-            let text = self.llm.complete(&prompt, 1024)?;
-            // Backfill is best-effort: a model that replied with prose instead
-            // of the JSON array (e.g. continued the transcript dialogue) yields
-            // nothing for this chunk, but must NOT abort the whole finalize —
-            // the retitle/close still need to run.
-            match parse_backfill_json(&text) {
+            // Backfill is strictly best-effort: ANY per-chunk failure — an
+            // over-budget 400, a transient backend error, or a non-JSON reply
+            // (model continued the transcript dialogue) — is skipped, never
+            // aborting the finalize. A genuinely broken backend still surfaces
+            // at the judge step, which has its own (small, always-sized) call.
+            match self
+                .llm
+                .complete(&prompt, 1024)
+                .and_then(|text| parse_backfill_json(&text))
+            {
                 Ok(evs) => out.extend(evs),
-                Err(e) => {
-                    tracing::warn!(error = %e, "dream backfill: skipping unparseable chunk reply")
-                }
+                Err(e) => tracing::warn!(error = %e, "dream backfill: skipping chunk"),
             }
         }
         Ok(out)
@@ -161,6 +166,30 @@ mod tests {
         assert!(evs.is_empty());
     }
 
+    #[test]
+    fn backfill_skips_chunk_whose_call_errors() {
+        // An over-budget 400 / transient backend error on a chunk must be
+        // swallowed so the surrounding finalize (retitle/close) still runs.
+        struct FailingLlm;
+        impl LlmBackend for FailingLlm {
+            fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
+                Err(anyhow::anyhow!(
+                    "`claude -p` exited with status 1: Prompt is too long"
+                ))
+            }
+            fn name(&self) -> &'static str {
+                "failing"
+            }
+        }
+        let b = LlmDreamBackend::new(Box::new(FailingLlm));
+        let input = BackfillInput {
+            tasks: vec![],
+            transcript: "user: hi\nassistant: hello".into(),
+        };
+        let evs = b.backfill(&input).unwrap();
+        assert!(evs.is_empty());
+    }
+
     #[test]
     fn small_transcript_is_one_chunk() {
         let c = chunk_transcript("a\nb\nc\n", 100);

From 8b8a4514372639ec9b19bfadad68f5f869975589 Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:44:00 +0400
Subject: [PATCH 3/5] fix(complete): timeout claude -p calls + enrich progress
 (no more hang)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A big task makes many sequential claude -p calls (one+ per session);
without a timeout one wedged call hung the whole complete, with no
output so it looked dead. Add a per-call wall-clock timeout (90s,
TJ_CLAUDE_TIMEOUT_SECS) that kills a stuck claude and drains pipes in
threads to avoid buffer deadlock; a timed-out chunk is skipped (enrich is
best-effort). Print an 'enriching N session(s)…' progress line so a
multi-minute run is legible, and point at --quick.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 crates/tj-cli/src/main.rs                  |  8 +++
 crates/tj-core/src/classifier/agent_sdk.rs | 67 ++++++++++++++++++++--
 2 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
index 3894399..6b03703 100644
--- a/crates/tj-cli/src/main.rs
+++ b/crates/tj-cli/src/main.rs
@@ -4153,6 +4153,14 @@ fn enrich_task(
     if sessions.is_empty() {
         return Ok(0);
     }
+    // Enrich is the slow part — one (or more, for big transcripts) `claude -p`
+    // call per session. Announce it so a multi-minute run doesn't look hung;
+    // `--quick` skips this entirely.
+    eprintln!(
+        "complete: enriching {} session(s) via {} — can take a few minutes (or use --quick to skip)…",
+        sessions.len(),
+        llm.name()
+    );
     let run_id = ulid::Ulid::new().to_string();
     let dream_backend = tj_core::dream::llm_backend::LlmDreamBackend::new(llm);
     let opts = tj_core::dream::DreamOptions {
diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
index 69a5b43..239489d 100644
--- a/crates/tj-core/src/classifier/agent_sdk.rs
+++ b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -93,12 +93,71 @@ fn claude_exit_error(
     anyhow!("`claude -p` exited with {status}: {detail}")
 }
 
+/// Per-call wall-clock ceiling for a `claude -p` invocation. A spawned full
+/// Claude Code instance normally answers in seconds; this kills a wedged one so
+/// a multi-chunk enrich can't hang the whole `complete`. Override with
+/// `TJ_CLAUDE_TIMEOUT_SECS`.
+fn claude_timeout() -> std::time::Duration {
+    let secs = std::env::var("TJ_CLAUDE_TIMEOUT_SECS")
+        .ok()
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|n| *n > 0)
+        .unwrap_or(90);
+    std::time::Duration::from_secs(secs)
+}
+
+/// Wait for `child` up to `timeout`, draining stdout/stderr concurrently so a
+/// full pipe can't deadlock the wait. On timeout the child is killed and an
+/// error returned; otherwise the captured output is handed back.
+fn wait_with_timeout(
+    mut child: std::process::Child,
+    timeout: std::time::Duration,
+) -> anyhow::Result<std::process::Output> {
+    use std::io::Read;
+    let mut out_pipe = child.stdout.take();
+    let mut err_pipe = child.stderr.take();
+    let so = std::thread::spawn(move || {
+        let mut b = Vec::new();
+        if let Some(p) = out_pipe.as_mut() {
+            let _ = p.read_to_end(&mut b);
+        }
+        b
+    });
+    let se = std::thread::spawn(move || {
+        let mut b = Vec::new();
+        if let Some(p) = err_pipe.as_mut() {
+            let _ = p.read_to_end(&mut b);
+        }
+        b
+    });
+    let start = std::time::Instant::now();
+    let status = loop {
+        if let Some(status) = child.try_wait()? {
+            break status;
+        }
+        if start.elapsed() >= timeout {
+            let _ = child.kill();
+            let _ = child.wait();
+            anyhow::bail!("`claude -p` timed out after {}s", timeout.as_secs());
+        }
+        std::thread::sleep(std::time::Duration::from_millis(150));
+    };
+    Ok(std::process::Output {
+        status,
+        stdout: so.join().unwrap_or_default(),
+        stderr: se.join().unwrap_or_default(),
+    })
+}
+
 impl CommandRunner for ClaudeBinaryRunner {
     fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
-        let output = base_claude_command(model)
+        let child = base_claude_command(model)
             .arg(prompt)
-            .output()
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
             .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
+        let output = wait_with_timeout(child, claude_timeout())?;
         if !output.status.success() {
             return Err(claude_exit_error(
                 output.status,
@@ -135,9 +194,7 @@ impl CommandRunner for ClaudeBinaryStdinRunner {
             .context("claude stdin was not captured")?
             .write_all(prompt.as_bytes())
             .context("failed to write prompt to claude stdin")?;
-        let output = child
-            .wait_with_output()
-            .context("failed to wait for `claude`")?;
+        let output = wait_with_timeout(child, claude_timeout())?;
         if !output.status.success() {
             return Err(claude_exit_error(
                 output.status,

From 8a8bc38369f6214dbed2aeb51583e2d129cfd5ce Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:44:30 +0400
Subject: [PATCH 4/5] docs(changelog): expand 0.22.2 with enrich hardening
 (sizing, timeout, progress)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0058eb7..ae4feee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,15 +9,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.22.2] - 2026-06-13
 
+Hardening pass on `complete`'s enrich step, found by running it on a real
+12-session task. Enrich is now strictly best-effort and can never hang or sink
+the retitle/close; `--quick` skips it entirely for an instant judge-only finalize.
+
 ### Fixed
 - **`complete` survives a non-JSON enrich reply.** When the backfill model
   answered with prose instead of the requested JSON array — e.g. continuing the
   transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error
-  aborted the whole `complete`, losing the retitle and close. Backfill is now
-  best-effort: an unparseable chunk reply is skipped (with a warning), the parser
-  extracts a JSON array even when wrapped in prose, and the prompt re-asserts
-  "output ONLY the JSON array, do not continue the transcript" after the
-  transcript. Retitle/close always run regardless of what enrich recovers.
+  aborted the whole `complete`, losing the retitle and close. Backfill now skips
+  an unparseable chunk reply (with a warning), the parser extracts a JSON array
+  even when wrapped in prose, and the prompt re-asserts "output ONLY the JSON
+  array, do not continue the transcript" after the transcript.
+- **Enrich chunks are sized for `claude -p`'s overhead.** `claude -p` is a full
+  Claude Code instance whose system prompt + tool definitions cost ~113k tokens
+  before our content, so the earlier 360k-char chunk still 400'd at ~204k total.
+  The per-call transcript budget drops to 150k chars (~37k tokens), and **any**
+  per-chunk failure (over-budget 400, transient error, non-JSON) is skipped
+  rather than aborting — a genuinely broken backend still surfaces at the judge
+  step.
+- **No more apparent hang.** A big task makes many sequential `claude -p` calls;
+  without a timeout one wedged call hung the whole command with no output. Each
+  call now has a wall-clock timeout (90s, `TJ_CLAUDE_TIMEOUT_SECS`) that kills a
+  stuck `claude` (pipes drained in threads to avoid buffer deadlock), and enrich
+  prints an "enriching N session(s)…" progress line pointing at `--quick`.
+- **Legible `claude -p` errors** (carried from the same investigation): a
+  non-zero exit now surfaces the JSON error claude prints on stdout, so failures
+  read as "Prompt is too long · ~204261 tokens" instead of a bare "exit 1".
 
 ## [0.22.1] - 2026-06-13
 

From 6c5cd64d47f9f4663266ebdf5b090a7ba08b1fb8 Mon Sep 17 00:00:00 2001
From: Mher Shahinyan <shahinyanm@gmail.com>
Date: Sat, 13 Jun 2026 20:57:59 +0400
Subject: [PATCH 5/5] feat(complete)!: judge-only by default, enrich opt-in via
 --enrich
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Running complete on real 12-session tasks showed the session-backfill
pass takes 10-15 min (dozens of sequential claude -p spawns, ~113k-token
overhead each) — too slow to be the default. The judge-only path
(retitle + close + outcome) is seconds and delivers ~90% of the value.

Flip the default: complete <id> now finalizes via judgment only; add
--enrich to also backfill missed events from sessions. The old --quick
flag is removed (its behaviour is the default). Behaviour change → 0.23.0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                      | 18 +++++++++---
 Cargo.lock                        |  6 ++--
 Cargo.toml                        |  2 +-
 crates/tj-cli/Cargo.toml          |  2 +-
 crates/tj-cli/src/main.rs         | 46 +++++++++++++++++--------------
 crates/tj-cli/tests/cli.rs        |  8 +++---
 crates/tj-mcp/Cargo.toml          |  2 +-
 plugin/.claude-plugin/plugin.json |  2 +-
 8 files changed, 51 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae4feee..75eceee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,11 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
-## [0.22.2] - 2026-06-13
+## [0.23.0] - 2026-06-13
 
-Hardening pass on `complete`'s enrich step, found by running it on a real
-12-session task. Enrich is now strictly best-effort and can never hang or sink
-the retitle/close; `--quick` skips it entirely for an instant judge-only finalize.
+Finalize, retuned after running `complete` on real 12-session tasks: the fast,
+reliable judge-only path is now the default, and the slow session-enrich pass is
+opt-in.
+
+### Changed
+- **`complete` is judge-only by default; enrich is opt-in via `--enrich`.**
+  Finalizing through the model's judgment (retitle + close + outcome) takes
+  seconds and is what gives ~90% of the value. The session-backfill pass — one
+  `claude -p` call per session, minutes on a big multi-session task — proved too
+  slow to be the default, so it now runs only with `--enrich`. (The old `--quick`
+  flag is gone: its behaviour is the default. Replace `complete <id> --quick`
+  with `complete <id>`, and `complete <id>` with `complete <id> --enrich` if you
+  want the old full behaviour.)
 
 ### Fixed
 - **`complete` survives a non-JSON enrich reply.** When the backfill model
diff --git a/Cargo.lock b/Cargo.lock
index 0213ed3..bec2953 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2572,7 +2572,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-cli"
-version = "0.22.2"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "assert_cmd",
@@ -2596,7 +2596,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-core"
-version = "0.22.2"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "chrono",
@@ -2621,7 +2621,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-mcp"
-version = "0.22.2"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "chrono",
diff --git a/Cargo.toml b/Cargo.toml
index 3fe8092..acf8fae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.22.2"
+version = "0.23.0"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"
diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
index 4ba290c..6f3ce8b 100644
--- a/crates/tj-cli/Cargo.toml
+++ b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
index 6b03703..28d49a0 100644
--- a/crates/tj-cli/src/main.rs
+++ b/crates/tj-cli/src/main.rs
@@ -871,21 +871,22 @@ enum Commands {
         #[arg(long)]
         backend: Option<String>,
     },
-    /// Finalize a task: enrich its memory from the sessions it touched, fix a
-    /// junk auto-title, and close it IF the events clearly show it is done —
-    /// the model decides from the content. Omit the id to finalize every open
-    /// task in the project (batch, with a reviewable list). One LLM call per
-    /// session for enrich + one judge call per task, via the chosen backend
-    /// (free with `--backend ollama`).
+    /// Finalize a task: fix a junk auto-title and close it IF the events
+    /// clearly show it is done — the model decides from the content, in
+    /// seconds. Omit the id to finalize every open task (batch, with a
+    /// reviewable list). Add `--enrich` to also re-read the task's sessions and
+    /// backfill missed events first — thorough but slow (one `claude -p` call
+    /// per session; minutes on a big multi-session task).
     Complete {
         /// The task id to finalize. Omit to finalize all open tasks (batch).
         task: Option<String>,
         /// Show scope and planned actions without calling the model or writing.
         #[arg(long)]
         dry_run: bool,
-        /// Skip the (heavy) enrich pass; judge/retitle/close from stored events only.
+        /// Also backfill missed events from the task's sessions before judging.
+        /// Thorough but slow (one `claude -p` call per session).
         #[arg(long)]
-        quick: bool,
+        enrich: bool,
         /// Required for batch finalize when stdin is not an interactive terminal.
         #[arg(long)]
         yes: bool,
@@ -2784,12 +2785,12 @@ fn main() -> Result<()> {
         Commands::Complete {
             task,
             dry_run,
-            quick,
+            enrich,
             yes,
             backend,
         } => match task {
-            Some(id) => run_complete_single(&id, dry_run, quick, backend.as_deref())?,
-            None => run_complete_batch(dry_run, quick, yes, backend.as_deref())?,
+            Some(id) => run_complete_single(&id, dry_run, enrich, backend.as_deref())?,
+            None => run_complete_batch(dry_run, enrich, yes, backend.as_deref())?,
         },
         Commands::Export {
             format,
@@ -4214,7 +4215,7 @@ fn task_event_lines(conn: &rusqlite::Connection, task_id: &str) -> anyhow::Resul
 fn finalize_one_task(
     ctx: &ProjectCtx<'_>,
     task_id: &str,
-    quick: bool,
+    enrich: bool,
     dry_run: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<FinalizeOutcome> {
@@ -4223,8 +4224,9 @@ fn finalize_one_task(
     let events_path = ctx.events_path;
     let project_hash = ctx.project_hash;
 
-    // 1. Enrich (unless quick / dry-run) — needs sessions and a backend.
-    if !quick && !dry_run {
+    // 1. Enrich (only when asked, and not on a dry-run) — needs sessions and a
+    // backend. Off by default because it is slow (one claude -p per session).
+    if enrich && !dry_run {
         if let Some(dir) = ctx.project_dir {
             if let Some(llm) = tj_core::llm::backend_from_env(backend)? {
                 out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
@@ -4339,7 +4341,7 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l
 fn run_complete_single(
     task_id: &str,
     dry_run: bool,
-    quick: bool,
+    enrich: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<()> {
     let cwd = std::env::current_dir()?;
@@ -4360,7 +4362,7 @@ fn run_complete_single(
         project_hash: &project_hash,
         project_dir: project_dir.as_deref(),
     };
-    let out = finalize_one_task(&ctx, task_id, quick, dry_run, backend)?;
+    let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
     print_finalize_outcome(task_id, &out);
     Ok(())
 }
@@ -4369,7 +4371,7 @@ fn run_complete_single(
 /// user can prune before confirming. Refuses without a TTY unless `--yes`.
 fn run_complete_batch(
     dry_run: bool,
-    quick: bool,
+    enrich: bool,
     yes: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<()> {
@@ -4425,7 +4427,7 @@ fn run_complete_batch(
     if dry_run {
         println!();
         for (id, _) in &open {
-            finalize_one_task(&ctx, id, quick, true, backend)?;
+            finalize_one_task(&ctx, id, enrich, true, backend)?;
         }
         return Ok(());
     }
@@ -4465,7 +4467,11 @@ fn run_complete_batch(
         println!(
             "\nWill finalize {} task(s){}. Proceed? [y/N]",
             targets.len(),
-            if quick { " (quick: no enrich)" } else { "" }
+            if enrich {
+                " (with --enrich: slow, reads sessions)"
+            } else {
+                ""
+            }
         );
         let mut buf = String::new();
         std::io::stdin().read_line(&mut buf)?;
@@ -4477,7 +4483,7 @@ fn run_complete_batch(
 
     let mut left_open: Vec<(String, String)> = Vec::new();
     for (id, _) in &targets {
-        let out = finalize_one_task(&ctx, id, quick, false, backend)?;
+        let out = finalize_one_task(&ctx, id, enrich, false, backend)?;
         print_finalize_outcome(id, &out);
         if out.skipped_no_backend {
             println!("complete: stopping batch — no LLM backend available.");
diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs
index c90699c..645b804 100644
--- a/crates/tj-cli/tests/cli.rs
+++ b/crates/tj-cli/tests/cli.rs
@@ -5549,10 +5549,10 @@ fn complete_batch_dry_run_lists_open_tasks() {
 /// `claude` on PATH returning a canned judgment. Proves the wiring: junk
 /// title → Rename, done verdict → Close with a persisted outcome. Unix-only
 /// (shell-script stub); the logic itself is covered cross-platform by the
-/// finalize.rs unit tests.
+/// finalize.rs unit tests. Default mode (judge-only, no `--enrich`).
 #[cfg(unix)]
 #[test]
-fn complete_quick_retitles_and_closes_via_fake_backend() {
+fn complete_retitles_and_closes_via_fake_backend() {
     use std::os::unix::fs::PermissionsExt;
 
     let dir = assert_fs::TempDir::new().unwrap();
@@ -5609,14 +5609,14 @@ fn complete_quick_retitles_and_closes_via_fake_backend() {
     .trim()
     .to_string();
 
-    // --quick: skip enrich (no sessions), exercise judge → retitle → close.
+    // Default mode (judge-only): exercise judge → retitle → close.
     Command::cargo_bin("task-journal")
         .unwrap()
         .current_dir(proj.path())
         .env("XDG_DATA_HOME", dir.path())
         .env("PATH", &path_env)
         .env_remove("ANTHROPIC_API_KEY")
-        .args(["complete", &task_id, "--quick"])
+        .args(["complete", &task_id])
         .assert()
         .success()
         .stdout(contains("retitled"))
diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
index 8d5995c..ac459ec 100644
--- a/crates/tj-mcp/Cargo.toml
+++ b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.22.2", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index b0e406c..79f4891 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.22.2",
+  "version": "0.23.0",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"