diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0816c5e..75eceee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,46 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.23.0] - 2026-06-13
+
+Finalize, retuned after running `complete` on real 12-session tasks: the fast,
+reliable judge-only path is now the default, and the slow session-enrich pass is
+opt-in.
+
+### Changed
+- **`complete` is judge-only by default; enrich is opt-in via `--enrich`.**
+  Finalizing through the model's judgment (retitle + close + outcome) takes
+  seconds and is what gives ~90% of the value. The session-backfill pass — one
+  `claude -p` call per session, minutes on a big multi-session task — proved too
+  slow to be the default, so it now runs only with `--enrich`. (The old `--quick`
+  flag is gone: its behaviour is the default. Replace `complete <id> --quick`
+  with `complete <id>`, and `complete <id>` with `complete <id> --enrich` if you
+  want the old full behaviour.)
+
+### Fixed
+- **`complete` survives a non-JSON enrich reply.** When the backfill model
+  answered with prose instead of the requested JSON array — e.g. continuing the
+  transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error
+  aborted the whole `complete`, losing the retitle and close. Backfill now skips
+  an unparseable chunk reply (with a warning), the parser extracts a JSON array
+  even when wrapped in prose, and the prompt re-asserts "output ONLY the JSON
+  array, do not continue the transcript" after the transcript.
+- **Enrich chunks are sized for `claude -p`'s overhead.** `claude -p` is a full
+  Claude Code instance whose system prompt + tool definitions cost ~113k tokens
+  before our content, so the earlier 360k-char chunk still 400'd at ~204k total.
+  The per-call transcript budget drops to 150k chars (~37k tokens), and **any**
+  per-chunk failure (over-budget 400, transient error, non-JSON) is skipped
+  rather than aborting — a genuinely broken backend still surfaces at the judge
+  step.
+- **No more apparent hang.** A big task makes many sequential `claude -p` calls;
+  without a timeout one wedged call hung the whole command with no output. Each
+  call now has a wall-clock timeout (90s, `TJ_CLAUDE_TIMEOUT_SECS`) that kills a
+  stuck `claude` (pipes drained in threads to avoid buffer deadlock), and enrich
+  prints an "enriching N session(s)…" progress line pointing at `--quick`.
+- **Legible `claude -p` errors** (carried from the same investigation): a
+  non-zero exit now surfaces the JSON error claude prints on stdout, so failures
+  read as "Prompt is too long · ~204261 tokens" instead of a bare "exit 1".
+
 ## [0.22.1] - 2026-06-13
 
 ### Fixed
diff --git a/Cargo.lock b/Cargo.lock
index 1bfc571..bec2953 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2572,7 +2572,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-cli"
-version = "0.22.1"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "assert_cmd",
@@ -2596,7 +2596,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-core"
-version = "0.22.1"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "chrono",
@@ -2621,7 +2621,7 @@ dependencies = [
 
 [[package]]
 name = "task-journal-mcp"
-version = "0.22.1"
+version = "0.23.0"
 dependencies = [
  "anyhow",
  "chrono",
diff --git a/Cargo.toml b/Cargo.toml
index f9c65b5..acf8fae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.22.1"
+version = "0.23.0"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"
diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
index 1ef884f..6f3ce8b 100644
--- a/crates/tj-cli/Cargo.toml
+++ b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
index 3894399..28d49a0 100644
--- a/crates/tj-cli/src/main.rs
+++ b/crates/tj-cli/src/main.rs
@@ -871,21 +871,22 @@ enum Commands {
         #[arg(long)]
         backend: Option<String>,
     },
-    /// Finalize a task: enrich its memory from the sessions it touched, fix a
-    /// junk auto-title, and close it IF the events clearly show it is done —
-    /// the model decides from the content. Omit the id to finalize every open
-    /// task in the project (batch, with a reviewable list). One LLM call per
-    /// session for enrich + one judge call per task, via the chosen backend
-    /// (free with `--backend ollama`).
+    /// Finalize a task: fix a junk auto-title and close it IF the events
+    /// clearly show it is done — the model decides from the content, in
+    /// seconds. Omit the id to finalize every open task (batch, with a
+    /// reviewable list). Add `--enrich` to also re-read the task's sessions and
+    /// backfill missed events first — thorough but slow (one `claude -p` call
+    /// per session; minutes on a big multi-session task).
     Complete {
         /// The task id to finalize. Omit to finalize all open tasks (batch).
         task: Option<String>,
         /// Show scope and planned actions without calling the model or writing.
         #[arg(long)]
         dry_run: bool,
-        /// Skip the (heavy) enrich pass; judge/retitle/close from stored events only.
+        /// Also backfill missed events from the task's sessions before judging.
+        /// Thorough but slow (one `claude -p` call per session).
         #[arg(long)]
-        quick: bool,
+        enrich: bool,
         /// Required for batch finalize when stdin is not an interactive terminal.
         #[arg(long)]
         yes: bool,
@@ -2784,12 +2785,12 @@ fn main() -> Result<()> {
         Commands::Complete {
             task,
             dry_run,
-            quick,
+            enrich,
             yes,
             backend,
         } => match task {
-            Some(id) => run_complete_single(&id, dry_run, quick, backend.as_deref())?,
-            None => run_complete_batch(dry_run, quick, yes, backend.as_deref())?,
+            Some(id) => run_complete_single(&id, dry_run, enrich, backend.as_deref())?,
+            None => run_complete_batch(dry_run, enrich, yes, backend.as_deref())?,
         },
         Commands::Export {
             format,
@@ -4153,6 +4154,14 @@ fn enrich_task(
     if sessions.is_empty() {
         return Ok(0);
     }
+    // Enrich is the slow part — one (or more, for big transcripts) `claude -p`
+    // call per session. Announce it so a multi-minute run doesn't look hung;
+    // `--quick` skips this entirely.
+    eprintln!(
+        "complete: enriching {} session(s) via {} — can take a few minutes (or use --quick to skip)…",
+        sessions.len(),
+        llm.name()
+    );
     let run_id = ulid::Ulid::new().to_string();
     let dream_backend = tj_core::dream::llm_backend::LlmDreamBackend::new(llm);
     let opts = tj_core::dream::DreamOptions {
@@ -4206,7 +4215,7 @@ fn task_event_lines(conn: &rusqlite::Connection, task_id: &str) -> anyhow::Resul
 fn finalize_one_task(
     ctx: &ProjectCtx<'_>,
     task_id: &str,
-    quick: bool,
+    enrich: bool,
     dry_run: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<FinalizeOutcome> {
@@ -4215,8 +4224,9 @@ fn finalize_one_task(
     let events_path = ctx.events_path;
     let project_hash = ctx.project_hash;
 
-    // 1. Enrich (unless quick / dry-run) — needs sessions and a backend.
-    if !quick && !dry_run {
+    // 1. Enrich (only when asked, and not on a dry-run) — needs sessions and a
+    // backend. Off by default because it is slow (one claude -p per session).
+    if enrich && !dry_run {
         if let Some(dir) = ctx.project_dir {
             if let Some(llm) = tj_core::llm::backend_from_env(backend)? {
                 out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
@@ -4331,7 +4341,7 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l
 fn run_complete_single(
     task_id: &str,
     dry_run: bool,
-    quick: bool,
+    enrich: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<()> {
     let cwd = std::env::current_dir()?;
@@ -4352,7 +4362,7 @@ fn run_complete_single(
         project_hash: &project_hash,
         project_dir: project_dir.as_deref(),
     };
-    let out = finalize_one_task(&ctx, task_id, quick, dry_run, backend)?;
+    let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
     print_finalize_outcome(task_id, &out);
     Ok(())
 }
@@ -4361,7 +4371,7 @@ fn run_complete_single(
 /// user can prune before confirming. Refuses without a TTY unless `--yes`.
 fn run_complete_batch(
     dry_run: bool,
-    quick: bool,
+    enrich: bool,
     yes: bool,
     backend: Option<&str>,
 ) -> anyhow::Result<()> {
@@ -4417,7 +4427,7 @@ fn run_complete_batch(
     if dry_run {
         println!();
         for (id, _) in &open {
-            finalize_one_task(&ctx, id, quick, true, backend)?;
+            finalize_one_task(&ctx, id, enrich, true, backend)?;
         }
         return Ok(());
     }
@@ -4457,7 +4467,11 @@ fn run_complete_batch(
         println!(
             "\nWill finalize {} task(s){}. Proceed? [y/N]",
             targets.len(),
-            if quick { " (quick: no enrich)" } else { "" }
+            if enrich {
+                " (with --enrich: slow, reads sessions)"
+            } else {
+                ""
+            }
         );
         let mut buf = String::new();
         std::io::stdin().read_line(&mut buf)?;
@@ -4469,7 +4483,7 @@ fn run_complete_batch(
 
     let mut left_open: Vec<(String, String)> = Vec::new();
     for (id, _) in &targets {
-        let out = finalize_one_task(&ctx, id, quick, false, backend)?;
+        let out = finalize_one_task(&ctx, id, enrich, false, backend)?;
         print_finalize_outcome(id, &out);
         if out.skipped_no_backend {
             println!("complete: stopping batch — no LLM backend available.");
diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs
index c90699c..645b804 100644
--- a/crates/tj-cli/tests/cli.rs
+++ b/crates/tj-cli/tests/cli.rs
@@ -5549,10 +5549,10 @@ fn complete_batch_dry_run_lists_open_tasks() {
 /// `claude` on PATH returning a canned judgment. Proves the wiring: junk
 /// title → Rename, done verdict → Close with a persisted outcome. Unix-only
 /// (shell-script stub); the logic itself is covered cross-platform by the
-/// finalize.rs unit tests.
+/// finalize.rs unit tests. Default mode (judge-only, no `--enrich`).
 #[cfg(unix)]
 #[test]
-fn complete_quick_retitles_and_closes_via_fake_backend() {
+fn complete_retitles_and_closes_via_fake_backend() {
     use std::os::unix::fs::PermissionsExt;
 
     let dir = assert_fs::TempDir::new().unwrap();
@@ -5609,14 +5609,14 @@ fn complete_quick_retitles_and_closes_via_fake_backend() {
     .trim()
     .to_string();
 
-    // --quick: skip enrich (no sessions), exercise judge → retitle → close.
+    // Default mode (judge-only): exercise judge → retitle → close.
     Command::cargo_bin("task-journal")
         .unwrap()
         .current_dir(proj.path())
         .env("XDG_DATA_HOME", dir.path())
         .env("PATH", &path_env)
         .env_remove("ANTHROPIC_API_KEY")
-        .args(["complete", &task_id, "--quick"])
+        .args(["complete", &task_id])
         .assert()
         .success()
         .stdout(contains("retitled"))
diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
index 69a5b43..239489d 100644
--- a/crates/tj-core/src/classifier/agent_sdk.rs
+++ b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -93,12 +93,71 @@ fn claude_exit_error(
     anyhow!("`claude -p` exited with {status}: {detail}")
 }
 
+/// Per-call wall-clock ceiling for a `claude -p` invocation. A spawned full
+/// Claude Code instance normally answers in seconds; this kills a wedged one so
+/// a multi-chunk enrich can't hang the whole `complete`. Override with
+/// `TJ_CLAUDE_TIMEOUT_SECS`.
+fn claude_timeout() -> std::time::Duration {
+    let secs = std::env::var("TJ_CLAUDE_TIMEOUT_SECS")
+        .ok()
+        .and_then(|s| s.parse::<u64>().ok())
+        .filter(|n| *n > 0)
+        .unwrap_or(90);
+    std::time::Duration::from_secs(secs)
+}
+
+/// Wait for `child` up to `timeout`, draining stdout/stderr concurrently so a
+/// full pipe can't deadlock the wait. On timeout the child is killed and an
+/// error returned; otherwise the captured output is handed back.
+fn wait_with_timeout(
+    mut child: std::process::Child,
+    timeout: std::time::Duration,
+) -> anyhow::Result<std::process::Output> {
+    use std::io::Read;
+    let mut out_pipe = child.stdout.take();
+    let mut err_pipe = child.stderr.take();
+    let so = std::thread::spawn(move || {
+        let mut b = Vec::new();
+        if let Some(p) = out_pipe.as_mut() {
+            let _ = p.read_to_end(&mut b);
+        }
+        b
+    });
+    let se = std::thread::spawn(move || {
+        let mut b = Vec::new();
+        if let Some(p) = err_pipe.as_mut() {
+            let _ = p.read_to_end(&mut b);
+        }
+        b
+    });
+    let start = std::time::Instant::now();
+    let status = loop {
+        if let Some(status) = child.try_wait()? {
+            break status;
+        }
+        if start.elapsed() >= timeout {
+            let _ = child.kill();
+            let _ = child.wait();
+            anyhow::bail!("`claude -p` timed out after {}s", timeout.as_secs());
+        }
+        std::thread::sleep(std::time::Duration::from_millis(150));
+    };
+    Ok(std::process::Output {
+        status,
+        stdout: so.join().unwrap_or_default(),
+        stderr: se.join().unwrap_or_default(),
+    })
+}
+
 impl CommandRunner for ClaudeBinaryRunner {
     fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
-        let output = base_claude_command(model)
+        let child = base_claude_command(model)
             .arg(prompt)
-            .output()
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
             .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
+        let output = wait_with_timeout(child, claude_timeout())?;
         if !output.status.success() {
             return Err(claude_exit_error(
                 output.status,
@@ -135,9 +194,7 @@ impl CommandRunner for ClaudeBinaryStdinRunner {
             .context("claude stdin was not captured")?
             .write_all(prompt.as_bytes())
             .context("failed to write prompt to claude stdin")?;
-        let output = child
-            .wait_with_output()
-            .context("failed to wait for `claude`")?;
+        let output = wait_with_timeout(child, claude_timeout())?;
         if !output.status.success() {
             return Err(claude_exit_error(
                 output.status,
diff --git a/crates/tj-core/src/dream/llm_backend.rs b/crates/tj-core/src/dream/llm_backend.rs
index 7ad63fc..11e1042 100644
--- a/crates/tj-core/src/dream/llm_backend.rs
+++ b/crates/tj-core/src/dream/llm_backend.rs
@@ -25,10 +25,13 @@ impl LlmDreamBackend {
 }
 
 /// Max transcript characters fed to the model in one call. The hard wall is
-/// the ~200k-token context limit (a real session hit ~220k tokens and `claude
-/// -p` returned HTTP 400). We stay well under it and split oversized
-/// transcripts across several calls, merging the events (run_dream dedups).
-const TRANSCRIPT_CHAR_BUDGET: usize = 360_000;
+/// the ~200k-token context window, but `claude -p` is a full Claude Code
+/// instance: its system prompt + tool definitions alone cost ~113k tokens
+/// before our content (measured: a 360k-char chunk was ~91k tokens, yet the
+/// request totalled ~204k and 400'd). So the usable budget is far below the
+/// nominal limit — keep each chunk well under it (~37k tokens) and split the
+/// rest across calls, merging the events (run_dream dedups).
+const TRANSCRIPT_CHAR_BUDGET: usize = 150_000;
 
 impl DreamBackend for LlmDreamBackend {
     fn backfill(&self, input: &BackfillInput) -> anyhow::Result<Vec<BackfillEvent>> {
@@ -39,8 +42,19 @@ impl DreamBackend for LlmDreamBackend {
                 transcript: chunk,
             };
             let prompt = crate::dream::prompt::build_prompt(&chunk_input);
-            let text = self.llm.complete(&prompt, 1024)?;
-            out.extend(parse_backfill_json(&text)?);
+            // Backfill is strictly best-effort: ANY per-chunk failure — an
+            // over-budget 400, a transient backend error, or a non-JSON reply
+            // (model continued the transcript dialogue) — is skipped, never
+            // aborting the finalize. A genuinely broken backend still surfaces
+            // at the judge step, which has its own (small, always-sized) call.
+            match self
+                .llm
+                .complete(&prompt, 1024)
+                .and_then(|text| parse_backfill_json(&text))
+            {
+                Ok(evs) => out.extend(evs),
+                Err(e) => tracing::warn!(error = %e, "dream backfill: skipping chunk"),
+            }
         }
         Ok(out)
     }
@@ -86,8 +100,12 @@ pub fn parse_backfill_json(text: &str) -> anyhow::Result<Vec<BackfillEvent>> {
         .trim_start_matches("```")
         .trim_end_matches("```")
         .trim();
-    serde_json::from_str(json_str)
-        .with_context(|| format!("dream JSON parse failed; got: {json_str}"))
+    // Tolerate a JSON array wrapped in prose by slicing to the outer brackets.
+    let slice = match (json_str.find('['), json_str.rfind(']')) {
+        (Some(a), Some(b)) if b > a => &json_str[a..=b],
+        _ => json_str,
+    };
+    serde_json::from_str(slice).with_context(|| format!("dream JSON parse failed; got: {json_str}"))
 }
 
 #[cfg(test)]
@@ -111,6 +129,67 @@ mod tests {
         assert!(parse_backfill_json("[]").unwrap().is_empty());
     }
 
+    #[test]
+    fn parse_extracts_array_wrapped_in_prose() {
+        let reply = "Here are the missed events:\n[{\"event_type\":\"finding\",\
+\"task_id\":\"tj-1\",\"text\":\"found\",\"timestamp\":\"2026-06-13T00:00:00Z\"}]\nHope that helps!";
+        let evs = parse_backfill_json(reply).unwrap();
+        assert_eq!(evs.len(), 1);
+    }
+
+    #[test]
+    fn parse_errors_on_pure_prose() {
+        // A conversational reply with no array at all must be an Err so the
+        // backfill loop can skip the chunk instead of inventing events.
+        assert!(parse_backfill_json("Контекст в норме. Что дальше?").is_err());
+    }
+
+    #[test]
+    fn backfill_skips_unparseable_chunk_reply() {
+        // Model replies with prose, not JSON → backfill yields nothing but does
+        // NOT error, so the surrounding finalize (retitle/close) still runs.
+        struct ChattyLlm;
+        impl LlmBackend for ChattyLlm {
+            fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
+                Ok("Контекст в норме. 566.5k/1M использовано. Что дальше?".to_string())
+            }
+            fn name(&self) -> &'static str {
+                "chatty"
+            }
+        }
+        let b = LlmDreamBackend::new(Box::new(ChattyLlm));
+        let input = BackfillInput {
+            tasks: vec![],
+            transcript: "user: hi\nassistant: hello".into(),
+        };
+        let evs = b.backfill(&input).unwrap();
+        assert!(evs.is_empty());
+    }
+
+    #[test]
+    fn backfill_skips_chunk_whose_call_errors() {
+        // An over-budget 400 / transient backend error on a chunk must be
+        // swallowed so the surrounding finalize (retitle/close) still runs.
+        struct FailingLlm;
+        impl LlmBackend for FailingLlm {
+            fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
+                Err(anyhow::anyhow!(
+                    "`claude -p` exited with status 1: Prompt is too long"
+                ))
+            }
+            fn name(&self) -> &'static str {
+                "failing"
+            }
+        }
+        let b = LlmDreamBackend::new(Box::new(FailingLlm));
+        let input = BackfillInput {
+            tasks: vec![],
+            transcript: "user: hi\nassistant: hello".into(),
+        };
+        let evs = b.backfill(&input).unwrap();
+        assert!(evs.is_empty());
+    }
+
     #[test]
     fn small_transcript_is_one_chunk() {
         let c = chunk_transcript("a\nb\nc\n", 100);
diff --git a/crates/tj-core/src/dream/prompt.rs b/crates/tj-core/src/dream/prompt.rs
index 210cfc4..9c1f6c1 100644
--- a/crates/tj-core/src/dream/prompt.rs
+++ b/crates/tj-core/src/dream/prompt.rs
@@ -34,7 +34,10 @@ pub fn build_prompt(input: &BackfillInput) -> String {
          - Respond with ONLY a JSON array of objects: \
          {{\"event_type\",\"task_id\",\"text\",\"timestamp\"}}. Empty array if nothing missed.\n\n\
          # Candidate tasks and their existing events\n{tasks}\n\
-         # Transcript\n{transcript}\n",
+         # Transcript\n{transcript}\n\n\
+         Remember: output ONLY the JSON array of missed events described above. \
+         Do NOT reply to, summarise, or continue the transcript; if nothing was \
+         missed, output [].\n",
         types = ALLOWED_TYPES,
         tasks = tasks_block,
         transcript = input.transcript,
diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
index 906afaf..ac459ec 100644
--- a/crates/tj-mcp/Cargo.toml
+++ b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
index bd04d5c..79f4891 100644
--- a/plugin/.claude-plugin/plugin.json
+++ b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.22.1",
+  "version": "0.23.0",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"