Digital-Threads · Shahinyanm · Jun 13, 2026 · Jun 13, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
-## [0.23.0] - 2026-06-13
+## [0.24.0] - 2026-06-13
+
+### Added
+- **`complete` reports tokens spent and saved.** Each finalize now prints what
+  it cost and what it compresses: `complete tj-x: … | spent 1.5k tok ($0.0012) ·
+  saved ~88k→1.5k tok (59×)`. **Spent** is exact, pulled from the backend's own
+  usage report (the `claude -p` JSON envelope's `usage`/`total_cost_usd`,
+  Anthropic/OpenAI `usage`), summed across the judge call and any `--enrich`
+  calls. **Saved** is an estimate of memory compression — the raw transcript
+  size of the task's sessions vs its compact pack (≈ chars/4). A batch run ends
+  with a `Totals across N task(s):` line. Backends expose usage via a new
+  `LlmBackend::complete_usage` method (default: no usage), so custom backends
+  keep working unchanged.
 
 Finalize, retuned after running `complete` on real 12-session tasks: the fast,
 reliable judge-only path is now the default, and the slow session-enrich pass is

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.23.0"
+version = "0.24.0"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"

diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }

diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
@@ -4108,6 +4108,89 @@ struct FinalizeOutcome {
     reason: String,
     /// True when no LLM backend was available — nothing was judged or written.
     skipped_no_backend: bool,
+    /// Exact token usage spent on this task (judge + any enrich calls).
+    spent: tj_core::llm::LlmUsage,
+    /// Estimated memory compression: raw session tokens → compact pack tokens.
+    saved: Option<Savings>,
+}
+
+/// Rough memory-compression estimate for a finalized task (≈ chars / 4).
+#[derive(Default, Clone, Copy)]
+struct Savings {
+    raw_tokens: u64,
+    pack_tokens: u64,
+}
+
+/// ~tokens from a char count (a rough 4-chars-per-token estimate — enough for
+/// an order-of-magnitude "how much memory this compresses" signal).
+fn est_tokens(chars: usize) -> u64 {
+    (chars as u64).div_ceil(4)
+}
+
+/// Estimate how much raw session material a task's compact pack stands in for:
+/// the summed transcript size of the sessions it touched vs the pack size.
+/// `None` when sessions aren't reachable (no project dir).
+fn compute_savings(
+    conn: &rusqlite::Connection,
+    events_path: &std::path::Path,
+    project_dir: Option<&std::path::Path>,
+    task_id: &str,
+) -> Option<Savings> {
+    let dir = project_dir?;
+    let sessions = task_sessions(events_path, dir, task_id).ok()?;
+    if sessions.is_empty() {
+        return None;
+    }
+    let raw_chars: usize = sessions.iter().map(|(_, inp)| inp.transcript.len()).sum();
+    let pack = tj_core::pack::assemble(conn, task_id, tj_core::pack::PackMode::Compact).ok()?;
+    Some(Savings {
+        raw_tokens: est_tokens(raw_chars),
+        pack_tokens: est_tokens(pack.text.len()),
+    })
+}
+
+/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k".
+fn fmt_tokens(n: u64) -> String {
+    if n < 1_000 {
+        n.to_string()
+    } else if n < 100_000 {
+        format!("{:.1}k", n as f64 / 1_000.0)
+    } else {
+        format!("{}k", n / 1_000)
+    }
+}
+
+/// Human spent/saved suffix for a finalize line, e.g.
+/// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)".
+fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option<Savings>) -> String {
+    let mut parts = Vec::new();
+    if spent.total_tokens() > 0 {
+        let cost = match spent.cost_usd {
+            Some(c) if c > 0.0 => format!(" (${c:.4})"),
+            _ => String::new(),
+        };
+        parts.push(format!(
+            "spent {} tok{}",
+            fmt_tokens(spent.total_tokens()),
+            cost
+        ));
+    }
+    if let Some(s) = saved {
+        if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens {
+            let factor = s.raw_tokens as f64 / s.pack_tokens as f64;
+            parts.push(format!(
+                "saved ~{}→{} tok ({:.0}×)",
+                fmt_tokens(s.raw_tokens),
+                fmt_tokens(s.pack_tokens),
+                factor
+            ));
+        }
+    }
+    if parts.is_empty() {
+        String::new()
+    } else {
+        format!(" | {}", parts.join(" · "))
+    }
 }
 
 /// Per-project handles threaded through the finalize helpers.
@@ -4149,10 +4232,10 @@ fn enrich_task(
     project_dir: &std::path::Path,
     task_id: &str,
     llm: Box<dyn tj_core::llm::LlmBackend>,
-) -> anyhow::Result<usize> {
+) -> anyhow::Result<(usize, tj_core::llm::LlmUsage)> {
     let sessions = task_sessions(events_path, project_dir, task_id)?;
     if sessions.is_empty() {
-        return Ok(0);
+        return Ok((0, tj_core::llm::LlmUsage::default()));
     }
     // Enrich is the slow part — one (or more, for big transcripts) `claude -p`
     // call per session. Announce it so a multi-minute run doesn't look hung;
@@ -4170,7 +4253,7 @@ fn enrich_task(
     };
     let report =
         tj_core::dream::run_dream(conn, events_path, &opts, &dream_backend, sessions, &run_id)?;
-    Ok(report.events_backfilled)
+    Ok((report.events_backfilled, dream_backend.usage()))
 }
 
 /// Current title for a task ("" if somehow unset).
@@ -4229,7 +4312,10 @@ fn finalize_one_task(
     if enrich && !dry_run {
         if let Some(dir) = ctx.project_dir {
             if let Some(llm) = tj_core::llm::backend_from_env(backend)? {
-                out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
+                let (n, enrich_usage) =
+                    enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
+                out.enriched = n;
+                out.spent.add(enrich_usage);
                 tj_core::db::ingest_new_events(conn, events_path, project_hash)?;
             }
         }
@@ -4256,7 +4342,8 @@ fn finalize_one_task(
         out.skipped_no_backend = true;
         return Ok(out);
     };
-    let j = tj_core::finalize::judge(&title, &lines, judge_backend.as_ref())?;
+    let (j, judge_usage) = tj_core::finalize::judge(&title, &lines, judge_backend.as_ref())?;
+    out.spent.add(judge_usage);
     out.done = j.done;
     out.reason = j.reason.clone();
 
@@ -4302,6 +4389,9 @@ fn finalize_one_task(
 
     writer.flush_durable()?;
     tj_core::db::ingest_new_events(conn, events_path, project_hash)?;
+
+    // 6. Estimate the memory compression this finalize represents.
+    out.saved = compute_savings(conn, events_path, ctx.project_dir, task_id);
     Ok(out)
 }
 
@@ -4334,7 +4424,11 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l
     if parts.is_empty() {
         parts.push("no change".to_string());
     }
-    println!("complete {task_id}: {}", parts.join("; "));
+    println!(
+        "complete {task_id}: {}{}",
+        parts.join("; "),
+        stats_suffix(&out.spent, &out.saved)
+    );
 }
 
 /// `complete <id>` — finalize a single task.
@@ -4482,18 +4576,35 @@ fn run_complete_batch(
     }
 
     let mut left_open: Vec<(String, String)> = Vec::new();
+    let mut total_spent = tj_core::llm::LlmUsage::default();
+    let mut total_saved = Savings::default();
+    let mut done_count = 0usize;
     for (id, _) in &targets {
         let out = finalize_one_task(&ctx, id, enrich, false, backend)?;
         print_finalize_outcome(id, &out);
         if out.skipped_no_backend {
             println!("complete: stopping batch — no LLM backend available.");
             return Ok(());
         }
+        total_spent.add(out.spent);
+        if let Some(s) = out.saved {
+            total_saved.raw_tokens += s.raw_tokens;
+            total_saved.pack_tokens += s.pack_tokens;
+        }
+        done_count += 1;
         if !out.closed {
             left_open.push((id.clone(), out.reason.clone()));
         }
     }
 
+    let totals = stats_suffix(&total_spent, &Some(total_saved));
+    if !totals.is_empty() {
+        println!(
+            "\nTotals across {done_count} task(s): {}",
+            totals.trim_start_matches(" | ")
+        );
+    }
+
     if !left_open.is_empty() {
         println!("\nLeft open ({}):", left_open.len());
         for (id, reason) in &left_open {
@@ -5551,6 +5662,43 @@ mod inline_tests {
     // declared before this module begins.
     use super::*;
 
+    #[test]
+    fn fmt_tokens_scales_units() {
+        assert_eq!(fmt_tokens(980), "980");
+        assert_eq!(fmt_tokens(1_500), "1.5k");
+        assert_eq!(fmt_tokens(88_000), "88.0k");
+        assert_eq!(fmt_tokens(204_000), "204k");
+    }
+
+    #[test]
+    fn stats_suffix_shows_spent_and_saved() {
+        let spent = tj_core::llm::LlmUsage {
+            input_tokens: 1200,
+            output_tokens: 300,
+            cost_usd: Some(0.0012),
+        };
+        let saved = Some(Savings {
+            raw_tokens: 90_000,
+            pack_tokens: 1_500,
+        });
+        let s = stats_suffix(&spent, &saved);
+        assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}");
+        assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}");
+    }
+
+    #[test]
+    fn stats_suffix_empty_when_nothing_to_report() {
+        let spent = tj_core::llm::LlmUsage::default();
+        assert_eq!(stats_suffix(&spent, &None), "");
+        // Cost omitted when zero/None; tokens still shown.
+        let spent = tj_core::llm::LlmUsage {
+            input_tokens: 500,
+            output_tokens: 0,
+            cost_usd: None,
+        };
+        assert_eq!(stats_suffix(&spent, &None), " | spent 500 tok");
+    }
+
     #[test]
     fn nudge_escalates_only_for_substantial_thin_sessions() {
         // Small session → never escalate, regardless of capture.

diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs
@@ -5563,6 +5563,8 @@ fn complete_retitles_and_closes_via_fake_backend() {
     // whose `result` field is the finalize JSON string.
     let envelope = serde_json::json!({
         "is_error": false,
+        "usage": {"input_tokens": 1200, "output_tokens": 300},
+        "total_cost_usd": 0.0012,
         "result": serde_json::json!({
             "retitle": true,
             "title": "Voucher refund: paid 100% but got 50%",
@@ -5619,6 +5621,7 @@ fn complete_retitles_and_closes_via_fake_backend() {
         .args(["complete", &task_id])
         .assert()
         .success()
+        .stdout(contains("spent 1.5k tok ($0.0012)"))
         .stdout(contains("retitled"))
         .stdout(contains("closed"));
 

diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -236,9 +236,9 @@ impl ClaudeCliClassifier {
     }
 }
 
-/// The JSON wrapper emitted by `claude --output-format json`. We only need the
-/// error flag and the `result` string (the model's verdict text); the rest of
-/// the envelope (usage, cost, timings) is ignored.
+/// The JSON wrapper emitted by `claude --output-format json`. We read the error
+/// flag, the `result` string (the model's verdict text), and the usage/cost so
+/// callers can report what a call actually consumed.
 #[derive(serde::Deserialize)]
 struct CliEnvelope {
     #[serde(default)]
@@ -247,6 +247,22 @@ struct CliEnvelope {
     result: Option<String>,
     #[serde(default)]
     subtype: Option<String>,
+    #[serde(default)]
+    usage: Option<EnvelopeUsage>,
+    #[serde(default)]
+    total_cost_usd: Option<f64>,
+}
+
+#[derive(serde::Deserialize, Default)]
+struct EnvelopeUsage {
+    #[serde(default)]
+    input_tokens: u64,
+    #[serde(default)]
+    output_tokens: u64,
+    #[serde(default)]
+    cache_creation_input_tokens: u64,
+    #[serde(default)]
+    cache_read_input_tokens: u64,
 }
 
 impl Classifier for ClaudeCliClassifier {
@@ -266,6 +282,16 @@ pub fn run_claude_json(
     model: &str,
     prompt: &str,
 ) -> anyhow::Result<String> {
+    run_claude_json_usage(runner, model, prompt).map(|(text, _)| text)
+}
+
+/// Like [`run_claude_json`] but also returns the envelope's reported token
+/// usage and cost (zeros when the envelope omits them).
+pub fn run_claude_json_usage(
+    runner: &dyn CommandRunner,
+    model: &str,
+    prompt: &str,
+) -> anyhow::Result<(String, crate::llm::LlmUsage)> {
     let stdout = runner.run(model, prompt)?;
     let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| {
         format!(
@@ -279,9 +305,17 @@ pub fn run_claude_json(
             envelope.subtype.as_deref().unwrap_or("unknown")
         ));
     }
-    envelope
+    let u = envelope.usage.unwrap_or_default();
+    let usage = crate::llm::LlmUsage {
+        // Count cache reads/writes as input so the total reflects real context.
+        input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens,
+        output_tokens: u.output_tokens,
+        cost_usd: envelope.total_cost_usd,
+    };
+    let result = envelope
         .result
-        .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))
+        .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))?;
+    Ok((result, usage))
 }
 
 /// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does