Digital-Threads · Shahinyanm · Jun 13, 2026 · Jun 13, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.24.1] - 2026-06-13
+
+### Changed
+- **Cheaper, honest `complete` stats.** One-shot `claude -p` calls now pass
+  `--disallowed-tools` (we never use tools), keeping the built-in tool schemas
+  out of the prompt and roughly halving the harness overhead. The stats line now
+  leads with the real dollar cost for `claude -p` (whose token counts are muddy —
+  a big prompt lands in `cache_creation`, not `input_tokens`) and shows clean
+  token counts only for API backends; token sizes scale to `M`. When a
+  cost-reporting backend is used, a one-line tip points at `--backend anthropic`
+  (direct Haiku API, ~50× cheaper per task by skipping Claude Code's overhead)
+  or `--backend ollama` (free, local).
+
 ## [0.24.0] - 2026-06-13
 
 ### Added

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.24.0"
+version = "0.24.1"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"

diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }

diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
@@ -4149,31 +4149,34 @@ fn compute_savings(
     })
 }
 
-/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k".
+/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k",
+/// 2_760_000 → "2.8M".
 fn fmt_tokens(n: u64) -> String {
     if n < 1_000 {
         n.to_string()
     } else if n < 100_000 {
         format!("{:.1}k", n as f64 / 1_000.0)
-    } else {
+    } else if n < 1_000_000 {
         format!("{}k", n / 1_000)
+    } else {
+        format!("{:.1}M", n as f64 / 1_000_000.0)
     }
 }
 
 /// Human spent/saved suffix for a finalize line, e.g.
 /// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)".
 fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option<Savings>) -> String {
     let mut parts = Vec::new();
-    if spent.total_tokens() > 0 {
-        let cost = match spent.cost_usd {
-            Some(c) if c > 0.0 => format!(" (${c:.4})"),
-            _ => String::new(),
-        };
-        parts.push(format!(
-            "spent {} tok{}",
-            fmt_tokens(spent.total_tokens()),
-            cost
-        ));
+    // claude -p reports a (notional) dollar cost but muddy token counts — its
+    // big prompt lands in `cache_creation`, not `input_tokens` — so lead with
+    // the cost there. API backends report no cost but clean tokens, so show
+    // those instead.
+    match spent.cost_usd {
+        Some(c) if c > 0.0 => parts.push(format!("cost ${c:.4}")),
+        _ if spent.total_tokens() > 0 => {
+            parts.push(format!("spent {} tok", fmt_tokens(spent.total_tokens())))
+        }
+        _ => {}
     }
     if let Some(s) = saved {
         if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens {
@@ -4395,6 +4398,21 @@ fn finalize_one_task(
     Ok(out)
 }
 
+/// A one-line nudge shown when a cost-reporting backend (claude -p) was used:
+/// the same Haiku via a direct API skips Claude Code's harness overhead. Only
+/// claude -p reports a non-zero `cost_usd`, so this fires for it alone.
+fn backend_cost_tip(cost: Option<f64>) -> Option<String> {
+    match cost {
+        Some(c) if c > 0.0 => Some(
+            "tip: that cost is claude -p's Claude Code overhead (notional under a \
+subscription). For ~50× cheaper per task, use --backend anthropic (direct Haiku API, \
+needs ANTHROPIC_API_KEY) — or --backend ollama for free, local."
+                .to_string(),
+        ),
+        _ => None,
+    }
+}
+
 /// Human-readable one-liner for a finalize result.
 fn print_finalize_outcome(task_id: &str, out: &FinalizeOutcome) {
     if out.skipped_no_backend {
@@ -4458,6 +4476,9 @@ fn run_complete_single(
     };
     let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
     print_finalize_outcome(task_id, &out);
+    if let Some(tip) = backend_cost_tip(out.spent.cost_usd) {
+        eprintln!("{tip}");
+    }
     Ok(())
 }
 
@@ -4604,6 +4625,9 @@ fn run_complete_batch(
             totals.trim_start_matches(" | ")
         );
     }
+    if let Some(tip) = backend_cost_tip(total_spent.cost_usd) {
+        eprintln!("{tip}");
+    }
 
     if !left_open.is_empty() {
         println!("\nLeft open ({}):", left_open.len());
@@ -5682,10 +5706,26 @@ mod inline_tests {
             pack_tokens: 1_500,
         });
         let s = stats_suffix(&spent, &saved);
-        assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}");
+        // Cost-reporting backend (claude -p) → lead with cost, not muddy tokens.
+        assert!(s.contains("cost $0.0012"), "{s}");
         assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}");
     }
 
+    #[test]
+    fn stats_suffix_shows_tokens_for_costless_backend() {
+        // API backend reports clean tokens, no cost → show the token count.
+        let spent = tj_core::llm::LlmUsage {
+            input_tokens: 1800,
+            output_tokens: 200,
+            cost_usd: None,
+        };
+        assert_eq!(
+            stats_suffix(&spent, &None),
+            " | spent 2.0k tok",
+            "API backend should show tokens"
+        );
+    }
+
     #[test]
     fn stats_suffix_empty_when_nothing_to_report() {
         let spent = tj_core::llm::LlmUsage::default();

diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs
@@ -5621,7 +5621,7 @@ fn complete_retitles_and_closes_via_fake_backend() {
         .args(["complete", &task_id])
         .assert()
         .success()
-        .stdout(contains("spent 1.5k tok ($0.0012)"))
+        .stdout(contains("cost $0.0012"))
         .stdout(contains("retitled"))
         .stdout(contains("closed"));
 

diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -54,10 +54,21 @@ fn base_claude_command(model: &str) -> Command {
         .arg("--output-format")
         .arg("json")
         .arg("--strict-mcp-config")
+        // We never use tools in these one-shot text calls — denying the
+        // built-in toolset keeps their schemas out of the prompt, roughly
+        // halving the harness overhead. (The cache-creation cost floor
+        // remains; for true pennies use a direct API backend.)
+        .arg("--disallowed-tools")
+        .arg(DISABLED_TOOLS)
         .env(IN_CLASSIFIER_ENV, "1");
     cmd
 }
 
+/// Built-in tools denied in our one-shot `claude -p` calls (we only want a text
+/// completion, never tool use). Listed explicitly because there is no wildcard.
+const DISABLED_TOOLS: &str = "Bash Read Edit Write Glob Grep Task WebFetch \
+WebSearch NotebookEdit TodoWrite BashOutput KillBash";
+
 /// Production runner: invokes the local `claude` binary in print mode, pinned
 /// to the given model, asking for the JSON envelope and an isolated MCP config
 /// (`--strict-mcp-config` keeps the project's own MCP servers — including this
@@ -259,10 +270,6 @@ struct EnvelopeUsage {
     input_tokens: u64,
     #[serde(default)]
     output_tokens: u64,
-    #[serde(default)]
-    cache_creation_input_tokens: u64,
-    #[serde(default)]
-    cache_read_input_tokens: u64,
 }
 
 impl Classifier for ClaudeCliClassifier {
@@ -307,8 +314,14 @@ pub fn run_claude_json_usage(
     }
     let u = envelope.usage.unwrap_or_default();
     let usage = crate::llm::LlmUsage {
-        // Count cache reads/writes as input so the total reflects real context.
-        input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens,
+        // Only our *fresh* prompt tokens — NOT the cached Claude Code system
+        // prompt + tool schemas (cache_read/creation), which are harness
+        // overhead, not work the user asked for. The dollar `cost` below still
+        // reflects everything (claude computes it with the cache discount), so
+        // a small token count next to a few-cents cost is the honest signal
+        // that claude -p's overhead dominates — switch to a direct API backend
+        // to avoid it.
+        input_tokens: u.input_tokens,
         output_tokens: u.output_tokens,
         cost_usd: envelope.total_cost_usd,
     };

diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }

diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.24.0",
+  "version": "0.24.1",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"