From 04aa1946911d1888da0733cc145372f95ac9bef3 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 22:19:47 +0400 Subject: [PATCH 1/2] fix(complete): cheaper claude -p calls + honest cost-led stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One-shot claude -p calls now pass --disallowed-tools (we never use tools), keeping built-in tool schemas out of the prompt — roughly halves the harness overhead. Stats lead with the real dollar cost for claude -p (its token counts are muddy: a big prompt lands in cache_creation, not input_tokens) and show clean tokens only for API backends; sizes scale to M. A tip points at --backend anthropic (~50× cheaper) / ollama (free) when a cost-reporting backend is used. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 13 +++++ Cargo.lock | 6 +- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-cli/src/main.rs | 66 +++++++++++++++++----- crates/tj-cli/tests/cli.rs | 2 +- crates/tj-core/src/classifier/agent_sdk.rs | 25 ++++++-- crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 9 files changed, 93 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9df63b6..fa141a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.24.1] - 2026-06-13 + +### Changed +- **Cheaper, honest `complete` stats.** One-shot `claude -p` calls now pass + `--disallowed-tools` (we never use tools), keeping the built-in tool schemas + out of the prompt and roughly halving the harness overhead. The stats line now + leads with the real dollar cost for `claude -p` (whose token counts are muddy — + a big prompt lands in `cache_creation`, not `input_tokens`) and shows clean + token counts only for API backends; token sizes scale to `M`. When a + cost-reporting backend is used, a one-line tip points at `--backend anthropic` + (direct Haiku API, ~50× cheaper per task by skipping Claude Code's overhead) + or `--backend ollama` (free, local). + ## [0.24.0] - 2026-06-13 ### Added diff --git a/Cargo.lock b/Cargo.lock index f55b875..6f7a0f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.24.0" +version = "0.24.1" dependencies = [ "anyhow", "assert_cmd", @@ -2596,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.24.0" +version = "0.24.1" dependencies = [ "anyhow", "chrono", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.24.0" +version = "0.24.1" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index f292469..58dc42a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.24.0" +version = "0.24.1" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index ef5f034..7f84716 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index eeff54b..fe5d81a 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -4149,14 +4149,17 @@ fn compute_savings( }) } -/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k". +/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k", +/// 2_760_000 → "2.8M". fn fmt_tokens(n: u64) -> String { if n < 1_000 { n.to_string() } else if n < 100_000 { format!("{:.1}k", n as f64 / 1_000.0) - } else { + } else if n < 1_000_000 { format!("{}k", n / 1_000) + } else { + format!("{:.1}M", n as f64 / 1_000_000.0) } } @@ -4164,16 +4167,16 @@ fn fmt_tokens(n: u64) -> String { /// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)". fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option) -> String { let mut parts = Vec::new(); - if spent.total_tokens() > 0 { - let cost = match spent.cost_usd { - Some(c) if c > 0.0 => format!(" (${c:.4})"), - _ => String::new(), - }; - parts.push(format!( - "spent {} tok{}", - fmt_tokens(spent.total_tokens()), - cost - )); + // claude -p reports a (notional) dollar cost but muddy token counts — its + // big prompt lands in `cache_creation`, not `input_tokens` — so lead with + // the cost there. API backends report no cost but clean tokens, so show + // those instead. + match spent.cost_usd { + Some(c) if c > 0.0 => parts.push(format!("cost ${c:.4}")), + _ if spent.total_tokens() > 0 => { + parts.push(format!("spent {} tok", fmt_tokens(spent.total_tokens()))) + } + _ => {} } if let Some(s) = saved { if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens { @@ -4395,6 +4398,21 @@ fn finalize_one_task( Ok(out) } +/// A one-line nudge shown when a cost-reporting backend (claude -p) was used: +/// the same Haiku via a direct API skips Claude Code's harness overhead. Only +/// claude -p reports a non-zero `cost_usd`, so this fires for it alone. +fn backend_cost_tip(cost: Option) -> Option { + match cost { + Some(c) if c > 0.0 => Some( + "tip: that cost is claude -p's Claude Code overhead (notional under a \ +subscription). For ~50× cheaper per task, use --backend anthropic (direct Haiku API, \ +needs ANTHROPIC_API_KEY) — or --backend ollama for free, local." + .to_string(), + ), + _ => None, + } +} + /// Human-readable one-liner for a finalize result. fn print_finalize_outcome(task_id: &str, out: &FinalizeOutcome) { if out.skipped_no_backend { @@ -4458,6 +4476,9 @@ fn run_complete_single( }; let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?; print_finalize_outcome(task_id, &out); + if let Some(tip) = backend_cost_tip(out.spent.cost_usd) { + eprintln!("{tip}"); + } Ok(()) } @@ -4604,6 +4625,9 @@ fn run_complete_batch( totals.trim_start_matches(" | ") ); } + if let Some(tip) = backend_cost_tip(total_spent.cost_usd) { + eprintln!("{tip}"); + } if !left_open.is_empty() { println!("\nLeft open ({}):", left_open.len()); @@ -5682,10 +5706,26 @@ mod inline_tests { pack_tokens: 1_500, }); let s = stats_suffix(&spent, &saved); - assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}"); + // Cost-reporting backend (claude -p) → lead with cost, not muddy tokens. + assert!(s.contains("cost $0.0012"), "{s}"); assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}"); } + #[test] + fn stats_suffix_shows_tokens_for_costless_backend() { + // API backend reports clean tokens, no cost → show the token count. + let spent = tj_core::llm::LlmUsage { + input_tokens: 1800, + output_tokens: 200, + cost_usd: None, + }; + assert_eq!( + stats_suffix(&spent, &None), + " | spent 2.0k tok", + "API backend should show tokens" + ); + } + #[test] fn stats_suffix_empty_when_nothing_to_report() { let spent = tj_core::llm::LlmUsage::default(); diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index c55aff0..eff23b5 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -5621,7 +5621,7 @@ fn complete_retitles_and_closes_via_fake_backend() { .args(["complete", &task_id]) .assert() .success() - .stdout(contains("spent 1.5k tok ($0.0012)")) + .stdout(contains("cost $0.0012")) .stdout(contains("retitled")) .stdout(contains("closed")); diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs index 69c5e07..5971ab4 100644 --- a/crates/tj-core/src/classifier/agent_sdk.rs +++ b/crates/tj-core/src/classifier/agent_sdk.rs @@ -54,10 +54,21 @@ fn base_claude_command(model: &str) -> Command { .arg("--output-format") .arg("json") .arg("--strict-mcp-config") + // We never use tools in these one-shot text calls — denying the + // built-in toolset keeps their schemas out of the prompt, roughly + // halving the harness overhead. (The cache-creation cost floor + // remains; for true pennies use a direct API backend.) + .arg("--disallowed-tools") + .arg(DISABLED_TOOLS) .env(IN_CLASSIFIER_ENV, "1"); cmd } +/// Built-in tools denied in our one-shot `claude -p` calls (we only want a text +/// completion, never tool use). Listed explicitly because there is no wildcard. +const DISABLED_TOOLS: &str = "Bash Read Edit Write Glob Grep Task WebFetch \ +WebSearch NotebookEdit TodoWrite BashOutput KillBash"; + /// Production runner: invokes the local `claude` binary in print mode, pinned /// to the given model, asking for the JSON envelope and an isolated MCP config /// (`--strict-mcp-config` keeps the project's own MCP servers — including this @@ -259,10 +270,6 @@ struct EnvelopeUsage { input_tokens: u64, #[serde(default)] output_tokens: u64, - #[serde(default)] - cache_creation_input_tokens: u64, - #[serde(default)] - cache_read_input_tokens: u64, } impl Classifier for ClaudeCliClassifier { @@ -307,8 +314,14 @@ pub fn run_claude_json_usage( } let u = envelope.usage.unwrap_or_default(); let usage = crate::llm::LlmUsage { - // Count cache reads/writes as input so the total reflects real context. - input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens, + // Only our *fresh* prompt tokens — NOT the cached Claude Code system + // prompt + tool schemas (cache_read/creation), which are harness + // overhead, not work the user asked for. The dollar `cost` below still + // reflects everything (claude computes it with the cache discount), so + // a small token count next to a few-cents cost is the honest signal + // that claude -p's overhead dominates — switch to a direct API backend + // to avoid it. + input_tokens: u.input_tokens, output_tokens: u.output_tokens, cost_usd: envelope.total_cost_usd, }; diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index a82d6f5..86e768b 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index 2e4366f..8fc90f0 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.24.0", + "version": "0.24.1", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan" From ef3af8861c45e17630ad40d3cf0dcd7b13f2ddf8 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 23:16:42 +0400 Subject: [PATCH 2/2] feat(distiller): in-session compaction-segment distiller subagent + advisory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New task-journal-distiller subagent (Haiku, background:true): reads a just-compacted conversation segment from the transcript file and backfills the missed decisions/rejections/findings for the active task via the journal MCP, never closing it. In-session → no separate claude -p call (~5k vs ~46k overhead), non-blocking. After a compaction the SessionStart hook adds a short advisory to delegate to it (hooks can't spawn subagents, so it's advisory; deterministic catch-up stays the net). TJ_DISTILLER_HINT=0 disables the hint. Platform constraints verified via claude-code-guide; see .docs/plans/2026-06-13-compaction-distiller.md. Worker-reliability fix and SessionEnd(clear) catch-up tracked as follow-ups. claude-memory-80f Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 14 ++++++- Cargo.lock | 6 +-- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-cli/src/main.rs | 19 ++++++++++ crates/tj-cli/tests/cli.rs | 8 ++++ crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- plugin/agents/task-journal-distiller.md | 49 +++++++++++++++++++++++++ 9 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 plugin/agents/task-journal-distiller.md diff --git a/CHANGELOG.md b/CHANGELOG.md index fa141a2..09f1ba0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.24.1] - 2026-06-13 +## [0.25.0] - 2026-06-13 + +### Added +- **In-session compaction distiller.** A new `task-journal-distiller` subagent + (Haiku, `background: true`) reads a just-compacted conversation segment from + the transcript file and backfills the decisions / rejections / findings that + weren't logged yet for the active task — via the journal MCP, never closing a + task. Because it runs as an in-session subagent it costs no separate `claude + -p` call (~5k token overhead vs ~46k) and doesn't block the main chat. After a + compaction, the `SessionStart` hook now adds a short advisory suggesting the + main agent delegate the segment to it (the platform doesn't let a hook spawn a + subagent, so this is advisory; the existing deterministic catch-up remains the + guaranteed safety net). Disable the hint with `TJ_DISTILLER_HINT=0`. ### Changed - **Cheaper, honest `complete` stats.** One-shot `claude -p` calls now pass diff --git a/Cargo.lock b/Cargo.lock index 6f7a0f7..75af620 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.24.1" +version = "0.25.0" dependencies = [ "anyhow", "assert_cmd", @@ -2596,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.24.1" +version = "0.25.0" dependencies = [ "anyhow", "chrono", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.24.1" +version = "0.25.0" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 58dc42a..b405d35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.24.1" +version = "0.25.0" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 7f84716..e0628d5 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.25.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index fe5d81a..b87a70e 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -2140,6 +2140,25 @@ fn main() -> Result<()> { bundle.push_str(&reminder); bundle.push_str("\n\n"); } + // Advisory (the hook can't force it): suggest the main agent + // delegate the just-compacted segment to the in-session + // distiller subagent, which backfills missed reasoning from + // the transcript file (which survives compaction) for the + // active task(s). Background → never blocks. Gated off by + // TJ_DISTILLER_HINT=0 for users who don't want it. + if std::env::var("TJ_DISTILLER_HINT").as_deref() != Ok("0") { + let transcript_hint = payload + .get("transcript_path") + .and_then(|v| v.as_str()) + .map(|p| format!(" (transcript: {p})")) + .unwrap_or_default(); + bundle.push_str(&format!( + "[task-journal] A compaction just occurred. If decisions, rejections, \ +or findings from before it are not yet in the journal for the active task(s) above, delegate to \ +the `task-journal-distiller` subagent to capture them from the transcript{transcript_hint}. It \ +runs in the background and won't block you; it only fills gaps and never closes tasks.\n\n" + )); + } } for tc in &recent { let pack = tj_core::pack::assemble( diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index eff23b5..6d85971 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -4516,6 +4516,10 @@ fn session_start_compact_prepends_active_task_reminder() { ctx.contains("Must ship before Friday"), "reminder must include the in-force constraint: {ctx}" ); + assert!( + ctx.contains("task-journal-distiller"), + "compact SessionStart must advise delegating to the distiller subagent: {ctx}" + ); } #[test] @@ -4525,6 +4529,10 @@ fn session_start_startup_has_no_reminder() { !ctx.contains("[Active task after compaction]"), "non-compact SessionStart must NOT inject the reminder: {ctx}" ); + assert!( + !ctx.contains("task-journal-distiller"), + "non-compact SessionStart must NOT advise the distiller: {ctx}" + ); } /// Recursively collect file names under `dir` that match a predicate. diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index 86e768b..cf4783e 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.25.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index 8fc90f0..d6eb8a4 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.24.1", + "version": "0.25.0", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan" diff --git a/plugin/agents/task-journal-distiller.md b/plugin/agents/task-journal-distiller.md new file mode 100644 index 0000000..d490260 --- /dev/null +++ b/plugin/agents/task-journal-distiller.md @@ -0,0 +1,49 @@ +--- +name: task-journal-distiller +description: Distills a conversation segment into task-journal memory. Use when a compaction just happened (or is about to), or when asked to "capture what we just did" — it reads the segment from the transcript, finds the decisions / rejections / findings that were NOT yet logged for the active task, and records them via the task-journal MCP. Runs in the background so it never blocks the main chat. Does NOT close tasks. +model: haiku +background: true +tools: Read, Bash, Grep, Glob, mcp__plugin_task-journal_task-journal__task_search, mcp__plugin_task-journal_task-journal__task_pack, mcp__plugin_task-journal_task-journal__event_add +--- + +You are the **task-journal distiller**. A segment of a coding conversation is +about to be (or has just been) compacted away. Your one job: make sure the +**reasoning** from that segment is preserved in the task journal as typed +events, so nothing is lost and the task does not later look "interrupted". + +You are dispatched with: the active **task id(s)**, the **transcript path** +(a JSONL file), and optionally a **boundary timestamp** (the start of the +segment — usually the task's last recorded event, or the previous compaction). + +## Procedure + +1. **Know what's already recorded.** For the task, call + `task_pack` (or `task_search`) and read its existing events. You will NOT + re-record anything already represented there. +2. **Read the segment.** Read the transcript JSONL file (use `Read`; for large + files read the tail or grep for the boundary timestamp and read forward). + Focus on the assistant/user turns AFTER the boundary timestamp. +3. **Extract only SIGNIFICANT, NOT-yet-logged reasoning** for the task: + - `decision` — a committed choice. Pass `alternatives` (the options weighed). + - `rejection` — an approach ruled out, and why. + - `finding` — a fact verified from code/logs (cite file:line, ids, names). + - `evidence` — a test/benchmark that proved something. + - `constraint` — an external limit discovered. + Skip chatter, restated tool output, greetings, and anything already in the + existing events. When in doubt, leave it out — precision over volume. +4. **Record** each via `event_add(task_id, event_type, text, ...)`. Write in the + user's language, terse and specific. Append-only — never edit. + +## Hard rules + +- **Never close** a task and **never** mark it done — you only fill gaps. +- **Never create** a new task unless the segment clearly pursued a *distinct* + objective with no matching open task; prefer attaching to the given task id. +- **De-dupe ruthlessly** — if the substance is already an event, skip it. +- If the transcript is unreadable or the segment holds nothing new, do nothing + and say so. Doing nothing is a valid, correct outcome. + +## Output + +One terse line: `distilled event(s) into : ` +(or `nothing new to record`). The main agent only needs this summary back.