Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.23.0] - 2026-06-13
## [0.24.0] - 2026-06-13

### Added
- **`complete` reports tokens spent and saved.** Each finalize now prints what
it cost and what it compresses: `complete tj-x: … | spent 1.5k tok ($0.0012) ·
saved ~88k→1.5k tok (59×)`. **Spent** is exact, pulled from the backend's own
usage report (the `claude -p` JSON envelope's `usage`/`total_cost_usd`,
Anthropic/OpenAI `usage`), summed across the judge call and any `--enrich`
calls. **Saved** is an estimate of memory compression — the raw transcript
size of the task's sessions vs its compact pack (≈ chars/4). A batch run ends
with a `Totals across N task(s):` line. Backends expose usage via a new
`LlmBackend::complete_usage` method (default: no usage), so custom backends
keep working unchanged.

Finalize, retuned after running `complete` on real 12-session tasks: the fast,
reliable judge-only path is now the default, and the slow session-enrich pass is
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
]

[workspace.package]
version = "0.23.0"
version = "0.24.0"
edition = "2021"
rust-version = "1.88"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ default = ["embed"]
embed = ["tj-core/embed"]

[dependencies]
tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
anyhow = { workspace = true }
clap = { workspace = true }
tracing = { workspace = true }
Expand Down
160 changes: 154 additions & 6 deletions crates/tj-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4108,6 +4108,89 @@ struct FinalizeOutcome {
reason: String,
/// True when no LLM backend was available — nothing was judged or written.
skipped_no_backend: bool,
/// Exact token usage spent on this task (judge + any enrich calls).
spent: tj_core::llm::LlmUsage,
/// Estimated memory compression: raw session tokens → compact pack tokens.
saved: Option<Savings>,
}

/// Rough memory-compression estimate for a finalized task (≈ chars / 4).
#[derive(Default, Clone, Copy)]
struct Savings {
raw_tokens: u64,
pack_tokens: u64,
}

/// ~tokens from a char count (a rough 4-chars-per-token estimate — enough for
/// an order-of-magnitude "how much memory this compresses" signal).
fn est_tokens(chars: usize) -> u64 {
(chars as u64).div_ceil(4)
}

/// Estimate how much raw session material a task's compact pack stands in for:
/// the summed transcript size of the sessions it touched vs the pack size.
/// `None` when sessions aren't reachable (no project dir).
fn compute_savings(
conn: &rusqlite::Connection,
events_path: &std::path::Path,
project_dir: Option<&std::path::Path>,
task_id: &str,
) -> Option<Savings> {
let dir = project_dir?;
let sessions = task_sessions(events_path, dir, task_id).ok()?;
if sessions.is_empty() {
return None;
}
let raw_chars: usize = sessions.iter().map(|(_, inp)| inp.transcript.len()).sum();
let pack = tj_core::pack::assemble(conn, task_id, tj_core::pack::PackMode::Compact).ok()?;
Some(Savings {
raw_tokens: est_tokens(raw_chars),
pack_tokens: est_tokens(pack.text.len()),
})
}

/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k".
fn fmt_tokens(n: u64) -> String {
if n < 1_000 {
n.to_string()
} else if n < 100_000 {
format!("{:.1}k", n as f64 / 1_000.0)
} else {
format!("{}k", n / 1_000)
}
}

/// Human spent/saved suffix for a finalize line, e.g.
/// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)".
fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option<Savings>) -> String {
let mut parts = Vec::new();
if spent.total_tokens() > 0 {
let cost = match spent.cost_usd {
Some(c) if c > 0.0 => format!(" (${c:.4})"),
_ => String::new(),
};
parts.push(format!(
"spent {} tok{}",
fmt_tokens(spent.total_tokens()),
cost
));
}
if let Some(s) = saved {
if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens {
let factor = s.raw_tokens as f64 / s.pack_tokens as f64;
parts.push(format!(
"saved ~{}→{} tok ({:.0}×)",
fmt_tokens(s.raw_tokens),
fmt_tokens(s.pack_tokens),
factor
));
}
}
if parts.is_empty() {
String::new()
} else {
format!(" | {}", parts.join(" · "))
}
}

/// Per-project handles threaded through the finalize helpers.
Expand Down Expand Up @@ -4149,10 +4232,10 @@ fn enrich_task(
project_dir: &std::path::Path,
task_id: &str,
llm: Box<dyn tj_core::llm::LlmBackend>,
) -> anyhow::Result<usize> {
) -> anyhow::Result<(usize, tj_core::llm::LlmUsage)> {
let sessions = task_sessions(events_path, project_dir, task_id)?;
if sessions.is_empty() {
return Ok(0);
return Ok((0, tj_core::llm::LlmUsage::default()));
}
// Enrich is the slow part — one (or more, for big transcripts) `claude -p`
// call per session. Announce it so a multi-minute run doesn't look hung;
Expand All @@ -4170,7 +4253,7 @@ fn enrich_task(
};
let report =
tj_core::dream::run_dream(conn, events_path, &opts, &dream_backend, sessions, &run_id)?;
Ok(report.events_backfilled)
Ok((report.events_backfilled, dream_backend.usage()))
}

/// Current title for a task ("" if somehow unset).
Expand Down Expand Up @@ -4229,7 +4312,10 @@ fn finalize_one_task(
if enrich && !dry_run {
if let Some(dir) = ctx.project_dir {
if let Some(llm) = tj_core::llm::backend_from_env(backend)? {
out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
let (n, enrich_usage) =
enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
out.enriched = n;
out.spent.add(enrich_usage);
tj_core::db::ingest_new_events(conn, events_path, project_hash)?;
}
}
Expand All @@ -4256,7 +4342,8 @@ fn finalize_one_task(
out.skipped_no_backend = true;
return Ok(out);
};
let j = tj_core::finalize::judge(&title, &lines, judge_backend.as_ref())?;
let (j, judge_usage) = tj_core::finalize::judge(&title, &lines, judge_backend.as_ref())?;
out.spent.add(judge_usage);
out.done = j.done;
out.reason = j.reason.clone();

Expand Down Expand Up @@ -4302,6 +4389,9 @@ fn finalize_one_task(

writer.flush_durable()?;
tj_core::db::ingest_new_events(conn, events_path, project_hash)?;

// 6. Estimate the memory compression this finalize represents.
out.saved = compute_savings(conn, events_path, ctx.project_dir, task_id);
Ok(out)
}

Expand Down Expand Up @@ -4334,7 +4424,11 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l
if parts.is_empty() {
parts.push("no change".to_string());
}
println!("complete {task_id}: {}", parts.join("; "));
println!(
"complete {task_id}: {}{}",
parts.join("; "),
stats_suffix(&out.spent, &out.saved)
);
}

/// `complete <id>` — finalize a single task.
Expand Down Expand Up @@ -4482,18 +4576,35 @@ fn run_complete_batch(
}

let mut left_open: Vec<(String, String)> = Vec::new();
let mut total_spent = tj_core::llm::LlmUsage::default();
let mut total_saved = Savings::default();
let mut done_count = 0usize;
for (id, _) in &targets {
let out = finalize_one_task(&ctx, id, enrich, false, backend)?;
print_finalize_outcome(id, &out);
if out.skipped_no_backend {
println!("complete: stopping batch — no LLM backend available.");
return Ok(());
}
total_spent.add(out.spent);
if let Some(s) = out.saved {
total_saved.raw_tokens += s.raw_tokens;
total_saved.pack_tokens += s.pack_tokens;
}
done_count += 1;
if !out.closed {
left_open.push((id.clone(), out.reason.clone()));
}
}

let totals = stats_suffix(&total_spent, &Some(total_saved));
if !totals.is_empty() {
println!(
"\nTotals across {done_count} task(s): {}",
totals.trim_start_matches(" | ")
);
}

if !left_open.is_empty() {
println!("\nLeft open ({}):", left_open.len());
for (id, reason) in &left_open {
Expand Down Expand Up @@ -5551,6 +5662,43 @@ mod inline_tests {
// declared before this module begins.
use super::*;

#[test]
fn fmt_tokens_scales_units() {
assert_eq!(fmt_tokens(980), "980");
assert_eq!(fmt_tokens(1_500), "1.5k");
assert_eq!(fmt_tokens(88_000), "88.0k");
assert_eq!(fmt_tokens(204_000), "204k");
}

#[test]
fn stats_suffix_shows_spent_and_saved() {
let spent = tj_core::llm::LlmUsage {
input_tokens: 1200,
output_tokens: 300,
cost_usd: Some(0.0012),
};
let saved = Some(Savings {
raw_tokens: 90_000,
pack_tokens: 1_500,
});
let s = stats_suffix(&spent, &saved);
assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}");
assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}");
}

#[test]
fn stats_suffix_empty_when_nothing_to_report() {
let spent = tj_core::llm::LlmUsage::default();
assert_eq!(stats_suffix(&spent, &None), "");
// Cost omitted when zero/None; tokens still shown.
let spent = tj_core::llm::LlmUsage {
input_tokens: 500,
output_tokens: 0,
cost_usd: None,
};
assert_eq!(stats_suffix(&spent, &None), " | spent 500 tok");
}

#[test]
fn nudge_escalates_only_for_substantial_thin_sessions() {
// Small session → never escalate, regardless of capture.
Expand Down
3 changes: 3 additions & 0 deletions crates/tj-cli/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5563,6 +5563,8 @@ fn complete_retitles_and_closes_via_fake_backend() {
// whose `result` field is the finalize JSON string.
let envelope = serde_json::json!({
"is_error": false,
"usage": {"input_tokens": 1200, "output_tokens": 300},
"total_cost_usd": 0.0012,
"result": serde_json::json!({
"retitle": true,
"title": "Voucher refund: paid 100% but got 50%",
Expand Down Expand Up @@ -5619,6 +5621,7 @@ fn complete_retitles_and_closes_via_fake_backend() {
.args(["complete", &task_id])
.assert()
.success()
.stdout(contains("spent 1.5k tok ($0.0012)"))
.stdout(contains("retitled"))
.stdout(contains("closed"));

Expand Down
44 changes: 39 additions & 5 deletions crates/tj-core/src/classifier/agent_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,9 @@ impl ClaudeCliClassifier {
}
}

/// The JSON wrapper emitted by `claude --output-format json`. We only need the
/// error flag and the `result` string (the model's verdict text); the rest of
/// the envelope (usage, cost, timings) is ignored.
/// The JSON wrapper emitted by `claude --output-format json`. We read the error
/// flag, the `result` string (the model's verdict text), and the usage/cost so
/// callers can report what a call actually consumed.
#[derive(serde::Deserialize)]
struct CliEnvelope {
#[serde(default)]
Expand All @@ -247,6 +247,22 @@ struct CliEnvelope {
result: Option<String>,
#[serde(default)]
subtype: Option<String>,
#[serde(default)]
usage: Option<EnvelopeUsage>,
#[serde(default)]
total_cost_usd: Option<f64>,
}

#[derive(serde::Deserialize, Default)]
struct EnvelopeUsage {
#[serde(default)]
input_tokens: u64,
#[serde(default)]
output_tokens: u64,
#[serde(default)]
cache_creation_input_tokens: u64,
#[serde(default)]
cache_read_input_tokens: u64,
}

impl Classifier for ClaudeCliClassifier {
Expand All @@ -266,6 +282,16 @@ pub fn run_claude_json(
model: &str,
prompt: &str,
) -> anyhow::Result<String> {
run_claude_json_usage(runner, model, prompt).map(|(text, _)| text)
}

/// Like [`run_claude_json`] but also returns the envelope's reported token
/// usage and cost (zeros when the envelope omits them).
pub fn run_claude_json_usage(
runner: &dyn CommandRunner,
model: &str,
prompt: &str,
) -> anyhow::Result<(String, crate::llm::LlmUsage)> {
let stdout = runner.run(model, prompt)?;
let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| {
format!(
Expand All @@ -279,9 +305,17 @@ pub fn run_claude_json(
envelope.subtype.as_deref().unwrap_or("unknown")
));
}
envelope
let u = envelope.usage.unwrap_or_default();
let usage = crate::llm::LlmUsage {
// Count cache reads/writes as input so the total reflects real context.
input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens,
output_tokens: u.output_tokens,
cost_usd: envelope.total_cost_usd,
};
let result = envelope
.result
.ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))
.ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))?;
Ok((result, usage))
}

/// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does
Expand Down
Loading
Loading