Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.24.1] - 2026-06-13

### Changed
- **Cheaper, honest `complete` stats.** One-shot `claude -p` calls now pass
`--disallowed-tools` (we never use tools), keeping the built-in tool schemas
out of the prompt and roughly halving the harness overhead. The stats line now
leads with the real dollar cost for `claude -p` (whose token counts are muddy —
a big prompt lands in `cache_creation`, not `input_tokens`) and shows clean
token counts only for API backends; token sizes scale to `M`. When a
cost-reporting backend is used, a one-line tip points at `--backend anthropic`
(direct Haiku API, ~50× cheaper per task by skipping Claude Code's overhead)
or `--backend ollama` (free, local).

## [0.24.0] - 2026-06-13

### Added
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ members = [
]

[workspace.package]
version = "0.24.0"
version = "0.24.1"
edition = "2021"
rust-version = "1.88"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ default = ["embed"]
embed = ["tj-core/embed"]

[dependencies]
tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false }
anyhow = { workspace = true }
clap = { workspace = true }
tracing = { workspace = true }
Expand Down
66 changes: 53 additions & 13 deletions crates/tj-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4149,31 +4149,34 @@ fn compute_savings(
})
}

/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k".
/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k",
/// 2_760_000 → "2.8M".
fn fmt_tokens(n: u64) -> String {
if n < 1_000 {
n.to_string()
} else if n < 100_000 {
format!("{:.1}k", n as f64 / 1_000.0)
} else {
} else if n < 1_000_000 {
format!("{}k", n / 1_000)
} else {
format!("{:.1}M", n as f64 / 1_000_000.0)
}
}

/// Human spent/saved suffix for a finalize line, e.g.
/// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)".
fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option<Savings>) -> String {
let mut parts = Vec::new();
if spent.total_tokens() > 0 {
let cost = match spent.cost_usd {
Some(c) if c > 0.0 => format!(" (${c:.4})"),
_ => String::new(),
};
parts.push(format!(
"spent {} tok{}",
fmt_tokens(spent.total_tokens()),
cost
));
// claude -p reports a (notional) dollar cost but muddy token counts — its
// big prompt lands in `cache_creation`, not `input_tokens` — so lead with
// the cost there. API backends report no cost but clean tokens, so show
// those instead.
match spent.cost_usd {
Some(c) if c > 0.0 => parts.push(format!("cost ${c:.4}")),
_ if spent.total_tokens() > 0 => {
parts.push(format!("spent {} tok", fmt_tokens(spent.total_tokens())))
}
_ => {}
}
if let Some(s) = saved {
if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens {
Expand Down Expand Up @@ -4395,6 +4398,21 @@ fn finalize_one_task(
Ok(out)
}

/// A one-line nudge shown when a cost-reporting backend (claude -p) was used:
/// the same Haiku via a direct API skips Claude Code's harness overhead. Only
/// claude -p reports a non-zero `cost_usd`, so this fires for it alone.
fn backend_cost_tip(cost: Option<f64>) -> Option<String> {
match cost {
Some(c) if c > 0.0 => Some(
"tip: that cost is claude -p's Claude Code overhead (notional under a \
subscription). For ~50× cheaper per task, use --backend anthropic (direct Haiku API, \
needs ANTHROPIC_API_KEY) — or --backend ollama for free, local."
.to_string(),
),
_ => None,
}
}

/// Human-readable one-liner for a finalize result.
fn print_finalize_outcome(task_id: &str, out: &FinalizeOutcome) {
if out.skipped_no_backend {
Expand Down Expand Up @@ -4458,6 +4476,9 @@ fn run_complete_single(
};
let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
print_finalize_outcome(task_id, &out);
if let Some(tip) = backend_cost_tip(out.spent.cost_usd) {
eprintln!("{tip}");
}
Ok(())
}

Expand Down Expand Up @@ -4604,6 +4625,9 @@ fn run_complete_batch(
totals.trim_start_matches(" | ")
);
}
if let Some(tip) = backend_cost_tip(total_spent.cost_usd) {
eprintln!("{tip}");
}

if !left_open.is_empty() {
println!("\nLeft open ({}):", left_open.len());
Expand Down Expand Up @@ -5682,10 +5706,26 @@ mod inline_tests {
pack_tokens: 1_500,
});
let s = stats_suffix(&spent, &saved);
assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}");
// Cost-reporting backend (claude -p) → lead with cost, not muddy tokens.
assert!(s.contains("cost $0.0012"), "{s}");
assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}");
}

#[test]
fn stats_suffix_shows_tokens_for_costless_backend() {
// API backend reports clean tokens, no cost → show the token count.
let spent = tj_core::llm::LlmUsage {
input_tokens: 1800,
output_tokens: 200,
cost_usd: None,
};
assert_eq!(
stats_suffix(&spent, &None),
" | spent 2.0k tok",
"API backend should show tokens"
);
}

#[test]
fn stats_suffix_empty_when_nothing_to_report() {
let spent = tj_core::llm::LlmUsage::default();
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-cli/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5621,7 +5621,7 @@ fn complete_retitles_and_closes_via_fake_backend() {
.args(["complete", &task_id])
.assert()
.success()
.stdout(contains("spent 1.5k tok ($0.0012)"))
.stdout(contains("cost $0.0012"))
.stdout(contains("retitled"))
.stdout(contains("closed"));

Expand Down
25 changes: 19 additions & 6 deletions crates/tj-core/src/classifier/agent_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,21 @@ fn base_claude_command(model: &str) -> Command {
.arg("--output-format")
.arg("json")
.arg("--strict-mcp-config")
// We never use tools in these one-shot text calls — denying the
// built-in toolset keeps their schemas out of the prompt, roughly
// halving the harness overhead. (The cache-creation cost floor
// remains; for true pennies use a direct API backend.)
.arg("--disallowed-tools")
.arg(DISABLED_TOOLS)
.env(IN_CLASSIFIER_ENV, "1");
cmd
}

/// Built-in tools denied in our one-shot `claude -p` calls (we only want a text
/// completion, never tool use). Listed explicitly because there is no wildcard.
const DISABLED_TOOLS: &str = "Bash Read Edit Write Glob Grep Task WebFetch \
WebSearch NotebookEdit TodoWrite BashOutput KillBash";

/// Production runner: invokes the local `claude` binary in print mode, pinned
/// to the given model, asking for the JSON envelope and an isolated MCP config
/// (`--strict-mcp-config` keeps the project's own MCP servers — including this
Expand Down Expand Up @@ -259,10 +270,6 @@ struct EnvelopeUsage {
input_tokens: u64,
#[serde(default)]
output_tokens: u64,
#[serde(default)]
cache_creation_input_tokens: u64,
#[serde(default)]
cache_read_input_tokens: u64,
}

impl Classifier for ClaudeCliClassifier {
Expand Down Expand Up @@ -307,8 +314,14 @@ pub fn run_claude_json_usage(
}
let u = envelope.usage.unwrap_or_default();
let usage = crate::llm::LlmUsage {
// Count cache reads/writes as input so the total reflects real context.
input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens,
// Only our *fresh* prompt tokens — NOT the cached Claude Code system
// prompt + tool schemas (cache_read/creation), which are harness
// overhead, not work the user asked for. The dollar `cost` below still
// reflects everything (claude computes it with the cache discount), so
// a small token count next to a few-cents cost is the honest signal
// that claude -p's overhead dominates — switch to a direct API backend
// to avoid it.
input_tokens: u.input_tokens,
output_tokens: u.output_tokens,
cost_usd: envelope.total_cost_usd,
};
Expand Down
2 changes: 1 addition & 1 deletion crates/tj-mcp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ path = "src/main.rs"

[dependencies]
# Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
tj-core = { package = "task-journal-core", version = "0.24.1", path = "../tj-core", default-features = false }
anyhow = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion plugin/.claude-plugin/plugin.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "task-journal",
"version": "0.24.0",
"version": "0.24.1",
"description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
"author": {
"name": "Mher Shahinyan"
Expand Down
Loading