From b194db4cbb6a47b8a621074a8626cd0bfa7eb657 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Thu, 11 Jun 2026 23:13:52 +0400 Subject: [PATCH] feat(classifier): subscription-native agent-sdk backend via Haiku MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a CLI-backed classifier that reaches an LLM through the local, already-authenticated `claude` binary — no ANTHROPIC_API_KEY needed. Resurrects the v0.7.x cli path that was dropped in v0.8.0, this time documented honestly: since 2026-06-15 a headless `claude -p` draws from the separate Agent SDK credit pool, not the interactive Pro/Max pool. - ClaudeCliClassifier (classifier/agent_sdk.rs) runs `claude -p --model claude-haiku-4-5 --output-format json --strict-mcp-config`, parses the envelope's `result`, reuses the shared parse_verdict. Command execution injected via a CommandRunner trait so the parse path is unit-testable without shelling out. from_env() returns None unless `claude` is on PATH; model overridable via TJ_AGENT_SDK_MODEL. - Factor http.rs's fence-strip+serde parse into classifier::parse_verdict, reused by both http and agent_sdk so they never diverge. - Hybrid is now an ordered LLM chain: heuristic >= 0.7 -> agent-sdk (if claude on PATH) -> api (if key) -> pending/. Reorder via TJ_HYBRID_LLM_ORDER (default agent-sdk,api). - Wire `agent-sdk` into --backend for ingest-hook and classify-worker, and add --backend to install-hooks (baked into the generated hook command). - README Configuration + doctor note + --backend help document the backend and the Agent SDK credit caveat. Tests: agent_sdk parse/threshold/fence/error (fake runner, no live claude); hybrid proves uncertain -> agent-sdk wins and http is never touched. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 19 ++ README.md | 28 ++- crates/tj-cli/src/main.rs | 83 +++++-- crates/tj-core/src/classifier/agent_sdk.rs | 269 +++++++++++++++++++++ crates/tj-core/src/classifier/http.rs | 10 +- crates/tj-core/src/classifier/hybrid.rs | 152 +++++++++--- crates/tj-core/src/classifier/mod.rs | 16 ++ 7 files changed, 518 insertions(+), 59 deletions(-) create mode 100644 crates/tj-core/src/classifier/agent_sdk.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fb0f5d..8237e6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- **`agent-sdk` classifier backend** — subscription-native LLM classification + via the local, already-authenticated `claude` binary, no `ANTHROPIC_API_KEY` + required. `tj_core::classifier::agent_sdk::ClaudeCliClassifier` invokes + `claude -p --model claude-haiku-4-5 --output-format json + --strict-mcp-config`, parses the JSON envelope's `result`, and reuses the + shared verdict parser. Command execution is injected via a `CommandRunner` + trait so the path is unit-testable without shelling out. `from_env()` returns + `None` unless `claude` is on PATH; model overridable with `TJ_AGENT_SDK_MODEL`. + - Wired into `--backend` selection (`ingest-hook`, `classify-worker`) and + added to `install-hooks --backend`, alongside `hybrid` | `api` | `heuristic`. + - **Hybrid fallback is now an ordered chain**: heuristic (≥ 0.7) → `agent-sdk` + (if `claude` on PATH) → `api` (if key) → `pending/`. Reorder via + `TJ_HYBRID_LLM_ORDER` (default `agent-sdk,api`) to prefer the API key. + - **Honest cost note**: since **2026-06-15** a headless `claude -p` run draws + from the separate Agent SDK monthly credit pool (~$20 Pro / $100 Max 5x / + $200 Max 20x at API rates), not the interactive pool. Documented in the + README, `--backend` help, and `doctor`. + ## [0.12.0] ### Added diff --git a/README.md b/README.md index fb2fd0b..a911988 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ task-journal pack tj-x9rz1f --mode full | `doctor` | Self-check the install | | `rebuild-state` | Rebuild SQLite from JSONL | | `migrate-project` | Re-key data when a project moves on disk | -| `install-hooks [--scope user\|project]` | Wire Claude Code auto-capture hooks | +| `install-hooks [--scope user\|project] [--backend hybrid\|agent-sdk\|api\|heuristic]` | Wire Claude Code auto-capture hooks | ## MCP tools @@ -181,10 +181,32 @@ The MCP server exposes five tools to Claude Code (and any MCP client): ## Configuration +### Classifier backends + +The auto-capture classifier (a best-effort backstop — explicit self-tagging via the +MCP tools is the primary path) has a heuristic stage plus an optional LLM stage. The +LLM stage has **two** ways to reach a model, pick via `--backend` on `install-hooks` +or `ingest-hook`: + +- **`agent-sdk`** — classify via the local, already-logged-in `claude` binary. **No + `ANTHROPIC_API_KEY` needed** — it rides your Claude subscription. Pinned to Haiku. + ⚠️ Since **2026-06-15** a headless `claude -p` run draws from the separate **Agent + SDK** monthly credit pool (~$20 Pro / $100 Max 5x / $200 Max 20x, at API rates), + not the interactive pool. Classification is Haiku-class and tiny (a few hundred + tokens per chunk), so the credit lasts a long time — but it is not strictly free. +- **`api`** — call the Anthropic API directly. Requires `ANTHROPIC_API_KEY`. + +`--backend=hybrid` (the default) runs the heuristic first, then falls through the LLM +chain `agent-sdk → api`, using whichever backends are available. Reorder the chain +with `TJ_HYBRID_LLM_ORDER` (e.g. `api,agent-sdk` to prefer the API key). With no LLM +backend available, the heuristic still runs and ambiguous chunks queue in `pending/`. + | Env var | Effect | Default | |---------|--------|---------| -| `ANTHROPIC_API_KEY` | Powers the API stage of `--backend=hybrid` (default) and is required for `--backend=api`. Without it, only the offline heuristic runs and ambiguous chunks land in the local pending queue. | _unset_ | -| `TJ_CLASSIFIER_MODEL` | Override the Anthropic model used by the API stage. | `claude-haiku-4-5-20251001` | +| `ANTHROPIC_API_KEY` | Enables the `api` LLM backend (and the `api` link of the hybrid chain). Optional — the `agent-sdk` backend needs no key. | _unset_ | +| `TJ_AGENT_SDK_MODEL` | Override the model the `agent-sdk` backend passes to `claude --model`. | `claude-haiku-4-5` | +| `TJ_HYBRID_LLM_ORDER` | Comma-separated fallback order for `--backend=hybrid`. | `agent-sdk,api` | +| `TJ_CLASSIFIER_MODEL` | Override the Anthropic model used by the `api` backend. | `claude-haiku-4-5-20251001` | | `TJ_AUTO_OPEN_TASKS` | Set to `0` / `false` to disable auto-opening a task from `UserPromptSubmit` when no open task exists. | `1` | ## Event types diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 3042286..fd09ee4 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -506,9 +506,10 @@ fn run_doctor() -> Result { } Ok(_) | Err(_) => { notes.push( - "claude CLI not on PATH — that's fine if you use the API backend \ - (set ANTHROPIC_API_KEY). For the CLI backend (free with Pro/Max), \ - install Claude Code from https://claude.com/claude-code" + "claude CLI not on PATH — that's fine if you use the `api` backend \ + (set ANTHROPIC_API_KEY). For the `agent-sdk` backend (no API key; \ + uses your Claude login, drawing the Agent SDK credit pool since \ + 2026-06-15), install Claude Code from https://claude.com/claude-code" .into(), ); (false, None) @@ -736,6 +737,12 @@ enum Commands { /// `task-journal backfill` afterwards. Onboarding shortcut. #[arg(long)] backfill: bool, + /// Classifier backend baked into the installed hook command: + /// "hybrid" (default), "agent-sdk", "api", or "heuristic". Use + /// "agent-sdk" to classify via the local `claude` login without an + /// ANTHROPIC_API_KEY (see `ingest-hook --help` for the credit note). + #[arg(long, default_value = "hybrid")] + backend: String, }, /// Show local classifier and journal statistics. Stats, @@ -838,13 +845,18 @@ enum Commands { #[arg(long)] text: Option, /// Classifier backend: - /// - "hybrid" (default) — keyword heuristic first (free, offline); - /// Anthropic API fallback when uncertain (needs ANTHROPIC_API_KEY). - /// - "api" — always call the Anthropic API. Best quality, paid. + /// - "hybrid" (default) — keyword heuristic first (free, offline), + /// then the configured LLM fallback chain (agent-sdk, then api; + /// reorder with TJ_HYBRID_LLM_ORDER). Only available backends run. + /// - "agent-sdk" — classify via the local, already-logged-in `claude` + /// binary; no ANTHROPIC_API_KEY needed. Pinned to Haiku (override + /// with TJ_AGENT_SDK_MODEL). NOTE: since 2026-06-15 a headless + /// `claude -p` draws from the separate Agent SDK monthly credit + /// pool (~$20 Pro / $100 Max 5x / $200 Max 20x at API rates), not + /// the interactive pool. Classification is tiny, so it lasts. + /// - "api" — always call the Anthropic API. Needs ANTHROPIC_API_KEY. /// - "heuristic" — heuristic only, no LLM. Fastest, lowest coverage. - /// - "cli" — deprecated alias for hybrid. The old `claude -p` path - /// was removed in v0.8.0 because Anthropic now bills it - /// separately from Pro/Max. + /// - "cli" — removed in v0.8.0; use "agent-sdk" (its resurrection). #[arg(long, default_value = "hybrid")] backend: String, /// Test/dev override: bypass classifier and force this event type. Hidden from --help. @@ -864,7 +876,8 @@ enum Commands { /// at a time. Hidden from --help; not a public API. #[command(hide = true)] ClassifyWorker { - /// Classifier backend: "hybrid", "api", or "heuristic". Defaults to hybrid. + /// Classifier backend: "hybrid", "agent-sdk", "api", or "heuristic". + /// Defaults to hybrid. #[arg(long, default_value = "hybrid")] backend: String, }, @@ -1378,6 +1391,7 @@ fn main() -> Result<()> { scope, uninstall, backfill, + backend, } => { let settings_path = match scope.as_str() { "user" => { @@ -1473,11 +1487,25 @@ fn main() -> Result<()> { // at env vars Claude Code never sets and therefore always // fed the classifier empty text. Stdin-only is the correct // wiring (see claude-memory-rsw). - // No --backend flag: the binary's default (hybrid) wins. - // Hybrid = free heuristic first, Anthropic API fallback when - // uncertain. Users wanting always-api can edit settings.json - // and add `--backend=api`. - let cmd = "task-journal ingest-hook || true"; + // Bake the selected backend into the hook command. Default + // "hybrid" stays flag-free (heuristic first, then the agent-sdk + // → api fallback chain). A non-default backend — e.g. + // `--backend=agent-sdk` for subscription users with no API key + // — is passed through so the spawned classify-worker honors it. + if !matches!( + backend.as_str(), + "hybrid" | "agent-sdk" | "api" | "heuristic" + ) { + anyhow::bail!( + "unknown --backend: {backend} (expected `hybrid`, `agent-sdk`, `api`, or `heuristic`)" + ); + } + let cmd_string = if backend == "hybrid" { + "task-journal ingest-hook || true".to_string() + } else { + format!("task-journal ingest-hook --backend={backend} || true") + }; + let cmd = cmd_string.as_str(); let entries = serde_json::json!({ "UserPromptSubmit": [{ "matcher": "", "hooks": [{ "type": "command", "command": cmd }] }], "PostToolUse": [{ "matcher": "", "hooks": [{ "type": "command", "command": cmd }] }], @@ -2241,6 +2269,15 @@ fn main() -> Result<()> { Box::new(tj_core::classifier::hybrid::HybridClassifier::from_env()) } "api" => Box::new(tj_core::classifier::http::AnthropicClassifier::from_env()?), + "agent-sdk" => Box::new( + tj_core::classifier::agent_sdk::ClaudeCliClassifier::from_env() + .ok_or_else(|| { + anyhow::anyhow!( + "agent-sdk backend selected but no `claude` binary on PATH — \ + install Claude Code (https://claude.com/claude-code) or pick another --backend" + ) + })?, + ), "heuristic" => { // Heuristic-only: no LLM at all. Trades coverage // for absolute zero-cost / offline operation. @@ -2262,7 +2299,7 @@ fn main() -> Result<()> { Box::new(HeuristicOnly) } other => anyhow::bail!( - "unknown backend: {other} (expected `hybrid`, `api`, or `heuristic`)" + "unknown backend: {other} (expected `hybrid`, `agent-sdk`, `api`, or `heuristic`)" ), }; let input = tj_core::classifier::ClassifyInput { @@ -3859,6 +3896,14 @@ fn process_pending_entry( let classifier: Box = match backend { "hybrid" | "" => Box::new(tj_core::classifier::hybrid::HybridClassifier::from_env()), "api" => Box::new(tj_core::classifier::http::AnthropicClassifier::from_env()?), + "agent-sdk" => Box::new( + tj_core::classifier::agent_sdk::ClaudeCliClassifier::from_env().ok_or_else(|| { + anyhow::anyhow!( + "agent-sdk backend selected but no `claude` binary on PATH — \ + install Claude Code (https://claude.com/claude-code) or pick another --backend" + ) + })?, + ), "heuristic" => { use tj_core::classifier::heuristic::try_heuristic; use tj_core::classifier::{ClassifyInput, ClassifyOutput}; @@ -3874,9 +3919,9 @@ fn process_pending_entry( } Box::new(HeuristicOnly) } - other => { - anyhow::bail!("unknown backend: {other} (expected `hybrid`, `api`, or `heuristic`)") - } + other => anyhow::bail!( + "unknown backend: {other} (expected `hybrid`, `agent-sdk`, `api`, or `heuristic`)" + ), }; let input = tj_core::classifier::ClassifyInput { text: text.clone(), diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs new file mode 100644 index 0000000..b6bb9ff --- /dev/null +++ b/crates/tj-core/src/classifier/agent_sdk.rs @@ -0,0 +1,269 @@ +//! Claude CLI ("agent SDK") classifier backend. +//! +//! Runs the locally-installed, already-authenticated `claude` binary in +//! non-interactive print mode, pinned to Haiku, to classify a chunk *without* +//! an `ANTHROPIC_API_KEY`. This resurrects the v0.7.x `cli` backend that was +//! removed in v0.8.0 — but honestly: since **2026-06-15** a headless +//! `claude -p` run draws from the separate **Agent SDK** monthly credit pool +//! (~$20 Pro / $100 Max 5x / $200 Max 20x, at API rates), not the interactive +//! Pro/Max pool. Classification is Haiku-class and tiny (a few hundred tokens +//! per chunk), so the credit lasts a long time — but it is not strictly free. +//! +//! The command execution is abstracted behind [`CommandRunner`] so the parsing +//! path is unit-testable with a fake; the suite never shells out to `claude`. + +use super::{Classifier, ClassifyInput, ClassifyOutput}; +use anyhow::{anyhow, Context}; +use std::process::Command; + +/// Default model. `claude --model` accepts the short alias and resolves it to +/// the current dated id (`claude-haiku-4-5-20251001`). Override with +/// `TJ_AGENT_SDK_MODEL`. +pub const DEFAULT_MODEL: &str = "claude-haiku-4-5"; + +/// "Run the classifier command and hand back its raw stdout." The production +/// impl shells out to `claude`; tests inject a fake returning canned JSON. +pub trait CommandRunner: Send + Sync { + /// Run the classification for `prompt` against `model`, returning the raw + /// stdout (the `--output-format json` wrapper) on success. + fn run(&self, model: &str, prompt: &str) -> anyhow::Result; +} + +/// Production runner: invokes the local `claude` binary in print mode, pinned +/// to the given model, asking for the JSON envelope and an isolated MCP config +/// (`--strict-mcp-config` keeps the project's own MCP servers — including this +/// very journal — out of the classification subprocess). +pub struct ClaudeBinaryRunner; + +impl CommandRunner for ClaudeBinaryRunner { + fn run(&self, model: &str, prompt: &str) -> anyhow::Result { + let output = Command::new("claude") + .arg("-p") + .arg(prompt) + .arg("--model") + .arg(model) + .arg("--output-format") + .arg("json") + .arg("--strict-mcp-config") + .output() + .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(anyhow!( + "`claude -p` exited with {}: {}", + output.status, + stderr.trim() + )); + } + Ok(String::from_utf8_lossy(&output.stdout).into_owned()) + } +} + +pub struct ClaudeCliClassifier { + model: String, + runner: Box, +} + +impl ClaudeCliClassifier { + /// Build from environment. Returns `None` unless a `claude` binary is on + /// PATH (probed with `claude --version`) — the caller then falls through to + /// the next backend. Model comes from `TJ_AGENT_SDK_MODEL`, else Haiku. + pub fn from_env() -> Option { + if !claude_on_path() { + return None; + } + let model = std::env::var("TJ_AGENT_SDK_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into()); + Some(Self { + model, + runner: Box::new(ClaudeBinaryRunner), + }) + } + + /// Test/dev constructor: inject a fake runner and an explicit model so the + /// parse path can be exercised without a live `claude` login. + pub fn with_runner(model: impl Into, runner: Box) -> Self { + Self { + model: model.into(), + runner, + } + } +} + +/// The JSON wrapper emitted by `claude --output-format json`. We only need the +/// error flag and the `result` string (the model's verdict text); the rest of +/// the envelope (usage, cost, timings) is ignored. +#[derive(serde::Deserialize)] +struct CliEnvelope { + #[serde(default)] + is_error: bool, + #[serde(default)] + result: Option, + #[serde(default)] + subtype: Option, +} + +impl Classifier for ClaudeCliClassifier { + fn classify(&self, input: &ClassifyInput) -> anyhow::Result { + let prompt = crate::classifier::prompt::build(input); + let stdout = self.runner.run(&self.model, &prompt)?; + let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| { + format!( + "claude --output-format json wrapper parse failed; got: {}", + stdout.trim() + ) + })?; + if envelope.is_error { + return Err(anyhow!( + "claude reported an error (subtype={})", + envelope.subtype.as_deref().unwrap_or("unknown") + )); + } + let verdict = envelope + .result + .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))?; + super::parse_verdict(&verdict) + } +} + +/// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does +/// no network) and tolerant — any spawn/exec failure means "not available". +fn claude_on_path() -> bool { + Command::new("claude") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::classifier::{decide_status, CONFIDENCE_THRESHOLD}; + use crate::event::{EventStatus, EventType}; + + /// Fake runner: returns canned stdout, ignoring model/prompt. Captures the + /// model it was asked for so tests can assert the pin. + struct FakeRunner { + canned: String, + seen_model: std::sync::Mutex>, + } + + impl FakeRunner { + fn new(canned: impl Into) -> Self { + Self { + canned: canned.into(), + seen_model: std::sync::Mutex::new(None), + } + } + } + + impl CommandRunner for FakeRunner { + fn run(&self, model: &str, _prompt: &str) -> anyhow::Result { + *self.seen_model.lock().unwrap() = Some(model.to_string()); + Ok(self.canned.clone()) + } + } + + fn input() -> ClassifyInput { + ClassifyInput { + text: "We adopted Rust for the journal core.".into(), + author_hint: "assistant".into(), + recent_tasks: vec![], + } + } + + fn envelope(result_json: &str) -> String { + serde_json::json!({ + "type": "result", + "subtype": "success", + "is_error": false, + "result": result_json, + }) + .to_string() + } + + #[test] + fn parses_canned_verdict_into_classify_output() { + let verdict = r#"{"event_type":"decision","task_id_guess":"tj-x","confidence":0.93,"evidence_strength":null,"suggested_text":"Adopt Rust."}"#; + let c = ClaudeCliClassifier::with_runner( + DEFAULT_MODEL, + Box::new(FakeRunner::new(envelope(verdict))), + ); + let out = c.classify(&input()).unwrap(); + assert_eq!(out.event_type, EventType::Decision); + assert_eq!(out.task_id_guess.as_deref(), Some("tj-x")); + assert!((out.confidence - 0.93).abs() < 1e-6); + // 0.93 >= 0.85 → confirmed. + assert_eq!(decide_status(out.confidence), EventStatus::Confirmed); + } + + /// Adapter so a test can keep an `Arc` handle to inspect the runner after + /// it is boxed into the classifier. + struct ArcRunner(std::sync::Arc); + impl CommandRunner for ArcRunner { + fn run(&self, model: &str, prompt: &str) -> anyhow::Result { + self.0.run(model, prompt) + } + } + + #[test] + fn pins_the_configured_model() { + let verdict = r#"{"event_type":"finding","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"x"}"#; + let captured = std::sync::Arc::new(FakeRunner::new(envelope(verdict))); + let c = ClaudeCliClassifier::with_runner( + "claude-haiku-4-5", + Box::new(ArcRunner(captured.clone())), + ); + let _ = c.classify(&input()).unwrap(); + assert_eq!( + captured.seen_model.lock().unwrap().as_deref(), + Some("claude-haiku-4-5"), + "classifier must pin the model it was constructed with" + ); + } + + #[test] + fn decide_status_at_the_0_85_threshold() { + for (conf, expect) in [ + (0.85_f64, EventStatus::Confirmed), + (0.84_f64, EventStatus::Suggested), + ] { + let verdict = format!( + r#"{{"event_type":"evidence","task_id_guess":null,"confidence":{conf},"evidence_strength":"strong","suggested_text":"t"}}"# + ); + let c = ClaudeCliClassifier::with_runner( + DEFAULT_MODEL, + Box::new(FakeRunner::new(envelope(&verdict))), + ); + let out = c.classify(&input()).unwrap(); + assert!((out.confidence - conf).abs() < 1e-6); + assert_eq!(decide_status(out.confidence), expect); + assert_eq!(CONFIDENCE_THRESHOLD, 0.85); + } + } + + #[test] + fn tolerates_code_fence_wrapped_verdict() { + let verdict = "```json\n{\"event_type\":\"rejection\",\"task_id_guess\":null,\"confidence\":0.88,\"evidence_strength\":null,\"suggested_text\":\"won't work\"}\n```"; + let c = ClaudeCliClassifier::with_runner( + DEFAULT_MODEL, + Box::new(FakeRunner::new(envelope(verdict))), + ); + let out = c.classify(&input()).unwrap(); + assert_eq!(out.event_type, EventType::Rejection); + } + + #[test] + fn errors_when_claude_reports_is_error() { + let canned = serde_json::json!({ + "type": "result", + "subtype": "error_during_execution", + "is_error": true, + "result": null, + }) + .to_string(); + let c = ClaudeCliClassifier::with_runner(DEFAULT_MODEL, Box::new(FakeRunner::new(canned))); + let err = c.classify(&input()).unwrap_err(); + assert!(format!("{err}").contains("error"), "got: {err}"); + } +} diff --git a/crates/tj-core/src/classifier/http.rs b/crates/tj-core/src/classifier/http.rs index 3956ef8..c828687 100644 --- a/crates/tj-core/src/classifier/http.rs +++ b/crates/tj-core/src/classifier/http.rs @@ -87,15 +87,7 @@ impl Classifier for AnthropicClassifier { .map(|b| b.text.clone()) .ok_or_else(|| anyhow!("no text content in response"))?; - let json_str = text - .trim() - .trim_start_matches("```json") - .trim_start_matches("```") - .trim_end_matches("```") - .trim(); - let out: ClassifyOutput = serde_json::from_str(json_str) - .with_context(|| format!("classifier JSON parse failed; got: {json_str}"))?; - Ok(out) + super::parse_verdict(&text) } } diff --git a/crates/tj-core/src/classifier/hybrid.rs b/crates/tj-core/src/classifier/hybrid.rs index 7d8fa00..3ae5e09 100644 --- a/crates/tj-core/src/classifier/hybrid.rs +++ b/crates/tj-core/src/classifier/hybrid.rs @@ -1,39 +1,69 @@ -//! Hybrid classifier — heuristic-first, LLM fallback. +//! Hybrid classifier — heuristic-first, LLM fallback chain. //! //! Tries the cheap, zero-network heuristic first. If a rule fires with //! confidence >= `min_heuristic_confidence`, returns the heuristic verdict. -//! Otherwise escalates to the HTTP (Anthropic API) backend — which -//! requires `ANTHROPIC_API_KEY`. When no key is set and the heuristic -//! is uncertain, the classifier errors out (caller should drop the -//! chunk into the pending queue for later retry rather than guess). +//! Otherwise it walks an ordered chain of LLM backends and returns the first +//! that succeeds: //! -//! This replaces the v0.7.x `cli` backend that relied on `claude -p`. -//! Anthropic changed `claude -p` to bill against tokens separately -//! from the Pro/Max subscription, breaking the "free fallback" promise -//! the cli backend was built on. +//! heuristic (>= 0.7) → agent-sdk (local `claude` login) → api (key) → bail +//! +//! The order is configurable via `TJ_HYBRID_LLM_ORDER` (default +//! `"agent-sdk,api"`); set it to `"api,agent-sdk"` to prefer the API key when +//! both are available. Only *available* backends join the chain (agent-sdk +//! needs `claude` on PATH; api needs `ANTHROPIC_API_KEY`). When the chain is +//! empty and the heuristic is uncertain, the classifier errors out and the +//! caller drops the chunk into the pending queue for later retry. +//! +//! The `agent-sdk` backend resurrects the v0.7.x `claude -p` path that was +//! removed in v0.8.0 — see [`super::agent_sdk`] for the honest note on the +//! post-2026-06-15 Agent SDK credit pool. +use super::agent_sdk::ClaudeCliClassifier; use super::heuristic::try_heuristic; +#[cfg(test)] use super::http::AnthropicClassifier; use super::{Classifier, ClassifyInput, ClassifyOutput}; /// Confidence the heuristic must reach to skip the LLM fallback. Below -/// this, the chunk is ambiguous enough that the API call is worth the -/// cost. +/// this, the chunk is ambiguous enough that the LLM call is worth the cost. const DEFAULT_MIN_HEURISTIC_CONFIDENCE: f64 = 0.7; +/// Default fallback order when `TJ_HYBRID_LLM_ORDER` is unset: prefer the +/// subscription-native agent-sdk backend over the paid API key. +const DEFAULT_LLM_ORDER: &str = "agent-sdk,api"; + pub struct HybridClassifier { - http: Option, + /// Ordered LLM fallbacks, tried after the heuristic is uncertain. The + /// first to return `Ok` wins. Empty = heuristic-only (uncertain → bail). + llm_chain: Vec>, min_heuristic_confidence: f64, } impl HybridClassifier { - /// Build from environment. Picks up `ANTHROPIC_API_KEY` if present; - /// without it, the hybrid still works for chunks the heuristic - /// handles confidently, but uncertain chunks will fail (caller - /// queues them in pending/). + /// Build from environment. The LLM chain is assembled from + /// `TJ_HYBRID_LLM_ORDER` (default `agent-sdk,api`), including only the + /// backends that are actually available right now. pub fn from_env() -> Self { + let order = + std::env::var("TJ_HYBRID_LLM_ORDER").unwrap_or_else(|_| DEFAULT_LLM_ORDER.into()); + let mut llm_chain: Vec> = Vec::new(); + for kind in order.split(',').map(str::trim) { + match kind { + "agent-sdk" => { + if let Some(c) = ClaudeCliClassifier::from_env() { + llm_chain.push(Box::new(c)); + } + } + "api" => { + if let Ok(c) = super::http::AnthropicClassifier::from_env() { + llm_chain.push(Box::new(c)); + } + } + _ => {} // unknown token: ignore rather than fail the hook + } + } Self { - http: AnthropicClassifier::from_env().ok(), + llm_chain, min_heuristic_confidence: DEFAULT_MIN_HEURISTIC_CONFIDENCE, } } @@ -42,14 +72,29 @@ impl HybridClassifier { /// (e.g. one pointed at a mock server) without touching env vars. #[cfg(test)] pub fn with_http(http: Option, min_conf: f64) -> Self { + let llm_chain: Vec> = match http { + Some(h) => vec![Box::new(h)], + None => vec![], + }; Self { - http, + llm_chain, + min_heuristic_confidence: min_conf, + } + } + + /// Test-only constructor — supply the LLM fallback chain directly (e.g. an + /// agent-sdk classifier backed by a fake runner, followed by a panicking + /// double to prove it is never reached). + #[cfg(test)] + pub fn with_llm_chain(llm_chain: Vec>, min_conf: f64) -> Self { + Self { + llm_chain, min_heuristic_confidence: min_conf, } } pub fn has_llm_fallback(&self) -> bool { - self.http.is_some() + !self.llm_chain.is_empty() } } @@ -60,19 +105,29 @@ impl Classifier for HybridClassifier { return Ok(out); } } - match &self.http { - Some(h) => h.classify(input), - None => anyhow::bail!( - "hybrid: heuristic uncertain and ANTHROPIC_API_KEY not set — \ + if self.llm_chain.is_empty() { + anyhow::bail!( + "hybrid: heuristic uncertain and no LLM backend available \ + (no `claude` on PATH for agent-sdk, no ANTHROPIC_API_KEY for api) — \ chunk left in pending queue for later retry" - ), + ); } + let mut last_err = None; + for backend in &self.llm_chain { + match backend.classify(input) { + Ok(out) => return Ok(out), + Err(e) => last_err = Some(e), + } + } + // The chain is non-empty, so at least one backend ran and errored. + Err(last_err.expect("non-empty chain must produce an error on full failure")) } } #[cfg(test)] mod tests { use super::*; + use crate::classifier::agent_sdk::{ClaudeCliClassifier, CommandRunner}; use crate::classifier::TaskContext; use crate::event::EventType; @@ -91,8 +146,7 @@ mod tests { #[test] fn heuristic_hit_skips_http_even_when_available() { - // Build a hybrid with `http` set to a *dummy* that would error if called. - // Heuristic catches the decision phrase, so http never runs. + // Heuristic catches the decision phrase, so the (empty) chain never runs. let hybrid = HybridClassifier::with_http(None, 0.7); let out = hybrid .classify(&ctx( @@ -120,17 +174,59 @@ mod tests { #[test] fn from_env_constructs_without_key() { // SAFETY: tests in this crate do not concurrently read these env vars. - let prev = std::env::var("ANTHROPIC_API_KEY").ok(); + let prev_key = std::env::var("ANTHROPIC_API_KEY").ok(); + // Force heuristic-only by disabling both LLM backends via an order that + // names no real one, so this stays deterministic regardless of whether + // a `claude` binary happens to be on the test machine's PATH. + let prev_order = std::env::var("TJ_HYBRID_LLM_ORDER").ok(); unsafe { std::env::remove_var("ANTHROPIC_API_KEY"); + std::env::set_var("TJ_HYBRID_LLM_ORDER", "none"); } let hybrid = HybridClassifier::from_env(); assert!(!hybrid.has_llm_fallback()); unsafe { - match prev { + match prev_key { Some(v) => std::env::set_var("ANTHROPIC_API_KEY", v), None => std::env::remove_var("ANTHROPIC_API_KEY"), } + match prev_order { + Some(v) => std::env::set_var("TJ_HYBRID_LLM_ORDER", v), + None => std::env::remove_var("TJ_HYBRID_LLM_ORDER"), + } } } + + #[test] + fn uncertain_heuristic_prefers_agent_sdk_and_never_touches_http() { + // agent-sdk (backed by a fake runner) returns Ok first; the http double + // panics if reached — proving the chain stops at the first success. + struct OkRunner; + impl CommandRunner for OkRunner { + fn run(&self, _model: &str, _prompt: &str) -> anyhow::Result { + Ok(serde_json::json!({ + "type": "result", + "is_error": false, + "result": r#"{"event_type":"decision","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"Adopt X."}"#, + }) + .to_string()) + } + } + struct PanicBackend; + impl Classifier for PanicBackend { + fn classify(&self, _input: &ClassifyInput) -> anyhow::Result { + panic!("http backend must not be reached when agent-sdk succeeds"); + } + } + + let agent = ClaudeCliClassifier::with_runner("claude-haiku-4-5", Box::new(OkRunner)); + let hybrid = + HybridClassifier::with_llm_chain(vec![Box::new(agent), Box::new(PanicBackend)], 0.7); + let out = hybrid + .classify(&ctx( + "Browsing the call site of refundProcessor to understand the dispatch.", + )) + .unwrap(); + assert_eq!(out.event_type, EventType::Decision); + } } diff --git a/crates/tj-core/src/classifier/mod.rs b/crates/tj-core/src/classifier/mod.rs index 72d9bcd..4748d17 100644 --- a/crates/tj-core/src/classifier/mod.rs +++ b/crates/tj-core/src/classifier/mod.rs @@ -54,6 +54,22 @@ pub fn decide_status(confidence: f64) -> EventStatus { } } +/// Parse a model's raw text reply into a strict-JSON `ClassifyOutput`, +/// tolerating ```json code-fence wrapping. Shared by the HTTP and agent-sdk +/// backends so the two never diverge on how they read the verdict. +pub(crate) fn parse_verdict(text: &str) -> anyhow::Result { + use anyhow::Context; + let json_str = text + .trim() + .trim_start_matches("```json") + .trim_start_matches("```") + .trim_end_matches("```") + .trim(); + serde_json::from_str(json_str) + .with_context(|| format!("classifier JSON parse failed; got: {json_str}")) +} + +pub mod agent_sdk; pub mod heuristic; pub mod http; pub mod hybrid;