From b8ba7ea595453b65146b919dd1683b2c186a74af Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Sat, 13 Jun 2026 12:48:51 +0400 Subject: [PATCH] =?UTF-8?q?feat(memory):=20consolidation=20via=20direct=20?= =?UTF-8?q?Haiku=20API=20=E2=80=94=20Pillar=20C=20complete=20(0.19.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `task-journal consolidate` distils a project's recurring decisions and constraints into durable semantic/procedural facts (bd claude-memory-8oy). - tj-core::consolidate: a strict summarisation prompt + parser + a direct Anthropic Haiku API call (mockito-tested). Direct API, not `claude -p`: post-2026-06-15 both bill as extra usage, but claude -p also boots the whole environment (~tens of k tokens) per call — the direct API sends only our ~7k-token prompt (~1c/run vs 5-10c). - tj-core::db: high_signal_events / find_task_by_title / task_event_texts. - tj-cli `consolidate [--max-facts N]`: gathers decisions/constraints/ rejections, calls Haiku once, stores facts as events (tier=semantic| procedural, consolidated=true, derived_from=) in a reusable per-project "Project conventions (consolidated)" task, then embeds + syncs them to the global recall index. De-dups on re-run. SAFETY: manual command only, one call per run, never on a hook → cannot spend automatically. No ANTHROPIC_API_KEY => skips cleanly, writes nothing. No heuristic fallback (would manufacture noise). Tests: parse/build/empty/mock API call (core, 5); CLI consolidate stores + de-dups + surfaces in recall, and skips without a key spending nothing (2). Clean on default and --no-default-features. This completes Pillar C (preferences + consolidation) and the core memory platform. 1.0 remains a separate, later decision (real-world soak + API freeze + WSL project_hash fix). Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 26 ++++ Cargo.lock | 7 +- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 3 +- crates/tj-cli/src/main.rs | 119 +++++++++++++++ crates/tj-cli/tests/cli.rs | 151 +++++++++++++++++++ crates/tj-core/src/consolidate.rs | 241 ++++++++++++++++++++++++++++++ crates/tj-core/src/db.rs | 46 ++++++ crates/tj-core/src/lib.rs | 1 + crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 11 files changed, 593 insertions(+), 7 deletions(-) create mode 100644 crates/tj-core/src/consolidate.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8009ffb..a281257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.19.0] - 2026-06-13 + +### Added +- **Consolidation — Pillar C complete.** `task-journal consolidate` distils a + project's recurring decisions and constraints into a handful of durable + **semantic** / **procedural** facts ("refunds always go through the ledger", + "PR into main, squash-merge"), stored as events in a per-project + *"Project conventions (consolidated)"* task with provenance + (`derived_from`), and surfaced in `ask`/`recall`. + - **Manual and opt-in.** It makes exactly **one direct Anthropic Haiku API + call per run, only when you run it** — never wired to a hook, so it can + never spend automatically. Roughly 1¢ per run. + - The **direct API** (not `claude -p`) is used on purpose: post-2026-06-15 + both bill as extra usage, but `claude -p` also boots the whole environment + (~tens of k tokens) per call; the direct API sends only the ~7k-token + prompt. + - **No `ANTHROPIC_API_KEY` → it skips cleanly** with a message and writes + nothing. There is no heuristic fallback (it would manufacture low-trust + facts). Re-running de-duplicates. + - `--max-facts N` caps output; `TJ_CONSOLIDATE_MODEL` overrides the model. + +### Internal +- `tj-core::consolidate` (prompt, parse, direct-API call, mockito-tested) + + `db::high_signal_events` / `find_task_by_title` / `task_event_texts`. CLI + `consolidate`. + ## [0.18.0] - 2026-06-12 ### Added diff --git a/Cargo.lock b/Cargo.lock index 8ad55ec..529d049 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.18.0" +version = "0.19.0" dependencies = [ "anyhow", "assert_cmd", @@ -2581,6 +2581,7 @@ dependencies = [ "clap", "crossterm", "libc", + "mockito", "predicates", "ratatui", "rusqlite", @@ -2595,7 +2596,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.18.0" +version = "0.19.0" dependencies = [ "anyhow", "chrono", @@ -2620,7 +2621,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.18.0" +version = "0.19.0" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index d0854c7..a4ae95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.18.0" +version = "0.19.0" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 21c7c7e..709658c 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.18.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.19.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } @@ -45,3 +45,4 @@ assert_fs = { workspace = true } predicates = { workspace = true } assert_cmd = ">=2, <2.2.1" rusqlite = { workspace = true } +mockito = { workspace = true } diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 29059a7..c1965ea 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -652,6 +652,16 @@ enum Commands { }, /// List your stored user preferences. Preferences, + /// Distil this project's recurring decisions and constraints into durable + /// semantic/procedural facts (Pillar C). MANUAL and opt-in — it makes ONE + /// direct Haiku API call per run (needs ANTHROPIC_API_KEY; ~1c/run) and is + /// never wired to a hook, so it can't spend automatically. Facts are stored + /// as events in a per-project "conventions" task and surface in ask/recall. + Consolidate { + /// Maximum number of facts to produce. + #[arg(long, default_value_t = 8)] + max_facts: usize, + }, /// Render and print the resume pack for a task. Pack { /// Task id (e.g. tj-7f3a). @@ -1284,6 +1294,9 @@ fn main() -> Result<()> { } } } + Commands::Consolidate { max_facts } => { + run_consolidate(max_facts)?; + } Commands::Event { task_id, r#type, @@ -3904,6 +3917,112 @@ fn emit_session_context(ctx: &str) { println!("{env}"); } +const CONSOLIDATE_TASK_TITLE: &str = "Project conventions (consolidated)"; + +/// Manual consolidation: read this project's recurring decisions/constraints, +/// distil them into durable facts via one direct Haiku API call, and store the +/// facts as events in a per-project conventions task. Skips cleanly (no spend) +/// when ANTHROPIC_API_KEY is absent. +fn run_consolidate(max_facts: usize) -> anyhow::Result<()> { + let cwd = std::env::current_dir()?; + let project_hash = tj_core::project_hash::from_path(&cwd)?; + let events_path = tj_core::paths::events_dir()?.join(format!("{project_hash}.jsonl")); + let state_path = tj_core::paths::state_dir()?.join(format!("{project_hash}.sqlite")); + if !events_path.exists() { + anyhow::bail!("no events file at {events_path:?}"); + } + let conn = tj_core::db::open(&state_path)?; + tj_core::db::ingest_new_events(&conn, &events_path, &project_hash)?; + + let sources = tj_core::db::high_signal_events(&conn, 200)?; + if sources.is_empty() { + println!("nothing to consolidate — no decisions/constraints/rejections recorded yet"); + return Ok(()); + } + let texts: Vec = sources.iter().map(|(_, t)| t.clone()).collect(); + let source_ids: Vec = sources.iter().map(|(id, _)| id.clone()).collect(); + + let consolidator = match tj_core::consolidate::Consolidator::from_env(max_facts) { + Ok(c) => c, + Err(e) => { + println!("skipped: {e}. Set ANTHROPIC_API_KEY to enable consolidation (~1c/run)."); + return Ok(()); + } + }; + eprintln!( + "consolidating {} high-signal event(s) via {} …", + texts.len(), + consolidator.model + ); + let facts = consolidator.consolidate(&texts)?; + if facts.is_empty() { + println!("no durable facts found"); + return Ok(()); + } + + // Reuse the per-project conventions task, or create it. + let task_id = match tj_core::db::find_task_by_title(&conn, CONSOLIDATE_TASK_TITLE)? { + Some(id) => id, + None => { + let id = tj_core::new_task_id(); + let mut ev = tj_core::event::Event::new( + id.clone(), + tj_core::event::EventType::Open, + tj_core::event::Author::User, + tj_core::event::Source::Cli, + CONSOLIDATE_TASK_TITLE.to_string(), + ); + ev.meta = serde_json::json!({ "title": CONSOLIDATE_TASK_TITLE }); + let mut w = tj_core::storage::JsonlWriter::open(&events_path)?; + w.append(&ev)?; + w.flush_durable()?; + tj_core::db::ingest_new_events(&conn, &events_path, &project_hash)?; + id + } + }; + + // De-dup against facts already stored in the conventions task. + let existing: std::collections::HashSet = + tj_core::db::task_event_texts(&conn, &task_id)? + .into_iter() + .collect(); + + let mut writer = tj_core::storage::JsonlWriter::open(&events_path)?; + let mut written = 0usize; + for f in &facts { + if existing.contains(&f.text) { + continue; + } + let mut ev = tj_core::event::Event::new( + task_id.clone(), + tj_core::event::EventType::Finding, + tj_core::event::Author::Agent, + tj_core::event::Source::Cli, + f.text.clone(), + ); + ev.meta = serde_json::json!({ + "memory_tier": f.tier, + "consolidated": true, + "derived_from": source_ids, + }); + writer.append(&ev)?; + written += 1; + } + writer.flush_durable()?; + + // Index the new facts and push them to the global recall index. + tj_core::db::ingest_new_events(&conn, &events_path, &project_hash)?; + let embedder = tj_core::embed::default_embedder(); + let now = chrono::Utc::now().to_rfc3339(); + tj_core::db::embed_pending(&conn, &project_hash, embedder.as_ref(), &now, 512)?; + sync_global_memory(&conn, &project_hash); + + println!( + "consolidated {written} new fact(s) into task {task_id} (\"{CONSOLIDATE_TASK_TITLE}\")" + ); + Ok(()) +} + fn auto_open_task_from_prompt( events_path: &std::path::Path, project_hash: &str, diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index 532180f..3d4ab96 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -5209,3 +5209,154 @@ fn stats_reports_memory_preferences_count() { .success() .stdout(contains("preferences: 1")); } + +#[test] +fn consolidate_writes_facts_to_conventions_task_and_dedups() { + // Pillar C: `consolidate` distils decisions into durable facts via one + // (mocked) Haiku call and stores them in a per-project conventions task. + // Re-running de-dups. TJ_CONSOLIDATE_BASE_URL points at the mock; TJ_EMBED + // forces the deterministic embedder. + let mut server = mockito::Server::new(); + let mock = server + .mock("POST", "/v1/messages") + .with_status(200) + .with_header("content-type", "application/json") + .with_body( + serde_json::json!({ + "id": "m", "type": "message", "role": "assistant", + "content": [{"type": "text", + "text": "[semantic] Refunds always route through the idempotent ledger\n[procedural] PR into main, squash-merge"}] + }) + .to_string(), + ) + .expect_at_least(1) + .create(); + + let xdg = assert_fs::TempDir::new().unwrap(); + let proj = assert_fs::TempDir::new().unwrap(); + + let tid = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args(["create", "Payments"]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap() + .trim() + .to_string(); + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args([ + "event", + &tid, + "--type", + "decision", + "--text", + "chose the idempotent ledger for refunds", + ]) + .assert() + .success(); + + let run = || { + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .env("ANTHROPIC_API_KEY", "test-key") + .env("TJ_CONSOLIDATE_BASE_URL", server.url()) + .env("TJ_EMBED", "hash") + .args(["consolidate"]) + .assert() + .success() + .get_output() + .stdout + .clone() + }; + + let first = String::from_utf8(run()).unwrap(); + assert!( + first.contains("consolidated 2 new fact(s)"), + "first run must store 2 facts; got: {first:?}" + ); + // Second run: same facts already present -> de-duped to 0. + let second = String::from_utf8(run()).unwrap(); + assert!( + second.contains("consolidated 0 new fact(s)"), + "second run must de-dup; got: {second:?}" + ); + mock.assert(); + + // The fact is now recallable. + let recall = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .env("XDG_DATA_HOME", xdg.path()) + .env("TJ_EMBED", "hash") + .args(["recall", "refund ledger idempotent", "--k", "3"]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap(); + assert!( + recall.contains("ledger"), + "consolidated fact must surface in cross-project recall; got: {recall:?}" + ); +} + +#[test] +fn consolidate_skips_without_api_key_and_spends_nothing() { + // Safety: with no ANTHROPIC_API_KEY, consolidate makes no call and creates + // no facts — it can never spend automatically. + let xdg = assert_fs::TempDir::new().unwrap(); + let proj = assert_fs::TempDir::new().unwrap(); + let tid = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args(["create", "Scheduler"]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap() + .trim() + .to_string(); + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args([ + "event", + &tid, + "--type", + "decision", + "--text", + "use postgres advisory locks for cron", + ]) + .assert() + .success(); + + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .env_remove("ANTHROPIC_API_KEY") + .args(["consolidate"]) + .assert() + .success() + .stdout(contains("skipped")); +} diff --git a/crates/tj-core/src/consolidate.rs b/crates/tj-core/src/consolidate.rs new file mode 100644 index 0000000..c1935f2 --- /dev/null +++ b/crates/tj-core/src/consolidate.rs @@ -0,0 +1,241 @@ +//! Memory consolidation (Pillar C): distil a project's recurring decisions and +//! constraints into a handful of durable semantic/procedural facts via a direct +//! Anthropic Haiku API call. +//! +//! Direct API, not `claude -p`: post-2026-06-15 both bill as extra usage, but +//! `claude -p` also boots the whole user environment (~tens of k tokens) on +//! every call, while the direct API sends only our ~7k-token prompt — roughly +//! 1c per run versus 5-10c. This is a MANUAL command (one call per run, only +//! when the user asks), so it never resembles the per-prompt classifier burn. +//! No `ANTHROPIC_API_KEY` → the caller skips cleanly; we never fall back to a +//! heuristic, which would manufacture low-trust "facts". + +use anyhow::{anyhow, Context}; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +/// Cheapest capable model for the summarisation step. +pub const DEFAULT_MODEL: &str = "claude-haiku-4-5-20251001"; + +/// A distilled fact and which tier it belongs to. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ConsolidatedFact { + /// "semantic" (a durable truth about the system) or "procedural" (how the + /// team works). + pub tier: String, + pub text: String, +} + +/// Direct-API consolidator. +pub struct Consolidator { + pub api_key: String, + pub model: String, + pub base_url: String, + pub timeout: Duration, + pub max_facts: usize, +} + +impl Consolidator { + /// Build from the environment. Errors (so the caller can skip cleanly) when + /// `ANTHROPIC_API_KEY` is absent. Model overridable via `TJ_CONSOLIDATE_MODEL`. + pub fn from_env(max_facts: usize) -> anyhow::Result { + let api_key = std::env::var("ANTHROPIC_API_KEY").map_err(|_| { + anyhow!("consolidation needs ANTHROPIC_API_KEY for the direct Haiku API") + })?; + let model = std::env::var("TJ_CONSOLIDATE_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into()); + // TJ_CONSOLIDATE_BASE_URL overrides the endpoint (used by tests to point + // at a local mock); production always hits the real Anthropic API. + let base_url = std::env::var("TJ_CONSOLIDATE_BASE_URL") + .unwrap_or_else(|_| "https://api.anthropic.com".into()); + Ok(Self { + api_key, + model, + base_url, + timeout: Duration::from_secs(60), + max_facts: max_facts.max(1), + }) + } + + /// Summarise the given event texts into durable facts. Empty input → no + /// call. Returns whatever facts the model produced (possibly none). + pub fn consolidate(&self, events: &[String]) -> anyhow::Result> { + if events.is_empty() { + return Ok(Vec::new()); + } + let prompt = build_prompt(events, self.max_facts); + let body = MessagesRequest { + model: &self.model, + max_tokens: 512, + messages: vec![MessageIn { + role: "user", + content: &prompt, + }], + }; + let url = format!("{}/v1/messages", self.base_url); + let resp: MessagesResponse = ureq::post(&url) + .timeout(self.timeout) + .set("x-api-key", &self.api_key) + .set("anthropic-version", "2023-06-01") + .set("content-type", "application/json") + .send_json(serde_json::to_value(&body)?) + .context("Anthropic API request failed")? + .into_json() + .context("decode Anthropic response")?; + let text = resp + .content + .iter() + .find(|b| b.kind == "text") + .map(|b| b.text.clone()) + .ok_or_else(|| anyhow!("no text content in response"))?; + Ok(parse_facts(&text)) + } +} + +/// The summarisation prompt. Deliberately strict: durable-only, fixed line +/// format, "output nothing" escape hatch so the model doesn't pad. +pub fn build_prompt(events: &[String], max_facts: usize) -> String { + let joined = events + .iter() + .map(|e| format!("- {}", e.trim())) + .collect::>() + .join("\n"); + format!( + "You are given decisions and constraints recorded across ONE software \ +project. Distil them into at most {max_facts} DURABLE facts — stable \ +conventions or architectural truths that hold across the project, not one-off \ +details.\n\n\ +Rules:\n\ +- One fact per line.\n\ +- Each line MUST start with `[semantic]` (a durable truth about the system) or \ +`[procedural]` (how the team works).\n\ +- Keep each fact to one short sentence.\n\ +- If nothing is durable enough, output nothing at all.\n\n\ +Decisions and constraints:\n{joined}" + ) +} + +/// Parse the model reply into facts. Accepts lines like `[semantic] ...` or +/// `- [procedural] ...`; ignores anything else. +pub fn parse_facts(text: &str) -> Vec { + let mut out = Vec::new(); + for raw in text.lines() { + let line = raw.trim().trim_start_matches(['-', '*', ' ']).trim(); + for tier in ["semantic", "procedural"] { + let tag = format!("[{tier}]"); + if let Some(rest) = line.strip_prefix(&tag) { + let fact = rest.trim(); + if fact.chars().count() >= 6 { + out.push(ConsolidatedFact { + tier: tier.to_string(), + text: fact.to_string(), + }); + } + break; + } + } + } + out +} + +#[derive(Serialize)] +struct MessagesRequest<'a> { + model: &'a str, + max_tokens: u32, + messages: Vec>, +} +#[derive(Serialize)] +struct MessageIn<'a> { + role: &'a str, + content: &'a str, +} +#[derive(Deserialize)] +struct MessagesResponse { + content: Vec, +} +#[derive(Deserialize)] +struct ContentBlock { + #[serde(rename = "type")] + kind: String, + #[serde(default)] + text: String, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_facts_extracts_tagged_lines() { + let reply = "[semantic] Refunds route through the idempotent ledger\n\ + - [procedural] PR into main, squash-merge\n\ + some preamble that should be ignored\n\ + [bogus] not a real tier"; + let facts = parse_facts(reply); + assert_eq!(facts.len(), 2); + assert_eq!(facts[0].tier, "semantic"); + assert!(facts[0].text.contains("idempotent ledger")); + assert_eq!(facts[1].tier, "procedural"); + assert!(facts[1].text.contains("squash-merge")); + } + + #[test] + fn parse_facts_empty_on_no_tagged_lines() { + assert!(parse_facts("nothing durable here").is_empty()); + assert!(parse_facts("").is_empty()); + } + + #[test] + fn build_prompt_includes_events_and_cap() { + let p = build_prompt(&["chose ledger".into(), "PR into main".into()], 5); + assert!(p.contains("at most 5")); + assert!(p.contains("- chose ledger")); + assert!(p.contains("- PR into main")); + assert!(p.contains("[semantic]") && p.contains("[procedural]")); + } + + #[test] + fn consolidate_empty_input_makes_no_call() { + // base_url is unreachable; empty input must short-circuit before any + // request, so this must not error. + let c = Consolidator { + api_key: "x".into(), + model: "m".into(), + base_url: "http://127.0.0.1:1".into(), + timeout: Duration::from_millis(50), + max_facts: 5, + }; + assert!(c.consolidate(&[]).unwrap().is_empty()); + } + + #[test] + fn consolidate_calls_api_and_parses() { + let mut server = mockito::Server::new(); + let m = server + .mock("POST", "/v1/messages") + .with_status(200) + .with_header("content-type", "application/json") + .with_body( + serde_json::json!({ + "id": "msg", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "[semantic] Always use the ledger\n[procedural] TDD here"}] + }) + .to_string(), + ) + .create(); + + let c = Consolidator { + api_key: "test".into(), + model: "claude-haiku-4-5-20251001".into(), + base_url: server.url(), + timeout: Duration::from_secs(5), + max_facts: 5, + }; + let facts = c.consolidate(&["chose ledger".into()]).unwrap(); + m.assert(); + assert_eq!(facts.len(), 2); + assert_eq!(facts[0].tier, "semantic"); + assert_eq!(facts[1].tier, "procedural"); + } +} diff --git a/crates/tj-core/src/db.rs b/crates/tj-core/src/db.rs index 7748b8e..658021f 100644 --- a/crates/tj-core/src/db.rs +++ b/crates/tj-core/src/db.rs @@ -1135,6 +1135,52 @@ pub fn upsert_embedding( Ok(()) } +/// High-signal events (decisions, constraints, rejections) for consolidation — +/// `(event_id, text)`, newest first, capped at `limit`. +pub fn high_signal_events( + conn: &Connection, + limit: usize, +) -> anyhow::Result> { + let mut stmt = conn.prepare( + "SELECT f.event_id, f.text + FROM search_fts f + JOIN events_index ei ON ei.event_id = f.event_id + WHERE f.type IN ('decision', 'constraint', 'rejection') + ORDER BY ei.timestamp DESC + LIMIT ?1", + )?; + let rows = stmt.query_map(rusqlite::params![limit as i64], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)) + })?; + let mut out = Vec::new(); + for r in rows { + out.push(r?); + } + Ok(out) +} + +/// First task whose title exactly matches `title`, if any — used to find the +/// reusable per-project consolidation task. +pub fn find_task_by_title(conn: &Connection, title: &str) -> anyhow::Result> { + let mut stmt = conn.prepare("SELECT task_id FROM tasks WHERE title = ?1 LIMIT 1")?; + let mut rows = stmt.query(rusqlite::params![title])?; + match rows.next()? { + Some(row) => Ok(Some(row.get(0)?)), + None => Ok(None), + } +} + +/// Texts of all events under a task (for de-duplicating consolidated facts). +pub fn task_event_texts(conn: &Connection, task_id: &str) -> anyhow::Result> { + let mut stmt = conn.prepare("SELECT text FROM search_fts WHERE task_id = ?1")?; + let rows = stmt.query_map(rusqlite::params![task_id], |r| r.get::<_, String>(0))?; + let mut out = Vec::new(); + for r in rows { + out.push(r?); + } + Ok(out) +} + /// Number of stored embeddings for a project (test/stats helper). pub fn count_embeddings(conn: &Connection, project_hash: &str) -> anyhow::Result { let n: i64 = conn.query_row( diff --git a/crates/tj-core/src/lib.rs b/crates/tj-core/src/lib.rs index 0280c82..e13a3be 100644 --- a/crates/tj-core/src/lib.rs +++ b/crates/tj-core/src/lib.rs @@ -50,6 +50,7 @@ mod task_id_tests { pub mod artifacts; pub mod classifier; pub mod completeness; +pub mod consolidate; pub mod db; pub mod dream; pub mod embed; diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index db3dd90..fc41df8 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.18.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.19.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index 9010280..d3fd32b 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.18.0", + "version": "0.19.0", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan"