From 81bc955ecb9027cc943a4c440bd46fa7b89c8b7b Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Fri, 12 Jun 2026 19:10:24 +0400 Subject: [PATCH 1/2] feat(memory): global cross-project recall index + recall command (P2.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pillar B foundation (bd claude-memory-9z9): recall reasoning across ALL projects, not just the current repo. - tj-core::memory: a global SQLite index (data_dir/memory.sqlite) mirroring high-signal events (decisions, rejections, constraints) with their embeddings from every project. sync_from_project (idempotent, flags superseded decisions), search (cosine top-k, down-ranks contradicted), count. paths::memory_db(). - tj-cli: `task-journal recall ""` — cross-project semantic search of prior decisions/rejections/constraints. embed/ask now sync the project into the global index (best-effort, never fails the command). Tests: sync+search across a project (core, ranks the right decision + model-scoped), CLI recall surfacing a decision from a *different* project via the shared index. Green on default and --no-default-features. Next (P2.2): opt-in proactive injection on UserPromptSubmit — fast keyword path, hard token budget, dedup, gated; default stays quiet. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/tj-cli/src/main.rs | 47 +++++++ crates/tj-cli/tests/cli.rs | 77 ++++++++++++ crates/tj-core/src/lib.rs | 1 + crates/tj-core/src/memory.rs | 233 +++++++++++++++++++++++++++++++++++ crates/tj-core/src/paths.rs | 6 + 5 files changed, 364 insertions(+) create mode 100644 crates/tj-core/src/memory.rs diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 6fa1e93..40d9ace 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -632,6 +632,16 @@ enum Commands { #[arg(long, default_value_t = 5)] k: usize, }, + /// Cross-project recall (Pillar B): search EVERY project's decisions, + /// rejections and constraints for reasoning relevant to the query — + /// prior choices and dead-ends from your whole history, not just this repo. + Recall { + /// The topic / approach to check against prior reasoning. + query: String, + /// Maximum number of results. + #[arg(long, default_value_t = 5)] + k: usize, + }, /// Render and print the resume pack for a task. Pack { /// Task id (e.g. tj-7f3a). @@ -1162,6 +1172,7 @@ fn main() -> Result<()> { break; } } + sync_global_memory(&conn, &project_hash); println!( "embedded {total} event(s) with model {} ({} dim)", embedder.model_id(), @@ -1184,6 +1195,7 @@ fn main() -> Result<()> { // latest events without the user running `embed` first. let now = chrono::Utc::now().to_rfc3339(); tj_core::db::embed_pending(&conn, &project_hash, embedder.as_ref(), &now, 512)?; + sync_global_memory(&conn, &project_hash); let qv = embedder.embed_one(&query)?; let hits = @@ -1200,6 +1212,29 @@ fn main() -> Result<()> { } } } + Commands::Recall { query, k } => { + let global_path = tj_core::paths::memory_db()?; + if !global_path.exists() { + println!("global memory is empty — run `ask` or `embed` in a project first"); + return Ok(()); + } + let global = tj_core::memory::open(&global_path)?; + let embedder = tj_core::embed::default_embedder(); + let qv = embedder.embed_one(&query)?; + let hits = tj_core::memory::search(&global, &qv, embedder.model_id(), k)?; + if hits.is_empty() { + println!("no relevant prior reasoning found"); + } else { + for h in hits { + let snippet: String = h.text.chars().take(100).collect(); + let proj: String = h.project_hash.chars().take(8).collect(); + println!( + "{:.3} [{}] {} ({}/{})", + h.score, h.event_type, snippet, proj, h.task_id + ); + } + } + } Commands::Event { task_id, r#type, @@ -3645,6 +3680,18 @@ fn recent_task_contexts( /// the first line trimmed to 80 chars; goal is the prompt trimmed to /// 200 chars. Returns a TaskContext so the classifier has somewhere /// to attach the same prompt as the first real event. +/// Best-effort sync of a project's high-signal events into the global +/// cross-project memory index. Never fails the caller — a slightly stale recall +/// index is fine; a broken `ask`/`embed` is not. +fn sync_global_memory(project_conn: &rusqlite::Connection, project_hash: &str) { + let result = tj_core::paths::memory_db() + .and_then(tj_core::memory::open) + .and_then(|g| tj_core::memory::sync_from_project(&g, project_conn, project_hash)); + if let Err(e) = result { + tracing::debug!("global memory sync skipped: {e:#}"); + } +} + fn auto_open_task_from_prompt( events_path: &std::path::Path, project_hash: &str, diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index 38a8d49..ec2b9b8 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -4931,3 +4931,80 @@ fn ask_with_model2vec_handles_paraphrase() { "model2vec must rank the refund decision first for a paraphrase; got: {first:?}" ); } + +#[test] +fn recall_surfaces_decision_from_another_project() { + // Pillar B: a decision made in project A must be recallable while working + // anywhere, via the shared global index. Two distinct cwds => two + // project_hashes => one XDG_DATA_HOME (one memory.sqlite). Hash embedder + // for determinism. + let xdg = assert_fs::TempDir::new().unwrap(); + let proj_a = assert_fs::TempDir::new().unwrap(); + let proj_b = assert_fs::TempDir::new().unwrap(); + + let seed = |cwd: &std::path::Path, title: &str, decision: &str| { + let tid = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(cwd) + .env("XDG_DATA_HOME", xdg.path()) + .args(["create", title]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap() + .trim() + .to_string(); + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(cwd) + .env("XDG_DATA_HOME", xdg.path()) + .args(["event", &tid, "--type", "decision", "--text", decision]) + .assert() + .success(); + // embed --backfill syncs this project's decisions into the global index. + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(cwd) + .env("XDG_DATA_HOME", xdg.path()) + .env("TJ_EMBED", "hash") + .args(["embed", "--backfill"]) + .assert() + .success(); + }; + + seed( + proj_a.path(), + "Payments", + "chose to route refunds through the idempotent payment ledger", + ); + seed( + proj_b.path(), + "Scheduler", + "use postgres advisory locks for cron leader election", + ); + + // Recall from a third location — global, cwd-independent. + let out = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .env("XDG_DATA_HOME", xdg.path()) + .env("TJ_EMBED", "hash") + .args(["recall", "refund ledger idempotent", "--k", "3"]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap(); + + let first = out.lines().next().unwrap_or(""); + assert!( + first.contains("refund") || first.contains("ledger"), + "cross-project recall must surface project A's refund decision first; got: {first:?}\nfull:\n{out}" + ); +} diff --git a/crates/tj-core/src/lib.rs b/crates/tj-core/src/lib.rs index 27ace2e..0280c82 100644 --- a/crates/tj-core/src/lib.rs +++ b/crates/tj-core/src/lib.rs @@ -56,6 +56,7 @@ pub mod embed; pub mod event; pub mod frontmatter; pub mod fts; +pub mod memory; pub mod pack; pub mod paths; pub mod project_hash; diff --git a/crates/tj-core/src/memory.rs b/crates/tj-core/src/memory.rs new file mode 100644 index 0000000..6eb223a --- /dev/null +++ b/crates/tj-core/src/memory.rs @@ -0,0 +1,233 @@ +//! Global cross-project memory index (Pillar B). +//! +//! A single SQLite file (`data_dir/memory.sqlite`) mirrors the *high-signal* +//! events — decisions, rejections, constraints (and, later, consolidated +//! semantic/procedural/preference facts) — from every project, together with +//! their embeddings. This is what lets the agent recall relevant prior +//! reasoning across its whole history, not just the current repo — the thing +//! single-project memory tools can't do. +//! +//! The index is a denormalised cache: the per-project JSONL logs remain the +//! source of truth. It is rebuilt idempotently by [`sync_from_project`] and +//! queried by [`search`]. + +use rusqlite::Connection; + +/// Event types worth surfacing proactively: a committed choice, a ruled-out +/// path, or an external limit. These are the reasoning the agent most wants +/// before repeating itself. +pub const HIGH_SIGNAL_TYPES: [&str; 3] = ["decision", "rejection", "constraint"]; + +const SCHEMA: &str = r#" +CREATE TABLE IF NOT EXISTS global_memory ( + event_id TEXT PRIMARY KEY, + project_hash TEXT NOT NULL, + task_id TEXT NOT NULL, + type TEXT NOT NULL, + tier TEXT NOT NULL DEFAULT 'episodic', + text TEXT NOT NULL, + model TEXT NOT NULL, + dim INTEGER NOT NULL, + vec BLOB NOT NULL, + created_at TEXT NOT NULL, + superseded INTEGER NOT NULL DEFAULT 0 +); +CREATE INDEX IF NOT EXISTS idx_gm_type ON global_memory(type); +CREATE INDEX IF NOT EXISTS idx_gm_model ON global_memory(model); +"#; + +/// Open (creating + migrating) the global memory database at `path`. +pub fn open(path: impl AsRef) -> anyhow::Result { + if let Some(parent) = path.as_ref().parent() { + std::fs::create_dir_all(parent)?; + } + let conn = Connection::open(path)?; + conn.execute_batch(SCHEMA)?; + Ok(conn) +} + +/// A cross-project recall hit. +pub struct GlobalHit { + pub event_id: String, + pub project_hash: String, + pub task_id: String, + pub event_type: String, + pub tier: String, + pub text: String, + pub score: f32, +} + +/// Copy this project's high-signal embedded events into the global index. +/// Idempotent (`INSERT OR REPLACE` on `event_id`); call after embedding a +/// project. Returns how many rows were synced. `superseded` is flagged from the +/// `decisions.superseded_by` projection so contradicted decisions can be +/// down-ranked at query time. +pub fn sync_from_project( + global: &Connection, + project: &Connection, + project_hash: &str, +) -> anyhow::Result { + let placeholders = HIGH_SIGNAL_TYPES + .iter() + .map(|_| "?") + .collect::>() + .join(","); + let sql = format!( + "SELECT e.event_id, e.task_id, f.type, e.tier, f.text, e.model, e.dim, e.vec, e.created_at, + CASE WHEN d.superseded_by IS NOT NULL THEN 1 ELSE 0 END + FROM embeddings e + JOIN search_fts f ON f.event_id = e.event_id + LEFT JOIN decisions d ON d.decision_id = e.event_id + WHERE f.type IN ({placeholders})" + ); + let mut stmt = project.prepare(&sql)?; + let rows = stmt.query_map(rusqlite::params_from_iter(HIGH_SIGNAL_TYPES.iter()), |r| { + Ok(( + r.get::<_, String>(0)?, // event_id + r.get::<_, String>(1)?, // task_id + r.get::<_, String>(2)?, // type + r.get::<_, String>(3)?, // tier + r.get::<_, String>(4)?, // text + r.get::<_, String>(5)?, // model + r.get::<_, i64>(6)?, // dim + r.get::<_, Vec>(7)?, // vec + r.get::<_, String>(8)?, // created_at + r.get::<_, i64>(9)?, // superseded + )) + })?; + + let mut n = 0usize; + for row in rows { + let (event_id, task_id, ty, tier, text, model, dim, vec, created_at, superseded) = row?; + global.execute( + "INSERT OR REPLACE INTO global_memory + (event_id, project_hash, task_id, type, tier, text, model, dim, vec, created_at, superseded) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)", + rusqlite::params![ + event_id, project_hash, task_id, ty, tier, text, model, dim, vec, created_at, superseded + ], + )?; + n += 1; + } + Ok(n) +} + +/// Semantic search across the whole global index for the embedder's `model`. +/// Returns the top `k` hits by cosine, with a small penalty applied to +/// superseded/contradicted entries so live reasoning ranks above stale. +pub fn search( + conn: &Connection, + query_vec: &[f32], + model: &str, + k: usize, +) -> anyhow::Result> { + let mut stmt = conn.prepare( + "SELECT event_id, project_hash, task_id, type, tier, text, vec, superseded + FROM global_memory WHERE model = ?1", + )?; + let rows = stmt.query_map(rusqlite::params![model], |r| { + Ok(( + r.get::<_, String>(0)?, + r.get::<_, String>(1)?, + r.get::<_, String>(2)?, + r.get::<_, String>(3)?, + r.get::<_, String>(4)?, + r.get::<_, String>(5)?, + r.get::<_, Vec>(6)?, + r.get::<_, i64>(7)?, + )) + })?; + + let mut hits = Vec::new(); + for row in rows { + let (event_id, project_hash, task_id, event_type, tier, text, blob, superseded) = row?; + let mut score = crate::embed::cosine(query_vec, &crate::embed::from_blob(&blob)); + if superseded != 0 { + score -= 0.1; // down-rank contradicted reasoning + } + hits.push(GlobalHit { + event_id, + project_hash, + task_id, + event_type, + tier, + text, + score, + }); + } + hits.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + hits.truncate(k); + Ok(hits) +} + +/// Count of indexed entries (test/stats helper). +pub fn count(conn: &Connection) -> anyhow::Result { + let n: i64 = conn.query_row("SELECT COUNT(*) FROM global_memory", [], |r| r.get(0))?; + Ok(n as usize) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::embed::Embedder; + + fn finding(text: &str) -> crate::event::Event { + // A decision event so it passes the HIGH_SIGNAL_TYPES filter. + crate::event::Event::new( + "tj-x", + crate::event::EventType::Decision, + crate::event::Author::User, + crate::event::Source::Cli, + text.into(), + ) + } + + #[test] + fn sync_then_search_finds_cross_project_decision() { + let d = tempfile::TempDir::new().unwrap(); + let proj = crate::db::open(d.path().join("p.sqlite")).unwrap(); + let global = open(d.path().join("memory.sqlite")).unwrap(); + let emb = crate::embed::HashEmbedder::new(256); + + for text in [ + "chose to route refunds through the idempotent payment ledger", + "use postgres advisory locks for the cron job leader election", + ] { + crate::db::index_event(&proj, &finding(text)).unwrap(); + } + crate::db::embed_pending(&proj, "projhash", &emb, "t", 100).unwrap(); + + let synced = sync_from_project(&global, &proj, "projhash").unwrap(); + assert_eq!(synced, 2); + assert_eq!(count(&global).unwrap(), 2); + + let q = emb.embed_one("refund ledger idempotent").unwrap(); + let hits = search(&global, &q, emb.model_id(), 5).unwrap(); + assert!(!hits.is_empty()); + assert!( + hits[0].text.contains("refund"), + "the refund decision must rank first across the global index, got: {}", + hits[0].text + ); + assert_eq!(hits[0].project_hash, "projhash"); + } + + #[test] + fn search_filters_by_model() { + let d = tempfile::TempDir::new().unwrap(); + let proj = crate::db::open(d.path().join("p.sqlite")).unwrap(); + let global = open(d.path().join("memory.sqlite")).unwrap(); + let emb = crate::embed::HashEmbedder::new(64); + crate::db::index_event(&proj, &finding("decided to adopt the outbox pattern")).unwrap(); + crate::db::embed_pending(&proj, "ph", &emb, "t", 100).unwrap(); + sync_from_project(&global, &proj, "ph").unwrap(); + + let q = emb.embed_one("outbox").unwrap(); + assert_eq!(search(&global, &q, "other-model", 5).unwrap().len(), 0); + assert_eq!(search(&global, &q, emb.model_id(), 5).unwrap().len(), 1); + } +} diff --git a/crates/tj-core/src/paths.rs b/crates/tj-core/src/paths.rs index 6f288fc..d854209 100644 --- a/crates/tj-core/src/paths.rs +++ b/crates/tj-core/src/paths.rs @@ -38,6 +38,12 @@ pub fn metrics_dir() -> anyhow::Result { Ok(data_dir()?.join("metrics")) } +/// Global cross-project memory index (Pillar B). One SQLite file aggregating +/// high-signal events + embeddings from every project. +pub fn memory_db() -> anyhow::Result { + Ok(data_dir()?.join("memory.sqlite")) +} + pub fn project_storage_dir(project_hash: &str) -> anyhow::Result { Ok(data_dir()?.join(project_hash)) } From d722d7aa9f1e814ef243f769b77e4cdd4fc17d32 Mon Sep 17 00:00:00 2001 From: Mher Shahinyan Date: Fri, 12 Jun 2026 19:17:04 +0400 Subject: [PATCH 2/2] =?UTF-8?q?feat(memory):=20proactive=20cross-project?= =?UTF-8?q?=20recall=20=E2=80=94=20opt-in=20injection=20hook=20(P2.2,=200.?= =?UTF-8?q?16.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes Pillar B (bd claude-memory-9z9). - tj-core::memory: FTS5 mirror on the global index + keyword_search — a fast, embedding-free lookup cheap enough to run on every prompt. - tj-cli `recall-hook`: opt-in UserPromptSubmit injector. Reads the prompt, keyword-searches the global index for relevant prior decisions/rejections/ constraints across all projects, and emits a budgeted additionalContext block — a guardrail before the agent re-decides or repeats a dead-end. Never blocks the prompt (silent on miss/error). Gated by TJ_PROACTIVE_RECALL=0; budget via TJ_RECALL_BUDGET_CHARS / TJ_RECALL_K; reuses the classifier recursion guard. - install-hooks --proactive-recall wires it into UserPromptSubmit alongside the nudge; OFF by default (no behaviour change unless opted in). is_tj uninstall filter updated so re-install stays idempotent. Tests: keyword_search match/no-match (core), recall-hook injects + is gated (CLI), install-hooks --proactive-recall wires it and default does not (CLI). Green on default and --no-default-features. Version 0.16.0; inter-crate version reqs bumped to resolve the release. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 25 ++++++ Cargo.lock | 6 +- Cargo.toml | 2 +- crates/tj-cli/Cargo.toml | 2 +- crates/tj-cli/src/main.rs | 119 ++++++++++++++++++++++++++- crates/tj-cli/tests/cli.rs | 131 ++++++++++++++++++++++++++++++ crates/tj-core/src/memory.rs | 90 ++++++++++++++++++++ crates/tj-mcp/Cargo.toml | 2 +- plugin/.claude-plugin/plugin.json | 2 +- 9 files changed, 371 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f41848a..093423f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.16.0] - 2026-06-12 + +### Added +- **Cross-project memory — Pillar B.** The journal now recalls relevant prior + reasoning across your *entire* history, not just the current repo — something + single-project memory tools can't do. + - `task-journal recall ""` — semantic search over **every** project's + decisions, rejections and constraints. Surfaces prior choices and + dead-ends from anywhere you've worked. + - A global index (`data_dir/memory.sqlite`) mirrors high-signal events + + embeddings from all projects; `ask`/`embed` keep it current automatically + (best-effort, never failing the command). Contradicted (superseded) + decisions are down-ranked. + - **Opt-in proactive recall** (`install-hooks --proactive-recall`): a + UserPromptSubmit hook that injects a budgeted block of relevant prior + decisions/rejections/constraints **before you act** — a guardrail against + re-deciding or repeating a dead-end. Off by default; uses a fast keyword + path (no model load on the prompt path); gated by `TJ_PROACTIVE_RECALL=0`, + budgeted by `TJ_RECALL_BUDGET_CHARS` / `TJ_RECALL_K`. + +### Internal +- `tj-core::memory` — global index schema (+ FTS5), `sync_from_project`, + semantic `search`, fast `keyword_search`. `paths::memory_db()`. CLI + `recall` / `recall-hook`; `install-hooks --proactive-recall` wiring. + ## [0.15.0] - 2026-06-12 ### Added diff --git a/Cargo.lock b/Cargo.lock index 2801a4d..be59017 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2572,7 +2572,7 @@ dependencies = [ [[package]] name = "task-journal-cli" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "assert_cmd", @@ -2595,7 +2595,7 @@ dependencies = [ [[package]] name = "task-journal-core" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "chrono", @@ -2620,7 +2620,7 @@ dependencies = [ [[package]] name = "task-journal-mcp" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index aab1968..7ec2050 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.15.0" +version = "0.16.0" edition = "2021" rust-version = "1.88" license = "MIT" diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml index 36db2a8..ed8322e 100644 --- a/crates/tj-cli/Cargo.toml +++ b/crates/tj-cli/Cargo.toml @@ -23,7 +23,7 @@ default = ["embed"] embed = ["tj-core/embed"] [dependencies] -tj-core = { package = "task-journal-core", version = "0.15.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.16.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } clap = { workspace = true } tracing = { workspace = true } diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs index 40d9ace..196c251 100644 --- a/crates/tj-cli/src/main.rs +++ b/crates/tj-cli/src/main.rs @@ -780,6 +780,13 @@ enum Commands { /// the classifier, honoring `--backend`). #[arg(long)] auto_capture: bool, + /// Opt in to proactive cross-project recall (Pillar B). Adds a + /// UserPromptSubmit hook that injects relevant prior decisions/ + /// rejections/constraints from any project before you act. Off by + /// default (it surfaces extra context on every prompt). Fast keyword + /// path, no model; gated at runtime by TJ_PROACTIVE_RECALL=0. + #[arg(long)] + proactive_recall: bool, }, /// Show local classifier and journal statistics. Stats, @@ -930,6 +937,14 @@ enum Commands { /// default. Hidden from --help; not a human command. #[command(hide = true)] Nudge, + /// Opt-in proactive recall hook (Pillar B). On UserPromptSubmit, injects a + /// budgeted additionalContext block of prior decisions/rejections/ + /// constraints from ANY project relevant to the prompt — a guardrail + /// against re-deciding or repeating a dead-end. Fast keyword path, no + /// model. Wired only by `install-hooks --proactive-recall`. Gated by + /// TJ_PROACTIVE_RECALL=0. Hidden from --help; not a human command. + #[command(hide = true)] + RecallHook, /// Cross-task search for `rejection` events matching a topic. Helpful /// when the agent is about to repeat a path that was already turned /// down — query the topic, see the prior rejection. @@ -1529,6 +1544,7 @@ fn main() -> Result<()> { backfill, backend, auto_capture, + proactive_recall, } => { let settings_path = match scope.as_str() { "user" => { @@ -1676,13 +1692,35 @@ fn main() -> Result<()> { ); } } + if proactive_recall { + // Append the recall injector to the UserPromptSubmit hooks, + // keeping whatever is already there (nudge, and ingest when + // --auto-capture is also set). + let obj = entries.as_object_mut().expect("entries is an object"); + let ups = obj + .entry("UserPromptSubmit") + .or_insert_with(|| serde_json::json!([{ "matcher": "", "hooks": [] }])); + if let Some(hooks) = ups + .as_array_mut() + .and_then(|a| a.get_mut(0)) + .and_then(|e| e.get_mut("hooks")) + .and_then(|h| h.as_array_mut()) + { + hooks.push(serde_json::json!({ + "type": "command", + "command": "task-journal recall-hook || true", + })); + } + } // MERGE our entries into the existing `hooks` block — touch ONLY // task-journal hooks, never clobber other plugins' hooks. For each // event we (a) strip any prior task-journal entry (idempotent // re-install) then (b) append ours, leaving foreign hooks and // untouched events intact. let is_tj = |c: &str| { - c.contains("task-journal ingest-hook") || c.contains("task-journal nudge") + c.contains("task-journal ingest-hook") + || c.contains("task-journal nudge") + || c.contains("task-journal recall-hook") }; let hooks_block = hooks_obj .entry("hooks".to_string()) @@ -3073,6 +3111,9 @@ fn main() -> Result<()> { }); print!("{env}"); } + Commands::RecallHook => { + run_recall_hook()?; + } Commands::Rejected { topic, all_projects, @@ -3692,6 +3733,82 @@ fn sync_global_memory(project_conn: &rusqlite::Connection, project_hash: &str) { } } +/// Proactive recall injector (opt-in hook). Reads the UserPromptSubmit payload +/// from stdin, keyword-searches the global index for relevant prior +/// decisions/rejections/constraints across all projects, and emits a budgeted +/// `additionalContext` block. Never blocks the prompt: any miss, empty result, +/// or error exits silently with no output. +fn run_recall_hook() -> anyhow::Result<()> { + // Opt-out and recursion guard (never inject into our own classifier spawn). + if std::env::var("TJ_PROACTIVE_RECALL").as_deref() == Ok("0") { + return Ok(()); + } + if std::env::var(tj_core::classifier::agent_sdk::IN_CLASSIFIER_ENV).is_ok() { + return Ok(()); + } + let global_path = tj_core::paths::memory_db()?; + if !global_path.exists() { + return Ok(()); + } + + use std::io::Read; + let mut buf = String::new(); + if std::io::stdin().read_to_string(&mut buf).is_err() || buf.trim().is_empty() { + return Ok(()); + } + // The UserPromptSubmit payload carries the prompt under `prompt`; fall back + // to the raw stdin if it isn't JSON. + let prompt = serde_json::from_str::(&buf) + .ok() + .and_then(|v| { + v.get("prompt") + .and_then(|p| p.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or(buf); + if prompt.trim().is_empty() { + return Ok(()); + } + + let conn = tj_core::memory::open(&global_path)?; + let k: usize = std::env::var("TJ_RECALL_K") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(3); + let hits = tj_core::memory::keyword_search(&conn, &prompt, k)?; + if hits.is_empty() { + return Ok(()); + } + + let budget: usize = std::env::var("TJ_RECALL_BUDGET_CHARS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(900); + let mut ctx = String::from( + "📓 task-journal — relevant prior reasoning from your history (you may have decided this before):\n", + ); + for h in &hits { + let snippet: String = h.text.chars().take(160).collect(); + let proj: String = h.project_hash.chars().take(8).collect(); + let line = format!( + "⚠ [{}] {} (project {proj}, {})\n", + h.event_type, snippet, h.task_id + ); + if ctx.len() + line.len() > budget { + break; + } + ctx.push_str(&line); + } + let env = serde_json::json!({ + "hookSpecificOutput": { + "hookEventName": "UserPromptSubmit", + "additionalContext": ctx.trim_end(), + } + }); + print!("{env}"); + Ok(()) +} + fn auto_open_task_from_prompt( events_path: &std::path::Path, project_hash: &str, diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs index ec2b9b8..80f614d 100644 --- a/crates/tj-cli/tests/cli.rs +++ b/crates/tj-cli/tests/cli.rs @@ -5008,3 +5008,134 @@ fn recall_surfaces_decision_from_another_project() { "cross-project recall must surface project A's refund decision first; got: {first:?}\nfull:\n{out}" ); } + +#[test] +fn install_hooks_proactive_recall_wires_recall_hook() { + // --proactive-recall adds the recall injector to UserPromptSubmit alongside + // the nudge; the default install must NOT wire it (off by default). + let dir = assert_fs::TempDir::new().unwrap(); + Command::cargo_bin("task-journal") + .unwrap() + .env("HOME", dir.path()) + .args(["install-hooks", "--scope", "user", "--proactive-recall"]) + .assert() + .success(); + let content = + std::fs::read_to_string(dir.path().join(".claude").join("settings.json")).unwrap(); + assert!( + content.contains("task-journal recall-hook"), + "--proactive-recall must wire the recall-hook; got: {content}" + ); + assert!( + content.contains("task-journal nudge"), + "nudge must remain alongside recall-hook" + ); + + let dir2 = assert_fs::TempDir::new().unwrap(); + Command::cargo_bin("task-journal") + .unwrap() + .env("HOME", dir2.path()) + .args(["install-hooks", "--scope", "user"]) + .assert() + .success(); + let c2 = std::fs::read_to_string(dir2.path().join(".claude").join("settings.json")).unwrap(); + assert!( + !c2.contains("recall-hook"), + "default install must not wire proactive recall" + ); +} + +#[test] +fn recall_hook_injects_relevant_prior_reasoning() { + // Pillar B proactive injection: a decision recorded in a project must be + // surfaced as additionalContext when a later prompt (anywhere) shares its + // terms. Gated by TJ_PROACTIVE_RECALL=0. + let xdg = assert_fs::TempDir::new().unwrap(); + let proj = assert_fs::TempDir::new().unwrap(); + + let tid = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args(["create", "Payments"]) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap() + .trim() + .to_string(); + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .args([ + "event", + &tid, + "--type", + "decision", + "--text", + "chose the idempotent payment ledger for refunds", + ]) + .assert() + .success(); + Command::cargo_bin("task-journal") + .unwrap() + .current_dir(proj.path()) + .env("XDG_DATA_HOME", xdg.path()) + .env("TJ_EMBED", "hash") + .args(["embed", "--backfill"]) + .assert() + .success(); + + let payload = serde_json::json!({ + "hook_event_name": "UserPromptSubmit", + "prompt": "should we add a refund ledger to billing?" + }) + .to_string(); + + let body = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .env("XDG_DATA_HOME", xdg.path()) + .args(["recall-hook"]) + .write_stdin(payload.clone()) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap(); + assert!( + body.contains("additionalContext"), + "recall-hook must emit additionalContext; got: {body:?}" + ); + assert!( + body.contains("ledger"), + "must surface the ledger decision; got: {body}" + ); + + // Gate: TJ_PROACTIVE_RECALL=0 suppresses all output. + let gated = String::from_utf8( + Command::cargo_bin("task-journal") + .unwrap() + .env("XDG_DATA_HOME", xdg.path()) + .env("TJ_PROACTIVE_RECALL", "0") + .args(["recall-hook"]) + .write_stdin(payload) + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .unwrap(); + assert!( + gated.trim().is_empty(), + "TJ_PROACTIVE_RECALL=0 must suppress injection; got: {gated:?}" + ); +} diff --git a/crates/tj-core/src/memory.rs b/crates/tj-core/src/memory.rs index 6eb223a..3b399e5 100644 --- a/crates/tj-core/src/memory.rs +++ b/crates/tj-core/src/memory.rs @@ -34,6 +34,7 @@ CREATE TABLE IF NOT EXISTS global_memory ( ); CREATE INDEX IF NOT EXISTS idx_gm_type ON global_memory(type); CREATE INDEX IF NOT EXISTS idx_gm_model ON global_memory(model); +CREATE VIRTUAL TABLE IF NOT EXISTS global_fts USING fts5(event_id UNINDEXED, text); "#; /// Open (creating + migrating) the global memory database at `path`. @@ -107,11 +108,68 @@ pub fn sync_from_project( event_id, project_hash, task_id, ty, tier, text, model, dim, vec, created_at, superseded ], )?; + // Mirror into FTS5 for the fast keyword path (proactive hook). + global.execute( + "DELETE FROM global_fts WHERE event_id = ?1", + rusqlite::params![event_id], + )?; + global.execute( + "INSERT INTO global_fts(event_id, text) VALUES (?1, ?2)", + rusqlite::params![event_id, text], + )?; n += 1; } Ok(n) } +/// Fast keyword (FTS5) search over the global index — no embedding, so it's +/// cheap enough to run on every prompt in the proactive hook (loading a model +/// per prompt would be too slow). Builds an OR query from the prompt's +/// alphanumeric tokens (≥4 chars) and ranks by BM25. +pub fn keyword_search(conn: &Connection, prompt: &str, k: usize) -> anyhow::Result> { + let mut seen = std::collections::HashSet::new(); + let terms: Vec = prompt + .split(|c: char| !c.is_alphanumeric()) + .filter(|t| t.chars().count() >= 4) + .map(|t| t.to_lowercase()) + .filter(|t| seen.insert(t.clone())) + .collect(); + if terms.is_empty() { + return Ok(Vec::new()); + } + let query = terms.join(" OR "); + let mut stmt = conn.prepare( + "SELECT g.event_id, g.project_hash, g.task_id, g.type, g.tier, g.text, g.superseded, + bm25(global_fts) + FROM global_fts + JOIN global_memory g ON g.event_id = global_fts.event_id + WHERE global_fts MATCH ?1 + ORDER BY bm25(global_fts) + LIMIT ?2", + )?; + let rows = stmt.query_map(rusqlite::params![query, k as i64], |r| { + let bm: f64 = r.get(7)?; + let superseded: i64 = r.get(6)?; + // BM25 is lower-is-better; negate so higher == more relevant, then + // nudge contradicted reasoning down. + let score = (-bm) as f32 - if superseded != 0 { 0.5 } else { 0.0 }; + Ok(GlobalHit { + event_id: r.get(0)?, + project_hash: r.get(1)?, + task_id: r.get(2)?, + event_type: r.get(3)?, + tier: r.get(4)?, + text: r.get(5)?, + score, + }) + })?; + let mut out = Vec::new(); + for row in rows { + out.push(row?); + } + Ok(out) +} + /// Semantic search across the whole global index for the embedder's `model`. /// Returns the top `k` hits by cosine, with a small penalty applied to /// superseded/contradicted entries so live reasoning ranks above stale. @@ -216,6 +274,38 @@ mod tests { assert_eq!(hits[0].project_hash, "projhash"); } + #[test] + fn keyword_search_matches_prompt_terms() { + let d = tempfile::TempDir::new().unwrap(); + let proj = crate::db::open(d.path().join("p.sqlite")).unwrap(); + let global = open(d.path().join("memory.sqlite")).unwrap(); + let emb = crate::embed::HashEmbedder::new(64); + crate::db::index_event( + &proj, + &finding("chose the idempotent payment ledger for refunds"), + ) + .unwrap(); + crate::db::index_event( + &proj, + &finding("rejected kafka for the audit log; too heavy"), + ) + .unwrap(); + crate::db::embed_pending(&proj, "ph", &emb, "t", 100).unwrap(); + sync_from_project(&global, &proj, "ph").unwrap(); + + let hits = keyword_search(&global, "should we add a refund ledger here?", 5).unwrap(); + assert!( + !hits.is_empty(), + "prompt terms must match the ledger decision" + ); + assert!(hits[0].text.contains("ledger")); + + // No overlapping ≥4-char term => no hit. + assert!(keyword_search(&global, "tiny ui css fix", 5) + .unwrap() + .is_empty()); + } + #[test] fn search_filters_by_model() { let d = tempfile::TempDir::new().unwrap(); diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml index d67cbab..81ca77e 100644 --- a/crates/tj-mcp/Cargo.toml +++ b/crates/tj-mcp/Cargo.toml @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend. -tj-core = { package = "task-journal-core", version = "0.15.0", path = "../tj-core", default-features = false } +tj-core = { package = "task-journal-core", version = "0.16.0", path = "../tj-core", default-features = false } anyhow = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index 8577ef5..baa1c66 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "task-journal", - "version": "0.15.0", + "version": "0.16.0", "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.", "author": { "name": "Mher Shahinyan"