From 6cf8387ca62d71be1f9f3ded312e9f0bc3903986 Mon Sep 17 00:00:00 2001 From: Justin Davis Date: Sat, 23 May 2026 10:56:49 +1000 Subject: [PATCH 1/3] =?UTF-8?q?feat(recall=5Fcheck):=20hybrid=20retrieval?= =?UTF-8?q?=20=E2=80=94=20FTS5=20+=20Vectorize=20+=20RRF=20fusion=20(CLA-1?= =?UTF-8?q?09)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hybrid search adds BM25-ranked lexical matching alongside the existing semantic (cosine) search. Lexical catches exact-term hits cosine misses (names, jargon, unique phrases); semantic catches conceptual hits lexical misses. Together substantially better than either alone, and the default architecture for production retrieval systems. ## Pieces * **migrations/0007_fts5_hybrid_retrieval.sql** — standalone FTS5 virtual table indexing content/summary/entity/tags. Insert/update/delete triggers keep it in sync with memories. Backfill INSERT seeds the index with pre-CLA-109 rows. Smoke-tested locally end-to-end: triggers fire correctly on each mutation; partial content updates preserve other indexed fields; column-scoped queries work. * **src/hybrid.rs** (new, non-wasm-gated) — pure-logic module with build_fts_query (free-text → safe FTS5 MATCH expression: tokenise, trim non-alphanumeric edges, quote each token, OR-join — quoting defends against FTS5 keywords like AND/OR/NOT/NEAR), rrf_fuse (Reciprocal Rank Fusion with configurable per-retriever weights; default k=60 per Cormack et al. 2009), and cosine_similarity (shared with worker_mmr, single source of truth). 13 unit tests covering tokenisation edge cases, RRF correctness, weight=0 degeneration, k constant effect. * **src/worker_vectorize.rs** — adds get_by_ids binding + helper for fetching stored vectors by id (no query, no score). Enables FTS-only hits to participate in MMR diversity ranking. * **src/worker_store.rs** — fts_search(db, match_expr, limit) executes BM25-ranked queries against memories_fts, returns ids in rank order. * **src/worker_mcp.rs** — tool_recall_check rewritten: 1. Vectorize query (existing) + FTS query in parallel-ish (sequential, ~5ms saved isn't worth the complexity). 2. FTS-only hits get vectors via get_by_ids, cosine computed, threshold-filtered. Lexical hits no longer invisible to MMR. 3. RRF fuses the two rankings; pool narrows to fused top-(limit*3). The narrowing is what gives fusion influence over the final ranking — without it, MMR's cosine-relevance metric would dominate and FTS contribution would be wasted. 4. CLA-108 metadata filters apply (unchanged). 5. MMR rerank within the narrowed pool (unchanged). 6. Header surfaces active fts_weight for transparency. ## Tunable knob `ONEIRO_HYBRID_FTS_WEIGHT` env var (default 1.0) scales the lexical leg's RRF contribution. Set to 0.0 to disable FTS entirely (degenerates to pre-CLA-109 semantic-only behaviour) — kill switch in case hybrid regresses on some query class. Set > 1.0 to over-weight lexical. Tunable without redeploy. ## Risk / migration notes The 0007 migration is non-destructive (additive: new table + triggers + backfill INSERT). Existing memories get indexed in place. Rollback path exists (DROP TRIGGER + DROP TABLE) but migrations are one-way in practice — branch can be abandoned cheaply up to ship. recall_orient is unchanged — it's fixed-recent + orientation, not topic-driven, so hybrid retrieval doesn't apply. 154 cargo tests pass (was 141 + 13 new). cargo check wasm32 clean. worker-build --release produces 28.1KB bundle. --- migrations/0007_fts5_hybrid_retrieval.sql | 58 +++++ src/hybrid.rs | 278 ++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 1 + src/worker_mcp.rs | 145 +++++++++-- src/worker_mmr.rs | 21 +- src/worker_store.rs | 35 +++ src/worker_vectorize.rs | 40 ++++ 8 files changed, 537 insertions(+), 42 deletions(-) create mode 100644 migrations/0007_fts5_hybrid_retrieval.sql create mode 100644 src/hybrid.rs diff --git a/migrations/0007_fts5_hybrid_retrieval.sql b/migrations/0007_fts5_hybrid_retrieval.sql new file mode 100644 index 0000000..6b86ad7 --- /dev/null +++ b/migrations/0007_fts5_hybrid_retrieval.sql @@ -0,0 +1,58 @@ +-- migrations/0007_fts5_hybrid_retrieval.sql — Hybrid retrieval (CLA-109). +-- +-- BM25-ranked full-text search via SQLite's FTS5 virtual table, queried +-- in parallel with Vectorize at recall time and fused with Reciprocal +-- Rank Fusion. Lexical search catches exact-term hits cosine misses +-- (names, jargon, unique phrases). Semantic search catches conceptual +-- hits lexical misses. Together substantially better than either alone. +-- +-- Standalone (not external-content) FTS5 table — keeps the index self- +-- contained at the cost of ~2x storage for the indexed columns. At our +-- scale (~500 memories × ~500 bytes each = ~250KB extra) the tradeoff +-- favours simplicity. External-content would couple the index to the +-- memories table's rowid, which is implicit and brittle to schema work. +-- +-- Triggers keep memories_fts in sync with memories. The backfill INSERT +-- at the end seeds the index with rows that pre-date this migration. +-- Direct INSERTs into memories_fts (like the backfill) don't fire the +-- memories_fts_insert trigger, so the backfill is safe and won't double- +-- index. The migration framework guarantees one-shot apply. + +CREATE VIRTUAL TABLE memories_fts USING fts5( + id UNINDEXED, -- carried for SELECT, not tokenised for MATCH + content, + summary, + entity, -- empty string when memories.entity IS NULL + tags, -- JSON-stringified; unicode61 tokeniser handles + -- the brackets/quotes fine — splits on those + -- non-alphanumeric chars as boundaries. + tokenize='unicode61 remove_diacritics 1' +); + +-- AFTER triggers fire post-mutation, so memories_fts always sees the +-- committed state. COALESCE on entity/tags handles the NULL columns +-- (FTS5 stores empty strings for missing token fields). +CREATE TRIGGER memories_fts_insert AFTER INSERT ON memories BEGIN + INSERT INTO memories_fts(id, content, summary, entity, tags) + VALUES (new.id, new.content, new.summary, + COALESCE(new.entity, ''), COALESCE(new.tags, '[]')); +END; + +CREATE TRIGGER memories_fts_update AFTER UPDATE ON memories BEGIN + UPDATE memories_fts SET + content = new.content, + summary = new.summary, + entity = COALESCE(new.entity, ''), + tags = COALESCE(new.tags, '[]') + WHERE id = new.id; +END; + +CREATE TRIGGER memories_fts_delete AFTER DELETE ON memories BEGIN + DELETE FROM memories_fts WHERE id = old.id; +END; + +-- Backfill: index every existing row. Pre-CLA-109 deploys have memories +-- without a corresponding FTS5 entry; this catches them up. +INSERT INTO memories_fts(id, content, summary, entity, tags) +SELECT id, content, summary, COALESCE(entity, ''), COALESCE(tags, '[]') +FROM memories; diff --git a/src/hybrid.rs b/src/hybrid.rs new file mode 100644 index 0000000..3c0c528 --- /dev/null +++ b/src/hybrid.rs @@ -0,0 +1,278 @@ +// hybrid.rs — Hybrid retrieval primitives for recall_check (CLA-109). +// +// Pure logic that's testable on native (worker_mcp.rs and worker_store.rs +// are both wasm-gated). Two pieces here: +// +// build_fts_query(raw) — user free-text → safe FTS5 MATCH expression. +// Whitespace-tokenises, strips non-alphanumeric +// edges, quotes each token, OR-joins. Quoting +// defends against tokens that happen to be FTS5 +// keywords (AND, OR, NOT, NEAR) — quoted forms +// are literal tokens, not operators. +// +// rrf_fuse(fts, vector, fts_weight, k) — Reciprocal Rank Fusion of two +// ranked id lists. Score per retriever per hit: +// weight * 1 / (k + rank) +// Summed across retrievers, sorted descending. +// k=60 is the original RRF paper's constant +// (Cormack et al. 2009); has held up in practice. +// RRF doesn't require normalising scores across +// retrievers — works directly off rank, which +// is why it's the safe default when fusing +// systems with incompatible score distributions +// (BM25 vs cosine similarity). +// +// Both functions are pure — no I/O, no env access — so they compile on +// every target and live alongside their unit tests. + +use std::collections::HashMap; + +/// RRF constant from the original paper. Increasing it flattens the +/// score curve (more weight to lower-ranked hits); decreasing it +/// sharpens to favour top ranks. 60 is the established default and +/// rarely benefits from tuning. +pub const DEFAULT_RRF_K: f64 = 60.0; + +/// Cosine similarity between two equal-length vectors. Used by MMR +/// rerank (worker_mmr.rs) for the diversity term and by hybrid +/// retrieval (worker_mcp.rs) to score FTS-only candidates after a +/// getByIds vector fetch. Pure function; testable on native. +/// +/// Returns 0.0 in the degenerate denominator case rather than NaN — +/// defensive against zero-length vectors. Workers AI / Vectorize never +/// emit a zero embedding for non-empty input, so this branch is +/// unreachable in practice but the safety net is cheap. +pub fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 { + let mut dot = 0.0; + let mut na = 0.0; + let mut nb = 0.0; + for (x, y) in a.iter().zip(b.iter()) { + dot += x * y; + na += x * x; + nb += y * y; + } + let denom = na.sqrt() * nb.sqrt(); + if denom == 0.0 { + 0.0 + } else { + dot / denom + } +} + +/// Build a safe FTS5 MATCH expression from user free-text. +/// +/// Returns `None` when the input has no usable tokens (empty or pure +/// punctuation) — caller should treat that as "skip the FTS leg of the +/// hybrid query." Returns `Some(expr)` otherwise; `expr` is a properly +/// quoted, OR-joined token list ready to pass directly into a `MATCH ?` +/// bind parameter. +/// +/// Tokenisation rules: +/// * split on whitespace +/// * strip leading/trailing non-alphanumeric characters per token +/// * drop empty tokens +/// * strip inner double quotes (defensive — shouldn't reach here) +/// * wrap each surviving token in double quotes +/// * join with `" OR "` +/// +/// Quoting each token makes FTS5 treat it as a literal — so a token that +/// happens to be "AND" or "OR" is a search term, not a syntax keyword. +pub fn build_fts_query(raw: &str) -> Option { + let tokens: Vec = raw + .split_whitespace() + .map(|t| { + t.trim_matches(|c: char| !c.is_alphanumeric()) + .replace('"', "") + }) + .filter(|t| !t.is_empty()) + .map(|t| format!("\"{}\"", t)) + .collect(); + if tokens.is_empty() { + None + } else { + Some(tokens.join(" OR ")) + } +} + +/// Reciprocal Rank Fusion of two ranked id lists. +/// +/// `fts_ranking` and `vector_ranking` are id lists in rank order (best +/// first, position 0). Each id gets a score from each list it appears +/// in: `weight * 1 / (k + rank_1_indexed)`. Per-list scores sum across +/// retrievers. The output is sorted descending by fused score. +/// +/// `fts_weight` scales the lexical leg's contribution. The vector leg +/// is fixed at weight 1.0 (the established baseline). `fts_weight = 0.0` +/// disables FTS entirely (degenerate vector-only ranking). `fts_weight = +/// 1.0` is equal weighting. Anything > 1.0 over-weights lexical. +/// +/// Returns `Vec<(id, fused_score)>`. Score is informational — the rank +/// order is what downstream consumers use. +pub fn rrf_fuse( + fts_ranking: &[String], + vector_ranking: &[String], + fts_weight: f64, + k: f64, +) -> Vec<(String, f64)> { + let mut scores: HashMap = HashMap::new(); + if fts_weight > 0.0 { + for (rank, id) in fts_ranking.iter().enumerate() { + let contribution = fts_weight / (k + (rank + 1) as f64); + *scores.entry(id.clone()).or_insert(0.0) += contribution; + } + } + for (rank, id) in vector_ranking.iter().enumerate() { + let contribution = 1.0 / (k + (rank + 1) as f64); + *scores.entry(id.clone()).or_insert(0.0) += contribution; + } + let mut out: Vec<(String, f64)> = scores.into_iter().collect(); + out.sort_by(|a, b| { + b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal) + }); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn ids(s: &[&str]) -> Vec { + s.iter().map(|x| x.to_string()).collect() + } + + // ---- build_fts_query ---- + + #[test] + fn empty_input_returns_none() { + assert!(build_fts_query("").is_none()); + assert!(build_fts_query(" ").is_none()); + assert!(build_fts_query("!!!").is_none()); // pure punctuation + } + + #[test] + fn single_token() { + assert_eq!(build_fts_query("rover").as_deref(), Some("\"rover\"")); + } + + #[test] + fn multi_token_or_joins() { + assert_eq!( + build_fts_query("rover camera heartbeat").as_deref(), + Some("\"rover\" OR \"camera\" OR \"heartbeat\""), + ); + } + + #[test] + fn fts5_keywords_get_quoted_as_literal_tokens() { + // AND/OR/NOT/NEAR are FTS5 syntax keywords; quoting makes them + // literal search terms instead of operators. + let q = build_fts_query("rover AND camera").unwrap(); + assert!(q.contains("\"AND\"")); + assert!(q.contains("\"rover\"")); + assert!(q.contains("\"camera\"")); + } + + #[test] + fn punctuation_stripped_from_token_edges() { + // Trailing comma and inner apostrophe are noise; the inner + // apostrophe is alphanumeric-adjacent so trim_matches leaves + // it. We only strip edge non-alphanumeric chars. + let q = build_fts_query("chopper, justin's").unwrap(); + assert!(q.contains("\"chopper\"")); + assert!(q.contains("justin")); // either "justin" or "justin's" — both fine + } + + #[test] + fn inner_double_quotes_stripped() { + // Defensive — a user query containing a literal double quote + // would otherwise break the FTS5 syntax. + let q = build_fts_query("say \"hello\"").unwrap(); + assert!(!q.contains("\\\"")); + // The "hello" word survives (without its inner quotes), as a + // properly-quoted FTS5 token. + assert!(q.contains("\"hello\"")); + } + + // ---- rrf_fuse ---- + + #[test] + fn empty_inputs_produce_empty_output() { + assert!(rrf_fuse(&[], &[], 1.0, DEFAULT_RRF_K).is_empty()); + } + + #[test] + fn single_retriever_orders_by_rank() { + let fts = ids(&["a", "b", "c"]); + let out = rrf_fuse(&fts, &[], 1.0, DEFAULT_RRF_K); + // FTS-only with default weight: order preserved. + let ordered: Vec<&str> = out.iter().map(|(id, _)| id.as_str()).collect(); + assert_eq!(ordered, vec!["a", "b", "c"]); + } + + #[test] + fn overlapping_rankings_boost_shared_hits() { + // "shared" appears top of both lists; should beat any item that + // appears in only one. + let fts = ids(&["shared", "fts_only"]); + let vec = ids(&["shared", "vec_only"]); + let out = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K); + assert_eq!(out[0].0, "shared"); + // "shared" gets contributions from both retrievers; the others + // from one each. So its score is roughly double. + assert!(out[0].1 > out[1].1); + } + + #[test] + fn fts_weight_zero_degenerates_to_vector_only() { + // With fts_weight=0, the FTS leg contributes nothing — the + // ranking is purely vector's ordering. + let fts = ids(&["fts_top", "fts_second"]); + let vec = ids(&["vec_top", "vec_second"]); + let out = rrf_fuse(&fts, &vec, 0.0, DEFAULT_RRF_K); + let ordered: Vec<&str> = out.iter().map(|(id, _)| id.as_str()).collect(); + assert_eq!(ordered, vec!["vec_top", "vec_second"]); + } + + #[test] + fn fts_weight_higher_than_one_increases_lexical_influence() { + // Item at rank 0 of FTS, rank 2 of vector vs item at rank 2 of + // FTS, rank 0 of vector. With fts_weight=1 they'd roughly tie; + // with fts_weight=3 the FTS-top item should win. + let fts = ids(&["a", "b", "c"]); + let vec = ids(&["c", "b", "a"]); + let balanced = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K); + let fts_heavy = rrf_fuse(&fts, &vec, 3.0, DEFAULT_RRF_K); + // Balanced: a and c tie (mirror rankings); b in the middle. + // FTS-heavy: a wins outright. + let _ = balanced; // not asserting balanced tie behaviour — order undefined + assert_eq!(fts_heavy[0].0, "a"); + } + + #[test] + fn rrf_is_robust_to_disjoint_rankings() { + // No overlap between retrievers: every hit appears once. + let fts = ids(&["a"]); + let vec = ids(&["b"]); + let out = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K); + assert_eq!(out.len(), 2); + // Both at rank 0 with equal weights → equal scores. + assert!((out[0].1 - out[1].1).abs() < 1e-12); + } + + #[test] + fn rrf_k_constant_affects_rank_curve() { + // Higher k flattens the score curve. With k=1, rank-1 hits get + // 1/2 = 0.5; rank-10 gets 1/11 = 0.09 — sharp. + // With k=100, rank-1 gets 1/101 ≈ 0.0099; rank-10 gets 1/110 ≈ + // 0.0091 — much flatter. + let fts = ids(&["a"]); + let vec: Vec = (0..10).map(|i| format!("v{}", i)).collect(); + let sharp = rrf_fuse(&fts, &vec, 1.0, 1.0); + let flat = rrf_fuse(&fts, &vec, 1.0, 100.0); + // In sharp: gap between rank-1 and rank-10 should be much bigger + // than in flat. + let sharp_gap = sharp[0].1 - sharp[sharp.len() - 1].1; + let flat_gap = flat[0].1 - flat[flat.len() - 1].1; + assert!(sharp_gap > flat_gap); + } +} diff --git a/src/lib.rs b/src/lib.rs index d0972b1..d1c6b5b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ mod api_key; mod audit; mod dialectic_validation; mod embed; +mod hybrid; mod key_rate; mod memory; diff --git a/src/main.rs b/src/main.rs index ce47c2d..1896795 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,7 @@ mod auth_ctx; #[allow(dead_code)] mod dialectic_validation; mod embed; +mod hybrid; mod key_rate; mod memory; mod store; diff --git a/src/worker_mcp.rs b/src/worker_mcp.rs index e38a19f..a0ed130 100644 --- a/src/worker_mcp.rs +++ b/src/worker_mcp.rs @@ -14,6 +14,7 @@ // recall_specific, review, reframe, forget, reflect, remember_with_image) // and the OAuth path for non-rover callers. +use crate::hybrid::{self, DEFAULT_RRF_K}; use crate::memory::{Memory, MemoryType}; use crate::worker_auth_ctx::{self, AuthCtx}; use crate::worker_orient::format_memory; @@ -748,6 +749,24 @@ struct RecallCheckArgs { tags: Vec, } +/// Read `ONEIRO_HYBRID_FTS_WEIGHT` from worker env (CLA-109). Default +/// 1.0 (equal weighting with vector). Set to 0.0 to disable the FTS +/// leg entirely (semantic-only — pre-CLA-109 behaviour). Tunable knob +/// for A/B without redeploying schema changes. +fn read_fts_weight(env: &Env) -> f64 { + let raw = match env.var("ONEIRO_HYBRID_FTS_WEIGHT") { + Ok(v) => v.to_string(), + Err(_) => return 1.0, + }; + raw.parse::().unwrap_or_else(|_| { + worker::console_error!( + "ONEIRO_HYBRID_FTS_WEIGHT={:?} unparseable; using default 1.0", + raw + ); + 1.0 + }) +} + async fn tool_recall_check( env: &Env, db: &D1Database, @@ -773,26 +792,73 @@ async fn tool_recall_check( let filters_active = entity_filter.is_some() || memory_type_filter.is_some() || !args.tags.is_empty(); + let fts_weight = read_fts_weight(env); + let hybrid_active = fts_weight > 0.0; + let query_emb = worker_embed::embed_query(env, &args.topic) .await .map_err(|e| format!("embed_query: {:?}", e))?; // Oversample wider when filters are active — entity / memory_type / tags - // are evaluated post-Vectorize (current index has no metadata; pushdown - // would require re-upserting every vector with metadata, deferred to - // CLA-109). A 90%-discriminating filter on a 10-deep oversample leaves - // ~1 candidate — not enough to fill `limit` or give MMR diversity room. - // Doubling the floor + ceiling keeps the post-filter survivor pool deep - // enough without taxing free-tier Vectorize quotas. + // filter post-Vectorize (no metadata pushdown in the index, would + // require re-upserting every vector with metadata). Hybrid is + // similarly post-fusion. A 90%-discriminating filter on a 10-deep + // oversample leaves ~1 candidate — not enough to fill `limit` or + // feed MMR. Doubling the floor + ceiling keeps the post-filter + // survivor pool deep enough without taxing free-tier quotas. let oversample_base = if filters_active { limit * 8 } else { limit * 4 }; let oversample_min = if filters_active { 20 } else { 10 }; let oversample = (oversample_base.clamp(oversample_min, 100)) as u32; - let matches = worker_vectorize::query_top_k_with_vectors(env, &query_emb, oversample) + + // ── Vector leg ──────────────────────────────────────────────── + let vector_matches = worker_vectorize::query_top_k_with_vectors(env, &query_emb, oversample) .await .map_err(|e| format!("vectorize query: {:?}", e))?; - let above_threshold: Vec = matches + let mut above_threshold: Vec = vector_matches .into_iter() .filter(|m| m.score >= min_similarity) .collect(); + let vector_ranking: Vec = above_threshold.iter().map(|m| m.id.clone()).collect(); + + // ── FTS leg (skipped if weight == 0) ────────────────────────── + let fts_ranking: Vec = if hybrid_active { + match hybrid::build_fts_query(&args.topic) { + Some(expr) => worker_store::fts_search(db, &expr, oversample) + .await + .map_err(|e| format!("fts_search: {:?}", e))?, + None => Vec::new(), + } + } else { + Vec::new() + }; + + // ── Bridge: FTS-only hits get their vectors via getByIds so they ── + // can participate in cosine threshold + MMR diversity. Without + // this, lexical-only hits would be invisible to MMR. + let vector_id_set: std::collections::HashSet<&str> = + above_threshold.iter().map(|m| m.id.as_str()).collect(); + let fts_only_ids: Vec<&str> = fts_ranking + .iter() + .filter(|id| !vector_id_set.contains(id.as_str())) + .map(String::as_str) + .collect(); + if !fts_only_ids.is_empty() { + let fetched = worker_vectorize::get_by_ids(env, &fts_only_ids) + .await + .map_err(|e| format!("vectorize get_by_ids: {:?}", e))?; + for sv in fetched { + if sv.values.is_empty() { + continue; + } + let score = hybrid::cosine_similarity(&query_emb, &sv.values); + if score >= min_similarity { + above_threshold.push(worker_vectorize::VectorMatchWithVector { + id: sv.id, + score, + values: sv.values, + }); + } + } + } if above_threshold.is_empty() { return Ok(format!( @@ -801,26 +867,62 @@ async fn tool_recall_check( )); } - // When filters are active, fetch all above-threshold candidates from D1 - // up front so the metadata filter can run before MMR (otherwise MMR - // would diversify candidates we're about to throw away). When no - // filters are set, keep the original lean flow: MMR first, then a - // smaller D1 fetch for just the survivors. + // Capture id → cosine_score for display BEFORE we consume pool into + // the rerank/filter pipeline below. Used to render per-row `sim:` + // values to the reader. + let id_to_score: std::collections::HashMap = above_threshold + .iter() + .map(|m| (m.id.clone(), m.score)) + .collect(); + + // ── Fuse rankings → narrow candidate pool to fused top-(limit*3) ── + // RRF determines which candidates compete; MMR diversifies within + // that subset. The narrowing is what gives fusion influence over + // the final ranking — without it, MMR's cosine-relevance metric + // would dominate and FTS contribution would be wasted. + let pool: Vec = if hybrid_active + && !fts_ranking.is_empty() + { + let fused = hybrid::rrf_fuse(&fts_ranking, &vector_ranking, fts_weight, DEFAULT_RRF_K); + let narrow_to = (limit * 3).max(limit).min(above_threshold.len()); + let kept_ids: std::collections::HashSet<&str> = fused + .iter() + .take(narrow_to) + .map(|(id, _)| id.as_str()) + .collect(); + // Reorder above_threshold by fused-rank position so MMR's first + // candidate (the highest-cosine in pool) at least comes from the + // top of the fused list when ties or near-ties exist. + let fused_pos: std::collections::HashMap<&str, usize> = fused + .iter() + .enumerate() + .map(|(i, (id, _))| (id.as_str(), i)) + .collect(); + let mut kept: Vec = above_threshold + .into_iter() + .filter(|m| kept_ids.contains(m.id.as_str())) + .collect(); + kept.sort_by_key(|m| fused_pos.get(m.id.as_str()).copied().unwrap_or(usize::MAX)); + kept + } else { + above_threshold + }; + + // ── CLA-108 metadata filters + MMR rerank ───────────────────── let (reranked_ids, memories): (Vec, Vec) = if filters_active { - let candidate_ids: Vec<&str> = above_threshold.iter().map(|m| m.id.as_str()).collect(); + let candidate_ids: Vec<&str> = pool.iter().map(|m| m.id.as_str()).collect(); let candidates = worker_store::get_many(db, &candidate_ids) .await .map_err(|e| format!("get_many: {:?}", e))?; let candidate_lookup: std::collections::HashMap<&str, &Memory> = candidates.iter().map(|m| (m.id.as_str(), m)).collect(); - let filtered_matches: Vec = above_threshold - .iter() + let filtered_matches: Vec = pool + .into_iter() .filter(|vm| { candidate_lookup .get(vm.id.as_str()) .is_some_and(|m| m.matches_filter(entity_filter, memory_type_filter, &args.tags)) }) - .cloned() .collect(); if filtered_matches.is_empty() { return Ok(format!( @@ -835,7 +937,7 @@ async fn tool_recall_check( .collect(); (reranked, kept) } else { - let reranked = crate::worker_mmr::mmr_rerank(&query_emb, &above_threshold, limit, 0.7); + let reranked = crate::worker_mmr::mmr_rerank(&query_emb, &pool, limit, 0.7); let ids: Vec<&str> = reranked.iter().map(String::as_str).collect(); let mems = worker_store::get_many(db, &ids) .await @@ -855,6 +957,9 @@ async fn tool_recall_check( "═══ Oneiro Check ═══\nStore: {} ep, {} sem, {} ori | Topic: \"{}\" | Threshold: {:.2}", ep, sem, ori, args.topic, min_similarity ); + if hybrid_active { + header.push_str(&format!(" | Hybrid: fts_w={:.1}", fts_weight)); + } if filters_active { let mut bits: Vec = Vec::new(); if let Some(e) = entity_filter { @@ -873,10 +978,6 @@ async fn tool_recall_check( // Display in MMR selection order, keeping each memory's raw similarity // score for the reader (the order is MMR, the per-row sim is cosine). - let id_to_score: std::collections::HashMap<&str, f64> = above_threshold - .iter() - .map(|m| (m.id.as_str(), m.score)) - .collect(); for id in &reranked_ids { if let Some(m) = memories.iter().find(|m| &m.id == id) { let sim = id_to_score.get(m.id.as_str()).copied().unwrap_or(0.0); diff --git a/src/worker_mmr.rs b/src/worker_mmr.rs index 3352151..08c82a5 100644 --- a/src/worker_mmr.rs +++ b/src/worker_mmr.rs @@ -26,28 +26,9 @@ #![cfg(target_family = "wasm")] +use crate::hybrid::cosine_similarity; use crate::worker_vectorize::VectorMatchWithVector; -/// Cosine similarity between two equal-length vectors. Assumes neither -/// is the zero vector (Workers AI / Vectorize never emit a zero embedding -/// for non-empty input). Returns 0.0 in the degenerate denominator case -/// rather than NaN — defensive but unreachable in practice. -fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 { - let mut dot = 0.0; - let mut na = 0.0; - let mut nb = 0.0; - for (x, y) in a.iter().zip(b.iter()) { - dot += x * y; - na += x * x; - nb += y * y; - } - let denom = na.sqrt() * nb.sqrt(); - if denom <= f64::EPSILON { - return 0.0; - } - dot / denom -} - /// Rerank a candidate set (already roughly relevance-ordered from /// Vectorize) to balance relevance against diversity. Returns ids of /// the top `k` MMR-selected matches in the order they were selected diff --git a/src/worker_store.rs b/src/worker_store.rs index 18c4a6a..b42ad65 100644 --- a/src/worker_store.rs +++ b/src/worker_store.rs @@ -316,6 +316,41 @@ pub async fn get_recent_episodics(db: &D1Database, limit: usize) -> Result Result> { + if match_expr.is_empty() { + return Ok(Vec::new()); + } + #[derive(serde::Deserialize)] + struct FtsHit { + id: String, + } + let rows: Vec = db + .prepare( + "SELECT id FROM memories_fts + WHERE memories_fts MATCH ? + ORDER BY rank + LIMIT ?", + ) + .bind(&[match_expr.into(), limit.into()])? + .all() + .await? + .results()?; + Ok(rows.into_iter().map(|r| r.id).collect()) +} + /// Memories filtered by entity, ranked by strength. pub async fn recall_by_entity( db: &D1Database, diff --git a/src/worker_vectorize.rs b/src/worker_vectorize.rs index f5b4db1..d540e29 100644 --- a/src/worker_vectorize.rs +++ b/src/worker_vectorize.rs @@ -44,6 +44,15 @@ extern "C" { /// deleteByIds(ids: string[]) → Promise #[wasm_bindgen(method, js_name = "deleteByIds", catch)] fn delete_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result; + + /// getByIds(ids: string[]) → Promise + /// Returns each id's stored vector (no score — there's no query to + /// score against). Used by hybrid retrieval (CLA-109) to fetch + /// vectors for ids that FTS surfaced but Vectorize's top-K query + /// didn't include — so those candidates can still participate in + /// MMR diversity ranking. + #[wasm_bindgen(method, js_name = "getByIds", catch)] + fn get_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result; } impl EnvBinding for VectorizeIndex { @@ -90,6 +99,15 @@ struct QueryResponseWithVectors { matches: Vec, } +/// getByIds returns bare `{id, values}` objects — no score (there was no +/// query). Used by hybrid retrieval to pull vectors for FTS-only hits. +#[derive(Debug, Deserialize)] +pub struct StoredVector { + pub id: String, + #[serde(default)] + pub values: Vec, +} + fn from_env(env: &Env, binding_name: &str) -> Result { env.get_binding::(binding_name) } @@ -175,3 +193,25 @@ pub async fn query_top_k_with_vectors( .map_err(|e| Error::JsError(format!("Vectorize response parse failed: {}", e)))?; Ok(response.matches) } + +/// Fetch stored vectors by id — no query, no scoring, just a lookup. +/// Used by hybrid retrieval (CLA-109): when FTS surfaces an id that +/// wasn't in Vectorize's top-K, we still want it to participate in MMR +/// diversity ranking, which needs the vector. +/// +/// Ids not present in the index are silently dropped from the returned +/// list (Cloudflare's behaviour, not ours). Caller should not assume +/// the response length matches the request length. +pub async fn get_by_ids(env: &Env, ids: &[&str]) -> Result> { + if ids.is_empty() { + return Ok(Vec::new()); + } + let index = from_env(env, "VECTORS")?; + let ids_arr: Array = ids.iter().map(|s| JsValue::from_str(s)).collect(); + let promise = index.get_by_ids(ids_arr.into()).map_err(js_err)?; + let result = JsFuture::from(promise).await.map_err(js_err)?; + // getByIds returns a bare array, not a `{matches: [...]}` envelope. + let vectors: Vec = serde_wasm_bindgen::from_value(result) + .map_err(|e| Error::JsError(format!("Vectorize getByIds parse failed: {}", e)))?; + Ok(vectors) +} From 343dc81e655b0bb2cbe5dd3aaf5b6d2841920bc8 Mon Sep 17 00:00:00 2001 From: Justin Davis Date: Sat, 23 May 2026 11:20:10 +1000 Subject: [PATCH 2/3] fix(fts): trim match_expr defensively in fts_search (CLA-109) PR #39 review (CodeRabbit). The only current caller (build_fts_query) can't produce whitespace-only output, but fts_search is pub and a future caller might pre-construct a malformed expression. A whitespace-only MATCH would error on FTS5's syntax check rather than degenerating cleanly to empty. Shadow match_expr with its trimmed form so the empty-check and the bind both see the same canonical value. --- src/worker_store.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/worker_store.rs b/src/worker_store.rs index b42ad65..2e84f13 100644 --- a/src/worker_store.rs +++ b/src/worker_store.rs @@ -330,6 +330,11 @@ pub async fn fts_search( match_expr: &str, limit: u32, ) -> Result> { + // Trim defensively: build_fts_query (our only current caller) can't + // produce whitespace-only output, but fts_search is pub and a future + // caller might. A whitespace-only MATCH expression would error on + // FTS5's syntax check rather than degenerating cleanly to empty. + let match_expr = match_expr.trim(); if match_expr.is_empty() { return Ok(Vec::new()); } From f0e0cdaa7fede75b1bb92a6c884a2877c5b6ad8d Mon Sep 17 00:00:00 2001 From: Justin Davis Date: Sat, 23 May 2026 11:34:37 +1000 Subject: [PATCH 3/3] feat(recall): img indicator on memory headers (CLA-109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hybrid retrieval demo exposed the gap directly: surfacing memories by topic works, but you can't tell which candidates have images attached without calling recall_image speculatively. The "I saw the photo in the bucket, surface it via search" workflow stalls at the "which one has the bytes" step. Adds a terse `| img` indicator to both display paths: * format_memory (worker_orient.rs) — used by recall_orient and recall_specific. Image-carrying rows now render as `[episodic | 5d ago | str:0.50 | id:abc12345 | img]`. * tool_recall_check inline render (worker_mcp.rs) — image-carrying rows now render as `[sim:0.77 | str:1.00 | abc12345 | img]`. Both formats only emit the suffix when m.image_hash.is_some() — empty case is byte-identical to pre-change output, so nothing else has to change. Reader sees `img` and knows recall_image will succeed against the id without a speculative round-trip. --- src/worker_mcp.rs | 4 +++- src/worker_orient.rs | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/worker_mcp.rs b/src/worker_mcp.rs index a0ed130..5e67525 100644 --- a/src/worker_mcp.rs +++ b/src/worker_mcp.rs @@ -981,11 +981,13 @@ async fn tool_recall_check( for id in &reranked_ids { if let Some(m) = memories.iter().find(|m| &m.id == id) { let sim = id_to_score.get(m.id.as_str()).copied().unwrap_or(0.0); + let img = if m.image_hash.is_some() { " | img" } else { "" }; out.push_str(&format!( - "[sim:{:.2} | str:{:.2} | {}]\n{}\n", + "[sim:{:.2} | str:{:.2} | {}{}]\n{}\n", sim, m.strength, &m.id[..8], + img, m.summary )); } diff --git a/src/worker_orient.rs b/src/worker_orient.rs index 198b99f..d8ab9d1 100644 --- a/src/worker_orient.rs +++ b/src/worker_orient.rs @@ -101,8 +101,14 @@ pub(crate) fn format_memory(m: &Memory) -> String { .as_deref() .map(|s| format!(" via:{}", s)) .unwrap_or_default(); + // Image indicator — terse so it doesn't bloat the header. The reader + // sees `img` and knows recall_image will succeed against this id. + // Without it: the "I came home excited about a photo" workflow + // forces speculative recall_image calls to discover which candidate + // carries the bytes. + let img = if m.image_hash.is_some() { " | img" } else { "" }; format!( - "[{} | {} | str:{:.2} | {} | id:{}{}{}]\n{}\n", + "[{} | {} | str:{:.2} | {} | id:{}{}{}{}]\n{}\n", type_label, age_str, m.strength, @@ -110,6 +116,7 @@ pub(crate) fn format_memory(m: &Memory) -> String { &m.id[..8], tags_str, by, + img, m.content ) }