From 6cf8387ca62d71be1f9f3ded312e9f0bc3903986 Mon Sep 17 00:00:00 2001
From: Justin Davis <justin.davis@energyq.com.au>
Date: Sat, 23 May 2026 10:56:49 +1000
Subject: [PATCH 1/3] =?UTF-8?q?feat(recall=5Fcheck):=20hybrid=20retrieval?=
 =?UTF-8?q?=20=E2=80=94=20FTS5=20+=20Vectorize=20+=20RRF=20fusion=20(CLA-1?=
 =?UTF-8?q?09)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hybrid search adds BM25-ranked lexical matching alongside the existing
semantic (cosine) search. Lexical catches exact-term hits cosine misses
(names, jargon, unique phrases); semantic catches conceptual hits
lexical misses. Together substantially better than either alone, and
the default architecture for production retrieval systems.

## Pieces

* **migrations/0007_fts5_hybrid_retrieval.sql** — standalone FTS5 virtual
  table indexing content/summary/entity/tags. Insert/update/delete
  triggers keep it in sync with memories. Backfill INSERT seeds the
  index with pre-CLA-109 rows. Smoke-tested locally end-to-end: triggers
  fire correctly on each mutation; partial content updates preserve
  other indexed fields; column-scoped queries work.

* **src/hybrid.rs** (new, non-wasm-gated) — pure-logic module with
  build_fts_query (free-text → safe FTS5 MATCH expression: tokenise,
  trim non-alphanumeric edges, quote each token, OR-join — quoting
  defends against FTS5 keywords like AND/OR/NOT/NEAR), rrf_fuse
  (Reciprocal Rank Fusion with configurable per-retriever weights;
  default k=60 per Cormack et al. 2009), and cosine_similarity
  (shared with worker_mmr, single source of truth). 13 unit tests
  covering tokenisation edge cases, RRF correctness, weight=0
  degeneration, k constant effect.

* **src/worker_vectorize.rs** — adds get_by_ids binding + helper for
  fetching stored vectors by id (no query, no score). Enables FTS-only
  hits to participate in MMR diversity ranking.

* **src/worker_store.rs** — fts_search(db, match_expr, limit) executes
  BM25-ranked queries against memories_fts, returns ids in rank order.

* **src/worker_mcp.rs** — tool_recall_check rewritten:
  1. Vectorize query (existing) + FTS query in parallel-ish (sequential,
     ~5ms saved isn't worth the complexity).
  2. FTS-only hits get vectors via get_by_ids, cosine computed,
     threshold-filtered. Lexical hits no longer invisible to MMR.
  3. RRF fuses the two rankings; pool narrows to fused top-(limit*3).
     The narrowing is what gives fusion influence over the final
     ranking — without it, MMR's cosine-relevance metric would
     dominate and FTS contribution would be wasted.
  4. CLA-108 metadata filters apply (unchanged).
  5. MMR rerank within the narrowed pool (unchanged).
  6. Header surfaces active fts_weight for transparency.

## Tunable knob

`ONEIRO_HYBRID_FTS_WEIGHT` env var (default 1.0) scales the lexical leg's
RRF contribution. Set to 0.0 to disable FTS entirely (degenerates to
pre-CLA-109 semantic-only behaviour) — kill switch in case hybrid
regresses on some query class. Set > 1.0 to over-weight lexical. Tunable
without redeploy.

## Risk / migration notes

The 0007 migration is non-destructive (additive: new table + triggers +
backfill INSERT). Existing memories get indexed in place. Rollback path
exists (DROP TRIGGER + DROP TABLE) but migrations are one-way in
practice — branch can be abandoned cheaply up to ship.

recall_orient is unchanged — it's fixed-recent + orientation, not
topic-driven, so hybrid retrieval doesn't apply.

154 cargo tests pass (was 141 + 13 new). cargo check wasm32 clean.
worker-build --release produces 28.1KB bundle.
---
 migrations/0007_fts5_hybrid_retrieval.sql |  58 +++++
 src/hybrid.rs                             | 278 ++++++++++++++++++++++
 src/lib.rs                                |   1 +
 src/main.rs                               |   1 +
 src/worker_mcp.rs                         | 145 +++++++++--
 src/worker_mmr.rs                         |  21 +-
 src/worker_store.rs                       |  35 +++
 src/worker_vectorize.rs                   |  40 ++++
 8 files changed, 537 insertions(+), 42 deletions(-)
 create mode 100644 migrations/0007_fts5_hybrid_retrieval.sql
 create mode 100644 src/hybrid.rs

diff --git a/migrations/0007_fts5_hybrid_retrieval.sql b/migrations/0007_fts5_hybrid_retrieval.sql
new file mode 100644
index 0000000..6b86ad7
--- /dev/null
+++ b/migrations/0007_fts5_hybrid_retrieval.sql
@@ -0,0 +1,58 @@
+-- migrations/0007_fts5_hybrid_retrieval.sql — Hybrid retrieval (CLA-109).
+--
+-- BM25-ranked full-text search via SQLite's FTS5 virtual table, queried
+-- in parallel with Vectorize at recall time and fused with Reciprocal
+-- Rank Fusion. Lexical search catches exact-term hits cosine misses
+-- (names, jargon, unique phrases). Semantic search catches conceptual
+-- hits lexical misses. Together substantially better than either alone.
+--
+-- Standalone (not external-content) FTS5 table — keeps the index self-
+-- contained at the cost of ~2x storage for the indexed columns. At our
+-- scale (~500 memories × ~500 bytes each = ~250KB extra) the tradeoff
+-- favours simplicity. External-content would couple the index to the
+-- memories table's rowid, which is implicit and brittle to schema work.
+--
+-- Triggers keep memories_fts in sync with memories. The backfill INSERT
+-- at the end seeds the index with rows that pre-date this migration.
+-- Direct INSERTs into memories_fts (like the backfill) don't fire the
+-- memories_fts_insert trigger, so the backfill is safe and won't double-
+-- index. The migration framework guarantees one-shot apply.
+
+CREATE VIRTUAL TABLE memories_fts USING fts5(
+    id UNINDEXED,        -- carried for SELECT, not tokenised for MATCH
+    content,
+    summary,
+    entity,              -- empty string when memories.entity IS NULL
+    tags,                -- JSON-stringified; unicode61 tokeniser handles
+                         -- the brackets/quotes fine — splits on those
+                         -- non-alphanumeric chars as boundaries.
+    tokenize='unicode61 remove_diacritics 1'
+);
+
+-- AFTER triggers fire post-mutation, so memories_fts always sees the
+-- committed state. COALESCE on entity/tags handles the NULL columns
+-- (FTS5 stores empty strings for missing token fields).
+CREATE TRIGGER memories_fts_insert AFTER INSERT ON memories BEGIN
+    INSERT INTO memories_fts(id, content, summary, entity, tags)
+    VALUES (new.id, new.content, new.summary,
+            COALESCE(new.entity, ''), COALESCE(new.tags, '[]'));
+END;
+
+CREATE TRIGGER memories_fts_update AFTER UPDATE ON memories BEGIN
+    UPDATE memories_fts SET
+        content = new.content,
+        summary = new.summary,
+        entity  = COALESCE(new.entity, ''),
+        tags    = COALESCE(new.tags, '[]')
+    WHERE id = new.id;
+END;
+
+CREATE TRIGGER memories_fts_delete AFTER DELETE ON memories BEGIN
+    DELETE FROM memories_fts WHERE id = old.id;
+END;
+
+-- Backfill: index every existing row. Pre-CLA-109 deploys have memories
+-- without a corresponding FTS5 entry; this catches them up.
+INSERT INTO memories_fts(id, content, summary, entity, tags)
+SELECT id, content, summary, COALESCE(entity, ''), COALESCE(tags, '[]')
+FROM memories;
diff --git a/src/hybrid.rs b/src/hybrid.rs
new file mode 100644
index 0000000..3c0c528
--- /dev/null
+++ b/src/hybrid.rs
@@ -0,0 +1,278 @@
+// hybrid.rs — Hybrid retrieval primitives for recall_check (CLA-109).
+//
+// Pure logic that's testable on native (worker_mcp.rs and worker_store.rs
+// are both wasm-gated). Two pieces here:
+//
+//   build_fts_query(raw) — user free-text → safe FTS5 MATCH expression.
+//                          Whitespace-tokenises, strips non-alphanumeric
+//                          edges, quotes each token, OR-joins. Quoting
+//                          defends against tokens that happen to be FTS5
+//                          keywords (AND, OR, NOT, NEAR) — quoted forms
+//                          are literal tokens, not operators.
+//
+//   rrf_fuse(fts, vector, fts_weight, k) — Reciprocal Rank Fusion of two
+//                          ranked id lists. Score per retriever per hit:
+//                              weight * 1 / (k + rank)
+//                          Summed across retrievers, sorted descending.
+//                          k=60 is the original RRF paper's constant
+//                          (Cormack et al. 2009); has held up in practice.
+//                          RRF doesn't require normalising scores across
+//                          retrievers — works directly off rank, which
+//                          is why it's the safe default when fusing
+//                          systems with incompatible score distributions
+//                          (BM25 vs cosine similarity).
+//
+// Both functions are pure — no I/O, no env access — so they compile on
+// every target and live alongside their unit tests.
+
+use std::collections::HashMap;
+
+/// RRF constant from the original paper. Increasing it flattens the
+/// score curve (more weight to lower-ranked hits); decreasing it
+/// sharpens to favour top ranks. 60 is the established default and
+/// rarely benefits from tuning.
+pub const DEFAULT_RRF_K: f64 = 60.0;
+
+/// Cosine similarity between two equal-length vectors. Used by MMR
+/// rerank (worker_mmr.rs) for the diversity term and by hybrid
+/// retrieval (worker_mcp.rs) to score FTS-only candidates after a
+/// getByIds vector fetch. Pure function; testable on native.
+///
+/// Returns 0.0 in the degenerate denominator case rather than NaN —
+/// defensive against zero-length vectors. Workers AI / Vectorize never
+/// emit a zero embedding for non-empty input, so this branch is
+/// unreachable in practice but the safety net is cheap.
+pub fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 {
+    let mut dot = 0.0;
+    let mut na = 0.0;
+    let mut nb = 0.0;
+    for (x, y) in a.iter().zip(b.iter()) {
+        dot += x * y;
+        na += x * x;
+        nb += y * y;
+    }
+    let denom = na.sqrt() * nb.sqrt();
+    if denom == 0.0 {
+        0.0
+    } else {
+        dot / denom
+    }
+}
+
+/// Build a safe FTS5 MATCH expression from user free-text.
+///
+/// Returns `None` when the input has no usable tokens (empty or pure
+/// punctuation) — caller should treat that as "skip the FTS leg of the
+/// hybrid query." Returns `Some(expr)` otherwise; `expr` is a properly
+/// quoted, OR-joined token list ready to pass directly into a `MATCH ?`
+/// bind parameter.
+///
+/// Tokenisation rules:
+///   * split on whitespace
+///   * strip leading/trailing non-alphanumeric characters per token
+///   * drop empty tokens
+///   * strip inner double quotes (defensive — shouldn't reach here)
+///   * wrap each surviving token in double quotes
+///   * join with `" OR "`
+///
+/// Quoting each token makes FTS5 treat it as a literal — so a token that
+/// happens to be "AND" or "OR" is a search term, not a syntax keyword.
+pub fn build_fts_query(raw: &str) -> Option<String> {
+    let tokens: Vec<String> = raw
+        .split_whitespace()
+        .map(|t| {
+            t.trim_matches(|c: char| !c.is_alphanumeric())
+                .replace('"', "")
+        })
+        .filter(|t| !t.is_empty())
+        .map(|t| format!("\"{}\"", t))
+        .collect();
+    if tokens.is_empty() {
+        None
+    } else {
+        Some(tokens.join(" OR "))
+    }
+}
+
+/// Reciprocal Rank Fusion of two ranked id lists.
+///
+/// `fts_ranking` and `vector_ranking` are id lists in rank order (best
+/// first, position 0). Each id gets a score from each list it appears
+/// in: `weight * 1 / (k + rank_1_indexed)`. Per-list scores sum across
+/// retrievers. The output is sorted descending by fused score.
+///
+/// `fts_weight` scales the lexical leg's contribution. The vector leg
+/// is fixed at weight 1.0 (the established baseline). `fts_weight = 0.0`
+/// disables FTS entirely (degenerate vector-only ranking). `fts_weight =
+/// 1.0` is equal weighting. Anything > 1.0 over-weights lexical.
+///
+/// Returns `Vec<(id, fused_score)>`. Score is informational — the rank
+/// order is what downstream consumers use.
+pub fn rrf_fuse(
+    fts_ranking: &[String],
+    vector_ranking: &[String],
+    fts_weight: f64,
+    k: f64,
+) -> Vec<(String, f64)> {
+    let mut scores: HashMap<String, f64> = HashMap::new();
+    if fts_weight > 0.0 {
+        for (rank, id) in fts_ranking.iter().enumerate() {
+            let contribution = fts_weight / (k + (rank + 1) as f64);
+            *scores.entry(id.clone()).or_insert(0.0) += contribution;
+        }
+    }
+    for (rank, id) in vector_ranking.iter().enumerate() {
+        let contribution = 1.0 / (k + (rank + 1) as f64);
+        *scores.entry(id.clone()).or_insert(0.0) += contribution;
+    }
+    let mut out: Vec<(String, f64)> = scores.into_iter().collect();
+    out.sort_by(|a, b| {
+        b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
+    });
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn ids(s: &[&str]) -> Vec<String> {
+        s.iter().map(|x| x.to_string()).collect()
+    }
+
+    // ---- build_fts_query ----
+
+    #[test]
+    fn empty_input_returns_none() {
+        assert!(build_fts_query("").is_none());
+        assert!(build_fts_query("   ").is_none());
+        assert!(build_fts_query("!!!").is_none()); // pure punctuation
+    }
+
+    #[test]
+    fn single_token() {
+        assert_eq!(build_fts_query("rover").as_deref(), Some("\"rover\""));
+    }
+
+    #[test]
+    fn multi_token_or_joins() {
+        assert_eq!(
+            build_fts_query("rover camera heartbeat").as_deref(),
+            Some("\"rover\" OR \"camera\" OR \"heartbeat\""),
+        );
+    }
+
+    #[test]
+    fn fts5_keywords_get_quoted_as_literal_tokens() {
+        // AND/OR/NOT/NEAR are FTS5 syntax keywords; quoting makes them
+        // literal search terms instead of operators.
+        let q = build_fts_query("rover AND camera").unwrap();
+        assert!(q.contains("\"AND\""));
+        assert!(q.contains("\"rover\""));
+        assert!(q.contains("\"camera\""));
+    }
+
+    #[test]
+    fn punctuation_stripped_from_token_edges() {
+        // Trailing comma and inner apostrophe are noise; the inner
+        // apostrophe is alphanumeric-adjacent so trim_matches leaves
+        // it. We only strip edge non-alphanumeric chars.
+        let q = build_fts_query("chopper, justin's").unwrap();
+        assert!(q.contains("\"chopper\""));
+        assert!(q.contains("justin")); // either "justin" or "justin's" — both fine
+    }
+
+    #[test]
+    fn inner_double_quotes_stripped() {
+        // Defensive — a user query containing a literal double quote
+        // would otherwise break the FTS5 syntax.
+        let q = build_fts_query("say \"hello\"").unwrap();
+        assert!(!q.contains("\\\""));
+        // The "hello" word survives (without its inner quotes), as a
+        // properly-quoted FTS5 token.
+        assert!(q.contains("\"hello\""));
+    }
+
+    // ---- rrf_fuse ----
+
+    #[test]
+    fn empty_inputs_produce_empty_output() {
+        assert!(rrf_fuse(&[], &[], 1.0, DEFAULT_RRF_K).is_empty());
+    }
+
+    #[test]
+    fn single_retriever_orders_by_rank() {
+        let fts = ids(&["a", "b", "c"]);
+        let out = rrf_fuse(&fts, &[], 1.0, DEFAULT_RRF_K);
+        // FTS-only with default weight: order preserved.
+        let ordered: Vec<&str> = out.iter().map(|(id, _)| id.as_str()).collect();
+        assert_eq!(ordered, vec!["a", "b", "c"]);
+    }
+
+    #[test]
+    fn overlapping_rankings_boost_shared_hits() {
+        // "shared" appears top of both lists; should beat any item that
+        // appears in only one.
+        let fts = ids(&["shared", "fts_only"]);
+        let vec = ids(&["shared", "vec_only"]);
+        let out = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K);
+        assert_eq!(out[0].0, "shared");
+        // "shared" gets contributions from both retrievers; the others
+        // from one each. So its score is roughly double.
+        assert!(out[0].1 > out[1].1);
+    }
+
+    #[test]
+    fn fts_weight_zero_degenerates_to_vector_only() {
+        // With fts_weight=0, the FTS leg contributes nothing — the
+        // ranking is purely vector's ordering.
+        let fts = ids(&["fts_top", "fts_second"]);
+        let vec = ids(&["vec_top", "vec_second"]);
+        let out = rrf_fuse(&fts, &vec, 0.0, DEFAULT_RRF_K);
+        let ordered: Vec<&str> = out.iter().map(|(id, _)| id.as_str()).collect();
+        assert_eq!(ordered, vec!["vec_top", "vec_second"]);
+    }
+
+    #[test]
+    fn fts_weight_higher_than_one_increases_lexical_influence() {
+        // Item at rank 0 of FTS, rank 2 of vector vs item at rank 2 of
+        // FTS, rank 0 of vector. With fts_weight=1 they'd roughly tie;
+        // with fts_weight=3 the FTS-top item should win.
+        let fts = ids(&["a", "b", "c"]);
+        let vec = ids(&["c", "b", "a"]);
+        let balanced = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K);
+        let fts_heavy = rrf_fuse(&fts, &vec, 3.0, DEFAULT_RRF_K);
+        // Balanced: a and c tie (mirror rankings); b in the middle.
+        // FTS-heavy: a wins outright.
+        let _ = balanced; // not asserting balanced tie behaviour — order undefined
+        assert_eq!(fts_heavy[0].0, "a");
+    }
+
+    #[test]
+    fn rrf_is_robust_to_disjoint_rankings() {
+        // No overlap between retrievers: every hit appears once.
+        let fts = ids(&["a"]);
+        let vec = ids(&["b"]);
+        let out = rrf_fuse(&fts, &vec, 1.0, DEFAULT_RRF_K);
+        assert_eq!(out.len(), 2);
+        // Both at rank 0 with equal weights → equal scores.
+        assert!((out[0].1 - out[1].1).abs() < 1e-12);
+    }
+
+    #[test]
+    fn rrf_k_constant_affects_rank_curve() {
+        // Higher k flattens the score curve. With k=1, rank-1 hits get
+        // 1/2 = 0.5; rank-10 gets 1/11 = 0.09 — sharp.
+        // With k=100, rank-1 gets 1/101 ≈ 0.0099; rank-10 gets 1/110 ≈
+        // 0.0091 — much flatter.
+        let fts = ids(&["a"]);
+        let vec: Vec<String> = (0..10).map(|i| format!("v{}", i)).collect();
+        let sharp = rrf_fuse(&fts, &vec, 1.0, 1.0);
+        let flat = rrf_fuse(&fts, &vec, 1.0, 100.0);
+        // In sharp: gap between rank-1 and rank-10 should be much bigger
+        // than in flat.
+        let sharp_gap = sharp[0].1 - sharp[sharp.len() - 1].1;
+        let flat_gap = flat[0].1 - flat[flat.len() - 1].1;
+        assert!(sharp_gap > flat_gap);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index d0972b1..d1c6b5b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,7 @@ mod api_key;
 mod audit;
 mod dialectic_validation;
 mod embed;
+mod hybrid;
 mod key_rate;
 mod memory;
 
diff --git a/src/main.rs b/src/main.rs
index ce47c2d..1896795 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -23,6 +23,7 @@ mod auth_ctx;
 #[allow(dead_code)]
 mod dialectic_validation;
 mod embed;
+mod hybrid;
 mod key_rate;
 mod memory;
 mod store;
diff --git a/src/worker_mcp.rs b/src/worker_mcp.rs
index e38a19f..a0ed130 100644
--- a/src/worker_mcp.rs
+++ b/src/worker_mcp.rs
@@ -14,6 +14,7 @@
 // recall_specific, review, reframe, forget, reflect, remember_with_image)
 // and the OAuth path for non-rover callers.
 
+use crate::hybrid::{self, DEFAULT_RRF_K};
 use crate::memory::{Memory, MemoryType};
 use crate::worker_auth_ctx::{self, AuthCtx};
 use crate::worker_orient::format_memory;
@@ -748,6 +749,24 @@ struct RecallCheckArgs {
     tags: Vec<String>,
 }
 
+/// Read `ONEIRO_HYBRID_FTS_WEIGHT` from worker env (CLA-109). Default
+/// 1.0 (equal weighting with vector). Set to 0.0 to disable the FTS
+/// leg entirely (semantic-only — pre-CLA-109 behaviour). Tunable knob
+/// for A/B without redeploying schema changes.
+fn read_fts_weight(env: &Env) -> f64 {
+    let raw = match env.var("ONEIRO_HYBRID_FTS_WEIGHT") {
+        Ok(v) => v.to_string(),
+        Err(_) => return 1.0,
+    };
+    raw.parse::<f64>().unwrap_or_else(|_| {
+        worker::console_error!(
+            "ONEIRO_HYBRID_FTS_WEIGHT={:?} unparseable; using default 1.0",
+            raw
+        );
+        1.0
+    })
+}
+
 async fn tool_recall_check(
     env: &Env,
     db: &D1Database,
@@ -773,26 +792,73 @@ async fn tool_recall_check(
     let filters_active =
         entity_filter.is_some() || memory_type_filter.is_some() || !args.tags.is_empty();
 
+    let fts_weight = read_fts_weight(env);
+    let hybrid_active = fts_weight > 0.0;
+
     let query_emb = worker_embed::embed_query(env, &args.topic)
         .await
         .map_err(|e| format!("embed_query: {:?}", e))?;
     // Oversample wider when filters are active — entity / memory_type / tags
-    // are evaluated post-Vectorize (current index has no metadata; pushdown
-    // would require re-upserting every vector with metadata, deferred to
-    // CLA-109). A 90%-discriminating filter on a 10-deep oversample leaves
-    // ~1 candidate — not enough to fill `limit` or give MMR diversity room.
-    // Doubling the floor + ceiling keeps the post-filter survivor pool deep
-    // enough without taxing free-tier Vectorize quotas.
+    // filter post-Vectorize (no metadata pushdown in the index, would
+    // require re-upserting every vector with metadata). Hybrid is
+    // similarly post-fusion. A 90%-discriminating filter on a 10-deep
+    // oversample leaves ~1 candidate — not enough to fill `limit` or
+    // feed MMR. Doubling the floor + ceiling keeps the post-filter
+    // survivor pool deep enough without taxing free-tier quotas.
     let oversample_base = if filters_active { limit * 8 } else { limit * 4 };
     let oversample_min = if filters_active { 20 } else { 10 };
     let oversample = (oversample_base.clamp(oversample_min, 100)) as u32;
-    let matches = worker_vectorize::query_top_k_with_vectors(env, &query_emb, oversample)
+
+    // ── Vector leg ────────────────────────────────────────────────
+    let vector_matches = worker_vectorize::query_top_k_with_vectors(env, &query_emb, oversample)
         .await
         .map_err(|e| format!("vectorize query: {:?}", e))?;
-    let above_threshold: Vec<worker_vectorize::VectorMatchWithVector> = matches
+    let mut above_threshold: Vec<worker_vectorize::VectorMatchWithVector> = vector_matches
         .into_iter()
         .filter(|m| m.score >= min_similarity)
         .collect();
+    let vector_ranking: Vec<String> = above_threshold.iter().map(|m| m.id.clone()).collect();
+
+    // ── FTS leg (skipped if weight == 0) ──────────────────────────
+    let fts_ranking: Vec<String> = if hybrid_active {
+        match hybrid::build_fts_query(&args.topic) {
+            Some(expr) => worker_store::fts_search(db, &expr, oversample)
+                .await
+                .map_err(|e| format!("fts_search: {:?}", e))?,
+            None => Vec::new(),
+        }
+    } else {
+        Vec::new()
+    };
+
+    // ── Bridge: FTS-only hits get their vectors via getByIds so they ──
+    //    can participate in cosine threshold + MMR diversity. Without
+    //    this, lexical-only hits would be invisible to MMR.
+    let vector_id_set: std::collections::HashSet<&str> =
+        above_threshold.iter().map(|m| m.id.as_str()).collect();
+    let fts_only_ids: Vec<&str> = fts_ranking
+        .iter()
+        .filter(|id| !vector_id_set.contains(id.as_str()))
+        .map(String::as_str)
+        .collect();
+    if !fts_only_ids.is_empty() {
+        let fetched = worker_vectorize::get_by_ids(env, &fts_only_ids)
+            .await
+            .map_err(|e| format!("vectorize get_by_ids: {:?}", e))?;
+        for sv in fetched {
+            if sv.values.is_empty() {
+                continue;
+            }
+            let score = hybrid::cosine_similarity(&query_emb, &sv.values);
+            if score >= min_similarity {
+                above_threshold.push(worker_vectorize::VectorMatchWithVector {
+                    id: sv.id,
+                    score,
+                    values: sv.values,
+                });
+            }
+        }
+    }
 
     if above_threshold.is_empty() {
         return Ok(format!(
@@ -801,26 +867,62 @@ async fn tool_recall_check(
         ));
     }
 
-    // When filters are active, fetch all above-threshold candidates from D1
-    // up front so the metadata filter can run before MMR (otherwise MMR
-    // would diversify candidates we're about to throw away). When no
-    // filters are set, keep the original lean flow: MMR first, then a
-    // smaller D1 fetch for just the survivors.
+    // Capture id → cosine_score for display BEFORE we consume pool into
+    // the rerank/filter pipeline below. Used to render per-row `sim:`
+    // values to the reader.
+    let id_to_score: std::collections::HashMap<String, f64> = above_threshold
+        .iter()
+        .map(|m| (m.id.clone(), m.score))
+        .collect();
+
+    // ── Fuse rankings → narrow candidate pool to fused top-(limit*3) ──
+    //    RRF determines which candidates compete; MMR diversifies within
+    //    that subset. The narrowing is what gives fusion influence over
+    //    the final ranking — without it, MMR's cosine-relevance metric
+    //    would dominate and FTS contribution would be wasted.
+    let pool: Vec<worker_vectorize::VectorMatchWithVector> = if hybrid_active
+        && !fts_ranking.is_empty()
+    {
+        let fused = hybrid::rrf_fuse(&fts_ranking, &vector_ranking, fts_weight, DEFAULT_RRF_K);
+        let narrow_to = (limit * 3).max(limit).min(above_threshold.len());
+        let kept_ids: std::collections::HashSet<&str> = fused
+            .iter()
+            .take(narrow_to)
+            .map(|(id, _)| id.as_str())
+            .collect();
+        // Reorder above_threshold by fused-rank position so MMR's first
+        // candidate (the highest-cosine in pool) at least comes from the
+        // top of the fused list when ties or near-ties exist.
+        let fused_pos: std::collections::HashMap<&str, usize> = fused
+            .iter()
+            .enumerate()
+            .map(|(i, (id, _))| (id.as_str(), i))
+            .collect();
+        let mut kept: Vec<worker_vectorize::VectorMatchWithVector> = above_threshold
+            .into_iter()
+            .filter(|m| kept_ids.contains(m.id.as_str()))
+            .collect();
+        kept.sort_by_key(|m| fused_pos.get(m.id.as_str()).copied().unwrap_or(usize::MAX));
+        kept
+    } else {
+        above_threshold
+    };
+
+    // ── CLA-108 metadata filters + MMR rerank ─────────────────────
     let (reranked_ids, memories): (Vec<String>, Vec<Memory>) = if filters_active {
-        let candidate_ids: Vec<&str> = above_threshold.iter().map(|m| m.id.as_str()).collect();
+        let candidate_ids: Vec<&str> = pool.iter().map(|m| m.id.as_str()).collect();
         let candidates = worker_store::get_many(db, &candidate_ids)
             .await
             .map_err(|e| format!("get_many: {:?}", e))?;
         let candidate_lookup: std::collections::HashMap<&str, &Memory> =
             candidates.iter().map(|m| (m.id.as_str(), m)).collect();
-        let filtered_matches: Vec<worker_vectorize::VectorMatchWithVector> = above_threshold
-            .iter()
+        let filtered_matches: Vec<worker_vectorize::VectorMatchWithVector> = pool
+            .into_iter()
             .filter(|vm| {
                 candidate_lookup
                     .get(vm.id.as_str())
                     .is_some_and(|m| m.matches_filter(entity_filter, memory_type_filter, &args.tags))
             })
-            .cloned()
             .collect();
         if filtered_matches.is_empty() {
             return Ok(format!(
@@ -835,7 +937,7 @@ async fn tool_recall_check(
             .collect();
         (reranked, kept)
     } else {
-        let reranked = crate::worker_mmr::mmr_rerank(&query_emb, &above_threshold, limit, 0.7);
+        let reranked = crate::worker_mmr::mmr_rerank(&query_emb, &pool, limit, 0.7);
         let ids: Vec<&str> = reranked.iter().map(String::as_str).collect();
         let mems = worker_store::get_many(db, &ids)
             .await
@@ -855,6 +957,9 @@ async fn tool_recall_check(
         "═══ Oneiro Check ═══\nStore: {} ep, {} sem, {} ori | Topic: \"{}\" | Threshold: {:.2}",
         ep, sem, ori, args.topic, min_similarity
     );
+    if hybrid_active {
+        header.push_str(&format!(" | Hybrid: fts_w={:.1}", fts_weight));
+    }
     if filters_active {
         let mut bits: Vec<String> = Vec::new();
         if let Some(e) = entity_filter {
@@ -873,10 +978,6 @@ async fn tool_recall_check(
 
     // Display in MMR selection order, keeping each memory's raw similarity
     // score for the reader (the order is MMR, the per-row sim is cosine).
-    let id_to_score: std::collections::HashMap<&str, f64> = above_threshold
-        .iter()
-        .map(|m| (m.id.as_str(), m.score))
-        .collect();
     for id in &reranked_ids {
         if let Some(m) = memories.iter().find(|m| &m.id == id) {
             let sim = id_to_score.get(m.id.as_str()).copied().unwrap_or(0.0);
diff --git a/src/worker_mmr.rs b/src/worker_mmr.rs
index 3352151..08c82a5 100644
--- a/src/worker_mmr.rs
+++ b/src/worker_mmr.rs
@@ -26,28 +26,9 @@
 
 #![cfg(target_family = "wasm")]
 
+use crate::hybrid::cosine_similarity;
 use crate::worker_vectorize::VectorMatchWithVector;
 
-/// Cosine similarity between two equal-length vectors. Assumes neither
-/// is the zero vector (Workers AI / Vectorize never emit a zero embedding
-/// for non-empty input). Returns 0.0 in the degenerate denominator case
-/// rather than NaN — defensive but unreachable in practice.
-fn cosine_similarity(a: &[f64], b: &[f64]) -> f64 {
-    let mut dot = 0.0;
-    let mut na = 0.0;
-    let mut nb = 0.0;
-    for (x, y) in a.iter().zip(b.iter()) {
-        dot += x * y;
-        na += x * x;
-        nb += y * y;
-    }
-    let denom = na.sqrt() * nb.sqrt();
-    if denom <= f64::EPSILON {
-        return 0.0;
-    }
-    dot / denom
-}
-
 /// Rerank a candidate set (already roughly relevance-ordered from
 /// Vectorize) to balance relevance against diversity. Returns ids of
 /// the top `k` MMR-selected matches in the order they were selected
diff --git a/src/worker_store.rs b/src/worker_store.rs
index 18c4a6a..b42ad65 100644
--- a/src/worker_store.rs
+++ b/src/worker_store.rs
@@ -316,6 +316,41 @@ pub async fn get_recent_episodics(db: &D1Database, limit: usize) -> Result<Vec<M
     Ok(rows.into_iter().map(MemoryRow::into_memory).collect())
 }
 
+/// BM25-ranked full-text search via the memories_fts virtual table
+/// (CLA-109). Returns memory IDs in rank order (best match first). The
+/// caller passes a pre-built FTS5 MATCH expression — use
+/// `crate::hybrid::build_fts_query` to construct one safely from user
+/// free-text. An empty / None query short-circuits (caller skips FTS).
+///
+/// BM25 is FTS5's default ranking function; we don't expose the raw
+/// score because RRF fusion uses rank, not score. Returning ids in
+/// order is the contract.
+pub async fn fts_search(
+    db: &D1Database,
+    match_expr: &str,
+    limit: u32,
+) -> Result<Vec<String>> {
+    if match_expr.is_empty() {
+        return Ok(Vec::new());
+    }
+    #[derive(serde::Deserialize)]
+    struct FtsHit {
+        id: String,
+    }
+    let rows: Vec<FtsHit> = db
+        .prepare(
+            "SELECT id FROM memories_fts
+             WHERE memories_fts MATCH ?
+             ORDER BY rank
+             LIMIT ?",
+        )
+        .bind(&[match_expr.into(), limit.into()])?
+        .all()
+        .await?
+        .results()?;
+    Ok(rows.into_iter().map(|r| r.id).collect())
+}
+
 /// Memories filtered by entity, ranked by strength.
 pub async fn recall_by_entity(
     db: &D1Database,
diff --git a/src/worker_vectorize.rs b/src/worker_vectorize.rs
index f5b4db1..d540e29 100644
--- a/src/worker_vectorize.rs
+++ b/src/worker_vectorize.rs
@@ -44,6 +44,15 @@ extern "C" {
     /// deleteByIds(ids: string[]) → Promise<VectorizeAsyncMutation>
     #[wasm_bindgen(method, js_name = "deleteByIds", catch)]
     fn delete_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result<Promise, JsValue>;
+
+    /// getByIds(ids: string[]) → Promise<VectorizeVector[]>
+    /// Returns each id's stored vector (no score — there's no query to
+    /// score against). Used by hybrid retrieval (CLA-109) to fetch
+    /// vectors for ids that FTS surfaced but Vectorize's top-K query
+    /// didn't include — so those candidates can still participate in
+    /// MMR diversity ranking.
+    #[wasm_bindgen(method, js_name = "getByIds", catch)]
+    fn get_by_ids(this: &VectorizeIndex, ids: JsValue) -> Result<Promise, JsValue>;
 }
 
 impl EnvBinding for VectorizeIndex {
@@ -90,6 +99,15 @@ struct QueryResponseWithVectors {
     matches: Vec<VectorMatchWithVector>,
 }
 
+/// getByIds returns bare `{id, values}` objects — no score (there was no
+/// query). Used by hybrid retrieval to pull vectors for FTS-only hits.
+#[derive(Debug, Deserialize)]
+pub struct StoredVector {
+    pub id: String,
+    #[serde(default)]
+    pub values: Vec<f64>,
+}
+
 fn from_env(env: &Env, binding_name: &str) -> Result<VectorizeIndex> {
     env.get_binding::<VectorizeIndex>(binding_name)
 }
@@ -175,3 +193,25 @@ pub async fn query_top_k_with_vectors(
         .map_err(|e| Error::JsError(format!("Vectorize response parse failed: {}", e)))?;
     Ok(response.matches)
 }
+
+/// Fetch stored vectors by id — no query, no scoring, just a lookup.
+/// Used by hybrid retrieval (CLA-109): when FTS surfaces an id that
+/// wasn't in Vectorize's top-K, we still want it to participate in MMR
+/// diversity ranking, which needs the vector.
+///
+/// Ids not present in the index are silently dropped from the returned
+/// list (Cloudflare's behaviour, not ours). Caller should not assume
+/// the response length matches the request length.
+pub async fn get_by_ids(env: &Env, ids: &[&str]) -> Result<Vec<StoredVector>> {
+    if ids.is_empty() {
+        return Ok(Vec::new());
+    }
+    let index = from_env(env, "VECTORS")?;
+    let ids_arr: Array = ids.iter().map(|s| JsValue::from_str(s)).collect();
+    let promise = index.get_by_ids(ids_arr.into()).map_err(js_err)?;
+    let result = JsFuture::from(promise).await.map_err(js_err)?;
+    // getByIds returns a bare array, not a `{matches: [...]}` envelope.
+    let vectors: Vec<StoredVector> = serde_wasm_bindgen::from_value(result)
+        .map_err(|e| Error::JsError(format!("Vectorize getByIds parse failed: {}", e)))?;
+    Ok(vectors)
+}

From 343dc81e655b0bb2cbe5dd3aaf5b6d2841920bc8 Mon Sep 17 00:00:00 2001
From: Justin Davis <justin.davis@energyq.com.au>
Date: Sat, 23 May 2026 11:20:10 +1000
Subject: [PATCH 2/3] fix(fts): trim match_expr defensively in fts_search
 (CLA-109)

PR #39 review (CodeRabbit). The only current caller (build_fts_query)
can't produce whitespace-only output, but fts_search is pub and a
future caller might pre-construct a malformed expression. A
whitespace-only MATCH would error on FTS5's syntax check rather than
degenerating cleanly to empty.

Shadow match_expr with its trimmed form so the empty-check and the
bind both see the same canonical value.
---
 src/worker_store.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/worker_store.rs b/src/worker_store.rs
index b42ad65..2e84f13 100644
--- a/src/worker_store.rs
+++ b/src/worker_store.rs
@@ -330,6 +330,11 @@ pub async fn fts_search(
     match_expr: &str,
     limit: u32,
 ) -> Result<Vec<String>> {
+    // Trim defensively: build_fts_query (our only current caller) can't
+    // produce whitespace-only output, but fts_search is pub and a future
+    // caller might. A whitespace-only MATCH expression would error on
+    // FTS5's syntax check rather than degenerating cleanly to empty.
+    let match_expr = match_expr.trim();
     if match_expr.is_empty() {
         return Ok(Vec::new());
     }

From f0e0cdaa7fede75b1bb92a6c884a2877c5b6ad8d Mon Sep 17 00:00:00 2001
From: Justin Davis <justin.davis@energyq.com.au>
Date: Sat, 23 May 2026 11:34:37 +1000
Subject: [PATCH 3/3] feat(recall): img indicator on memory headers (CLA-109)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hybrid retrieval demo exposed the gap directly: surfacing memories
by topic works, but you can't tell which candidates have images
attached without calling recall_image speculatively. The "I saw the
photo in the bucket, surface it via search" workflow stalls at the
"which one has the bytes" step.

Adds a terse `| img` indicator to both display paths:

  * format_memory (worker_orient.rs) — used by recall_orient and
    recall_specific. Image-carrying rows now render as
    `[episodic | 5d ago | str:0.50 | id:abc12345 | img]`.

  * tool_recall_check inline render (worker_mcp.rs) — image-carrying
    rows now render as `[sim:0.77 | str:1.00 | abc12345 | img]`.

Both formats only emit the suffix when m.image_hash.is_some() — empty
case is byte-identical to pre-change output, so nothing else has to
change. Reader sees `img` and knows recall_image will succeed against
the id without a speculative round-trip.
---
 src/worker_mcp.rs    | 4 +++-
 src/worker_orient.rs | 9 ++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/worker_mcp.rs b/src/worker_mcp.rs
index a0ed130..5e67525 100644
--- a/src/worker_mcp.rs
+++ b/src/worker_mcp.rs
@@ -981,11 +981,13 @@ async fn tool_recall_check(
     for id in &reranked_ids {
         if let Some(m) = memories.iter().find(|m| &m.id == id) {
             let sim = id_to_score.get(m.id.as_str()).copied().unwrap_or(0.0);
+            let img = if m.image_hash.is_some() { " | img" } else { "" };
             out.push_str(&format!(
-                "[sim:{:.2} | str:{:.2} | {}]\n{}\n",
+                "[sim:{:.2} | str:{:.2} | {}{}]\n{}\n",
                 sim,
                 m.strength,
                 &m.id[..8],
+                img,
                 m.summary
             ));
         }
diff --git a/src/worker_orient.rs b/src/worker_orient.rs
index 198b99f..d8ab9d1 100644
--- a/src/worker_orient.rs
+++ b/src/worker_orient.rs
@@ -101,8 +101,14 @@ pub(crate) fn format_memory(m: &Memory) -> String {
         .as_deref()
         .map(|s| format!(" via:{}", s))
         .unwrap_or_default();
+    // Image indicator — terse so it doesn't bloat the header. The reader
+    // sees `img` and knows recall_image will succeed against this id.
+    // Without it: the "I came home excited about a photo" workflow
+    // forces speculative recall_image calls to discover which candidate
+    // carries the bytes.
+    let img = if m.image_hash.is_some() { " | img" } else { "" };
     format!(
-        "[{} | {} | str:{:.2} | {} | id:{}{}{}]\n{}\n",
+        "[{} | {} | str:{:.2} | {} | id:{}{}{}{}]\n{}\n",
         type_label,
         age_str,
         m.strength,
@@ -110,6 +116,7 @@ pub(crate) fn format_memory(m: &Memory) -> String {
         &m.id[..8],
         tags_str,
         by,
+        img,
         m.content
     )
 }