From 51efd49b12623505e1da2d2a7a453b4a2628fdae Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Mon, 13 Apr 2026 18:16:08 +0200
Subject: [PATCH 01/12] fix: repair broken README doc links and convert docs
 index to MDX

- Replace docs/user/*.md links with lip-sigma.vercel.app equivalents
- Convert docs/index.astro to index.mdx so markdown renders correctly

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md                                         | 15 ++++++++-------
 website/src/pages/docs/{index.astro => index.mdx} | 10 +++++-----
 2 files changed, 13 insertions(+), 12 deletions(-)
 rename website/src/pages/docs/{index.astro => index.mdx} (87%)
diff --git a/README.md b/README.md
index acc8c53..6aea39a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # LIP — Linked Incremental Protocol
 
-[![Website](https://img.shields.io/badge/website-lip.dev-8b5cf6?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZmlsbD0id2hpdGUiIGQ9Ik0xMiAyQzYuNDggMiAyIDYuNDggMiAxMnM0LjQ4IDEwIDEwIDEwIDEwLTQuNDggMTAtMTBTMTcuNTIgMiAxMiAyek0xMSA3djZsNS4yNSAzLjE1LS44NSAxLjQyTDEwIDE0VjdoMXoiLz48L3N2Zz4=)](https://lip.dev)
+[![Website](https://img.shields.io/badge/website-lip--sigma.vercel.app-8b5cf6?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZmlsbD0id2hpdGUiIGQ9Ik0xMiAyQzYuNDggMiAyIDYuNDggMiAxMnM0LjQ4IDEwIDEwIDEwIDEwLTQuNDggMTAtMTBTMTcuNTIgMiAxMiAyek0xMSA3djZsNS4yNSAzLjE1LS44NSAxLjQyTDEwIDE0VjdoMXoiLz48L3N2Zz4=)](https://lip-sigma.vercel.app)
+[![Docs](https://img.shields.io/badge/docs-lip--sigma.vercel.app%2Fdocs-06b6d4?style=flat-square)](https://lip-sigma.vercel.app/docs)
 [![Crates.io](https://img.shields.io/crates/v/lip-cli?style=flat-square&color=8b5cf6)](https://crates.io/crates/lip-cli)
 [![License: MIT](https://img.shields.io/badge/license-MIT-06b6d4?style=flat-square)](LICENSE)
 [![Rust 1.78+](https://img.shields.io/badge/rust-1.78+-06b6d4?style=flat-square&logo=rust)](https://www.rust-lang.org)
@@ -47,7 +48,7 @@ All of them have built custom, proprietary, incompatible code graph layers to an
 
 **LIP is not a replacement for LSP either.** It ships an LSP bridge (`lip lsp`) so any editor sees it as a standard language server. The difference is that LIP's answers come from a persistent live graph rather than an in-memory server that restarts cold.
 
-See [docs/user/comparisons.md](docs/user/comparisons.md) for the full breakdown, including when to use all three together.
+See [LSP, SCIP & LIP](https://lip-sigma.vercel.app/docs/comparisons) for the full breakdown, including when to use all three together.
 
 ---
 
@@ -167,11 +168,11 @@ lip slice --cargo                                        # index ./Cargo.toml de
 lip slice --npm                                          # index ./package.json deps
 lip slice --pub                                          # index ./pubspec.yaml deps
 lip slice --pip                                          # index pip-installed packages
-lip slice --cargo --push --registry https://registry.lip.dev
+lip slice --cargo --push --registry https://your-registry.internal
 
-# Fetch / publish slices directly
-lip fetch <sha256-hash> --registry https://registry.lip.dev
-lip push  slice.json    --registry https://registry.lip.dev
+# Fetch / publish slices
+lip fetch <sha256-hash> --registry https://your-registry.internal
+lip push  slice.json    --registry https://your-registry.internal
 
 # Force re-index specific files from disk (v1.6)
 lip query reindex-files file:///src/auth.rs file:///src/generated/schema.rs
@@ -245,7 +246,7 @@ lip query export-embeddings file:///src/auth.rs file:///src/session.rs --output
 4. Make changes
 5. `lip_annotation_set` — release locks, leave notes
 
-See [docs/user/mcp-integration.md](docs/user/mcp-integration.md) for full tool reference.
+See [MCP Integration docs](https://lip-sigma.vercel.app/docs/mcp) for full tool reference.
 
 ---
 
diff --git a/website/src/pages/docs/index.astro b/website/src/pages/docs/index.mdx
similarity index 87%
rename from website/src/pages/docs/index.astro
rename to website/src/pages/docs/index.mdx
index 406b471..3887abe 100644
--- a/website/src/pages/docs/index.astro
+++ b/website/src/pages/docs/index.mdx
@@ -1,9 +1,11 @@
 ---
-import DocsLayout from '../../layouts/DocsLayout.astro';
+layout: ../../layouts/DocsLayout.astro
+title: Documentation
+description: LIP documentation — guides, CLI reference, daemon protocol, MCP integration, and more.
 ---
-<DocsLayout title="Documentation" description="LIP documentation — guides, CLI reference, daemon protocol, MCP integration, and more.">
 
-<h1>LIP Documentation</h1>
+# LIP Documentation
+
 <p class="lead">Everything you need to install, run, and integrate LIP — the persistent, incremental code intelligence daemon.</p>
 
 ---
@@ -37,5 +39,3 @@ Full documentation for every CLI command, daemon configuration option, and proto
 |---|---|
 | [Glossary](/docs/glossary) | Blast radius, confidence tiers, WAL journal, Merkle sync, symbol URI, and more |
 | [Protocol Specification](/docs/spec) | Full design document — wire format, symbol URIs, security, architecture |
-
-</DocsLayout>

From cca6e8fe33d4b5d84a8c0204ebbcb806f79b7315 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:17:46 +0200
Subject: [PATCH 02/12] =?UTF-8?q?feat:=20v2.1.0=20=E2=80=94=20stream=5Fcon?=
 =?UTF-8?q?text=20(token-budgeted=20RAG=20context=20streaming)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements LIP 2.1.0 proposal: callers stream symbols ranked by relevance
to a cursor position and stop reading when their prompt budget is full,
instead of fetching top-k and locally truncating.

- ClientMessage::StreamContext { file_uri, cursor_position, max_tokens, model? }
- ServerMessage::SymbolInfo { symbol_info, relevance_score, token_cost }
- ServerMessage::EndStream { reason, emitted, total_candidates, error? }
  with EndStreamReason = budget_reached | exhausted | error
- Daemon writes one frame at a time; back-pressure throttles ranking,
  BrokenPipe aborts the walk on client disconnect.
- Relevance order (spec §2.3): cursor symbol → callers (blast-radius CPG)
  → callees → related types.
- Conservative chars÷4 + 8 token-cost estimate per spec §2.4.
- protocol_version bumped 1 → 2 in HandshakeResult.
- New `lip stream-context <uri> <line:col> --max-tokens N` CLI subcommand.
- Integration tests cover zero-budget, cursor out-of-range, and v2 handshake.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                            |  15 ++
 Cargo.lock                              |   6 +-
 Cargo.toml                              |   2 +-
 bindings/rust/src/daemon/session.rs     | 230 +++++++++++++++++++++++-
 bindings/rust/src/query_graph/types.rs  | 120 ++++++++++++-
 bindings/rust/tests/integration.rs      | 151 ++++++++++++++++
 tools/lip-cli/src/cmd/mod.rs            |   1 +
 tools/lip-cli/src/cmd/stream_context.rs |  81 +++++++++
 tools/lip-cli/src/main.rs               |   3 +
 9 files changed, 602 insertions(+), 7 deletions(-)
 create mode 100644 tools/lip-cli/src/cmd/stream_context.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2952b53..7465f5b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,21 @@ All notable changes to this project are documented here.
 
 ---
 
+## [2.1.0] — 2026-04-15
+
+### Added
+
+**v2.1 — `stream_context`: token-budgeted RAG context streaming**
+
+- **`StreamContext { file_uri, cursor_position, max_tokens, model? }`** — new streaming wire message. Daemon ranks symbols relevant to the cursor and emits one `SymbolInfo { symbol_info, relevance_score, token_cost }` frame at a time, terminating with exactly one `EndStream { reason, emitted, total_candidates, error? }` frame. Reasons: `budget_reached`, `exhausted`, `error`. Replaces the broken "fetch top-k, locally truncate to prompt budget" pattern with stream-until-full. Spec §9.2.
+- **Relevance ordering** (spec §2.3): direct symbol at cursor → callers (from blast-radius CPG walk) → callees / references → related types.
+- **Token-cost estimate**: conservative `ceil((len(signature) + len(documentation)) / 4) + 8` per symbol.
+- **Back-pressure**: daemon does not buffer ahead of the socket. `BrokenPipe` from a closing client aborts the ranking walk cleanly. `StreamContext` is rejected from `Batch` / `BatchQuery`.
+- **`protocol_version` bumped from `1` → `2`** in `HandshakeResult`. Clients can detect streaming support via handshake.
+- **`lip stream-context <file_uri> <line:col> --max-tokens N [--model M]`** — new CLI subcommand prints frames as JSON for manual testing.
+
+---
+
 ## [2.0.0] — 2026-04-13
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
index f2bb1f0..b07921f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1074,7 +1074,7 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "lip"
-version = "2.0.0"
+version = "2.1.0"
 dependencies = [
  "anyhow",
  "criterion",
@@ -1111,7 +1111,7 @@ dependencies = [
 
 [[package]]
 name = "lip-cli"
-version = "2.0.0"
+version = "2.1.0"
 dependencies = [
  "anyhow",
  "clap",
@@ -1130,7 +1130,7 @@ dependencies = [
 
 [[package]]
 name = "lip-registry"
-version = "2.0.0"
+version = "2.1.0"
 dependencies = [
  "anyhow",
  "axum",
diff --git a/Cargo.toml b/Cargo.toml
index b518da7..01bd6e5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members  = [
 ]
 
 [workspace.package]
-version    = "2.0.0"
+version    = "2.1.0"
 edition    = "2021"
 authors    = ["Lisa Welsch <lisa@tastehub.io>"]
 license    = "MIT"
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index c3d2561..702e369 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -17,7 +17,7 @@ use super::watcher::{uri_to_path, FileWatcherHandle};
 
 /// Monotonic protocol version. Bumped only on breaking wire-format changes.
 /// Clients can detect drift by comparing against this value in `HandshakeResult`.
-const PROTOCOL_VERSION: u32 = 1;
+const PROTOCOL_VERSION: u32 = 2;
 
 /// Per-connection session state.
 pub struct Session {
@@ -96,6 +96,25 @@ impl Session {
                 }
             };
 
+            // Streaming requests bypass the unary handle/response cycle:
+            // they write N frames + an end_stream terminator directly.
+            if let ClientMessage::StreamContext {
+                file_uri,
+                cursor_position,
+                max_tokens,
+                model: _,
+            } = msg
+            {
+                if let Err(e) = self
+                    .handle_stream_context(&mut stream, file_uri, cursor_position, max_tokens)
+                    .await
+                {
+                    error!("stream_context write error: {e}");
+                    break;
+                }
+                continue;
+            }
+
             let response = self.handle(msg).await;
             if let Err(e) = write_message(&mut stream, &response).await {
                 error!("write error: {e}");
@@ -1497,8 +1516,213 @@ impl Session {
                     query_model,
                 }
             }
+
+            // Streaming variant — caught earlier in `run`. Reached only if a
+            // client embedded one inside a Batch / BatchQuery, which is not
+            // supported.
+            ClientMessage::StreamContext { .. } => ServerMessage::Error {
+                message: "stream_context is a streaming request and cannot be \
+                          batched or nested"
+                    .into(),
+            },
+        }
+    }
+
+    /// Handle a [`ClientMessage::StreamContext`] by streaming `symbol_info`
+    /// frames followed by exactly one `end_stream` terminator.
+    ///
+    /// Frames are written one at a time with no internal buffering — the
+    /// daemon's `write_message` blocks on socket back-pressure, which throttles
+    /// ranking work when the client stops reading. A closed socket surfaces
+    /// as `BrokenPipe` and aborts the loop cleanly.
+    async fn handle_stream_context(
+        &self,
+        stream: &mut UnixStream,
+        file_uri: String,
+        cursor_position: OwnedRange,
+        max_tokens: u32,
+    ) -> anyhow::Result<()> {
+        use crate::query_graph::types::EndStreamReason;
+
+        // Validate cursor position. "Outside the file" = not tracked, or line
+        // beyond the file's line count.
+        let line_count_opt = {
+            let db = self.db.lock().await;
+            db.file_source_text(&file_uri)
+                .map(|t| t.lines().count() as i32)
+        };
+        let Some(line_count) = line_count_opt else {
+            let term = ServerMessage::EndStream {
+                reason: EndStreamReason::Error,
+                emitted: 0,
+                total_candidates: 0,
+                error: Some("cursor_out_of_range".into()),
+            };
+            write_message(stream, &term).await?;
+            return Ok(());
+        };
+        if cursor_position.start_line < 0 || cursor_position.start_line >= line_count {
+            let term = ServerMessage::EndStream {
+                reason: EndStreamReason::Error,
+                emitted: 0,
+                total_candidates: 0,
+                error: Some("cursor_out_of_range".into()),
+            };
+            write_message(stream, &term).await?;
+            return Ok(());
+        }
+
+        // Rank candidates relative to the cursor.
+        let candidates = {
+            let mut db = self.db.lock().await;
+            rank_context_candidates(
+                &mut db,
+                &file_uri,
+                cursor_position.start_line,
+                cursor_position.start_char,
+            )
+        };
+        let total_candidates = candidates.len() as u32;
+
+        // Empty-budget short-circuit: emit terminator immediately. Per spec
+        // this counts as `budget_reached` (acceptance criterion 2).
+        if max_tokens == 0 {
+            let term = ServerMessage::EndStream {
+                reason: EndStreamReason::BudgetReached,
+                emitted: 0,
+                total_candidates,
+                error: None,
+            };
+            write_message(stream, &term).await?;
+            return Ok(());
+        }
+
+        let mut emitted: u32 = 0;
+        let mut spent: u64 = 0;
+        let mut reason = EndStreamReason::Exhausted;
+
+        for (sym, score) in candidates {
+            let cost = estimate_token_cost(&sym);
+            if spent + cost as u64 > max_tokens as u64 {
+                reason = EndStreamReason::BudgetReached;
+                break;
+            }
+            let frame = ServerMessage::SymbolInfo {
+                symbol_info: sym,
+                relevance_score: score,
+                token_cost: cost,
+            };
+            // BrokenPipe / EBADF aborts the walk — client closed early.
+            write_message(stream, &frame).await?;
+            spent += cost as u64;
+            emitted += 1;
+        }
+
+        let term = ServerMessage::EndStream {
+            reason,
+            emitted,
+            total_candidates,
+            error: None,
+        };
+        write_message(stream, &term).await?;
+        Ok(())
+    }
+}
+
+/// Conservative chars÷4 + 8 token estimate per spec §2.4.
+fn estimate_token_cost(sym: &crate::schema::OwnedSymbolInfo) -> u32 {
+    let sig_len = sym.signature.as_deref().map(str::len).unwrap_or(0);
+    let doc_len = sym.documentation.as_deref().map(str::len).unwrap_or(0);
+    ((sig_len + doc_len) as u32).div_ceil(4) + 8
+}
+
+/// Rank symbols by relevance to a cursor inside `file_uri` (spec §2.3 ordering):
+/// 1. The symbol the cursor is on (definition).
+/// 2. Callers — symbols whose blast-radius walk reaches the target.
+/// 3. Callees / references — outgoing relationships of the target.
+/// 4. Related types — relationships flagged `is_type_definition`.
+///
+/// Within a tier, frames are ordered by descending heuristic score.
+fn rank_context_candidates(
+    db: &mut crate::query_graph::LipDatabase,
+    file_uri: &str,
+    line: i32,
+    col: i32,
+) -> Vec<(crate::schema::OwnedSymbolInfo, f32)> {
+    use std::collections::HashSet;
+
+    let mut out: Vec<(crate::schema::OwnedSymbolInfo, f32)> = Vec::new();
+    let mut seen: HashSet<String> = HashSet::new();
+
+    let target_uri_opt = db.symbol_at_position(file_uri, line, col);
+
+    // Tier 1 — direct definition.
+    if let Some(ref target_uri) = target_uri_opt {
+        if let Some(sym) = db.symbol_by_uri(target_uri) {
+            seen.insert(sym.uri.clone());
+            out.push((sym, 1.0));
+        }
+    }
+
+    // Tier 2 — callers from the blast-radius CPG walk.
+    if let Some(ref target_uri) = target_uri_opt {
+        let blast = db.blast_radius_for(target_uri);
+        let mut callers: Vec<_> = blast
+            .direct_items
+            .iter()
+            .chain(blast.transitive_items.iter())
+            .filter(|item| !item.symbol_uri.is_empty())
+            .cloned()
+            .collect();
+        callers.sort_by_key(|item| item.distance);
+        for item in callers {
+            if !seen.insert(item.symbol_uri.clone()) {
+                continue;
+            }
+            if let Some(sym) = db.symbol_by_uri(&item.symbol_uri) {
+                let score = (0.9 - 0.1 * item.distance as f32).max(0.1);
+                out.push((sym, score));
+            }
+        }
+    }
+
+    // Tier 3 + 4 — outgoing relationships (callees, then types).
+    if let Some(ref target_uri) = target_uri_opt {
+        if let Some(target) = db.symbol_by_uri(target_uri) {
+            // Callees and plain references first.
+            for rel in target
+                .relationships
+                .iter()
+                .filter(|r| !r.is_type_definition)
+                .cloned()
+                .collect::<Vec<_>>()
+            {
+                if !seen.insert(rel.target_uri.clone()) {
+                    continue;
+                }
+                if let Some(sym) = db.symbol_by_uri(&rel.target_uri) {
+                    out.push((sym, 0.5));
+                }
+            }
+            // Related types last.
+            for rel in target
+                .relationships
+                .iter()
+                .filter(|r| r.is_type_definition)
+                .cloned()
+                .collect::<Vec<_>>()
+            {
+                if !seen.insert(rel.target_uri.clone()) {
+                    continue;
+                }
+                if let Some(sym) = db.symbol_by_uri(&rel.target_uri) {
+                    out.push((sym, 0.4));
+                }
+            }
         }
     }
+
+    out
 }
 
 // ── Batch query helper ────────────────────────────────────────────────────────
@@ -2044,6 +2268,10 @@ fn process_query_sync(
         ClientMessage::ExplainMatch { .. } => {
             err("ExplainMatch requires async HTTP; not permitted in BatchQuery")
         }
+
+        ClientMessage::StreamContext { .. } => {
+            err("StreamContext is a streaming request; not permitted in BatchQuery")
+        }
     }
 }
 
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index 3ee1df6..85457f6 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -444,6 +444,47 @@ pub enum ServerMessage {
         /// The embedding model used to score the chunks.
         query_model: String,
     },
+
+    // ── v2.1 features ────────────────────────────────────────────────────
+    /// One frame of a [`ClientMessage::StreamContext`] response: a single
+    /// ranked symbol with its estimated prompt token cost.
+    ///
+    /// Wire tag is `"symbol_info"`. Multiple frames precede the
+    /// [`ServerMessage::EndStream`] terminator.
+    SymbolInfo {
+        symbol_info: crate::schema::OwnedSymbolInfo,
+        /// Heuristic score in `[0.0, 1.0]`; higher = more relevant to the cursor.
+        relevance_score: f32,
+        /// Estimated prompt-token cost of this symbol's serialised context.
+        token_cost: u32,
+    },
+
+    /// Terminator frame for a [`ClientMessage::StreamContext`] response.
+    ///
+    /// Wire tag is `"end_stream"`. Exactly one terminator follows N
+    /// [`ServerMessage::SymbolInfo`] frames.
+    EndStream {
+        reason: EndStreamReason,
+        /// Number of `SymbolInfo` frames emitted before this terminator.
+        emitted: u32,
+        /// Total candidate symbols the daemon considered.
+        total_candidates: u32,
+        /// Set only when `reason == EndStreamReason::Error`.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        error: Option<String>,
+    },
+}
+
+/// Why a [`ServerMessage::EndStream`] terminated a context stream.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum EndStreamReason {
+    /// Daemon emitted enough symbols to reach `max_tokens`.
+    BudgetReached,
+    /// No more relevant candidates exist.
+    Exhausted,
+    /// An error terminated the stream; see [`ServerMessage::EndStream::error`].
+    Error,
 }
 
 /// A contiguous region of a file that contributes to a semantic match.
@@ -870,6 +911,22 @@ pub enum ClientMessage {
         /// Override the embedding model for this request.
         model: Option<String>,
     },
+
+    // ── v2.1 features ────────────────────────────────────────────────────
+    /// Stream symbols ordered by relevance to `cursor_position` in `file_uri`,
+    /// stopping when the caller closes the connection or when the daemon has
+    /// emitted enough symbols to reach `max_tokens` estimated prompt cost.
+    ///
+    /// Response is N [`ServerMessage::SymbolInfo`] frames followed by exactly
+    /// one [`ServerMessage::EndStream`] terminator.
+    StreamContext {
+        file_uri: String,
+        cursor_position: OwnedRange,
+        max_tokens: u32,
+        /// Optional: restrict to a specific embedding model.
+        #[serde(default)]
+        model: Option<String>,
+    },
 }
 
 impl ClientMessage {
@@ -975,7 +1032,7 @@ mod tests {
     fn handshake_result_round_trips() {
         let msg = ServerMessage::HandshakeResult {
             daemon_version: "1.5.0".into(),
-            protocol_version: 1,
+            protocol_version: 2,
         };
         let rt = round_trip_server(&msg);
         let ServerMessage::HandshakeResult {
@@ -986,7 +1043,66 @@ mod tests {
             panic!("wrong variant");
         };
         assert_eq!(daemon_version, "1.5.0");
-        assert_eq!(protocol_version, 1);
+        assert_eq!(protocol_version, 2);
+    }
+
+    #[test]
+    fn stream_context_request_round_trips() {
+        let msg = ClientMessage::StreamContext {
+            file_uri: "file:///src/main.rs".into(),
+            cursor_position: OwnedRange {
+                start_line: 10,
+                start_char: 4,
+                end_line: 10,
+                end_char: 4,
+            },
+            max_tokens: 4096,
+            model: None,
+        };
+        let json = serde_json::to_value(&msg).unwrap();
+        assert_eq!(json["type"], "stream_context");
+        assert_eq!(json["max_tokens"], 4096);
+        let rt = round_trip_client(&msg);
+        let ClientMessage::StreamContext {
+            file_uri,
+            max_tokens,
+            ..
+        } = rt
+        else {
+            panic!("wrong variant");
+        };
+        assert_eq!(file_uri, "file:///src/main.rs");
+        assert_eq!(max_tokens, 4096);
+    }
+
+    #[test]
+    fn end_stream_frame_round_trips() {
+        let msg = ServerMessage::EndStream {
+            reason: EndStreamReason::BudgetReached,
+            emitted: 3,
+            total_candidates: 12,
+            error: None,
+        };
+        let json = serde_json::to_value(&msg).unwrap();
+        assert_eq!(json["type"], "end_stream");
+        assert_eq!(json["reason"], "budget_reached");
+        // Optional `error` field omitted when None.
+        assert!(json.get("error").is_none());
+
+        let rt = round_trip_server(&msg);
+        let ServerMessage::EndStream {
+            reason,
+            emitted,
+            total_candidates,
+            error,
+        } = rt
+        else {
+            panic!("wrong variant");
+        };
+        assert_eq!(reason, EndStreamReason::BudgetReached);
+        assert_eq!(emitted, 3);
+        assert_eq!(total_candidates, 12);
+        assert!(error.is_none());
     }
 
     #[test]
diff --git a/bindings/rust/tests/integration.rs b/bindings/rust/tests/integration.rs
index aaaf018..a349bb1 100644
--- a/bindings/rust/tests/integration.rs
+++ b/bindings/rust/tests/integration.rs
@@ -541,3 +541,154 @@ async fn daemon_annotations_survive_restart() {
         let _ = task.await;
     }
 }
+
+// ─── stream_context (LIP 2.1.0) ──────────────────────────────────────────────
+
+async fn recv_stream_frame(stream: &mut UnixStream) -> anyhow::Result<ServerMessage> {
+    // Filter push notifications (`IndexChanged`, `SymbolUpgraded`) that may
+    // have been queued from earlier upserts.
+    recv(stream).await
+}
+
+#[tokio::test]
+async fn stream_context_zero_budget_terminates_immediately() {
+    use lip::query_graph::types::EndStreamReason;
+    use lip::schema::OwnedRange;
+
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_stream_zero.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+    let uri = "lip://local/test@0.1/budget.rs";
+    let source = "pub fn foo() {}\n";
+    send(
+        &mut client,
+        &ClientMessage::Delta {
+            seq: 1,
+            action: Action::Upsert,
+            document: make_doc(uri, source),
+        },
+    )
+    .await
+    .unwrap();
+    let _ = recv(&mut client).await.unwrap();
+
+    send(
+        &mut client,
+        &ClientMessage::StreamContext {
+            file_uri: uri.into(),
+            cursor_position: OwnedRange {
+                start_line: 0,
+                start_char: 0,
+                end_line: 0,
+                end_char: 0,
+            },
+            max_tokens: 0,
+            model: None,
+        },
+    )
+    .await
+    .unwrap();
+
+    let frame = recv_stream_frame(&mut client).await.unwrap();
+    match frame {
+        ServerMessage::EndStream {
+            reason, emitted, ..
+        } => {
+            assert_eq!(reason, EndStreamReason::BudgetReached);
+            assert_eq!(emitted, 0);
+        }
+        other => panic!("expected EndStream first frame, got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}
+
+#[tokio::test]
+async fn stream_context_cursor_out_of_range_errors() {
+    use lip::query_graph::types::EndStreamReason;
+    use lip::schema::OwnedRange;
+
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_stream_oob.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+    let uri = "lip://local/test@0.1/oob.rs";
+    let source = "pub fn foo() {}\n"; // 1 line
+    send(
+        &mut client,
+        &ClientMessage::Delta {
+            seq: 1,
+            action: Action::Upsert,
+            document: make_doc(uri, source),
+        },
+    )
+    .await
+    .unwrap();
+    let _ = recv(&mut client).await.unwrap();
+
+    send(
+        &mut client,
+        &ClientMessage::StreamContext {
+            file_uri: uri.into(),
+            cursor_position: OwnedRange {
+                start_line: 9999,
+                start_char: 0,
+                end_line: 9999,
+                end_char: 0,
+            },
+            max_tokens: 4096,
+            model: None,
+        },
+    )
+    .await
+    .unwrap();
+
+    let frame = recv_stream_frame(&mut client).await.unwrap();
+    match frame {
+        ServerMessage::EndStream { reason, error, .. } => {
+            assert_eq!(reason, EndStreamReason::Error);
+            assert_eq!(error.as_deref(), Some("cursor_out_of_range"));
+        }
+        other => panic!("expected EndStream(error), got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}
+
+#[tokio::test]
+async fn stream_context_handshake_advertises_v2() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_stream_hs.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+    send(
+        &mut client,
+        &ClientMessage::Handshake {
+            client_version: Some("test".into()),
+        },
+    )
+    .await
+    .unwrap();
+    let resp = recv(&mut client).await.unwrap();
+    match resp {
+        ServerMessage::HandshakeResult {
+            protocol_version, ..
+        } => assert_eq!(protocol_version, 2),
+        other => panic!("expected HandshakeResult, got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}
diff --git a/tools/lip-cli/src/cmd/mod.rs b/tools/lip-cli/src/cmd/mod.rs
index 571c095..0d6aa0a 100644
--- a/tools/lip-cli/src/cmd/mod.rs
+++ b/tools/lip-cli/src/cmd/mod.rs
@@ -9,3 +9,4 @@ pub mod mcp;
 pub mod push;
 pub mod query;
 pub mod slice;
+pub mod stream_context;
diff --git a/tools/lip-cli/src/cmd/stream_context.rs b/tools/lip-cli/src/cmd/stream_context.rs
new file mode 100644
index 0000000..4408a28
--- /dev/null
+++ b/tools/lip-cli/src/cmd/stream_context.rs
@@ -0,0 +1,81 @@
+use std::path::PathBuf;
+
+use clap::Args;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::UnixStream;
+
+use lip::query_graph::{ClientMessage, ServerMessage};
+use lip::schema::OwnedRange;
+
+use crate::output;
+
+#[derive(Args)]
+pub struct StreamContextArgs {
+    /// Path to the daemon Unix socket.
+    #[arg(long, default_value = "/tmp/lip-daemon.sock")]
+    pub socket: PathBuf,
+
+    /// File URI to stream context for (e.g. `file:///src/main.rs`).
+    pub file_uri: String,
+
+    /// Cursor position as `LINE:COL` (0-based).
+    pub position: String,
+
+    /// Maximum estimated prompt-token budget across all streamed symbols.
+    #[arg(long, default_value_t = 4096)]
+    pub max_tokens: u32,
+
+    /// Optional embedding model override.
+    #[arg(long)]
+    pub model: Option<String>,
+}
+
+pub async fn run(args: StreamContextArgs) -> anyhow::Result<()> {
+    let (line, col) = parse_position(&args.position)?;
+
+    let msg = ClientMessage::StreamContext {
+        file_uri: args.file_uri,
+        cursor_position: OwnedRange {
+            start_line: line,
+            start_char: col,
+            end_line: line,
+            end_char: col,
+        },
+        max_tokens: args.max_tokens,
+        model: args.model,
+    };
+
+    let mut stream = UnixStream::connect(&args.socket).await.map_err(|e| {
+        anyhow::anyhow!("cannot connect to daemon at {}: {e}", args.socket.display())
+    })?;
+    let body = serde_json::to_vec(&msg)?;
+    stream.write_all(&(body.len() as u32).to_be_bytes()).await?;
+    stream.write_all(&body).await?;
+
+    loop {
+        let mut len_buf = [0u8; 4];
+        stream.read_exact(&mut len_buf).await?;
+        let len = u32::from_be_bytes(len_buf) as usize;
+        let mut buf = vec![0u8; len];
+        stream.read_exact(&mut buf).await?;
+        let frame: ServerMessage = serde_json::from_slice(&buf)?;
+        output::print_json(&frame)?;
+        if matches!(frame, ServerMessage::EndStream { .. }) {
+            break;
+        }
+    }
+    Ok(())
+}
+
+fn parse_position(s: &str) -> anyhow::Result<(i32, i32)> {
+    let (l, c) = s
+        .split_once(':')
+        .ok_or_else(|| anyhow::anyhow!("position must be LINE:COL, got `{s}`"))?;
+    let line: i32 = l
+        .parse()
+        .map_err(|e| anyhow::anyhow!("invalid line `{l}`: {e}"))?;
+    let col: i32 = c
+        .parse()
+        .map_err(|e| anyhow::anyhow!("invalid col `{c}`: {e}"))?;
+    Ok((line, col))
+}
diff --git a/tools/lip-cli/src/main.rs b/tools/lip-cli/src/main.rs
index ad74d54..18f2af0 100644
--- a/tools/lip-cli/src/main.rs
+++ b/tools/lip-cli/src/main.rs
@@ -45,6 +45,8 @@ enum Commands {
     Mcp(cmd::mcp::McpArgs),
     /// Build pre-computed dependency slices for Cargo, npm, or pub packages.
     Slice(cmd::slice::SliceArgs),
+    /// Stream token-budgeted RAG context frames for a cursor position.
+    StreamContext(cmd::stream_context::StreamContextArgs),
 }
 
 #[tokio::main]
@@ -68,5 +70,6 @@ async fn main() -> anyhow::Result<()> {
         Commands::Annotate(args) => cmd::annotate::run(args).await,
         Commands::Mcp(args) => cmd::mcp::run(args).await,
         Commands::Slice(args) => cmd::slice::run(args).await,
+        Commands::StreamContext(args) => cmd::stream_context::run(args).await,
     }
 }

From ef7cc5521c4b40df7420adc9faeb71941da41c0d Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:18:58 +0200
Subject: [PATCH 03/12] docs: mention v2.1 stream_context in README status

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6aea39a..c1e2cdf 100644
--- a/README.md
+++ b/README.md
@@ -379,7 +379,7 @@ Requires Rust 1.78+. No system `protoc` required.
 
 ## Status
 
-v2.0 — `ExplainMatch` (chunk-level explanation: which lines in a result file drove the match), model provenance (`FileStatus` exposes the embedding model per file; `IndexStatus` warns when the index contains mixed-model vectors). v1.9: `filter` glob + `min_score` on all NN calls, `GetCentroid`, `QueryStaleEmbeddings`. v1.8: `FindBoundaries`, `SemanticDiff`, `QueryNearestInStore` (cross-repo federation), `QueryNoveltyScore`, `ExtractTerminology`, `PruneDeleted`. v1.7: 6 semantic retrieval primitives. v1.6: `ReindexFiles`, `Similarity`, `QueryExpansion`, `Cluster`, `ExportEmbeddings`. Wire format is JSON.
+v2.1 — `StreamContext` (token-budgeted RAG context streaming): callers stream symbols ranked by relevance to a cursor and stop reading when the prompt budget is full instead of fetching top-k and locally truncating; `protocol_version` bumped to `2`. v2.0 — `ExplainMatch` (chunk-level explanation: which lines in a result file drove the match), model provenance (`FileStatus` exposes the embedding model per file; `IndexStatus` warns when the index contains mixed-model vectors). v1.9: `filter` glob + `min_score` on all NN calls, `GetCentroid`, `QueryStaleEmbeddings`. v1.8: `FindBoundaries`, `SemanticDiff`, `QueryNearestInStore` (cross-repo federation), `QueryNoveltyScore`, `ExtractTerminology`, `PruneDeleted`. v1.7: 6 semantic retrieval primitives. v1.6: `ReindexFiles`, `Similarity`, `QueryExpansion`, `Cluster`, `ExportEmbeddings`. Wire format is JSON.
 
 ---
 

From c1c485fb922e31101e230207aa7b229f25930ff9 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:21:23 +0200
Subject: [PATCH 04/12] =?UTF-8?q?feat:=20embed=5Ftext=20=E2=80=94=20unary?=
 =?UTF-8?q?=20text-to-vector=20embedding?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the gap left by EmbeddingBatch (URI-only) and QueryNearestByText
(embeds internally but discards the vector). Callers re-ranking with
their own scoring — centroid arithmetic, federated nearest-neighbour,
lexical-then-semantic re-rank — get the embedding directly instead of
having to build a centroid out of nearest-neighbour seeds.

- ClientMessage::EmbedText { text, model? }
- ServerMessage::EmbedTextResult { vector, embedding_model }
- Handler reuses EmbeddingClient::embed_texts; returns the model
  actually used (after any override).
- Rejected from Batch / BatchQuery (async HTTP, like other embedding ops).
- Round-trip + integration tests; CHANGELOG entry.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                           |  4 ++
 bindings/rust/src/daemon/session.rs    | 23 ++++++++++
 bindings/rust/src/query_graph/types.rs | 62 ++++++++++++++++++++++++++
 bindings/rust/tests/integration.rs     | 40 +++++++++++++++++
 4 files changed, 129 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7465f5b..00e0112 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ All notable changes to this project are documented here.
 
 ### Added
 
+**v2.1 — `embed_text`: unary text-to-vector embedding**
+
+- **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
+
 **v2.1 — `stream_context`: token-budgeted RAG context streaming**
 
 - **`StreamContext { file_uri, cursor_position, max_tokens, model? }`** — new streaming wire message. Daemon ranks symbols relevant to the cursor and emits one `SymbolInfo { symbol_info, relevance_score, token_cost }` frame at a time, terminating with exactly one `EndStream { reason, emitted, total_candidates, error? }` frame. Reasons: `budget_reached`, `exhausted`, `error`. Replaces the broken "fetch top-k, locally truncate to prompt budget" pattern with stream-until-full. Spec §9.2.
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 702e369..5b2349d 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -1517,6 +1517,25 @@ impl Session {
                 }
             }
 
+            // ── v2.1: EmbedText ─────────────────────────────────────────
+            ClientMessage::EmbedText { text, model } => {
+                let Some(client) = self.embedding_client.as_ref().as_ref() else {
+                    return ServerMessage::Error {
+                        message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                    };
+                };
+                let texts = vec![text];
+                match client.embed_texts(&texts, model.as_deref()).await {
+                    Ok((mut vecs, used_model)) => ServerMessage::EmbedTextResult {
+                        vector: vecs.pop().unwrap_or_default(),
+                        embedding_model: used_model,
+                    },
+                    Err(e) => ServerMessage::Error {
+                        message: format!("embedding failed: {e}"),
+                    },
+                }
+            }
+
             // Streaming variant — caught earlier in `run`. Reached only if a
             // client embedded one inside a Batch / BatchQuery, which is not
             // supported.
@@ -2272,6 +2291,10 @@ fn process_query_sync(
         ClientMessage::StreamContext { .. } => {
             err("StreamContext is a streaming request; not permitted in BatchQuery")
         }
+
+        ClientMessage::EmbedText { .. } => {
+            err("EmbedText requires async HTTP; not permitted in BatchQuery")
+        }
     }
 }
 
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index 85457f6..6347c72 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -446,6 +446,14 @@ pub enum ServerMessage {
     },
 
     // ── v2.1 features ────────────────────────────────────────────────────
+    /// Response to [`ClientMessage::EmbedText`].
+    EmbedTextResult {
+        /// Raw embedding vector. Empty when the endpoint returned no data.
+        vector: Vec<f32>,
+        /// Model that produced the vector (after any client-side override).
+        embedding_model: String,
+    },
+
     /// One frame of a [`ClientMessage::StreamContext`] response: a single
     /// ranked symbol with its estimated prompt token cost.
     ///
@@ -913,6 +921,19 @@ pub enum ClientMessage {
     },
 
     // ── v2.1 features ────────────────────────────────────────────────────
+    /// Embed an arbitrary text string and return the raw vector.
+    ///
+    /// Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText`
+    /// (embeds internally but discards the vector). Callers that want to feed
+    /// the embedding into their own scoring (re-ranking, centroid arithmetic,
+    /// federated nearest-neighbour) need the vector itself.
+    EmbedText {
+        text: String,
+        /// Optional model override. `None` uses the daemon's default.
+        #[serde(default)]
+        model: Option<String>,
+    },
+
     /// Stream symbols ordered by relevance to `cursor_position` in `file_uri`,
     /// stopping when the caller closes the connection or when the daemon has
     /// emitted enough symbols to reach `max_tokens` estimated prompt cost.
@@ -1046,6 +1067,47 @@ mod tests {
         assert_eq!(protocol_version, 2);
     }
 
+    #[test]
+    fn embed_text_request_round_trips() {
+        let msg = ClientMessage::EmbedText {
+            text: "verify token expiry".into(),
+            model: Some("text-embedding-3-small".into()),
+        };
+        let json = serde_json::to_value(&msg).unwrap();
+        assert_eq!(json["type"], "embed_text");
+        assert_eq!(json["text"], "verify token expiry");
+        assert_eq!(json["model"], "text-embedding-3-small");
+
+        let rt = round_trip_client(&msg);
+        let ClientMessage::EmbedText { text, model } = rt else {
+            panic!("wrong variant");
+        };
+        assert_eq!(text, "verify token expiry");
+        assert_eq!(model.as_deref(), Some("text-embedding-3-small"));
+    }
+
+    #[test]
+    fn embed_text_result_round_trips() {
+        let msg = ServerMessage::EmbedTextResult {
+            vector: vec![0.1, 0.2, -0.3],
+            embedding_model: "text-embedding-3-small".into(),
+        };
+        let json = serde_json::to_value(&msg).unwrap();
+        assert_eq!(json["type"], "embed_text_result");
+        assert_eq!(json["embedding_model"], "text-embedding-3-small");
+
+        let rt = round_trip_server(&msg);
+        let ServerMessage::EmbedTextResult {
+            vector,
+            embedding_model,
+        } = rt
+        else {
+            panic!("wrong variant");
+        };
+        assert_eq!(vector, vec![0.1, 0.2, -0.3]);
+        assert_eq!(embedding_model, "text-embedding-3-small");
+    }
+
     #[test]
     fn stream_context_request_round_trips() {
         let msg = ClientMessage::StreamContext {
diff --git a/bindings/rust/tests/integration.rs b/bindings/rust/tests/integration.rs
index a349bb1..1d1deae 100644
--- a/bindings/rust/tests/integration.rs
+++ b/bindings/rust/tests/integration.rs
@@ -664,6 +664,46 @@ async fn stream_context_cursor_out_of_range_errors() {
     let _ = task.await;
 }
 
+#[tokio::test]
+async fn embed_text_without_endpoint_returns_error() {
+    // No `LIP_EMBEDDING_URL` set in the test process → embedding client is None
+    // → daemon returns the documented configuration error.
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_embed_text.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+    send(
+        &mut client,
+        &ClientMessage::EmbedText {
+            text: "verify token expiry".into(),
+            model: None,
+        },
+    )
+    .await
+    .unwrap();
+
+    let resp = recv(&mut client).await.unwrap();
+    match resp {
+        ServerMessage::Error { message } => {
+            assert!(
+                message.contains("LIP_EMBEDDING_URL"),
+                "expected configuration error, got {message:?}"
+            );
+        }
+        ServerMessage::EmbedTextResult { vector, .. } => {
+            // If a real embedding endpoint is configured in CI, fall through.
+            assert!(!vector.is_empty(), "vector should be non-empty");
+        }
+        other => panic!("expected Error or EmbedTextResult, got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}
+
 #[tokio::test]
 async fn stream_context_handshake_advertises_v2() {
     let dir = tempfile::tempdir().expect("tempdir");

From b43e63f997fcc7f88bbbf14ab89a5c3ecdae9836 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:28:29 +0200
Subject: [PATCH 05/12] feat(2.1): capability discovery + graceful
 unknown-variant handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- HandshakeResult gains supported_messages: Vec<String> listing every
  ClientMessage type tag this daemon understands. Clients can probe for
  individual features (stream_context, embed_text) without comparing
  protocol_version integers. Field is #[serde(default)] so older daemons
  yield an empty vector.
- ServerMessage::UnknownMessage { message_type, supported } — when the
  client sends a well-formed envelope whose type tag is unknown, the
  daemon replies with UnknownMessage (carrying the tag + supported list)
  and keeps the socket open, instead of closing after a generic parse
  Error. Lets forward-compatible clients downgrade gracefully.
- Added integration tests for both behaviors; run loop inspects the
  parse error text for "unknown variant" to distinguish recoverable
  unknown-tag cases from malformed JSON.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                           |  5 ++
 bindings/rust/src/daemon/session.rs    | 24 ++++++-
 bindings/rust/src/query_graph/types.rs | 89 ++++++++++++++++++++++++++
 bindings/rust/tests/integration.rs     | 89 ++++++++++++++++++++++++++
 4 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 00e0112..fe1cc4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,11 @@ All notable changes to this project are documented here.
 
 ### Added
 
+**v2.1 — Capability discovery + graceful unknown-variant handling**
+
+- **`HandshakeResult.supported_messages: Vec<String>`** — handshake response now lists every `ClientMessage` `type` tag this daemon understands. Lets clients probe for an individual message (e.g. `stream_context`, `embed_text`) without writing "handshake then pray" code or comparing `protocol_version` integers. Field is `#[serde(default)]`; older daemons predating this field yield an empty vector, which clients should treat as "fall back to `protocol_version`."
+- **`ServerMessage::UnknownMessage { message_type, supported }`** — when a client sends a well-formed JSON envelope whose `type` tag is unknown, the daemon now replies with `UnknownMessage` (carrying the tag plus the same supported list as handshake) *and keeps the socket open*, instead of closing after a generic parse `Error`. Lets forward-compatible clients downgrade gracefully to a supported call instead of reconnecting.
+
 **v2.1 — `embed_text`: unary text-to-vector embedding**
 
 - **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 5b2349d..494abd0 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -88,10 +88,26 @@ impl Session {
                 Ok(m) => m,
                 Err(e) => {
                     warn!("parse error: {e}");
-                    let err = ServerMessage::Error {
-                        message: e.to_string(),
+                    let err_text = e.to_string();
+                    // Unknown-variant parses are recoverable: the JSON was
+                    // well-formed but carried a `type` tag this daemon
+                    // doesn't know. Surface it as `UnknownMessage` with the
+                    // supported list so the client can fall back gracefully
+                    // instead of dropping the connection.
+                    let response = if err_text.contains("unknown variant") {
+                        let message_type = serde_json::from_slice::<serde_json::Value>(&msg_bytes)
+                            .ok()
+                            .and_then(|v| {
+                                v.get("type").and_then(|t| t.as_str()).map(str::to_owned)
+                            });
+                        ServerMessage::UnknownMessage {
+                            message_type,
+                            supported: ClientMessage::supported_messages(),
+                        }
+                    } else {
+                        ServerMessage::Error { message: err_text }
                     };
-                    let _ = write_message(&mut stream, &err).await;
+                    let _ = write_message(&mut stream, &response).await;
                     continue;
                 }
             };
@@ -777,6 +793,7 @@ impl Session {
                 ServerMessage::HandshakeResult {
                     daemon_version: env!("CARGO_PKG_VERSION").to_owned(),
                     protocol_version: PROTOCOL_VERSION,
+                    supported_messages: ClientMessage::supported_messages(),
                 }
             }
 
@@ -1939,6 +1956,7 @@ fn process_query_sync(
         ClientMessage::Handshake { .. } => ok(ServerMessage::HandshakeResult {
             daemon_version: env!("CARGO_PKG_VERSION").to_owned(),
             protocol_version: PROTOCOL_VERSION,
+            supported_messages: ClientMessage::supported_messages(),
         }),
 
         // BatchAnnotationGet is a pure read — safe inside a batch.
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index 6347c72..d066841 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -306,6 +306,16 @@ pub enum ServerMessage {
         daemon_version: String,
         /// Monotonic integer bumped only on breaking wire-format changes.
         protocol_version: u32,
+        /// Snake-case names of every `ClientMessage` `type` tag this daemon
+        /// understands. Lets clients probe support for an individual message
+        /// without writing "handshake then pray" code — a forward-compatible
+        /// alternative to comparing `protocol_version` integers.
+        ///
+        /// Older daemons predating this field omit it; serde defaults to an
+        /// empty vector on the client side, which clients should treat as
+        /// "unknown — fall back to `protocol_version`."
+        #[serde(default)]
+        supported_messages: Vec<String>,
     },
 
     // ── v1.6 features ────────────────────────────────────────────────────
@@ -446,6 +456,18 @@ pub enum ServerMessage {
     },
 
     // ── v2.1 features ────────────────────────────────────────────────────
+    /// Sent in place of [`ServerMessage::Error`] when the client sent a
+    /// well-formed JSON object whose `"type"` tag is not recognised by this
+    /// daemon. The connection stays open so the client can fall back to a
+    /// supported message instead of disconnecting.
+    UnknownMessage {
+        /// The unrecognised `type` tag, when extractable from the request.
+        message_type: Option<String>,
+        /// Snake-case names of every `ClientMessage` `type` tag this daemon
+        /// understands — same list as `HandshakeResult.supported_messages`.
+        supported: Vec<String>,
+    },
+
     /// Response to [`ClientMessage::EmbedText`].
     EmbedTextResult {
         /// Raw embedding vector. Empty when the endpoint returned no data.
@@ -951,6 +973,69 @@ pub enum ClientMessage {
 }
 
 impl ClientMessage {
+    /// Snake-case `type` tags of every variant this daemon understands.
+    ///
+    /// Returned by `Handshake` and `UnknownMessage` so clients can probe
+    /// support for individual messages without parsing protocol-version
+    /// integers. Order is stable; callers that compare lists should sort
+    /// or hash first.
+    pub fn supported_messages() -> Vec<String> {
+        [
+            "manifest",
+            "delta",
+            "query_definition",
+            "query_references",
+            "query_hover",
+            "query_blast_radius",
+            "query_workspace_symbols",
+            "query_document_symbols",
+            "query_dead_symbols",
+            "annotation_set",
+            "annotation_get",
+            "annotation_list",
+            "annotation_workspace_list",
+            "batch_query",
+            "batch",
+            "similar_symbols",
+            "query_stale_files",
+            "load_slice",
+            "embedding_batch",
+            "query_index_status",
+            "query_file_status",
+            "query_nearest",
+            "query_nearest_by_text",
+            "batch_query_nearest_by_text",
+            "query_nearest_by_symbol",
+            "batch_annotation_get",
+            "handshake",
+            "reindex_files",
+            "similarity",
+            "query_expansion",
+            "cluster",
+            "export_embeddings",
+            "query_nearest_by_contrast",
+            "query_outliers",
+            "query_semantic_drift",
+            "similarity_matrix",
+            "find_semantic_counterpart",
+            "query_coverage",
+            "find_boundaries",
+            "semantic_diff",
+            "query_nearest_in_store",
+            "query_novelty_score",
+            "extract_terminology",
+            "prune_deleted",
+            "get_centroid",
+            "query_stale_embeddings",
+            "explain_match",
+            "embed_text",
+            "stream_context",
+        ]
+        .iter()
+        .map(|s| (*s).to_owned())
+        .collect()
+    }
+
     /// Returns `true` for any message that may appear inside a [`ClientMessage::Batch`].
     /// A `Batch` itself is excluded to prevent nesting. `LoadSlice` is also excluded
     /// because it requires mutable database access outside the read-only batch lock.
@@ -1054,17 +1139,21 @@ mod tests {
         let msg = ServerMessage::HandshakeResult {
             daemon_version: "1.5.0".into(),
             protocol_version: 2,
+            supported_messages: ClientMessage::supported_messages(),
         };
         let rt = round_trip_server(&msg);
         let ServerMessage::HandshakeResult {
             daemon_version,
             protocol_version,
+            supported_messages,
         } = rt
         else {
             panic!("wrong variant");
         };
         assert_eq!(daemon_version, "1.5.0");
         assert_eq!(protocol_version, 2);
+        assert!(supported_messages.contains(&"handshake".to_string()));
+        assert!(supported_messages.contains(&"stream_context".to_string()));
     }
 
     #[test]
diff --git a/bindings/rust/tests/integration.rs b/bindings/rust/tests/integration.rs
index 1d1deae..b726b13 100644
--- a/bindings/rust/tests/integration.rs
+++ b/bindings/rust/tests/integration.rs
@@ -732,3 +732,92 @@ async fn stream_context_handshake_advertises_v2() {
     task.abort();
     let _ = task.await;
 }
+
+#[tokio::test]
+async fn handshake_advertises_supported_messages() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_caps.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+    send(
+        &mut client,
+        &ClientMessage::Handshake {
+            client_version: Some("test".into()),
+        },
+    )
+    .await
+    .unwrap();
+
+    let resp = recv(&mut client).await.unwrap();
+    match resp {
+        ServerMessage::HandshakeResult {
+            supported_messages, ..
+        } => {
+            assert!(supported_messages.contains(&"handshake".to_string()));
+            assert!(supported_messages.contains(&"stream_context".to_string()));
+            assert!(supported_messages.contains(&"embed_text".to_string()));
+            assert!(!supported_messages.contains(&"nonexistent_message".to_string()));
+        }
+        other => panic!("expected HandshakeResult, got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}
+
+#[tokio::test]
+async fn unknown_variant_returns_unknown_message_and_keeps_connection() {
+    let dir = tempfile::tempdir().expect("tempdir");
+    let socket = dir.path().join("lip_unknown.sock");
+    let daemon = LipDaemon::new(&socket);
+    let task = tokio::spawn(async move { daemon.run().await.ok() });
+    tokio::time::sleep(Duration::from_millis(20)).await;
+
+    let mut client = UnixStream::connect(&socket).await.expect("connect");
+
+    // Hand-craft an envelope with an unknown `type` tag — the daemon should
+    // recognise this as recoverable and reply with `UnknownMessage` rather
+    // than closing the socket.
+    let bogus = serde_json::json!({
+        "type": "summon_kraken",
+        "payload": {"when": "at_dawn"},
+    });
+    let body = serde_json::to_vec(&bogus).unwrap();
+    client
+        .write_all(&(body.len() as u32).to_be_bytes())
+        .await
+        .unwrap();
+    client.write_all(&body).await.unwrap();
+
+    let resp = recv(&mut client).await.unwrap();
+    match resp {
+        ServerMessage::UnknownMessage {
+            message_type,
+            supported,
+        } => {
+            assert_eq!(message_type.as_deref(), Some("summon_kraken"));
+            assert!(supported.contains(&"handshake".to_string()));
+        }
+        other => panic!("expected UnknownMessage, got {other:?}"),
+    }
+
+    // Connection must still be usable: send a Handshake after the error.
+    send(
+        &mut client,
+        &ClientMessage::Handshake {
+            client_version: Some("test".into()),
+        },
+    )
+    .await
+    .unwrap();
+    match recv(&mut client).await.unwrap() {
+        ServerMessage::HandshakeResult { .. } => {}
+        other => panic!("expected HandshakeResult after recovery, got {other:?}"),
+    }
+
+    task.abort();
+    let _ = task.await;
+}

From cb7cf289930523bf98383ddbb05f4c700c12625a Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:42:01 +0200
Subject: [PATCH 06/12] feat(2.1): structured error codes on all Error
 responses (#11)

- Added `ErrorCode` enum (unknown_message_type, unknown_model,
  cursor_out_of_range, index_locked, internal) and `code: ErrorCode`
  field on `ServerMessage::Error`. Clients can now branch on a stable
  category instead of string-matching `message`.
- `#[serde(default)]` keeps the wire backwards-compatible: older
  daemons deserialize as `ErrorCode::Internal`.
- Classified all 10 "embedding endpoint not configured / no cached
  embedding for URI" sites as `unknown_model`; everything else
  defaults to `internal`. Integration test asserts the UnknownModel
  code on embed_text when LIP_EMBEDDING_URL is unset.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                           |  6 ++++
 bindings/rust/src/daemon/session.rs    | 44 +++++++++++++++++++++++---
 bindings/rust/src/query_graph/mod.rs   |  4 +--
 bindings/rust/src/query_graph/types.rs | 35 ++++++++++++++++++++
 bindings/rust/tests/integration.rs     |  9 ++++--
 tools/lip-cli/src/cmd/mcp.rs           |  4 +--
 6 files changed, 91 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe1cc4f..902296f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,12 @@ All notable changes to this project are documented here.
 
 ### Added
 
+**v2.1 — Structured error codes on all `Error` responses**
+
+- **`ServerMessage::Error { message, code }`** — `code: ErrorCode` is a stable, machine-readable category. Clients branch on this instead of string-matching `message`. Added `#[serde(default)]`; older daemons predating this field deserialize as `ErrorCode::Internal`, so forward-compatible clients should treat `Internal` as "no classification available."
+- **`ErrorCode`** enum with a deliberately small, stable set: `unknown_message_type`, `unknown_model`, `cursor_out_of_range`, `index_locked`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking.
+- **Daemon classification so far**: all 10 "embedding not configured / no cached embedding for URI" errors now carry `code: unknown_model`; everything else is `code: internal`. Specific codes will be wired in as each call-site picks one.
+
 **v2.1 — Capability discovery + graceful unknown-variant handling**
 
 - **`HandshakeResult.supported_messages: Vec<String>`** — handshake response now lists every `ClientMessage` `type` tag this daemon understands. Lets clients probe for an individual message (e.g. `stream_context`, `embed_text`) without writing "handshake then pray" code or comparing `protocol_version` integers. Field is `#[serde(default)]`; older daemons predating this field yield an empty vector, which clients should treat as "fall back to `protocol_version`."
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 494abd0..e8c6c07 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -6,7 +6,7 @@ use tokio::net::UnixStream;
 use tokio::sync::{broadcast, mpsc, Mutex};
 use tracing::{debug, error, info, warn};
 
-use crate::query_graph::{BatchQueryResult, ClientMessage, LipDatabase, ServerMessage};
+use crate::query_graph::{BatchQueryResult, ClientMessage, ErrorCode, LipDatabase, ServerMessage};
 use crate::schema::{Action, IndexingState, OwnedAnnotationEntry, OwnedRange};
 
 use super::embedding::EmbeddingClient;
@@ -105,7 +105,10 @@ impl Session {
                             supported: ClientMessage::supported_messages(),
                         }
                     } else {
-                        ServerMessage::Error { message: err_text }
+                        ServerMessage::Error {
+                            message: err_text,
+                            code: ErrorCode::Internal,
+                        }
                     };
                     let _ = write_message(&mut stream, &response).await;
                     continue;
@@ -439,6 +442,7 @@ impl Session {
                     let _ = bad; // already matched by is_batchable
                     return ServerMessage::Error {
                         message: "nested Batch not allowed".into(),
+                        code: ErrorCode::Internal,
                     };
                 }
                 let mut results = Vec::with_capacity(requests.len());
@@ -489,6 +493,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 // Separate URIs that already have a cached embedding from those
@@ -531,6 +536,7 @@ impl Session {
                         Err(e) => {
                             return ServerMessage::Error {
                                 message: format!("embedding failed: {e}"),
+                                code: ErrorCode::Internal,
                             }
                         }
                     }
@@ -635,6 +641,7 @@ impl Session {
                 let Some(query_vec) = db.get_file_embedding(&uri).cloned() else {
                     return ServerMessage::Error {
                         message: format!("no embedding for {uri} — call EmbeddingBatch first"),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let results = db.nearest_by_vector(
@@ -657,6 +664,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let texts = vec![text];
@@ -665,6 +673,7 @@ impl Session {
                     Err(e) => {
                         return ServerMessage::Error {
                             message: format!("embedding failed: {e}"),
+                            code: ErrorCode::Internal,
                         }
                     }
                 };
@@ -686,6 +695,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 // Embed all queries in one HTTP batch call; no lock held during await.
@@ -694,6 +704,7 @@ impl Session {
                     Err(e) => {
                         return ServerMessage::Error {
                             message: format!("embedding failed: {e}"),
+                            code: ErrorCode::Internal,
                         }
                     }
                 };
@@ -714,6 +725,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 // Check cache — avoid re-embedding the same symbol repeatedly.
@@ -741,6 +753,7 @@ impl Session {
                             None => {
                                 return ServerMessage::Error {
                                     message: format!("symbol not found: {symbol_uri}"),
+                                    code: ErrorCode::Internal,
                                 }
                             }
                         }
@@ -753,6 +766,7 @@ impl Session {
                             Err(e) => {
                                 return ServerMessage::Error {
                                     message: format!("embedding failed: {e}"),
+                                    code: ErrorCode::Internal,
                                 }
                             }
                         };
@@ -862,6 +876,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let (mut vecs, _) = match client.embed_texts(&[query], model.as_deref()).await {
@@ -869,6 +884,7 @@ impl Session {
                     Err(e) => {
                         return ServerMessage::Error {
                             message: format!("embedding failed: {e}"),
+                            code: ErrorCode::Internal,
                         }
                     }
                 };
@@ -1002,6 +1018,7 @@ impl Session {
                         message: "both URIs must have cached embeddings with matching \
                                   dimensions — call embedding_batch first"
                             .into(),
+                        code: ErrorCode::Internal,
                     },
                 }
             }
@@ -1071,6 +1088,7 @@ impl Session {
                         message: format!(
                             "{uri} has no cached embedding — call embedding_batch first"
                         ),
+                        code: ErrorCode::Internal,
                     };
                 };
                 let q_norm: f32 = qv.iter().map(|x| x * x).sum::<f32>().sqrt();
@@ -1155,6 +1173,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let chunk_size = chunk_lines.max(1);
@@ -1181,6 +1200,7 @@ impl Session {
                     Err(e) => {
                         return ServerMessage::Error {
                             message: format!("embedding failed: {e}"),
+                            code: ErrorCode::Internal,
                         }
                     }
                 };
@@ -1220,6 +1240,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let (mut vecs, _) = match client
@@ -1230,12 +1251,14 @@ impl Session {
                     Err(e) => {
                         return ServerMessage::Error {
                             message: format!("embedding failed: {e}"),
+                            code: ErrorCode::Internal,
                         }
                     }
                 };
                 if vecs.len() < 2 {
                     return ServerMessage::Error {
                         message: "embedding service returned fewer vectors than expected".into(),
+                        code: ErrorCode::Internal,
                     };
                 }
                 let vb = vecs.pop().unwrap();
@@ -1286,6 +1309,7 @@ impl Session {
                         message: format!(
                             "{uri} has no cached embedding — call embedding_batch first"
                         ),
+                        code: ErrorCode::Internal,
                     };
                 };
                 let q_norm: f32 = qv.iter().map(|x| x * x).sum::<f32>().sqrt();
@@ -1417,6 +1441,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let effective_top_k = if top_k == 0 { 5 } else { top_k };
@@ -1440,6 +1465,7 @@ impl Session {
                             Err(e) => {
                                 return ServerMessage::Error {
                                     message: format!("embedding failed: {e}"),
+                                    code: ErrorCode::Internal,
                                 }
                             }
                         }
@@ -1449,6 +1475,7 @@ impl Session {
                 if query_vec.is_empty() {
                     return ServerMessage::Error {
                         message: "could not obtain query embedding".into(),
+                        code: ErrorCode::Internal,
                     };
                 }
 
@@ -1492,6 +1519,7 @@ impl Session {
                         Err(e) => {
                             return ServerMessage::Error {
                                 message: format!("embedding failed: {e}"),
+                                code: ErrorCode::Internal,
                             }
                         }
                     };
@@ -1539,6 +1567,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
+                        code: ErrorCode::UnknownModel,
                     };
                 };
                 let texts = vec![text];
@@ -1549,6 +1578,7 @@ impl Session {
                     },
                     Err(e) => ServerMessage::Error {
                         message: format!("embedding failed: {e}"),
+                        code: ErrorCode::Internal,
                     },
                 }
             }
@@ -1560,6 +1590,7 @@ impl Session {
                 message: "stream_context is a streaming request and cannot be \
                           batched or nested"
                     .into(),
+                code: ErrorCode::Internal,
             },
         }
     }
@@ -2366,6 +2397,7 @@ mod tests {
         let (a, b) = tokio::net::UnixStream::pair().unwrap();
         let msg = ServerMessage::Error {
             message: "hello framing".to_owned(),
+            code: ErrorCode::Internal,
         };
 
         // Writer task.
@@ -2382,7 +2414,7 @@ mod tests {
 
         let decoded: ServerMessage = serde_json::from_slice(&bytes).unwrap();
         match decoded {
-            ServerMessage::Error { message } => assert_eq!(message, "hello framing"),
+            ServerMessage::Error { message, .. } => assert_eq!(message, "hello framing"),
             other => panic!("unexpected variant: {other:?}"),
         }
     }
@@ -2392,6 +2424,7 @@ mod tests {
         let payload = "x".repeat(65_536);
         let msg = ServerMessage::Error {
             message: payload.clone(),
+            code: ErrorCode::Internal,
         };
 
         let (a, b) = tokio::net::UnixStream::pair().unwrap();
@@ -2406,7 +2439,7 @@ mod tests {
 
         let decoded: ServerMessage = serde_json::from_slice(&bytes).unwrap();
         match decoded {
-            ServerMessage::Error { message } => assert_eq!(message, payload),
+            ServerMessage::Error { message, .. } => assert_eq!(message, payload),
             other => panic!("unexpected variant: {other:?}"),
         }
     }
@@ -2420,6 +2453,7 @@ mod tests {
             for i in 0u32..5 {
                 let msg = ServerMessage::Error {
                     message: i.to_string(),
+                    code: ErrorCode::Internal,
                 };
                 write_message(&mut a, &msg).await.unwrap();
             }
@@ -2430,7 +2464,7 @@ mod tests {
             let bytes = read_message(&mut b).await.unwrap();
             let decoded: ServerMessage = serde_json::from_slice(&bytes).unwrap();
             match decoded {
-                ServerMessage::Error { message } => assert_eq!(message, i.to_string()),
+                ServerMessage::Error { message, .. } => assert_eq!(message, i.to_string()),
                 other => panic!("unexpected variant: {other:?}"),
             }
         }
diff --git a/bindings/rust/src/query_graph/mod.rs b/bindings/rust/src/query_graph/mod.rs
index 2433baa..b9e5cea 100644
--- a/bindings/rust/src/query_graph/mod.rs
+++ b/bindings/rust/src/query_graph/mod.rs
@@ -39,6 +39,6 @@ pub mod types;
 
 pub use db::LipDatabase;
 pub use types::{
-    ApiSurface, BatchQueryResult, BlastRadiusResult, ClientMessage, ImpactItem, RiskLevel,
-    ServerMessage, SimilarSymbol,
+    ApiSurface, BatchQueryResult, BlastRadiusResult, ClientMessage, ErrorCode, ImpactItem,
+    RiskLevel, ServerMessage, SimilarSymbol,
 };
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index d066841..3dd2165 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -237,7 +237,13 @@ pub enum ServerMessage {
         stale_uris: Vec<String>,
     },
     Error {
+        /// Human-readable error string. Still free-form.
         message: String,
+        /// Machine-readable code. Clients branch on this instead of
+        /// string-matching `message`. Defaults to
+        /// [`ErrorCode::Internal`] on older daemons that predate this field.
+        #[serde(default)]
+        code: ErrorCode,
     },
     /// Response to [`ClientMessage::EmbeddingBatch`].
     ///
@@ -505,6 +511,35 @@ pub enum ServerMessage {
     },
 }
 
+/// Stable, machine-readable category for [`ServerMessage::Error`].
+///
+/// Clients branch on this field instead of string-matching the free-form
+/// `message`. Older daemons predating this field deserialize as
+/// [`ErrorCode::Internal`] via `#[serde(default)]`, so forward-compatible
+/// clients should treat `Internal` as "no classification available."
+///
+/// The set is intentionally small and stable. New codes are additive —
+/// adding one is non-breaking; renaming or removing one is breaking.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ErrorCode {
+    /// The request used a `type` tag this daemon does not understand.
+    /// Preferred reply is [`ServerMessage::UnknownMessage`]; this code
+    /// exists for legacy paths that still emit `Error`.
+    UnknownMessageType,
+    /// The embedding model name is not configured on this daemon.
+    UnknownModel,
+    /// A cursor position (line/col or byte offset) fell outside the
+    /// target file. Emitted e.g. by `StreamContext`.
+    CursorOutOfRange,
+    /// A writer or exclusive index operation is in progress; the
+    /// request cannot proceed right now. Retry is safe.
+    IndexLocked,
+    /// Anything not captured by a more specific code. Default.
+    #[default]
+    Internal,
+}
+
 /// Why a [`ServerMessage::EndStream`] terminated a context stream.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
diff --git a/bindings/rust/tests/integration.rs b/bindings/rust/tests/integration.rs
index b726b13..6f5758e 100644
--- a/bindings/rust/tests/integration.rs
+++ b/bindings/rust/tests/integration.rs
@@ -5,7 +5,7 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::net::UnixStream;
 
 use lip::daemon::LipDaemon;
-use lip::query_graph::{ClientMessage, ServerMessage};
+use lip::query_graph::{ClientMessage, ErrorCode, ServerMessage};
 use lip::schema::{Action, IndexingState, OwnedDocument};
 
 // ─── Framing helpers (client side) ───────────────────────────────────────────
@@ -687,11 +687,16 @@ async fn embed_text_without_endpoint_returns_error() {
 
     let resp = recv(&mut client).await.unwrap();
     match resp {
-        ServerMessage::Error { message } => {
+        ServerMessage::Error { message, code } => {
             assert!(
                 message.contains("LIP_EMBEDDING_URL"),
                 "expected configuration error, got {message:?}"
             );
+            assert_eq!(
+                code,
+                ErrorCode::UnknownModel,
+                "expected UnknownModel code, got {code:?}"
+            );
         }
         ServerMessage::EmbedTextResult { vector, .. } => {
             // If a real embedding endpoint is configured in CI, fall through.
diff --git a/tools/lip-cli/src/cmd/mcp.rs b/tools/lip-cli/src/cmd/mcp.rs
index 7e2d768..4c2bf03 100644
--- a/tools/lip-cli/src/cmd/mcp.rs
+++ b/tools/lip-cli/src/cmd/mcp.rs
@@ -477,7 +477,7 @@ fn format_response(tool: &str, msg: &ServerMessage) -> String {
             .iter()
             .enumerate()
             .map(|(i, msg)| match msg {
-                ServerMessage::Error { message } => format!("[{i}] error: {message}"),
+                ServerMessage::Error { message, .. } => format!("[{i}] error: {message}"),
                 other => format!("[{i}]\n{}", format_response(tool, other)),
             })
             .collect::<Vec<_>>()
@@ -760,7 +760,7 @@ fn format_response(tool: &str, msg: &ServerMessage) -> String {
             }
             out.trim_end().to_owned()
         }
-        ServerMessage::Error { message } => format!("LIP error: {message}"),
+        ServerMessage::Error { message, .. } => format!("LIP error: {message}"),
         // Catch-all: emit JSON so nothing is silently lost.
         other => serde_json::to_string_pretty(other).unwrap_or_default(),
     }

From b3b672155bae02794223efa19daf81952874f8fd Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 13:47:29 +0200
Subject: [PATCH 07/12] fix(query_expansion): honor model pin, restrict ranking
 to matching model (#12)

QueryExpansion embedded the query with the requested model but then
ranked candidates across all stored symbol embeddings regardless of
which model produced them. Cross-model cosine scores are meaningless,
so the returned "expansion terms" were noise whenever the index held
mixed-model vectors.

- Added `model_filter: Option<&str>` parameter to
  `LipDatabase::nearest_symbol_by_vector`. When `Some(m)`, only
  symbols whose stored embedding was produced by `m` are scored.
- QueryExpansion handler captures the actual model returned by
  `embed_texts` (previously discarded) and passes it as the filter,
  guaranteeing query-vector and candidate-vectors share a model.
- SimilarSymbols (resolves from a URI's own cached embedding) keeps
  the old unfiltered behavior by passing `None`.
- Consolidated v2.1 CHANGELOG into one heading per user request.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                        | 30 ++++++++---------------
 bindings/rust/src/daemon/session.rs | 32 ++++++++++++++++---------
 bindings/rust/src/query_graph/db.rs | 37 ++++++++++++++++++++++++++---
 3 files changed, 65 insertions(+), 34 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 902296f..36ba3a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,29 +8,19 @@ All notable changes to this project are documented here.
 
 ### Added
 
-**v2.1 — Structured error codes on all `Error` responses**
-
-- **`ServerMessage::Error { message, code }`** — `code: ErrorCode` is a stable, machine-readable category. Clients branch on this instead of string-matching `message`. Added `#[serde(default)]`; older daemons predating this field deserialize as `ErrorCode::Internal`, so forward-compatible clients should treat `Internal` as "no classification available."
-- **`ErrorCode`** enum with a deliberately small, stable set: `unknown_message_type`, `unknown_model`, `cursor_out_of_range`, `index_locked`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking.
-- **Daemon classification so far**: all 10 "embedding not configured / no cached embedding for URI" errors now carry `code: unknown_model`; everything else is `code: internal`. Specific codes will be wired in as each call-site picks one.
-
-**v2.1 — Capability discovery + graceful unknown-variant handling**
-
-- **`HandshakeResult.supported_messages: Vec<String>`** — handshake response now lists every `ClientMessage` `type` tag this daemon understands. Lets clients probe for an individual message (e.g. `stream_context`, `embed_text`) without writing "handshake then pray" code or comparing `protocol_version` integers. Field is `#[serde(default)]`; older daemons predating this field yield an empty vector, which clients should treat as "fall back to `protocol_version`."
-- **`ServerMessage::UnknownMessage { message_type, supported }`** — when a client sends a well-formed JSON envelope whose `type` tag is unknown, the daemon now replies with `UnknownMessage` (carrying the tag plus the same supported list as handshake) *and keeps the socket open*, instead of closing after a generic parse `Error`. Lets forward-compatible clients downgrade gracefully to a supported call instead of reconnecting.
-
-**v2.1 — `embed_text`: unary text-to-vector embedding**
-
-- **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
-
-**v2.1 — `stream_context`: token-budgeted RAG context streaming**
-
 - **`StreamContext { file_uri, cursor_position, max_tokens, model? }`** — new streaming wire message. Daemon ranks symbols relevant to the cursor and emits one `SymbolInfo { symbol_info, relevance_score, token_cost }` frame at a time, terminating with exactly one `EndStream { reason, emitted, total_candidates, error? }` frame. Reasons: `budget_reached`, `exhausted`, `error`. Replaces the broken "fetch top-k, locally truncate to prompt budget" pattern with stream-until-full. Spec §9.2.
-- **Relevance ordering** (spec §2.3): direct symbol at cursor → callers (from blast-radius CPG walk) → callees / references → related types.
-- **Token-cost estimate**: conservative `ceil((len(signature) + len(documentation)) / 4) + 8` per symbol.
-- **Back-pressure**: daemon does not buffer ahead of the socket. `BrokenPipe` from a closing client aborts the ranking walk cleanly. `StreamContext` is rejected from `Batch` / `BatchQuery`.
+- **Relevance ordering** (spec §2.3): direct symbol at cursor → callers (from blast-radius CPG walk) → callees / references → related types. Conservative token-cost estimate `ceil((len(signature) + len(documentation)) / 4) + 8` per symbol. Daemon does not buffer ahead of the socket; `BrokenPipe` from a closing client aborts the ranking walk cleanly. `StreamContext` is rejected from `Batch` / `BatchQuery`.
 - **`protocol_version` bumped from `1` → `2`** in `HandshakeResult`. Clients can detect streaming support via handshake.
 - **`lip stream-context <file_uri> <line:col> --max-tokens N [--model M]`** — new CLI subcommand prints frames as JSON for manual testing.
+- **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
+- **`HandshakeResult.supported_messages: Vec<String>`** — handshake response now lists every `ClientMessage` `type` tag this daemon understands. Lets clients probe for an individual message (e.g. `stream_context`, `embed_text`) without writing "handshake then pray" code or comparing `protocol_version` integers. Field is `#[serde(default)]`; older daemons yield an empty vector, which clients should treat as "fall back to `protocol_version`."
+- **`ServerMessage::UnknownMessage { message_type, supported }`** — when a client sends a well-formed JSON envelope whose `type` tag is unknown, the daemon now replies with `UnknownMessage` (carrying the tag plus the same supported list as handshake) *and keeps the socket open*, instead of closing after a generic parse `Error`. Lets forward-compatible clients downgrade gracefully to a supported call instead of reconnecting.
+- **`ServerMessage::Error { message, code }`** — `code: ErrorCode` is a stable, machine-readable category. Clients branch on this instead of string-matching `message`. `#[serde(default)]`; older daemons deserialize as `ErrorCode::Internal`.
+- **`ErrorCode`** enum — small, stable set: `unknown_message_type`, `unknown_model`, `cursor_out_of_range`, `index_locked`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking. All 10 "embedding not configured / no cached embedding for URI" errors carry `code: unknown_model`; everything else currently defaults to `internal`.
+
+### Fixed
+
+- **`QueryExpansion` now honors the caller's model pin.** Previously the handler embedded the query with the requested model but then ranked candidates across *all* stored symbol embeddings regardless of which model produced them — cross-model cosine scores are not meaningful, so the returned "expansion terms" were effectively noise whenever the index held mixed-model vectors. Handler now captures the actual model returned by `embed_texts` and passes it through a new `model_filter: Option<&str>` parameter on `LipDatabase::nearest_symbol_by_vector`, restricting candidates to symbols embedded with the same model. `SimilarSymbols` (which resolves from a URI's own cached embedding) keeps the old unfiltered behavior by passing `None`.
 
 ---
 
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index e8c6c07..85f5ac5 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -779,8 +779,12 @@ impl Session {
                     v
                 };
                 let db = self.db.lock().await;
-                let results =
-                    db.nearest_symbol_by_vector(&query_vec, top_k, Some(symbol_uri.as_str()));
+                let results = db.nearest_symbol_by_vector(
+                    &query_vec,
+                    top_k,
+                    Some(symbol_uri.as_str()),
+                    None,
+                );
                 ServerMessage::NearestResult { results }
             }
 
@@ -879,18 +883,24 @@ impl Session {
                         code: ErrorCode::UnknownModel,
                     };
                 };
-                let (mut vecs, _) = match client.embed_texts(&[query], model.as_deref()).await {
-                    Ok(r) => r,
-                    Err(e) => {
-                        return ServerMessage::Error {
-                            message: format!("embedding failed: {e}"),
-                            code: ErrorCode::Internal,
+                let (mut vecs, actual_model) =
+                    match client.embed_texts(&[query], model.as_deref()).await {
+                        Ok(r) => r,
+                        Err(e) => {
+                            return ServerMessage::Error {
+                                message: format!("embedding failed: {e}"),
+                                code: ErrorCode::Internal,
+                            }
                         }
-                    }
-                };
+                    };
                 let query_vec = vecs.pop().unwrap_or_default();
                 let mut db = self.db.lock().await;
-                let hits = db.nearest_symbol_by_vector(&query_vec, top_k, None);
+                // Pin the search to symbols embedded with the same model that
+                // produced `query_vec`. Cross-model cosine scores are
+                // meaningless, so silently mixing them gives the caller noisy
+                // "expansion terms" that rank random symbols highest.
+                let hits =
+                    db.nearest_symbol_by_vector(&query_vec, top_k, None, Some(&actual_model));
                 // Resolve display names; fall back to URI fragment.
                 let mut terms = Vec::with_capacity(hits.len());
                 for item in hits {
diff --git a/bindings/rust/src/query_graph/db.rs b/bindings/rust/src/query_graph/db.rs
index cd2424b..25632d4 100644
--- a/bindings/rust/src/query_graph/db.rs
+++ b/bindings/rust/src/query_graph/db.rs
@@ -910,6 +910,7 @@ impl LipDatabase {
         query_vec: &[f32],
         top_k: usize,
         exclude_uri: Option<&str>,
+        model_filter: Option<&str>,
     ) -> Vec<crate::query_graph::types::NearestItem> {
         let q_norm: f32 = query_vec.iter().map(|x| x * x).sum::<f32>().sqrt();
         if q_norm == 0.0 || top_k == 0 {
@@ -919,6 +920,17 @@ impl LipDatabase {
             .symbol_embeddings
             .iter()
             .filter(|(uri, _)| exclude_uri.map(|e| e != uri.as_str()).unwrap_or(true))
+            .filter(|(uri, _)| {
+                // When `model_filter` is set, skip any symbol whose stored
+                // embedding was produced by a different model — cross-model
+                // cosine scores are not meaningful.
+                model_filter.is_none_or(|want| {
+                    self.symbol_embedding_models
+                        .get(uri.as_str())
+                        .map(|m| m == want)
+                        .unwrap_or(false)
+                })
+            })
             .filter_map(|(uri, vec)| {
                 if vec.len() != query_vec.len() {
                     return None;
@@ -2872,7 +2884,7 @@ impl Greeter {
         db.set_symbol_embedding("lip://local/f.rs#baz", vec![0.0, 0.0, 1.0], "test-model");
 
         let query = vec![1.0_f32, 0.0, 0.0];
-        let results = db.nearest_symbol_by_vector(&query, 3, None);
+        let results = db.nearest_symbol_by_vector(&query, 3, None, None);
         assert_eq!(results.len(), 3);
         assert_eq!(results[0].uri, "lip://local/f.rs#foo");
         assert!(
@@ -2889,7 +2901,7 @@ impl Greeter {
         db.set_symbol_embedding("lip://local/f.rs#bar", vec![0.9, 0.1], "test-model");
 
         let query = vec![1.0_f32, 0.0];
-        let results = db.nearest_symbol_by_vector(&query, 5, Some("lip://local/f.rs#foo"));
+        let results = db.nearest_symbol_by_vector(&query, 5, Some("lip://local/f.rs#foo"), None);
         assert!(
             !results.iter().any(|r| r.uri == "lip://local/f.rs#foo"),
             "excluded URI must not appear in results"
@@ -2900,10 +2912,29 @@ impl Greeter {
     #[test]
     fn nearest_symbol_by_vector_empty_store_returns_empty() {
         let db = LipDatabase::new();
-        let results = db.nearest_symbol_by_vector(&[1.0, 0.0], 5, None);
+        let results = db.nearest_symbol_by_vector(&[1.0, 0.0], 5, None, None);
         assert!(results.is_empty());
     }
 
+    #[test]
+    fn nearest_symbol_by_vector_filters_by_model() {
+        let mut db = LipDatabase::new();
+        // Two symbols with near-identical vectors but different embedding
+        // models. A query pinned to model-a must not match the model-b symbol
+        // even though the raw cosine score would be high.
+        db.set_symbol_embedding("lip://local/f.rs#alpha", vec![1.0, 0.0], "model-a");
+        db.set_symbol_embedding("lip://local/f.rs#beta", vec![1.0, 0.0], "model-b");
+
+        let query = vec![1.0_f32, 0.0];
+
+        let all = db.nearest_symbol_by_vector(&query, 5, None, None);
+        assert_eq!(all.len(), 2, "without filter both symbols rank");
+
+        let pinned = db.nearest_symbol_by_vector(&query, 5, None, Some("model-a"));
+        assert_eq!(pinned.len(), 1);
+        assert_eq!(pinned[0].uri, "lip://local/f.rs#alpha");
+    }
+
     // ── outliers ──────────────────────────────────────────────────────────
 
     #[test]

From ef453c63f3b1b785875566061a6cdad494d36248 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 14:01:43 +0200
Subject: [PATCH 08/12] feat(2.1): split ErrorCode conflations, drift-guard
 supported_messages (#13)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-release polish for 2.1.0. Once the tag ships, ErrorCode becomes
load-bearing with its current meaning — splitting the conflations now
is free; splitting them later is a breaking contract change.

Error codes split:
- `EmbeddingNotConfigured` — daemon has no LIP_EMBEDDING_URL
- `NoEmbedding`            — URI not yet embedded; retry after batch
- `InvalidRequest`         — client-side misuse (nested Batch,
                             StreamContext inside Batch)

Previously all three collapsed into `UnknownModel` / `Internal`, so
clients could not tell "retry after embedding" from "model broken"
from "my request shape is wrong."

Drift guard:
- `ClientMessage::variant_tag` is an exhaustive-match method; adding
  a new variant breaks compilation until acknowledged. The paired
  `supported_messages_covers_all_variants` test then enforces that
  the new tag also appears in the handshake capability list.

Also:
- benches/framing.rs lost the `code` field on ServerMessage::Error
  since it was added to the struct; repaired so `cargo bench` compiles.
- CHANGELOG 2.1.0 reformatted with v2.0-style subsection headers and
  corrected to reflect the new, non-conflated error codes.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                           |  20 +-
 bindings/rust/benches/framing.rs       |   3 +-
 bindings/rust/src/daemon/session.rs    |  30 +--
 bindings/rust/src/query_graph/types.rs | 317 ++++++++++++++++++++++++-
 4 files changed, 352 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36ba3a6..1cee93c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,15 +8,33 @@ All notable changes to this project are documented here.
 
 ### Added
 
+**v2.1 — Streaming context + forward-compat primitives**
+
+**Streaming**
+
 - **`StreamContext { file_uri, cursor_position, max_tokens, model? }`** — new streaming wire message. Daemon ranks symbols relevant to the cursor and emits one `SymbolInfo { symbol_info, relevance_score, token_cost }` frame at a time, terminating with exactly one `EndStream { reason, emitted, total_candidates, error? }` frame. Reasons: `budget_reached`, `exhausted`, `error`. Replaces the broken "fetch top-k, locally truncate to prompt budget" pattern with stream-until-full. Spec §9.2.
 - **Relevance ordering** (spec §2.3): direct symbol at cursor → callers (from blast-radius CPG walk) → callees / references → related types. Conservative token-cost estimate `ceil((len(signature) + len(documentation)) / 4) + 8` per symbol. Daemon does not buffer ahead of the socket; `BrokenPipe` from a closing client aborts the ranking walk cleanly. `StreamContext` is rejected from `Batch` / `BatchQuery`.
 - **`protocol_version` bumped from `1` → `2`** in `HandshakeResult`. Clients can detect streaming support via handshake.
 - **`lip stream-context <file_uri> <line:col> --max-tokens N [--model M]`** — new CLI subcommand prints frames as JSON for manual testing.
+
+**New primitives**
+
 - **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
+
+**Forward-compat & capability discovery**
+
 - **`HandshakeResult.supported_messages: Vec<String>`** — handshake response now lists every `ClientMessage` `type` tag this daemon understands. Lets clients probe for an individual message (e.g. `stream_context`, `embed_text`) without writing "handshake then pray" code or comparing `protocol_version` integers. Field is `#[serde(default)]`; older daemons yield an empty vector, which clients should treat as "fall back to `protocol_version`."
 - **`ServerMessage::UnknownMessage { message_type, supported }`** — when a client sends a well-formed JSON envelope whose `type` tag is unknown, the daemon now replies with `UnknownMessage` (carrying the tag plus the same supported list as handshake) *and keeps the socket open*, instead of closing after a generic parse `Error`. Lets forward-compatible clients downgrade gracefully to a supported call instead of reconnecting.
 - **`ServerMessage::Error { message, code }`** — `code: ErrorCode` is a stable, machine-readable category. Clients branch on this instead of string-matching `message`. `#[serde(default)]`; older daemons deserialize as `ErrorCode::Internal`.
-- **`ErrorCode`** enum — small, stable set: `unknown_message_type`, `unknown_model`, `cursor_out_of_range`, `index_locked`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking. All 10 "embedding not configured / no cached embedding for URI" errors carry `code: unknown_model`; everything else currently defaults to `internal`.
+- **`ErrorCode`** enum — small, stable set: `unknown_message_type`, `unknown_model`, `embedding_not_configured`, `no_embedding`, `cursor_out_of_range`, `index_locked`, `invalid_request`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking.
+  - `embedding_not_configured` — daemon has no embedding service (`LIP_EMBEDDING_URL` unset).
+  - `no_embedding` — URI has no cached embedding yet; call `EmbeddingBatch` first.
+  - `unknown_model` — caller asked for a model the daemon does not recognize.
+  - `invalid_request` — request was well-formed on the wire but used incorrectly (e.g. nested `Batch`, or `StreamContext` inside a `Batch`). Distinct from `internal` so clients can avoid retry loops on caller-side mistakes.
+
+**Drift guard**
+
+- **`ClientMessage::variant_tag`** + `supported_messages_covers_all_variants` test — exhaustive-match helper plus paired test that fails compilation when a new `ClientMessage` variant is added without being advertised in `supported_messages()`. Prevents capability-list drift from silently shrinking the handshake surface.
 
 ### Fixed
 
diff --git a/bindings/rust/benches/framing.rs b/bindings/rust/benches/framing.rs
index 9fa98c1..3cb48aa 100644
--- a/bindings/rust/benches/framing.rs
+++ b/bindings/rust/benches/framing.rs
@@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criteri
 use tokio::runtime::Runtime;
 
 use lip::daemon::session::{read_message, write_message};
-use lip::query_graph::ServerMessage;
+use lip::query_graph::{ErrorCode, ServerMessage};
 
 fn make_rt() -> Runtime {
     tokio::runtime::Builder::new_current_thread()
@@ -19,6 +19,7 @@ fn make_rt() -> Runtime {
 fn make_message(payload_bytes: usize) -> ServerMessage {
     ServerMessage::Error {
         message: "x".repeat(payload_bytes),
+        code: ErrorCode::Internal,
     }
 }
 
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 85f5ac5..1223f52 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -442,7 +442,7 @@ impl Session {
                     let _ = bad; // already matched by is_batchable
                     return ServerMessage::Error {
                         message: "nested Batch not allowed".into(),
-                        code: ErrorCode::Internal,
+                        code: ErrorCode::InvalidRequest,
                     };
                 }
                 let mut results = Vec::with_capacity(requests.len());
@@ -493,7 +493,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 // Separate URIs that already have a cached embedding from those
@@ -641,7 +641,7 @@ impl Session {
                 let Some(query_vec) = db.get_file_embedding(&uri).cloned() else {
                     return ServerMessage::Error {
                         message: format!("no embedding for {uri} — call EmbeddingBatch first"),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::NoEmbedding,
                     };
                 };
                 let results = db.nearest_by_vector(
@@ -664,7 +664,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let texts = vec![text];
@@ -695,7 +695,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 // Embed all queries in one HTTP batch call; no lock held during await.
@@ -725,7 +725,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 // Check cache — avoid re-embedding the same symbol repeatedly.
@@ -880,7 +880,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let (mut vecs, actual_model) =
@@ -1028,7 +1028,7 @@ impl Session {
                         message: "both URIs must have cached embeddings with matching \
                                   dimensions — call embedding_batch first"
                             .into(),
-                        code: ErrorCode::Internal,
+                        code: ErrorCode::NoEmbedding,
                     },
                 }
             }
@@ -1098,7 +1098,7 @@ impl Session {
                         message: format!(
                             "{uri} has no cached embedding — call embedding_batch first"
                         ),
-                        code: ErrorCode::Internal,
+                        code: ErrorCode::NoEmbedding,
                     };
                 };
                 let q_norm: f32 = qv.iter().map(|x| x * x).sum::<f32>().sqrt();
@@ -1183,7 +1183,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let chunk_size = chunk_lines.max(1);
@@ -1250,7 +1250,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let (mut vecs, _) = match client
@@ -1319,7 +1319,7 @@ impl Session {
                         message: format!(
                             "{uri} has no cached embedding — call embedding_batch first"
                         ),
-                        code: ErrorCode::Internal,
+                        code: ErrorCode::NoEmbedding,
                     };
                 };
                 let q_norm: f32 = qv.iter().map(|x| x * x).sum::<f32>().sqrt();
@@ -1451,7 +1451,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let effective_top_k = if top_k == 0 { 5 } else { top_k };
@@ -1577,7 +1577,7 @@ impl Session {
                 let Some(client) = self.embedding_client.as_ref().as_ref() else {
                     return ServerMessage::Error {
                         message: "embedding not configured — set LIP_EMBEDDING_URL".into(),
-                        code: ErrorCode::UnknownModel,
+                        code: ErrorCode::EmbeddingNotConfigured,
                     };
                 };
                 let texts = vec![text];
@@ -1600,7 +1600,7 @@ impl Session {
                 message: "stream_context is a streaming request and cannot be \
                           batched or nested"
                     .into(),
-                code: ErrorCode::Internal,
+                code: ErrorCode::InvalidRequest,
             },
         }
     }
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index 3dd2165..2be60ba 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -527,14 +527,30 @@ pub enum ErrorCode {
     /// Preferred reply is [`ServerMessage::UnknownMessage`]; this code
     /// exists for legacy paths that still emit `Error`.
     UnknownMessageType,
-    /// The embedding model name is not configured on this daemon.
+    /// The caller asked for an embedding model this daemon does not
+    /// recognize. Retrying is pointless until the model is configured.
     UnknownModel,
+    /// The daemon has no embedding service configured at all
+    /// (`LIP_EMBEDDING_URL` unset). Distinct from [`UnknownModel`]:
+    /// this is a daemon-side configuration gap, not a caller problem.
+    EmbeddingNotConfigured,
+    /// The requested URI has no cached embedding yet. The remedy is to
+    /// call `EmbeddingBatch` first; the model itself is fine. Clients
+    /// can distinguish this from [`UnknownModel`] / [`EmbeddingNotConfigured`]
+    /// to drive "index-then-retry" flows instead of giving up.
+    NoEmbedding,
     /// A cursor position (line/col or byte offset) fell outside the
     /// target file. Emitted e.g. by `StreamContext`.
     CursorOutOfRange,
     /// A writer or exclusive index operation is in progress; the
     /// request cannot proceed right now. Retry is safe.
     IndexLocked,
+    /// The request was well-formed on the wire but used incorrectly —
+    /// e.g. a nested `Batch`, or a `StreamContext` submitted inside a
+    /// `Batch`. Callers should not blindly retry; the request must be
+    /// changed. Distinct from [`Internal`], which indicates a
+    /// daemon-side failure.
+    InvalidRequest,
     /// Anything not captured by a more specific code. Default.
     #[default]
     Internal,
@@ -1071,6 +1087,71 @@ impl ClientMessage {
         .collect()
     }
 
+    /// Snake-case `type` tag for a specific variant.
+    ///
+    /// Exists primarily as a drift guard: the exhaustive match below
+    /// fails to compile when a new [`ClientMessage`] variant is added
+    /// without acknowledgement, and the paired
+    /// `supported_messages_covers_all_variants` test then enforces
+    /// that the new tag also appears in
+    /// [`ClientMessage::supported_messages`].
+    ///
+    /// Update [`ClientMessage::supported_messages`] in lockstep with
+    /// the arms here.
+    pub fn variant_tag(&self) -> &'static str {
+        match self {
+            ClientMessage::Manifest(_) => "manifest",
+            ClientMessage::Delta { .. } => "delta",
+            ClientMessage::QueryDefinition { .. } => "query_definition",
+            ClientMessage::QueryReferences { .. } => "query_references",
+            ClientMessage::QueryHover { .. } => "query_hover",
+            ClientMessage::QueryBlastRadius { .. } => "query_blast_radius",
+            ClientMessage::QueryWorkspaceSymbols { .. } => "query_workspace_symbols",
+            ClientMessage::QueryDocumentSymbols { .. } => "query_document_symbols",
+            ClientMessage::QueryDeadSymbols { .. } => "query_dead_symbols",
+            ClientMessage::AnnotationSet { .. } => "annotation_set",
+            ClientMessage::AnnotationGet { .. } => "annotation_get",
+            ClientMessage::AnnotationList { .. } => "annotation_list",
+            ClientMessage::AnnotationWorkspaceList { .. } => "annotation_workspace_list",
+            ClientMessage::BatchQuery { .. } => "batch_query",
+            ClientMessage::Batch { .. } => "batch",
+            ClientMessage::SimilarSymbols { .. } => "similar_symbols",
+            ClientMessage::QueryStaleFiles { .. } => "query_stale_files",
+            ClientMessage::LoadSlice { .. } => "load_slice",
+            ClientMessage::EmbeddingBatch { .. } => "embedding_batch",
+            ClientMessage::QueryIndexStatus => "query_index_status",
+            ClientMessage::QueryFileStatus { .. } => "query_file_status",
+            ClientMessage::QueryNearest { .. } => "query_nearest",
+            ClientMessage::QueryNearestByText { .. } => "query_nearest_by_text",
+            ClientMessage::BatchQueryNearestByText { .. } => "batch_query_nearest_by_text",
+            ClientMessage::QueryNearestBySymbol { .. } => "query_nearest_by_symbol",
+            ClientMessage::BatchAnnotationGet { .. } => "batch_annotation_get",
+            ClientMessage::Handshake { .. } => "handshake",
+            ClientMessage::ReindexFiles { .. } => "reindex_files",
+            ClientMessage::Similarity { .. } => "similarity",
+            ClientMessage::QueryExpansion { .. } => "query_expansion",
+            ClientMessage::Cluster { .. } => "cluster",
+            ClientMessage::ExportEmbeddings { .. } => "export_embeddings",
+            ClientMessage::QueryNearestByContrast { .. } => "query_nearest_by_contrast",
+            ClientMessage::QueryOutliers { .. } => "query_outliers",
+            ClientMessage::QuerySemanticDrift { .. } => "query_semantic_drift",
+            ClientMessage::SimilarityMatrix { .. } => "similarity_matrix",
+            ClientMessage::FindSemanticCounterpart { .. } => "find_semantic_counterpart",
+            ClientMessage::QueryCoverage { .. } => "query_coverage",
+            ClientMessage::FindBoundaries { .. } => "find_boundaries",
+            ClientMessage::SemanticDiff { .. } => "semantic_diff",
+            ClientMessage::QueryNearestInStore { .. } => "query_nearest_in_store",
+            ClientMessage::QueryNoveltyScore { .. } => "query_novelty_score",
+            ClientMessage::ExtractTerminology { .. } => "extract_terminology",
+            ClientMessage::PruneDeleted => "prune_deleted",
+            ClientMessage::GetCentroid { .. } => "get_centroid",
+            ClientMessage::QueryStaleEmbeddings { .. } => "query_stale_embeddings",
+            ClientMessage::ExplainMatch { .. } => "explain_match",
+            ClientMessage::EmbedText { .. } => "embed_text",
+            ClientMessage::StreamContext { .. } => "stream_context",
+        }
+    }
+
     /// Returns `true` for any message that may appear inside a [`ClientMessage::Batch`].
     /// A `Batch` itself is excluded to prevent nesting. `LoadSlice` is also excluded
     /// because it requires mutable database access outside the read-only batch lock.
@@ -1191,6 +1272,240 @@ mod tests {
         assert!(supported_messages.contains(&"stream_context".to_string()));
     }
 
+    /// Drift guard: every tag produced by [`ClientMessage::variant_tag`]
+    /// must also appear in [`ClientMessage::supported_messages`], and
+    /// the two lists must be the same size. Combined with the
+    /// compile-time exhaustiveness of `variant_tag`'s match, this
+    /// prevents a new [`ClientMessage`] variant from being added
+    /// without being advertised in the handshake capability list.
+    #[test]
+    fn supported_messages_covers_all_variants() {
+        // One representative instance per variant. Payloads are the
+        // cheapest legal construction — we only exercise `variant_tag`,
+        // not behavior.
+        let samples: Vec<ClientMessage> = vec![
+            ClientMessage::Manifest(crate::daemon::manifest::ManifestRequest {
+                repo_root: String::new(),
+                merkle_root: String::new(),
+                dep_tree_hash: String::new(),
+                lip_version: String::new(),
+            }),
+            ClientMessage::Delta {
+                seq: 0,
+                action: crate::schema::Action::Upsert,
+                document: crate::schema::OwnedDocument {
+                    uri: String::new(),
+                    content_hash: String::new(),
+                    language: String::new(),
+                    occurrences: vec![],
+                    symbols: vec![],
+                    merkle_path: String::new(),
+                    edges: vec![],
+                    source_text: None,
+                },
+            },
+            ClientMessage::QueryDefinition {
+                uri: String::new(),
+                line: 0,
+                col: 0,
+            },
+            ClientMessage::QueryReferences {
+                symbol_uri: String::new(),
+                limit: None,
+            },
+            ClientMessage::QueryHover {
+                uri: String::new(),
+                line: 0,
+                col: 0,
+            },
+            ClientMessage::QueryBlastRadius {
+                symbol_uri: String::new(),
+            },
+            ClientMessage::QueryWorkspaceSymbols {
+                query: String::new(),
+                limit: None,
+            },
+            ClientMessage::QueryDocumentSymbols {
+                uri: String::new(),
+            },
+            ClientMessage::QueryDeadSymbols { limit: None },
+            ClientMessage::AnnotationSet {
+                symbol_uri: String::new(),
+                key: String::new(),
+                value: String::new(),
+                author_id: String::new(),
+            },
+            ClientMessage::AnnotationGet {
+                symbol_uri: String::new(),
+                key: String::new(),
+            },
+            ClientMessage::AnnotationList {
+                symbol_uri: String::new(),
+            },
+            ClientMessage::AnnotationWorkspaceList {
+                key_prefix: String::new(),
+            },
+            ClientMessage::BatchQuery { queries: vec![] },
+            ClientMessage::Batch { requests: vec![] },
+            ClientMessage::SimilarSymbols {
+                query: String::new(),
+                limit: 0,
+            },
+            ClientMessage::QueryStaleFiles { files: vec![] },
+            ClientMessage::LoadSlice {
+                slice: crate::schema::OwnedDependencySlice {
+                    manager: String::new(),
+                    package_name: String::new(),
+                    version: String::new(),
+                    package_hash: String::new(),
+                    content_hash: String::new(),
+                    symbols: vec![],
+                    slice_url: String::new(),
+                    built_at_ms: 0,
+                },
+            },
+            ClientMessage::EmbeddingBatch {
+                uris: vec![],
+                model: None,
+            },
+            ClientMessage::QueryIndexStatus,
+            ClientMessage::QueryFileStatus { uri: String::new() },
+            ClientMessage::QueryNearest {
+                uri: String::new(),
+                top_k: 0,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::QueryNearestByText {
+                text: String::new(),
+                top_k: 0,
+                model: None,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::BatchQueryNearestByText {
+                queries: vec![],
+                top_k: 0,
+                model: None,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::QueryNearestBySymbol {
+                symbol_uri: String::new(),
+                top_k: 0,
+                model: None,
+            },
+            ClientMessage::BatchAnnotationGet {
+                uris: vec![],
+                key: String::new(),
+            },
+            ClientMessage::Handshake {
+                client_version: None,
+            },
+            ClientMessage::ReindexFiles { uris: vec![] },
+            ClientMessage::Similarity {
+                uri_a: String::new(),
+                uri_b: String::new(),
+            },
+            ClientMessage::QueryExpansion {
+                query: String::new(),
+                top_k: 0,
+                model: None,
+            },
+            ClientMessage::Cluster {
+                uris: vec![],
+                radius: 0.0,
+            },
+            ClientMessage::ExportEmbeddings { uris: vec![] },
+            ClientMessage::QueryNearestByContrast {
+                like_uri: String::new(),
+                unlike_uri: String::new(),
+                top_k: 0,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::QueryOutliers {
+                uris: vec![],
+                top_k: 0,
+            },
+            ClientMessage::QuerySemanticDrift {
+                uri_a: String::new(),
+                uri_b: String::new(),
+            },
+            ClientMessage::SimilarityMatrix { uris: vec![] },
+            ClientMessage::FindSemanticCounterpart {
+                uri: String::new(),
+                candidates: vec![],
+                top_k: 0,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::QueryCoverage {
+                root: String::new(),
+            },
+            ClientMessage::FindBoundaries {
+                uri: String::new(),
+                chunk_lines: 0,
+                threshold: 0.0,
+                model: None,
+            },
+            ClientMessage::SemanticDiff {
+                content_a: String::new(),
+                content_b: String::new(),
+                top_k: 0,
+                model: None,
+            },
+            ClientMessage::QueryNearestInStore {
+                uri: String::new(),
+                store: std::collections::HashMap::new(),
+                top_k: 0,
+                filter: None,
+                min_score: None,
+            },
+            ClientMessage::QueryNoveltyScore { uris: vec![] },
+            ClientMessage::ExtractTerminology {
+                uris: vec![],
+                top_k: 0,
+            },
+            ClientMessage::PruneDeleted,
+            ClientMessage::GetCentroid { uris: vec![] },
+            ClientMessage::QueryStaleEmbeddings {
+                root: String::new(),
+            },
+            ClientMessage::ExplainMatch {
+                query: String::new(),
+                result_uri: String::new(),
+                top_k: 0,
+                chunk_lines: 0,
+                model: None,
+            },
+            ClientMessage::EmbedText {
+                text: String::new(),
+                model: None,
+            },
+            ClientMessage::StreamContext {
+                file_uri: String::new(),
+                cursor_position: crate::schema::OwnedRange::default(),
+                max_tokens: 0,
+                model: None,
+            },
+        ];
+
+        let supported = ClientMessage::supported_messages();
+        for m in &samples {
+            let tag = m.variant_tag();
+            assert!(
+                supported.iter().any(|s| s == tag),
+                "variant tag {tag:?} missing from supported_messages()"
+            );
+        }
+        assert_eq!(
+            samples.len(),
+            supported.len(),
+            "variant count drifted from supported_messages() length"
+        );
+    }
+
     #[test]
     fn embed_text_request_round_trips() {
         let msg = ClientMessage::EmbedText {

From 713952668223633c72221a05ac7044343abf61bd Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 14:19:33 +0200
Subject: [PATCH 09/12] feat(2.1): Tier 3 provenance via RegisterTier3Source
 (#14)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the silent-staleness footgun on Tier 3 (SCIP) imports. The
daemon previously had no way to report *what* produced imported
symbols or *when* they landed, so stale Tier 3 data returned
confidently-wrong results with no observable signal.

New wire surface (additive, backwards-compatible):
- `Tier3Source { source_id, tool_name, tool_version, project_root,
  imported_at_ms }` — provenance record.
- `ClientMessage::RegisterTier3Source { source }` — idempotent
  registration; re-registering the same `source_id` refreshes
  `imported_at_ms` in place. Ack'd with `DeltaAck`. Rejected inside
  `BatchQuery` (mutation).
- `IndexStatusResult.tier3_sources: Vec<Tier3Source>` — sorted by
  `source_id`. `#[serde(default)]`, so older daemons yield an empty
  list to newer clients.

CLI wiring:
- `lip import --push-to-daemon` now extracts SCIP `Metadata.tool_info`
  + `project_root` and registers before streaming deltas. `source_id`
  = `sha256(tool_name + ":" + project_root)` so re-imports of the
  same source collapse to one entry.
- `lip-cli mcp` index-status text output now surfaces the tier3 list.

The daemon deliberately does no staleness detection. This is a
provenance primitive, not a policy: clients decide what "stale"
means (time threshold, commit drift vs. HEAD, etc.). The 2.1 goal
is visibility, not auto-reindex.

Tests: +4 (round-trip, missing-field fallback, sort order, re-reg
overwrite). Drift-guard sample list updated for the new variant.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                           |   1 +
 bindings/rust/src/daemon/session.rs    |  19 +++++
 bindings/rust/src/query_graph/db.rs    |  77 +++++++++++++++++
 bindings/rust/src/query_graph/mod.rs   |   2 +-
 bindings/rust/src/query_graph/types.rs | 109 +++++++++++++++++++++++++
 tools/lip-cli/src/cmd/import.rs        |  75 ++++++++++++++++-
 tools/lip-cli/src/cmd/mcp.rs           |  17 +++-
 7 files changed, 297 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1cee93c..0fcfadb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ All notable changes to this project are documented here.
 **New primitives**
 
 - **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
+- **`RegisterTier3Source { source: Tier3Source }`** + **`IndexStatusResult.tier3_sources`** — expose provenance for Tier 3 ingestion batches (SCIP imports). `Tier3Source { source_id, tool_name, tool_version, project_root, imported_at_ms }` records *what* producer generated the symbols and *when* the daemon accepted them. Re-registering the same `source_id` overwrites in place, refreshing `imported_at_ms`. The daemon deliberately does no staleness detection: stale Tier 3 symbols remain in the graph at their original confidence until the caller re-imports. Surfacing provenance lets clients decide when to warn a user that imported data has aged (e.g. `scip-rust imported 3 days ago`). `lip import --push-to-daemon` now sends this before streaming SCIP deltas, with `source_id = sha256(tool_name + ":" + project_root)`. `IndexStatusResult.tier3_sources` is `#[serde(default)]`; older daemons yield an empty vector. Ack'd with `DeltaAck`. Not permitted inside `BatchQuery` (mutation).
 
 **Forward-compat & capability discovery**
 
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 1223f52..6515f01 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -607,6 +607,7 @@ impl Session {
                     .map(|c| c.default_model().to_owned());
                 let models_in_index = db.file_embedding_model_names();
                 let mixed_models = models_in_index.len() > 1;
+                let tier3_sources = db.tier3_sources();
                 ServerMessage::IndexStatusResult {
                     indexed_files,
                     pending_embedding_files: pending,
@@ -614,6 +615,7 @@ impl Session {
                     embedding_model,
                     mixed_models,
                     models_in_index,
+                    tier3_sources,
                 }
             }
 
@@ -1602,6 +1604,17 @@ impl Session {
                     .into(),
                 code: ErrorCode::InvalidRequest,
             },
+
+            // ── v2.1: Tier 3 provenance registration ──────────────────────
+            ClientMessage::RegisterTier3Source { source } => {
+                let mut db = self.db.lock().await;
+                db.register_tier3_source(source);
+                ServerMessage::DeltaAck {
+                    seq: 0,
+                    accepted: true,
+                    error: None,
+                }
+            }
         }
     }
 
@@ -1961,6 +1974,7 @@ fn process_query_sync(
             let (indexed_files, pending, last_ms) = db.index_status();
             let models_in_index = db.file_embedding_model_names();
             let mixed_models = models_in_index.len() > 1;
+            let tier3_sources = db.tier3_sources();
             ok(ServerMessage::IndexStatusResult {
                 indexed_files,
                 pending_embedding_files: pending,
@@ -1968,6 +1982,7 @@ fn process_query_sync(
                 embedding_model: None, // no client reference available in sync context
                 mixed_models,
                 models_in_index,
+                tier3_sources,
             })
         }
 
@@ -2354,6 +2369,10 @@ fn process_query_sync(
         ClientMessage::EmbedText { .. } => {
             err("EmbedText requires async HTTP; not permitted in BatchQuery")
         }
+
+        ClientMessage::RegisterTier3Source { .. } => {
+            err("RegisterTier3Source is a mutation; not permitted in BatchQuery")
+        }
     }
 }
 
diff --git a/bindings/rust/src/query_graph/db.rs b/bindings/rust/src/query_graph/db.rs
index 25632d4..66add73 100644
--- a/bindings/rust/src/query_graph/db.rs
+++ b/bindings/rust/src/query_graph/db.rs
@@ -170,6 +170,11 @@ pub struct LipDatabase {
     symbol_embedding_models: HashMap<String, String>,
     /// Unix timestamps (ms) recording when each URI was last upserted.
     file_indexed_at: HashMap<String, i64>,
+    /// Provenance for Tier 3 ingestion batches (typically SCIP imports),
+    /// keyed by caller-supplied `source_id`. Surfaced through
+    /// `QueryIndexStatus` so clients can implement their own staleness
+    /// policy; the daemon never reasons about freshness itself.
+    tier3_sources: HashMap<String, crate::query_graph::types::Tier3Source>,
 }
 
 impl LipDatabase {
@@ -196,9 +201,28 @@ impl LipDatabase {
             symbol_embeddings: HashMap::new(),
             symbol_embedding_models: HashMap::new(),
             file_indexed_at: HashMap::new(),
+            tier3_sources: HashMap::new(),
         }
     }
 
+    /// Record (or refresh) provenance for a Tier 3 ingestion batch.
+    /// Re-registering the same `source_id` overwrites the prior entry,
+    /// which is how clients refresh `imported_at_ms` after a re-import.
+    pub fn register_tier3_source(
+        &mut self,
+        source: crate::query_graph::types::Tier3Source,
+    ) {
+        self.tier3_sources.insert(source.source_id.clone(), source);
+    }
+
+    /// All currently-registered Tier 3 provenance records, sorted by
+    /// `source_id` for deterministic output.
+    pub fn tier3_sources(&self) -> Vec<crate::query_graph::types::Tier3Source> {
+        let mut out: Vec<_> = self.tier3_sources.values().cloned().collect();
+        out.sort_by(|a, b| a.source_id.cmp(&b.source_id));
+        out
+    }
+
     // ── Mutations ─────────────────────────────────────────────────────────
 
     /// Register or update a file. Bumps the global revision and invalidates
@@ -2861,6 +2885,59 @@ impl Greeter {
         );
     }
 
+    // ── tier3 provenance ──────────────────────────────────────────────────
+
+    #[test]
+    fn tier3_sources_sorted_by_source_id() {
+        use crate::query_graph::types::Tier3Source;
+        let mut db = LipDatabase::new();
+        db.register_tier3_source(Tier3Source {
+            source_id: "b".into(),
+            tool_name: "scip-typescript".into(),
+            tool_version: "0.3.0".into(),
+            project_root: "file:///b".into(),
+            imported_at_ms: 2,
+        });
+        db.register_tier3_source(Tier3Source {
+            source_id: "a".into(),
+            tool_name: "scip-rust".into(),
+            tool_version: "0.3.0".into(),
+            project_root: "file:///a".into(),
+            imported_at_ms: 1,
+        });
+        let got = db.tier3_sources();
+        assert_eq!(got.len(), 2);
+        assert_eq!(got[0].source_id, "a");
+        assert_eq!(got[1].source_id, "b");
+    }
+
+    /// Re-registering the same `source_id` must overwrite the prior
+    /// record in place, refreshing `imported_at_ms`. This is the
+    /// mechanism clients rely on to mark a fresh import.
+    #[test]
+    fn tier3_reregistration_overwrites_in_place() {
+        use crate::query_graph::types::Tier3Source;
+        let mut db = LipDatabase::new();
+        db.register_tier3_source(Tier3Source {
+            source_id: "same".into(),
+            tool_name: "scip-rust".into(),
+            tool_version: "0.3.0".into(),
+            project_root: "file:///r".into(),
+            imported_at_ms: 1,
+        });
+        db.register_tier3_source(Tier3Source {
+            source_id: "same".into(),
+            tool_name: "scip-rust".into(),
+            tool_version: "0.4.0".into(),
+            project_root: "file:///r".into(),
+            imported_at_ms: 99,
+        });
+        let got = db.tier3_sources();
+        assert_eq!(got.len(), 1, "re-registration must not grow the list");
+        assert_eq!(got[0].tool_version, "0.4.0");
+        assert_eq!(got[0].imported_at_ms, 99);
+    }
+
     // ── symbol_embeddings / nearest_symbol_by_vector ──────────────────────
 
     #[test]
diff --git a/bindings/rust/src/query_graph/mod.rs b/bindings/rust/src/query_graph/mod.rs
index b9e5cea..d3b21f8 100644
--- a/bindings/rust/src/query_graph/mod.rs
+++ b/bindings/rust/src/query_graph/mod.rs
@@ -40,5 +40,5 @@ pub mod types;
 pub use db::LipDatabase;
 pub use types::{
     ApiSurface, BatchQueryResult, BlastRadiusResult, ClientMessage, ErrorCode, ImpactItem,
-    RiskLevel, ServerMessage, SimilarSymbol,
+    RiskLevel, ServerMessage, SimilarSymbol, Tier3Source,
 };
diff --git a/bindings/rust/src/query_graph/types.rs b/bindings/rust/src/query_graph/types.rs
index 2be60ba..c78abc6 100644
--- a/bindings/rust/src/query_graph/types.rs
+++ b/bindings/rust/src/query_graph/types.rs
@@ -272,6 +272,13 @@ pub enum ServerMessage {
         mixed_models: bool,
         /// Distinct model names present across all stored file embeddings, sorted.
         models_in_index: Vec<String>,
+        /// Provenance for every Tier 3 ingestion source registered on this
+        /// daemon, sorted by `source_id`. Added in v2.1 to let clients
+        /// surface "SCIP imported N hours ago" warnings without the daemon
+        /// taking a position on what "stale" means. `#[serde(default)]`;
+        /// older daemons return an empty vector.
+        #[serde(default)]
+        tier3_sources: Vec<Tier3Source>,
     },
     /// Response to [`ClientMessage::QueryFileStatus`].
     FileStatusResult {
@@ -511,6 +518,37 @@ pub enum ServerMessage {
     },
 }
 
+/// Provenance record for a Tier 3 ingestion source (typically a SCIP
+/// import). Exposes *what* produced the imported symbols and *when* —
+/// nothing about whether the source repo has since changed. Staleness
+/// policy is left to the caller: compare `imported_at_ms` against a
+/// freshness threshold, or pin `project_root` externally to a commit
+/// hash out-of-band. The daemon deliberately does no detection of its
+/// own; stale Tier 3 symbols live in the graph at their original
+/// confidence until the source is re-imported.
+///
+/// Returned inside [`ServerMessage::IndexStatusResult::tier3_sources`].
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct Tier3Source {
+    /// Caller-supplied stable identifier (e.g. `sha256("scip-rust:/repo")`).
+    /// Re-registering the same `source_id` overwrites the prior record,
+    /// which is the intended mechanism for refreshing `imported_at_ms`
+    /// after a re-import.
+    pub source_id: String,
+    /// Producer name from SCIP `Metadata.tool_info.name` (e.g.
+    /// `"scip-rust"`). Empty when the import path had no metadata.
+    pub tool_name: String,
+    /// Producer version from SCIP `Metadata.tool_info.version`.
+    pub tool_version: String,
+    /// SCIP `Metadata.project_root` — a `file://` URL identifying the
+    /// source tree the producer indexed. Clients that want commit-level
+    /// staleness can resolve this to a working tree and compare HEAD.
+    pub project_root: String,
+    /// Unix timestamp (ms) when the daemon accepted the registration.
+    /// Re-registration updates this in place.
+    pub imported_at_ms: i64,
+}
+
 /// Stable, machine-readable category for [`ServerMessage::Error`].
 ///
 /// Clients branch on this field instead of string-matching the free-form
@@ -1021,6 +1059,23 @@ pub enum ClientMessage {
         #[serde(default)]
         model: Option<String>,
     },
+
+    /// Record provenance for a Tier 3 ingestion batch. Typically called
+    /// once by `lip import --push-to-daemon` before streaming SCIP
+    /// `Delta` messages, so `QueryIndexStatus` can later report which
+    /// producer generated the imported symbols and when.
+    ///
+    /// Idempotent: re-registering the same `source_id` overwrites the
+    /// previous record, refreshing `imported_at_ms` to the new
+    /// import time. Acknowledged with `DeltaAck`.
+    ///
+    /// The daemon does *not* infer freshness from this record — stale
+    /// Tier 3 symbols remain in the graph at their original confidence
+    /// until the caller re-imports. Surfacing the provenance lets
+    /// clients decide when to warn a user that imported data has aged.
+    RegisterTier3Source {
+        source: Tier3Source,
+    },
 }
 
 impl ClientMessage {
@@ -1081,6 +1136,7 @@ impl ClientMessage {
             "explain_match",
             "embed_text",
             "stream_context",
+            "register_tier3_source",
         ]
         .iter()
         .map(|s| (*s).to_owned())
@@ -1149,6 +1205,7 @@ impl ClientMessage {
             ClientMessage::ExplainMatch { .. } => "explain_match",
             ClientMessage::EmbedText { .. } => "embed_text",
             ClientMessage::StreamContext { .. } => "stream_context",
+            ClientMessage::RegisterTier3Source { .. } => "register_tier3_source",
         }
     }
 
@@ -1272,6 +1329,49 @@ mod tests {
         assert!(supported_messages.contains(&"stream_context".to_string()));
     }
 
+    #[test]
+    fn register_tier3_source_round_trips() {
+        let msg = ClientMessage::RegisterTier3Source {
+            source: Tier3Source {
+                source_id: "sha256:abc".into(),
+                tool_name: "scip-rust".into(),
+                tool_version: "0.3.1".into(),
+                project_root: "file:///repo".into(),
+                imported_at_ms: 1_700_000_000_000,
+            },
+        };
+        let rt = round_trip_client(&msg);
+        let ClientMessage::RegisterTier3Source { source } = rt else {
+            panic!("wrong variant");
+        };
+        assert_eq!(source.source_id, "sha256:abc");
+        assert_eq!(source.tool_name, "scip-rust");
+        assert_eq!(source.tool_version, "0.3.1");
+        assert_eq!(source.project_root, "file:///repo");
+        assert_eq!(source.imported_at_ms, 1_700_000_000_000);
+    }
+
+    /// Older daemons (pre-v2.1) will serialise `IndexStatusResult`
+    /// without a `tier3_sources` field; newer deserialisers must
+    /// treat that as an empty list, not a parse failure.
+    #[test]
+    fn index_status_result_accepts_missing_tier3_sources() {
+        let legacy = serde_json::json!({
+            "type": "index_status_result",
+            "indexed_files": 7,
+            "pending_embedding_files": 0,
+            "last_updated_ms": 123,
+            "embedding_model": null,
+            "mixed_models": false,
+            "models_in_index": []
+        });
+        let parsed: ServerMessage = serde_json::from_value(legacy).unwrap();
+        let ServerMessage::IndexStatusResult { tier3_sources, .. } = parsed else {
+            panic!("wrong variant");
+        };
+        assert!(tier3_sources.is_empty());
+    }
+
     /// Drift guard: every tag produced by [`ClientMessage::variant_tag`]
     /// must also appear in [`ClientMessage::supported_messages`], and
     /// the two lists must be the same size. Combined with the
@@ -1489,6 +1589,15 @@ mod tests {
                 max_tokens: 0,
                 model: None,
             },
+            ClientMessage::RegisterTier3Source {
+                source: Tier3Source {
+                    source_id: String::new(),
+                    tool_name: String::new(),
+                    tool_version: String::new(),
+                    project_root: String::new(),
+                    imported_at_ms: 0,
+                },
+            },
         ];
 
         let supported = ClientMessage::supported_messages();
diff --git a/tools/lip-cli/src/cmd/import.rs b/tools/lip-cli/src/cmd/import.rs
index 4898b46..9779420 100644
--- a/tools/lip-cli/src/cmd/import.rs
+++ b/tools/lip-cli/src/cmd/import.rs
@@ -5,7 +5,7 @@ use prost::Message;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::net::UnixStream;
 
-use lip::query_graph::{ClientMessage, ServerMessage};
+use lip::query_graph::{ClientMessage, ServerMessage, Tier3Source};
 use lip::schema::{
     sha256_hex, Action, OwnedDelta, OwnedDocument, OwnedEventStream, OwnedOccurrence, OwnedRange,
     OwnedSymbolInfo, Role, SymbolKind,
@@ -61,6 +61,12 @@ pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
         args.scip_file.display()
     );
 
+    // Capture Tier 3 provenance before consuming `index.documents`.
+    // `project_root` is a file:// URL identifying the source tree the
+    // producer indexed; clients can later resolve it to a working tree
+    // to compare HEAD against `imported_at_ms` for staleness.
+    let tier3_source = build_tier3_source(&index, &args.scip_file);
+
     let confidence = args.confidence;
     let mut deltas: Vec<OwnedDelta> = index
         .documents
@@ -100,6 +106,34 @@ pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
         let mut stream = UnixStream::connect(&socket_path).await.map_err(|e| {
             anyhow::anyhow!("cannot connect to daemon at {}: {e}", socket_path.display())
         })?;
+
+        // Register provenance before streaming deltas so the daemon can
+        // timestamp the import and expose the record via `QueryIndexStatus`.
+        // Older daemons that predate `register_tier3_source` will reply
+        // with `UnknownMessage`; we tolerate that and proceed — the deltas
+        // still land, the provenance is just unavailable.
+        let reg_msg = ClientMessage::RegisterTier3Source {
+            source: tier3_source,
+        };
+        let reg_body = serde_json::to_vec(&reg_msg)?;
+        stream.write_all(&(reg_body.len() as u32).to_be_bytes()).await?;
+        stream.write_all(&reg_body).await?;
+        let mut reg_len = [0u8; 4];
+        stream.read_exact(&mut reg_len).await?;
+        let reg_resp_len = u32::from_be_bytes(reg_len) as usize;
+        let mut reg_resp_bytes = vec![0u8; reg_resp_len];
+        stream.read_exact(&mut reg_resp_bytes).await?;
+        // We do not fail on UnknownMessage — that only means the daemon
+        // is pre-v2.1. We do surface a genuine DeltaAck rejection.
+        if let Ok(ServerMessage::DeltaAck { accepted: false, error, .. }) =
+            serde_json::from_slice::<ServerMessage>(&reg_resp_bytes)
+        {
+            eprintln!(
+                "warning: daemon rejected tier3 provenance registration: {}",
+                error.as_deref().unwrap_or("?")
+            );
+        }
+
         let total = deltas.len();
         for (seq, delta) in deltas.into_iter().enumerate() {
             let Some(doc) = delta.document else { continue };
@@ -154,6 +188,45 @@ pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
     Ok(())
 }
 
+/// Build a Tier 3 provenance record from a SCIP index.
+///
+/// `source_id` is derived from producer name + `project_root` (or the
+/// .scip filename when metadata is absent), so re-imports of the same
+/// source refresh the record in place rather than growing the list.
+fn build_tier3_source(index: &scip::Index, scip_path: &std::path::Path) -> Tier3Source {
+    let (tool_name, tool_version, project_root) = match index.metadata.as_ref() {
+        Some(md) => {
+            let (tn, tv) = md
+                .tool_info
+                .as_ref()
+                .map(|ti| (ti.name.clone(), ti.version.clone()))
+                .unwrap_or_default();
+            (tn, tv, md.project_root.clone())
+        }
+        None => (String::new(), String::new(), String::new()),
+    };
+
+    let fingerprint = if project_root.is_empty() {
+        scip_path.display().to_string()
+    } else {
+        project_root.clone()
+    };
+    let source_id = sha256_hex(format!("{tool_name}:{fingerprint}").as_bytes());
+
+    let imported_at_ms = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .map(|d| d.as_millis() as i64)
+        .unwrap_or(0);
+
+    Tier3Source {
+        source_id,
+        tool_name,
+        tool_version,
+        project_root,
+        imported_at_ms,
+    }
+}
+
 // ─── Conversion helpers ───────────────────────────────────────────────────────
 
 fn convert_document(doc: scip::Document, confidence: u8) -> OwnedDelta {
diff --git a/tools/lip-cli/src/cmd/mcp.rs b/tools/lip-cli/src/cmd/mcp.rs
index 4c2bf03..3f43472 100644
--- a/tools/lip-cli/src/cmd/mcp.rs
+++ b/tools/lip-cli/src/cmd/mcp.rs
@@ -535,6 +535,7 @@ fn format_response(tool: &str, msg: &ServerMessage) -> String {
             embedding_model,
             mixed_models,
             models_in_index,
+            tier3_sources,
         } => {
             let last = last_updated_ms
                 .map(|ms| format!("  last_updated={ms}ms"))
@@ -548,7 +549,21 @@ fn format_response(tool: &str, msg: &ServerMessage) -> String {
             } else {
                 String::new()
             };
-            format!("indexed={indexed_files}  pending_embeddings={pending_embedding_files}{last}{model}{mixed}")
+            let tier3 = if tier3_sources.is_empty() {
+                String::new()
+            } else {
+                let parts: Vec<String> = tier3_sources
+                    .iter()
+                    .map(|s| {
+                        format!(
+                            "{}@{}/{} imported_at={}ms",
+                            s.tool_name, s.tool_version, s.source_id, s.imported_at_ms
+                        )
+                    })
+                    .collect();
+                format!("  tier3=[{}]", parts.join(", "))
+            };
+            format!("indexed={indexed_files}  pending_embeddings={pending_embedding_files}{last}{model}{mixed}{tier3}")
         }
         ServerMessage::FileStatusResult {
             uri,

From f9be039cb9c256917497eeb4dfd548d0d04af8d3 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 17:10:33 +0200
Subject: [PATCH 10/12] feat(2.1): lip import --no-provenance opt-out flag
 (#15)

Ephemeral or test imports can skip `RegisterTier3Source` so they
don't pollute a long-lived daemon's `tier3_sources` list. No effect
on the default EventStream-JSON output path, where provenance was
never sent anyway.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                    |  1 +
 tools/lip-cli/src/cmd/import.rs | 66 ++++++++++++++++++++++-----------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fcfadb..0bd2eb7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ All notable changes to this project are documented here.
 
 - **`EmbedText { text, model? }`** — embed an arbitrary text string and return the raw vector. Closes the gap left by `EmbeddingBatch` (URI-only) and `QueryNearestByText` (embeds internally but discards the vector). Callers re-ranking with their own scoring (centroid arithmetic, federated nearest-neighbour, lexical-then-semantic re-rank) get the embedding directly instead of building a centroid out of nearest-neighbour seeds. Returns `EmbedTextResult { vector: Vec<f32>, embedding_model: String }`. Not permitted inside `BatchQuery` (requires async HTTP).
 - **`RegisterTier3Source { source: Tier3Source }`** + **`IndexStatusResult.tier3_sources`** — expose provenance for Tier 3 ingestion batches (SCIP imports). `Tier3Source { source_id, tool_name, tool_version, project_root, imported_at_ms }` records *what* producer generated the symbols and *when* the daemon accepted them. Re-registering the same `source_id` overwrites in place, refreshing `imported_at_ms`. The daemon deliberately does no staleness detection: stale Tier 3 symbols remain in the graph at their original confidence until the caller re-imports. Surfacing provenance lets clients decide when to warn a user that imported data has aged (e.g. `scip-rust imported 3 days ago`). `lip import --push-to-daemon` now sends this before streaming SCIP deltas, with `source_id = sha256(tool_name + ":" + project_root)`. `IndexStatusResult.tier3_sources` is `#[serde(default)]`; older daemons yield an empty vector. Ack'd with `DeltaAck`. Not permitted inside `BatchQuery` (mutation).
+- **`lip import --no-provenance`** — opt out of Tier 3 provenance registration for ephemeral or test imports that should not pollute a long-lived daemon's `tier3_sources` list. No effect on the default EventStream-JSON output path.
 
 **Forward-compat & capability discovery**
 
diff --git a/tools/lip-cli/src/cmd/import.rs b/tools/lip-cli/src/cmd/import.rs
index 9779420..6606b8e 100644
--- a/tools/lip-cli/src/cmd/import.rs
+++ b/tools/lip-cli/src/cmd/import.rs
@@ -44,6 +44,17 @@ pub struct ImportArgs {
     /// Default: 90 (compiler-verified, not locally re-checked).
     #[arg(long, default_value_t = 90)]
     pub confidence: u8,
+
+    /// Skip Tier 3 provenance registration on the daemon.
+    ///
+    /// By default `--push-to-daemon` sends a `RegisterTier3Source`
+    /// message before streaming deltas so `QueryIndexStatus` reports
+    /// who produced the imported symbols and when. Use this flag for
+    /// ephemeral or test imports whose provenance should not pollute
+    /// a long-lived daemon's status output. No effect on the default
+    /// EventStream-JSON output path.
+    #[arg(long)]
+    pub no_provenance: bool,
 }
 
 pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
@@ -65,7 +76,15 @@ pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
     // `project_root` is a file:// URL identifying the source tree the
     // producer indexed; clients can later resolve it to a working tree
     // to compare HEAD against `imported_at_ms` for staleness.
-    let tier3_source = build_tier3_source(&index, &args.scip_file);
+    //
+    // Skipped when `--no-provenance` is set — ephemeral/test imports
+    // opt out of registering so they do not pollute a long-lived
+    // daemon's `tier3_sources` list.
+    let tier3_source = if args.no_provenance {
+        None
+    } else {
+        Some(build_tier3_source(&index, &args.scip_file))
+    };
 
     let confidence = args.confidence;
     let mut deltas: Vec<OwnedDelta> = index
@@ -112,26 +131,31 @@ pub async fn run(args: ImportArgs) -> anyhow::Result<()> {
         // Older daemons that predate `register_tier3_source` will reply
         // with `UnknownMessage`; we tolerate that and proceed — the deltas
         // still land, the provenance is just unavailable.
-        let reg_msg = ClientMessage::RegisterTier3Source {
-            source: tier3_source,
-        };
-        let reg_body = serde_json::to_vec(&reg_msg)?;
-        stream.write_all(&(reg_body.len() as u32).to_be_bytes()).await?;
-        stream.write_all(&reg_body).await?;
-        let mut reg_len = [0u8; 4];
-        stream.read_exact(&mut reg_len).await?;
-        let reg_resp_len = u32::from_be_bytes(reg_len) as usize;
-        let mut reg_resp_bytes = vec![0u8; reg_resp_len];
-        stream.read_exact(&mut reg_resp_bytes).await?;
-        // We do not fail on UnknownMessage — that only means the daemon
-        // is pre-v2.1. We do surface a genuine DeltaAck rejection.
-        if let Ok(ServerMessage::DeltaAck { accepted: false, error, .. }) =
-            serde_json::from_slice::<ServerMessage>(&reg_resp_bytes)
-        {
-            eprintln!(
-                "warning: daemon rejected tier3 provenance registration: {}",
-                error.as_deref().unwrap_or("?")
-            );
+        if let Some(source) = tier3_source {
+            let reg_msg = ClientMessage::RegisterTier3Source { source };
+            let reg_body = serde_json::to_vec(&reg_msg)?;
+            stream.write_all(&(reg_body.len() as u32).to_be_bytes()).await?;
+            stream.write_all(&reg_body).await?;
+            let mut reg_len = [0u8; 4];
+            stream.read_exact(&mut reg_len).await?;
+            let reg_resp_len = u32::from_be_bytes(reg_len) as usize;
+            let mut reg_resp_bytes = vec![0u8; reg_resp_len];
+            stream.read_exact(&mut reg_resp_bytes).await?;
+            // We do not fail on UnknownMessage — that only means the daemon
+            // is pre-v2.1. We do surface a genuine DeltaAck rejection.
+            if let Ok(ServerMessage::DeltaAck {
+                accepted: false,
+                error,
+                ..
+            }) = serde_json::from_slice::<ServerMessage>(&reg_resp_bytes)
+            {
+                eprintln!(
+                    "warning: daemon rejected tier3 provenance registration: {}",
+                    error.as_deref().unwrap_or("?")
+                );
+            }
+        } else {
+            eprintln!("provenance registration skipped (--no-provenance)");
         }
 
         let total = deltas.len();

From 8351cfa93a5f8f28042555f7fc6bfc7ebc3e8378 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 19:11:20 +0200
Subject: [PATCH 11/12] fix(2.1): wire ErrorCode::UnknownModel; pin
 QueryExpansion contract (#16)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ship-blockers flagged in the 2.1.0 review.

**1. ErrorCode::UnknownModel is now actually reachable.**

The embedding HTTP client previously collapsed every endpoint error
into a generic anyhow::Error, forcing the session layer to tag all
HTTP failures as ErrorCode::Internal. The UnknownModel code was
defined and documented but unreachable — exactly the conflation the
error-code split was meant to end.

New `EmbedError` enum with four variants: UnknownModel, Transport,
Protocol, Http. Classification lives in
`daemon/embedding.rs::classify_http_error`:

- 404 → UnknownModel (OpenAI/Ollama/most compatible backends)
- 4xx with `model_not_found`, `"unknown model"`, or
  `"model … not found/invalid/unsupported"` in the body → UnknownModel
- Auth (401), rate-limit (429), and 5xx → Http → Internal at the wire
- Parse / count-mismatch → Protocol → Internal

Conservative on purpose — mentioning "model" alone (e.g. in a token
payload or a parameter error) does not flip it to UnknownModel.

Session.rs routes classification via `embed_error_response(e)` at
all 10 call sites, replacing the hand-rolled `format!("embedding
failed: {e}")` + `ErrorCode::Internal` blocks.

**2. QueryExpansion handler contract pinned by a db-level test.**

Extracted the post-embedding ranking into
`LipDatabase::query_expansion_terms(query_vec, actual_model, top_k)`.
The session handler is now a single call, and the db method is
covered by `query_expansion_terms_rejects_cross_model_scoring` which
puts a matching-model symbol and a cross-model symbol at identical
vectors and asserts only the matching-model term appears. A
regression that silently drops the model filter would flip the
assertion.

Tests: +7 classifier cases, +1 handler-contract test (284 lib tests
pass; was 276).

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                          |   3 +-
 bindings/rust/src/daemon/embedding.rs | 154 ++++++++++++++++++++++++--
 bindings/rust/src/daemon/session.rs   |  88 +++++----------
 bindings/rust/src/query_graph/db.rs   |  79 +++++++++++++
 4 files changed, 251 insertions(+), 73 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0bd2eb7..11146d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,7 @@ All notable changes to this project are documented here.
 - **`ErrorCode`** enum — small, stable set: `unknown_message_type`, `unknown_model`, `embedding_not_configured`, `no_embedding`, `cursor_out_of_range`, `index_locked`, `invalid_request`, `internal` (default). Adding a code is non-breaking; renaming or removing one is breaking.
   - `embedding_not_configured` — daemon has no embedding service (`LIP_EMBEDDING_URL` unset).
   - `no_embedding` — URI has no cached embedding yet; call `EmbeddingBatch` first.
-  - `unknown_model` — caller asked for a model the daemon does not recognize.
+  - `unknown_model` — the embedding endpoint rejected the requested model. Emitted by the daemon when the HTTP backend returns 404 or a 4xx body matching `model_not_found` / `"unknown model"` / `"model … not found/invalid/unsupported"`. Transport, rate-limit, and auth errors stay on `internal` — retrying with the same model only makes sense after a real config change. Classification lives in `daemon/embedding.rs::classify_http_error`.
   - `invalid_request` — request was well-formed on the wire but used incorrectly (e.g. nested `Batch`, or `StreamContext` inside a `Batch`). Distinct from `internal` so clients can avoid retry loops on caller-side mistakes.
 
 **Drift guard**
@@ -40,6 +40,7 @@ All notable changes to this project are documented here.
 
 ### Fixed
 
+- **`QueryExpansion` handler contract pinned by a db-level test.** The post-embedding ranking is now encapsulated in `LipDatabase::query_expansion_terms(query_vec, actual_model, top_k)`, which the handler calls in one line. A regression that drops the model filter would cause `query_expansion_terms_rejects_cross_model_scoring` (db.rs) to fail, closing the earlier gap where the fix shipped without a paired assertion.
 - **`QueryExpansion` now honors the caller's model pin.** Previously the handler embedded the query with the requested model but then ranked candidates across *all* stored symbol embeddings regardless of which model produced them — cross-model cosine scores are not meaningful, so the returned "expansion terms" were effectively noise whenever the index held mixed-model vectors. Handler now captures the actual model returned by `embed_texts` and passes it through a new `model_filter: Option<&str>` parameter on `LipDatabase::nearest_symbol_by_vector`, restricting candidates to symbols embedded with the same model. `SimilarSymbols` (which resolves from a URI's own cached embedding) keeps the old unfiltered behavior by passing `None`.
 
 ---
diff --git a/bindings/rust/src/daemon/embedding.rs b/bindings/rust/src/daemon/embedding.rs
index 44aa7ff..cfb01a9 100644
--- a/bindings/rust/src/daemon/embedding.rs
+++ b/bindings/rust/src/daemon/embedding.rs
@@ -10,9 +10,79 @@
 //! When `LIP_EMBEDDING_URL` is unset, [`EmbeddingClient::from_env`] returns `None`
 //! and all embedding requests return a sensible error to the caller.
 
-use anyhow::Context;
 use serde::{Deserialize, Serialize};
 
+/// Classified failure from the embedding HTTP endpoint.
+///
+/// The variants map directly to [`crate::query_graph::ErrorCode`]
+/// categories so the daemon can propagate a precise classification to
+/// clients instead of collapsing every endpoint failure into `Internal`.
+/// Callers that only need a display string should use the `Display` impl.
+#[derive(Debug)]
+pub enum EmbedError {
+    /// The endpoint rejected the requested model name — either 404, or
+    /// a 4xx whose body names the model. Maps to `ErrorCode::UnknownModel`.
+    /// Retrying with the same model is pointless.
+    UnknownModel(String),
+    /// HTTP transport failure, timeout, or TLS error. Maps to
+    /// `ErrorCode::Internal`. Retry is often safe.
+    Transport(String),
+    /// The endpoint returned a response we could not parse, or the
+    /// vector count did not match the input count. Maps to
+    /// `ErrorCode::Internal`. Indicates a backend misconfiguration.
+    Protocol(String),
+    /// Non-2xx status that does not clearly match any of the above.
+    /// Maps to `ErrorCode::Internal`.
+    Http(String),
+}
+
+impl std::fmt::Display for EmbedError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            EmbedError::UnknownModel(m)
+            | EmbedError::Transport(m)
+            | EmbedError::Protocol(m)
+            | EmbedError::Http(m) => f.write_str(m),
+        }
+    }
+}
+
+impl std::error::Error for EmbedError {}
+
+/// Classify an embedding endpoint's non-2xx response into the narrowest
+/// applicable [`EmbedError`] variant.
+///
+/// Heuristic: 404 is always an unknown-model signal (OpenAI, Ollama, and
+/// most compatible backends 404 on an unrecognised model). Other 4xx are
+/// classified as `UnknownModel` only when the body mentions the model —
+/// OpenAI-compatible errors typically carry `"code":"model_not_found"`
+/// or a message containing `"model"` for this case. Everything else
+/// (5xx, 4xx without model keyword) falls through to `Http`.
+fn classify_http_error(status: reqwest::StatusCode, body: &str) -> EmbedError {
+    let msg = format!("embedding endpoint returned {status}: {body}");
+    if status == reqwest::StatusCode::NOT_FOUND {
+        return EmbedError::UnknownModel(msg);
+    }
+    if status.is_client_error() {
+        let lower = body.to_ascii_lowercase();
+        if lower.contains("model_not_found") || lower.contains("unknown model") {
+            return EmbedError::UnknownModel(msg);
+        }
+        // Conservative: generic 4xx with "model" mention, treat as model issue
+        // only when combined with a "not found" / "invalid" / "unsupported" hint,
+        // to avoid misclassifying auth / rate-limit errors.
+        let looks_model_shaped = lower.contains("model")
+            && (lower.contains("not found")
+                || lower.contains("invalid")
+                || lower.contains("unsupported")
+                || lower.contains("does not exist"));
+        if looks_model_shaped {
+            return EmbedError::UnknownModel(msg);
+        }
+    }
+    EmbedError::Http(msg)
+}
+
 /// Thin client around a single OpenAI-compatible embedding endpoint.
 pub struct EmbeddingClient {
     url: String,
@@ -73,12 +143,14 @@ impl EmbeddingClient {
     ///
     /// # Errors
     ///
-    /// Propagates HTTP, serialisation, and API errors.
+    /// Returns an [`EmbedError`] classified so the daemon can map directly
+    /// to a [`crate::query_graph::ErrorCode`] without inspecting the
+    /// message string.
     pub async fn embed_texts(
         &self,
         texts: &[String],
         model_override: Option<&str>,
-    ) -> anyhow::Result<(Vec<Vec<f32>>, String)> {
+    ) -> Result<(Vec<Vec<f32>>, String), EmbedError> {
         if texts.is_empty() {
             return Ok((vec![], self.default_model.clone()));
         }
@@ -93,31 +165,89 @@ impl EmbeddingClient {
             .json(&body)
             .send()
             .await
-            .context("embedding HTTP request failed")?;
+            .map_err(|e| EmbedError::Transport(format!("embedding HTTP request failed: {e}")))?;
 
         if !resp.status().is_success() {
             let status = resp.status();
             let text = resp.text().await.unwrap_or_default();
-            anyhow::bail!("embedding endpoint returned {status}: {text}");
+            return Err(classify_http_error(status, &text));
         }
 
         let parsed: EmbedResponse = resp
             .json()
             .await
-            .context("failed to parse embedding response")?;
+            .map_err(|e| EmbedError::Protocol(format!("failed to parse embedding response: {e}")))?;
 
         // Re-order by index field to match the input order.
         let mut data = parsed.data;
         data.sort_by_key(|d| d.index);
 
-        anyhow::ensure!(
-            data.len() == texts.len(),
-            "embedding endpoint returned {} vectors for {} inputs",
-            data.len(),
-            texts.len()
-        );
+        if data.len() != texts.len() {
+            return Err(EmbedError::Protocol(format!(
+                "embedding endpoint returned {} vectors for {} inputs",
+                data.len(),
+                texts.len()
+            )));
+        }
 
         let vectors = data.into_iter().map(|d| d.embedding).collect();
         Ok((vectors, parsed.model))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use reqwest::StatusCode;
+
+    #[test]
+    fn classify_404_is_unknown_model() {
+        let e = classify_http_error(StatusCode::NOT_FOUND, "model not found");
+        assert!(matches!(e, EmbedError::UnknownModel(_)));
+    }
+
+    #[test]
+    fn classify_openai_model_not_found_code() {
+        // OpenAI API shape.
+        let body = r#"{"error":{"code":"model_not_found","message":"The model 'foo' does not exist"}}"#;
+        let e = classify_http_error(StatusCode::BAD_REQUEST, body);
+        assert!(matches!(e, EmbedError::UnknownModel(_)));
+    }
+
+    #[test]
+    fn classify_ollama_model_unknown() {
+        let body = r#"{"error":"model 'nomic-embed-text' not found, try pulling it first"}"#;
+        let e = classify_http_error(StatusCode::NOT_FOUND, body);
+        assert!(matches!(e, EmbedError::UnknownModel(_)));
+    }
+
+    #[test]
+    fn classify_auth_error_stays_http() {
+        // 401 unauthorized must not be misclassified as UnknownModel just
+        // because a token payload might mention "model".
+        let body = "Unauthorized";
+        let e = classify_http_error(StatusCode::UNAUTHORIZED, body);
+        assert!(matches!(e, EmbedError::Http(_)));
+    }
+
+    #[test]
+    fn classify_rate_limit_stays_http() {
+        let e = classify_http_error(StatusCode::TOO_MANY_REQUESTS, "rate limit");
+        assert!(matches!(e, EmbedError::Http(_)));
+    }
+
+    #[test]
+    fn classify_5xx_stays_http() {
+        let e = classify_http_error(StatusCode::INTERNAL_SERVER_ERROR, "backend died");
+        assert!(matches!(e, EmbedError::Http(_)));
+    }
+
+    #[test]
+    fn classify_4xx_mentioning_model_without_not_found_keyword_stays_http() {
+        // "model temperature too high" would mention "model" but is not
+        // an unknown-model signal. Conservative classifier keeps it Http.
+        let body = "model temperature parameter rejected";
+        let e = classify_http_error(StatusCode::BAD_REQUEST, body);
+        assert!(matches!(e, EmbedError::Http(_)));
+    }
+}
diff --git a/bindings/rust/src/daemon/session.rs b/bindings/rust/src/daemon/session.rs
index 6515f01..32be340 100644
--- a/bindings/rust/src/daemon/session.rs
+++ b/bindings/rust/src/daemon/session.rs
@@ -9,7 +9,7 @@ use tracing::{debug, error, info, warn};
 use crate::query_graph::{BatchQueryResult, ClientMessage, ErrorCode, LipDatabase, ServerMessage};
 use crate::schema::{Action, IndexingState, OwnedAnnotationEntry, OwnedRange};
 
-use super::embedding::EmbeddingClient;
+use super::embedding::{EmbedError, EmbeddingClient};
 use super::journal::{Journal, JournalEntry};
 use super::manifest::ManifestResponse;
 use super::tier2_manager::VerificationJob;
@@ -19,6 +19,22 @@ use super::watcher::{uri_to_path, FileWatcherHandle};
 /// Clients can detect drift by comparing against this value in `HandshakeResult`.
 const PROTOCOL_VERSION: u32 = 2;
 
+/// Convert a classified [`EmbedError`] into the appropriate wire-level
+/// error response. Centralises the mapping so every embedding call site
+/// reports the same [`ErrorCode`] category for the same failure mode.
+fn embed_error_response(e: EmbedError) -> ServerMessage {
+    let code = match e {
+        EmbedError::UnknownModel(_) => ErrorCode::UnknownModel,
+        EmbedError::Transport(_) | EmbedError::Protocol(_) | EmbedError::Http(_) => {
+            ErrorCode::Internal
+        }
+    };
+    ServerMessage::Error {
+        message: format!("embedding failed: {e}"),
+        code,
+    }
+}
+
 /// Per-connection session state.
 pub struct Session {
     pub db: Arc<Mutex<LipDatabase>>,
@@ -534,10 +550,7 @@ impl Session {
                     match client.embed_texts(&miss_texts, model.as_deref()).await {
                         Ok(r) => r,
                         Err(e) => {
-                            return ServerMessage::Error {
-                                message: format!("embedding failed: {e}"),
-                                code: ErrorCode::Internal,
-                            }
+                            return embed_error_response(e)
                         }
                     }
                 };
@@ -673,10 +686,7 @@ impl Session {
                 let (mut vecs, _) = match client.embed_texts(&texts, model.as_deref()).await {
                     Ok(r) => r,
                     Err(e) => {
-                        return ServerMessage::Error {
-                            message: format!("embedding failed: {e}"),
-                            code: ErrorCode::Internal,
-                        }
+                        return embed_error_response(e)
                     }
                 };
                 let query_vec = vecs.pop().unwrap_or_default();
@@ -704,10 +714,7 @@ impl Session {
                 let (vecs, _) = match client.embed_texts(&queries, model.as_deref()).await {
                     Ok(r) => r,
                     Err(e) => {
-                        return ServerMessage::Error {
-                            message: format!("embedding failed: {e}"),
-                            code: ErrorCode::Internal,
-                        }
+                        return embed_error_response(e)
                     }
                 };
                 let db = self.db.lock().await;
@@ -766,10 +773,7 @@ impl Session {
                         match client.embed_texts(&texts, model.as_deref()).await {
                             Ok(r) => r,
                             Err(e) => {
-                                return ServerMessage::Error {
-                                    message: format!("embedding failed: {e}"),
-                                    code: ErrorCode::Internal,
-                                }
+                                return embed_error_response(e)
                             }
                         };
                     let v = vecs.pop().unwrap_or_default();
@@ -889,33 +893,12 @@ impl Session {
                     match client.embed_texts(&[query], model.as_deref()).await {
                         Ok(r) => r,
                         Err(e) => {
-                            return ServerMessage::Error {
-                                message: format!("embedding failed: {e}"),
-                                code: ErrorCode::Internal,
-                            }
+                            return embed_error_response(e)
                         }
                     };
                 let query_vec = vecs.pop().unwrap_or_default();
                 let mut db = self.db.lock().await;
-                // Pin the search to symbols embedded with the same model that
-                // produced `query_vec`. Cross-model cosine scores are
-                // meaningless, so silently mixing them gives the caller noisy
-                // "expansion terms" that rank random symbols highest.
-                let hits =
-                    db.nearest_symbol_by_vector(&query_vec, top_k, None, Some(&actual_model));
-                // Resolve display names; fall back to URI fragment.
-                let mut terms = Vec::with_capacity(hits.len());
-                for item in hits {
-                    let name = match db.symbol_by_uri(&item.uri) {
-                        Some(s) => s.display_name.clone(),
-                        None => item
-                            .uri
-                            .rfind('#')
-                            .map(|i| item.uri[i + 1..].to_owned())
-                            .unwrap_or(item.uri.clone()),
-                    };
-                    terms.push(name);
-                }
+                let terms = db.query_expansion_terms(&query_vec, &actual_model, top_k);
                 ServerMessage::QueryExpansionResult { terms }
             }
 
@@ -1210,10 +1193,7 @@ impl Session {
                 let (vecs, _) = match client.embed_texts(&chunks, model.as_deref()).await {
                     Ok(r) => r,
                     Err(e) => {
-                        return ServerMessage::Error {
-                            message: format!("embedding failed: {e}"),
-                            code: ErrorCode::Internal,
-                        }
+                        return embed_error_response(e)
                     }
                 };
                 let mut boundaries = Vec::new();
@@ -1261,10 +1241,7 @@ impl Session {
                 {
                     Ok(r) => r,
                     Err(e) => {
-                        return ServerMessage::Error {
-                            message: format!("embedding failed: {e}"),
-                            code: ErrorCode::Internal,
-                        }
+                        return embed_error_response(e)
                     }
                 };
                 if vecs.len() < 2 {
@@ -1475,10 +1452,7 @@ impl Session {
                         match client.embed_texts(&texts, model.as_deref()).await {
                             Ok((mut vecs, m)) => (vecs.pop().unwrap_or_default(), m),
                             Err(e) => {
-                                return ServerMessage::Error {
-                                    message: format!("embedding failed: {e}"),
-                                    code: ErrorCode::Internal,
-                                }
+                                return embed_error_response(e)
                             }
                         }
                     }
@@ -1529,10 +1503,7 @@ impl Session {
                     match client.embed_texts(&chunk_texts, model.as_deref()).await {
                         Ok(r) => r,
                         Err(e) => {
-                            return ServerMessage::Error {
-                                message: format!("embedding failed: {e}"),
-                                code: ErrorCode::Internal,
-                            }
+                            return embed_error_response(e)
                         }
                     };
                 let _ = chunk_model; // we report query_model, not per-chunk model
@@ -1588,10 +1559,7 @@ impl Session {
                         vector: vecs.pop().unwrap_or_default(),
                         embedding_model: used_model,
                     },
-                    Err(e) => ServerMessage::Error {
-                        message: format!("embedding failed: {e}"),
-                        code: ErrorCode::Internal,
-                    },
+                    Err(e) => embed_error_response(e),
                 }
             }
 
diff --git a/bindings/rust/src/query_graph/db.rs b/bindings/rust/src/query_graph/db.rs
index 66add73..5322add 100644
--- a/bindings/rust/src/query_graph/db.rs
+++ b/bindings/rust/src/query_graph/db.rs
@@ -926,6 +926,35 @@ impl LipDatabase {
         names
     }
 
+    /// Rank symbols semantically related to `query_vec` (produced by
+    /// `actual_model`) and return their display names as query-expansion
+    /// terms.
+    ///
+    /// Encapsulates the post-embedding work of the `QueryExpansion`
+    /// handler so the daemon-side wiring (filter results to symbols
+    /// embedded with `actual_model`, then resolve display names) is
+    /// pinned by a db-level test and cannot silently regress in the
+    /// session handler. Cross-model cosine scores are meaningless —
+    /// mixing them would rank random symbols highest.
+    pub fn query_expansion_terms(
+        &mut self,
+        query_vec: &[f32],
+        actual_model: &str,
+        top_k: usize,
+    ) -> Vec<String> {
+        let hits = self.nearest_symbol_by_vector(query_vec, top_k, None, Some(actual_model));
+        let uris: Vec<String> = hits.into_iter().map(|item| item.uri).collect();
+        uris.into_iter()
+            .map(|uri| match self.symbol_by_uri(&uri) {
+                Some(s) => s.display_name,
+                None => uri
+                    .rfind('#')
+                    .map(|i| uri[i + 1..].to_owned())
+                    .unwrap_or(uri),
+            })
+            .collect()
+    }
+
     /// Find the `top_k` symbols whose embedding is most similar (cosine) to `query_vec`.
     ///
     /// Mirrors `nearest_by_vector` but operates over `symbol_embeddings`.
@@ -3012,6 +3041,56 @@ impl Greeter {
         assert_eq!(pinned[0].uri, "lip://local/f.rs#alpha");
     }
 
+    /// Pins the `QueryExpansion` handler contract: when the embedding
+    /// service returns model X for the query, the subsequent ranking
+    /// must be restricted to symbols embedded with model X.
+    ///
+    /// This test mirrors the exact call the session handler makes
+    /// (see `session.rs::ClientMessage::QueryExpansion`). A regression
+    /// that passes `None` for the model filter — which would silently
+    /// re-introduce cross-model cosine scoring — would cause
+    /// `cross-model-vector` to appear in the expansion terms and fail
+    /// this assertion.
+    #[test]
+    fn query_expansion_terms_rejects_cross_model_scoring() {
+        let mut db = LipDatabase::new();
+
+        // Two symbols in different models, both aligned with the query
+        // vector. Naive (unfiltered) cosine would rank both highly.
+        let f_uri = "file:///src/f.rs".to_owned();
+        db.upsert_file(
+            f_uri.clone(),
+            "fn matching_model() {}\nfn cross_model_vector() {}".into(),
+            "rust".into(),
+        );
+        db.set_symbol_embedding(
+            "lip://local/f.rs#matching_model",
+            vec![1.0, 0.0],
+            "model-a",
+        );
+        db.set_symbol_embedding(
+            "lip://local/f.rs#cross_model_vector",
+            vec![1.0, 0.0],
+            "model-b",
+        );
+
+        let query_vec = vec![1.0_f32, 0.0];
+
+        // The embedding service would have returned "model-a" for the
+        // query. Handler passes that through.
+        let terms = db.query_expansion_terms(&query_vec, "model-a", 5);
+
+        assert!(
+            terms.iter().any(|t| t.contains("matching_model")),
+            "same-model term must appear: got {terms:?}"
+        );
+        assert!(
+            !terms.iter().any(|t| t.contains("cross_model_vector")),
+            "cross-model term must NOT appear — indicates the filter was \
+             bypassed: got {terms:?}"
+        );
+    }
+
     // ── outliers ──────────────────────────────────────────────────────────
 
     #[test]

From 9b1732bb996511e01d2ea658d8dd230a4ce6e027 Mon Sep 17 00:00:00 2001
From: Lisa <lisa.welsch1985@gmail.com>
Date: Wed, 15 Apr 2026 20:44:24 +0200
Subject: [PATCH 12/12] test(watcher): poll instead of fixed sleep to reduce CI
 flakes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FSEvents / inotify latency varies under load; a fixed 3s sleep was
marginal on slow runners. Poll up to 15s with 100ms ticks instead —
fast in the happy case, robust when the runner is loaded.
---
 bindings/rust/src/daemon/watcher.rs | 31 +++++++++++++++++++----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/bindings/rust/src/daemon/watcher.rs b/bindings/rust/src/daemon/watcher.rs
index 7e5f647..c831335 100644
--- a/bindings/rust/src/daemon/watcher.rs
+++ b/bindings/rust/src/daemon/watcher.rs
@@ -314,16 +314,25 @@ mod tests {
             write!(f, "fn updated() {{}}").unwrap();
         }
 
-        // Wait for the watcher to pick up the change.
-        // FSEvents on macOS batches events; allow up to 3 seconds.
-        tokio::time::sleep(Duration::from_millis(3000)).await;
-
-        // The db should now contain the new text.
-        let db_guard = db.lock().await;
-        assert_eq!(
-            db_guard.file_text(&uri),
-            Some("fn updated() {}"),
-            "watcher should have updated db with new file content"
-        );
+        // Poll for the watcher to pick up the change.
+        // FSEvents on macOS batches events and latency varies widely
+        // under load — poll up to 15s rather than a fixed sleep to
+        // avoid flaking CI on slow runners.
+        let deadline = std::time::Instant::now() + Duration::from_secs(15);
+        loop {
+            if db.lock().await.file_text(&uri).map(str::to_owned)
+                == Some("fn updated() {}".to_owned())
+            {
+                break;
+            }
+            if std::time::Instant::now() >= deadline {
+                let last = db.lock().await.file_text(&uri).map(str::to_owned);
+                panic!(
+                    "watcher should have updated db with new file content within 15s; \
+                     last observed text was {last:?}"
+                );
+            }
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        }
     }
 }