From f081f21cc99c3fc11d998d62d4d52cae9952df34 Mon Sep 17 00:00:00 2001
From: Antawari <antawari@gmail.com>
Date: Mon, 29 Jun 2026 12:31:01 -0600
Subject: [PATCH 1/2] Add a persistent sqlite knowledge-vault backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A third VaultBackend implementation backed by the standard-library
sqlite3 module — no extra dependencies, so it runs in CI (unlike the
embedded-vector backend, whose optional deps are absent there). It
conforms to the existing VaultBackend protocol and the frozen vault
entry shape, and mirrors the in-memory backend's keyword retrieval
byte-for-byte (LIKE prefilter, ranking done in Python) so results are
deterministic and identical across environments.

Storage is a single table with a forward-only versioned schema recorded
in a small meta table; all SQL is parameterized. Keyword retrieval only,
no embeddings. Wired into the backend factory behind a new "sqlite"
option; the existing options are untouched. Tests mirror the in-memory
contract plus persistence across reopening the same file.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/bonfire/knowledge/__init__.py       |   7 +
 src/bonfire/knowledge/sqlite_backend.py | 233 ++++++++++++++++++++++
 tests/unit/test_sqlite_vault.py         | 247 ++++++++++++++++++++++++
 3 files changed, 487 insertions(+)
 create mode 100644 src/bonfire/knowledge/sqlite_backend.py
 create mode 100644 tests/unit/test_sqlite_vault.py

diff --git a/src/bonfire/knowledge/__init__.py b/src/bonfire/knowledge/__init__.py
index 8ab4799e..125c6a21 100644
--- a/src/bonfire/knowledge/__init__.py
+++ b/src/bonfire/knowledge/__init__.py
@@ -31,6 +31,8 @@ def get_vault_backend(
 
     - ``enabled=False`` → :class:`InMemoryVaultBackend`
     - ``backend="memory"`` → :class:`InMemoryVaultBackend`
+    - ``backend="sqlite"`` → :class:`SqliteVaultBackend` (persistent, stdlib
+      only; ``vault_path`` is the database file, ``":memory:"`` for ephemeral)
     - ``backend="lancedb"`` → :class:`LanceDBBackend`
     - anything else → :class:`InMemoryVaultBackend` (safe fallback)
     """
@@ -39,6 +41,11 @@ def get_vault_backend(
 
         return InMemoryVaultBackend()
 
+    if backend == "sqlite":
+        from bonfire.knowledge.sqlite_backend import SqliteVaultBackend
+
+        return SqliteVaultBackend(db_path=vault_path)
+
     if backend == "lancedb":
         from bonfire.knowledge.backend import LanceDBBackend
         from bonfire.knowledge.embeddings import get_embedder
diff --git a/src/bonfire/knowledge/sqlite_backend.py b/src/bonfire/knowledge/sqlite_backend.py
new file mode 100644
index 00000000..3ddeca64
--- /dev/null
+++ b/src/bonfire/knowledge/sqlite_backend.py
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright 2026 BonfireAI
+
+"""Persistent vault backend backed by the stdlib ``sqlite3`` module.
+
+This is the *portable* persistent backend: it needs no third-party
+dependencies (unlike the LanceDB backend, whose vector deps are absent in
+CI), so it runs everywhere CPython does. A single plain table holds one row
+per :class:`~bonfire.protocols.VaultEntry`; list/dict fields are stored as
+JSON text.
+
+Retrieval is **honest keyword search**, not semantic search. ``query`` does
+exactly what :class:`~bonfire.knowledge.memory.InMemoryVaultBackend` does: it
+splits the query into words and scores each entry by how many of those words
+appear as a case-insensitive substring of the entry's content -- no
+embeddings, no vectors. SQLite ``LIKE`` is used only as a parameterized
+prefilter to avoid scanning unmatched rows; the final scoring and ranking
+mirror the in-memory backend byte-for-byte.
+
+The async methods wrap synchronous ``sqlite3`` calls (the same pattern the
+in-memory backend uses) -- no ``aiosqlite`` or other added dependency.
+
+Schema is versioned (BubbleGum): ``_SCHEMA_VERSION`` plus an idempotent,
+forward-only ``_ensure_schema``. A ``vault_meta`` row records the version so
+a future migration can detect and upgrade an older file.
+"""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+from typing import TYPE_CHECKING
+
+from bonfire.knowledge.hasher import content_hash as compute_hash
+from bonfire.protocols import VaultEntry
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+# Forward-only schema version. Bump only alongside a migration step in
+# ``_ensure_schema``; never rewrite history.
+_SCHEMA_VERSION = 1
+
+# Ordered VaultEntry fields stored as their own columns. The two structured
+# fields (``tags`` -> JSON array, ``metadata`` -> JSON object) are handled
+# separately when (de)serializing; everything else round-trips as TEXT.
+_TEXT_FIELDS = (
+    "entry_id",
+    "content",
+    "entry_type",
+    "source_path",
+    "project_name",
+    "scanned_at",
+    "git_hash",
+    "content_hash",
+)
+
+
+class SqliteVaultBackend:
+    """Persistent vault over a single ``sqlite3`` connection.
+
+    Pass a filesystem ``db_path`` to persist across process restarts, or
+    ``":memory:"`` (the default) for an ephemeral in-process database used by
+    tests. Keyword retrieval only -- no embeddings.
+    """
+
+    def __init__(self, db_path: str = ":memory:") -> None:
+        self._db_path = db_path
+        # check_same_thread=False keeps the connection usable from the asyncio
+        # event loop's worker context; access here is serialized by the single
+        # event loop so no cross-thread races occur.
+        self._conn = sqlite3.connect(db_path, check_same_thread=False)
+        self._conn.row_factory = sqlite3.Row
+        self._ensure_schema()
+
+    # -- schema ----------------------------------------------------------
+
+    def _ensure_schema(self) -> None:
+        """Create the table and record the schema version (idempotent).
+
+        Forward-only: safe to call on every open. Creating the objects
+        ``IF NOT EXISTS`` means an existing file is left intact; the version
+        row is inserted only when absent.
+        """
+        self._conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS vault_entries (
+                entry_id     TEXT PRIMARY KEY,
+                content      TEXT NOT NULL,
+                entry_type   TEXT NOT NULL,
+                source_path  TEXT NOT NULL DEFAULT '',
+                project_name TEXT NOT NULL DEFAULT '',
+                scanned_at   TEXT NOT NULL DEFAULT '',
+                git_hash     TEXT NOT NULL DEFAULT '',
+                content_hash TEXT NOT NULL DEFAULT '',
+                tags         TEXT NOT NULL DEFAULT '[]',
+                metadata     TEXT NOT NULL DEFAULT '{}'
+            )
+            """
+        )
+        self._conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_vault_entries_content_hash "
+            "ON vault_entries (content_hash)"
+        )
+        self._conn.execute(
+            "CREATE INDEX IF NOT EXISTS idx_vault_entries_source_path "
+            "ON vault_entries (source_path)"
+        )
+        self._conn.execute(
+            "CREATE TABLE IF NOT EXISTS vault_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)"
+        )
+        self._conn.execute(
+            "INSERT OR IGNORE INTO vault_meta (key, value) VALUES ('schema_version', ?)",
+            (str(_SCHEMA_VERSION),),
+        )
+        self._conn.commit()
+
+    # -- (de)serialization ----------------------------------------------
+
+    @staticmethod
+    def _to_row(entry: VaultEntry) -> tuple[object, ...]:
+        """Flatten a VaultEntry into the column tuple (JSON for tags/metadata)."""
+        values: list[object] = [getattr(entry, field) for field in _TEXT_FIELDS]
+        values.append(json.dumps(entry.tags))
+        values.append(json.dumps(entry.metadata))
+        return tuple(values)
+
+    @staticmethod
+    def _from_row(row: sqlite3.Row) -> VaultEntry:
+        """Rebuild a VaultEntry from a stored row (JSON-decode tags/metadata)."""
+        data = {field: row[field] for field in _TEXT_FIELDS}
+        data["tags"] = json.loads(row["tags"])
+        data["metadata"] = json.loads(row["metadata"])
+        return VaultEntry(**data)
+
+    # -- protocol methods -----------------------------------------------
+
+    async def store(self, entry: VaultEntry) -> str:
+        """Persist *entry* (upsert by ``entry_id``) and return its ``entry_id``.
+
+        Computes ``content_hash`` from the content when the caller left it
+        blank, mirroring the in-memory backend.
+        """
+        if not entry.content_hash:
+            entry = entry.model_copy(update={"content_hash": compute_hash(entry.content)})
+        columns = (*_TEXT_FIELDS, "tags", "metadata")
+        placeholders = ", ".join("?" for _ in columns)
+        column_list = ", ".join(columns)
+        # Upsert by primary key so re-storing the same entry_id replaces the
+        # row rather than failing on the PK constraint.
+        updates = ", ".join(f"{col}=excluded.{col}" for col in columns if col != "entry_id")
+        self._conn.execute(
+            f"INSERT INTO vault_entries ({column_list}) VALUES ({placeholders}) "
+            f"ON CONFLICT(entry_id) DO UPDATE SET {updates}",
+            self._to_row(entry),
+        )
+        self._conn.commit()
+        return entry.entry_id
+
+    async def query(
+        self,
+        query: str,
+        *,
+        limit: int = 5,
+        entry_type: str | None = None,
+    ) -> list[VaultEntry]:
+        """Keyword retrieval: score by per-word substring hits, top *limit*.
+
+        Mirrors :class:`InMemoryVaultBackend.query` exactly -- the query is
+        lowercased and split into words; each candidate entry scores one point
+        per distinct query word found as a substring of its (lowercased)
+        content; only positive-scoring entries are returned, highest score
+        first, capped at *limit*. ``LIKE`` is used purely as a parameterized
+        prefilter; no semantic/vector matching is involved.
+        """
+        query_words = query.lower().split()
+        if not query_words:
+            return []
+
+        rows = self._candidate_rows(query_words, entry_type)
+        scored: list[tuple[VaultEntry, int]] = []
+        for row in rows:
+            lowered = row["content"].lower()
+            score = sum(1 for w in query_words if w in lowered)
+            if score > 0:
+                scored.append((self._from_row(row), score))
+        scored.sort(key=lambda pair: pair[1], reverse=True)
+        return [entry for entry, _ in scored[:limit]]
+
+    def _candidate_rows(
+        self,
+        query_words: Iterable[str],
+        entry_type: str | None,
+    ) -> list[sqlite3.Row]:
+        """Fetch rows where content matches ANY query word (parameterized).
+
+        A row scores > 0 in :meth:`query` only if at least one (already
+        lowercased) query word is a substring of the entry's lowercased
+        content, so an OR of ``LIKE`` clauses against ``lower(content)`` is a
+        sound, loss-free prefilter -- it can only over-include. The
+        authoritative scoring in :meth:`query` re-checks every word in Python,
+        so the returned set and ranking match the in-memory backend exactly.
+        """
+        params: list[object] = []
+        like_clauses: list[str] = []
+        for word in query_words:
+            like_clauses.append("lower(content) LIKE '%' || ? || '%'")
+            params.append(word)
+        where = f"({' OR '.join(like_clauses)})"
+        if entry_type is not None:
+            where += " AND entry_type = ?"
+            params.append(entry_type)
+        cursor = self._conn.execute(
+            f"SELECT * FROM vault_entries WHERE {where}",
+            tuple(params),
+        )
+        return cursor.fetchall()
+
+    async def exists(self, content_hash: str) -> bool:
+        """Return ``True`` if a stored entry has this ``content_hash``."""
+        cursor = self._conn.execute(
+            "SELECT 1 FROM vault_entries WHERE content_hash = ? LIMIT 1",
+            (content_hash,),
+        )
+        return cursor.fetchone() is not None
+
+    async def get_by_source(self, source_path: str) -> list[VaultEntry]:
+        """Return all entries whose ``source_path`` equals *source_path*."""
+        cursor = self._conn.execute(
+            "SELECT * FROM vault_entries WHERE source_path = ?",
+            (source_path,),
+        )
+        return [self._from_row(row) for row in cursor.fetchall()]
diff --git a/tests/unit/test_sqlite_vault.py b/tests/unit/test_sqlite_vault.py
new file mode 100644
index 00000000..87f1bd07
--- /dev/null
+++ b/tests/unit/test_sqlite_vault.py
@@ -0,0 +1,247 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright 2026 BonfireAI
+
+"""Contract tests for the stdlib-``sqlite3`` persistent vault backend.
+
+Mirrors the in-memory backend's contract (store -> exists -> query ->
+get_by_source, content_hash dedup, entry_id round-trip) and adds the property
+the in-memory backend cannot have: **persistence across reopening the same
+file path**. No third-party dependencies -- this backend is the one that runs
+in CI where the LanceDB vector deps are absent.
+
+Async tests auto-discover (``asyncio_mode = "auto"``); no marker needed.
+Implementation NEVER edits this file.
+"""
+
+from __future__ import annotations
+
+from bonfire.knowledge import get_vault_backend
+from bonfire.knowledge.hasher import content_hash as compute_hash
+from bonfire.knowledge.sqlite_backend import SqliteVaultBackend
+from bonfire.protocols import VaultBackend, VaultEntry
+
+
+def _entry(content: str, **overrides: object) -> VaultEntry:
+    """Build a VaultEntry with a default entry_type, overridable per call."""
+    data: dict[str, object] = {"content": content, "entry_type": "code_chunk"}
+    data.update(overrides)
+    return VaultEntry(**data)
+
+
+class TestProtocolConformance:
+    def test_satisfies_vault_backend_protocol(self) -> None:
+        backend = SqliteVaultBackend()
+        assert isinstance(backend, VaultBackend)
+
+
+class TestStoreAndExists:
+    async def test_store_returns_entry_id(self) -> None:
+        backend = SqliteVaultBackend()
+        entry = _entry("hello world")
+        returned = await backend.store(entry)
+        assert returned == entry.entry_id
+
+    async def test_store_computes_content_hash_when_absent(self) -> None:
+        backend = SqliteVaultBackend()
+        entry = _entry("compute my hash")
+        assert entry.content_hash == ""
+        await backend.store(entry)
+        assert await backend.exists(compute_hash("compute my hash")) is True
+
+    async def test_store_preserves_supplied_content_hash(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("payload", content_hash="explicit-hash"))
+        assert await backend.exists("explicit-hash") is True
+
+    async def test_exists_false_for_unknown_hash(self) -> None:
+        backend = SqliteVaultBackend()
+        assert await backend.exists("never-stored") is False
+
+
+class TestRoundTrip:
+    async def test_query_returns_full_entry(self) -> None:
+        backend = SqliteVaultBackend()
+        original = _entry(
+            "alpha beta gamma",
+            source_path="src/foo.py",
+            project_name="proj",
+            scanned_at="2026-06-29",
+            git_hash="deadbeef",
+            tags=["a", "b"],
+            metadata={"k": "v", "n": 1},
+        )
+        await backend.store(original)
+        results = await backend.query("alpha")
+        assert len(results) == 1
+        got = results[0]
+        assert got.entry_id == original.entry_id
+        assert got.content == "alpha beta gamma"
+        assert got.entry_type == "code_chunk"
+        assert got.source_path == "src/foo.py"
+        assert got.project_name == "proj"
+        assert got.scanned_at == "2026-06-29"
+        assert got.git_hash == "deadbeef"
+        assert got.tags == ["a", "b"]
+        assert got.metadata == {"k": "v", "n": 1}
+
+
+class TestQuery:
+    async def test_query_substring_match(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("the quick brown fox"))
+        await backend.store(_entry("a lazy dog sleeps"))
+        results = await backend.query("quick")
+        assert len(results) == 1
+        assert results[0].content == "the quick brown fox"
+
+    async def test_query_is_case_insensitive(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("UPPER CASE CONTENT"))
+        results = await backend.query("upper")
+        assert len(results) == 1
+
+    async def test_query_no_match_returns_empty(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("hello world"))
+        assert await backend.query("absent") == []
+
+    async def test_query_empty_string_returns_empty(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("hello world"))
+        assert await backend.query("   ") == []
+
+    async def test_query_respects_limit(self) -> None:
+        backend = SqliteVaultBackend()
+        for i in range(10):
+            await backend.store(_entry(f"shared token entry {i}"))
+        results = await backend.query("shared", limit=3)
+        assert len(results) == 3
+
+    async def test_query_filters_by_entry_type(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("token here", entry_type="code_chunk"))
+        await backend.store(_entry("token there", entry_type="scout_report"))
+        results = await backend.query("token", entry_type="scout_report")
+        assert len(results) == 1
+        assert results[0].entry_type == "scout_report"
+
+    async def test_query_ranks_more_word_hits_first(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("alpha only here", content_hash="one"))
+        await backend.store(_entry("alpha and beta both", content_hash="two"))
+        results = await backend.query("alpha beta")
+        assert results[0].content == "alpha and beta both"
+
+
+class TestGetBySource:
+    async def test_get_by_source_returns_matching(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("a", source_path="src/x.py", content_hash="ha"))
+        await backend.store(_entry("b", source_path="src/x.py", content_hash="hb"))
+        await backend.store(_entry("c", source_path="src/y.py", content_hash="hc"))
+        results = await backend.get_by_source("src/x.py")
+        assert len(results) == 2
+        assert {r.content for r in results} == {"a", "b"}
+
+    async def test_get_by_source_empty_when_none(self) -> None:
+        backend = SqliteVaultBackend()
+        assert await backend.get_by_source("src/missing.py") == []
+
+
+class TestDedupByContentHash:
+    async def test_distinct_hashes_both_exist(self) -> None:
+        backend = SqliteVaultBackend()
+        await backend.store(_entry("first", content_hash="h1"))
+        await backend.store(_entry("second", content_hash="h2"))
+        assert await backend.exists("h1") is True
+        assert await backend.exists("h2") is True
+
+    async def test_exists_drives_ingest_dedup(self) -> None:
+        """The ingest pattern: skip store when exists() reports the hash."""
+        backend = SqliteVaultBackend()
+        c_hash = compute_hash("dedup me")
+        entry = _entry("dedup me")
+        if not await backend.exists(c_hash):
+            await backend.store(entry)
+        # Second pass: hash now present, so ingest skips the store.
+        would_store_again = not await backend.exists(c_hash)
+        assert would_store_again is False
+        # Exactly one row landed despite two ingest passes.
+        assert len(await backend.query("dedup")) == 1
+
+
+class TestPersistenceAcrossReopen:
+    async def test_data_survives_reopening_same_file(self, tmp_path) -> None:
+        """Write through one connection, reopen the SAME path, read it back.
+
+        This is the property the in-memory backend cannot provide and the
+        reason this backend exists: durable storage on disk.
+        """
+        db_file = str(tmp_path / "vault.db")
+
+        writer = SqliteVaultBackend(db_path=db_file)
+        entry = _entry(
+            "persistent payload token",
+            source_path="src/persist.py",
+            content_hash="persist-hash",
+            tags=["keep"],
+            metadata={"durable": True},
+        )
+        await writer.store(entry)
+
+        # A fresh backend over the same file path must see the prior write.
+        reader = SqliteVaultBackend(db_path=db_file)
+        assert await reader.exists("persist-hash") is True
+        by_source = await reader.get_by_source("src/persist.py")
+        assert len(by_source) == 1
+        restored = by_source[0]
+        assert restored.entry_id == entry.entry_id
+        assert restored.content == "persistent payload token"
+        assert restored.tags == ["keep"]
+        assert restored.metadata == {"durable": True}
+
+        hits = await reader.query("persistent")
+        assert len(hits) == 1
+        assert hits[0].entry_id == entry.entry_id
+
+    async def test_reopen_does_not_duplicate_schema(self, tmp_path) -> None:
+        """Reopening repeatedly is idempotent; data accumulates correctly."""
+        db_file = str(tmp_path / "vault.db")
+        first = SqliteVaultBackend(db_path=db_file)
+        await first.store(_entry("one", content_hash="k1"))
+        second = SqliteVaultBackend(db_path=db_file)
+        await second.store(_entry("two", content_hash="k2"))
+        third = SqliteVaultBackend(db_path=db_file)
+        assert await third.exists("k1") is True
+        assert await third.exists("k2") is True
+
+
+class TestUpsertByEntryId:
+    async def test_restoring_same_entry_id_replaces_row(self, tmp_path) -> None:
+        backend = SqliteVaultBackend()
+        first = _entry("original", entry_id="fixed-id", content_hash="orig")
+        await backend.store(first)
+        second = _entry("updated", entry_id="fixed-id", content_hash="upd")
+        await backend.store(second)
+        # Same id => single row; latest content wins.
+        results = await backend.query("updated")
+        assert len(results) == 1
+        assert results[0].entry_id == "fixed-id"
+        assert await backend.query("original") == []
+
+
+class TestFactoryWiring:
+    def test_factory_returns_sqlite_backend(self) -> None:
+        backend = get_vault_backend(backend="sqlite", vault_path=":memory:")
+        assert isinstance(backend, SqliteVaultBackend)
+
+    def test_factory_memory_still_default(self) -> None:
+        backend = get_vault_backend()
+        assert not isinstance(backend, SqliteVaultBackend)
+
+    async def test_factory_sqlite_persists_to_path(self, tmp_path) -> None:
+        db_file = str(tmp_path / "factory.db")
+        writer = get_vault_backend(backend="sqlite", vault_path=db_file)
+        await writer.store(_entry("via factory", content_hash="fac"))
+        reader = get_vault_backend(backend="sqlite", vault_path=db_file)
+        assert await reader.exists("fac") is True

From 805d40792077f6dfd29477ec1d00b438529c516d Mon Sep 17 00:00:00 2001
From: Antawari <antawari@gmail.com>
Date: Mon, 29 Jun 2026 12:43:13 -0600
Subject: [PATCH 2/2] Use static SQL in the sqlite vault and declare its test
 for the budget
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the two dynamically-built statements (the upsert and the query
prefilter) with static literals: a fixed-column INSERT ... ON CONFLICT,
and a SELECT that reads the table (optionally narrowed by entry_type)
with the scoring done in Python as before. Every value is still a bound
parameter; this clears the shared gate's SQL-construction lint and is
simpler — the keyword ranking still mirrors the in-memory backend.

Declare the new test file in the file-budget ledger so it does not draw
against the frozen tests/unit package total (the established pattern for
new test coverage).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 file-budget.json                        |  3 +
 src/bonfire/knowledge/sqlite_backend.py | 84 ++++++++++++-------------
 2 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/file-budget.json b/file-budget.json
index 81836a07..e6803cc0 100644
--- a/file-budget.json
+++ b/file-budget.json
@@ -61,6 +61,9 @@
     },
     "tests/unit/test_session_lifecycle_cli.py": {
       "purpose": "new e2e coverage for the status/resume/handoff verbs"
+    },
+    "tests/unit/test_sqlite_vault.py": {
+      "purpose": "contract + persistence coverage for the sqlite vault backend"
     }
   },
   "packages": {
diff --git a/src/bonfire/knowledge/sqlite_backend.py b/src/bonfire/knowledge/sqlite_backend.py
index 3ddeca64..ac8f6026 100644
--- a/src/bonfire/knowledge/sqlite_backend.py
+++ b/src/bonfire/knowledge/sqlite_backend.py
@@ -13,9 +13,10 @@
 exactly what :class:`~bonfire.knowledge.memory.InMemoryVaultBackend` does: it
 splits the query into words and scores each entry by how many of those words
 appear as a case-insensitive substring of the entry's content -- no
-embeddings, no vectors. SQLite ``LIKE`` is used only as a parameterized
-prefilter to avoid scanning unmatched rows; the final scoring and ranking
-mirror the in-memory backend byte-for-byte.
+embeddings, no vectors. It reads the rows with a static ``SELECT`` (optionally
+narrowed by ``entry_type``) and does the scoring and ranking in Python, which
+mirrors the in-memory backend byte-for-byte. The SQL carries only bound
+parameters -- no value is ever formatted into a statement string.
 
 The async methods wrap synchronous ``sqlite3`` calls (the same pattern the
 in-memory backend uses) -- no ``aiosqlite`` or other added dependency.
@@ -29,18 +30,32 @@
 
 import json
 import sqlite3
-from typing import TYPE_CHECKING
 
 from bonfire.knowledge.hasher import content_hash as compute_hash
 from bonfire.protocols import VaultEntry
 
-if TYPE_CHECKING:
-    from collections.abc import Iterable
-
 # Forward-only schema version. Bump only alongside a migration step in
 # ``_ensure_schema``; never rewrite history.
 _SCHEMA_VERSION = 1
 
+# Static statements. Every value is bound (``?``); no identifier or value is
+# ever formatted into the SQL string. The INSERT column order matches
+# ``_to_row`` (``_TEXT_FIELDS`` then ``tags``, ``metadata``).
+_INSERT_SQL = (
+    "INSERT INTO vault_entries "
+    "(entry_id, content, entry_type, source_path, project_name, "
+    "scanned_at, git_hash, content_hash, tags, metadata) "
+    "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) "
+    "ON CONFLICT(entry_id) DO UPDATE SET "
+    "content=excluded.content, entry_type=excluded.entry_type, "
+    "source_path=excluded.source_path, project_name=excluded.project_name, "
+    "scanned_at=excluded.scanned_at, git_hash=excluded.git_hash, "
+    "content_hash=excluded.content_hash, tags=excluded.tags, "
+    "metadata=excluded.metadata"
+)
+_SELECT_ALL = "SELECT * FROM vault_entries"
+_SELECT_BY_TYPE = "SELECT * FROM vault_entries WHERE entry_type = ?"
+
 # Ordered VaultEntry fields stored as their own columns. The two structured
 # fields (``tags`` -> JSON array, ``metadata`` -> JSON object) are handled
 # separately when (de)serializing; everything else round-trips as TEXT.
@@ -143,17 +158,9 @@ async def store(self, entry: VaultEntry) -> str:
         """
         if not entry.content_hash:
             entry = entry.model_copy(update={"content_hash": compute_hash(entry.content)})
-        columns = (*_TEXT_FIELDS, "tags", "metadata")
-        placeholders = ", ".join("?" for _ in columns)
-        column_list = ", ".join(columns)
         # Upsert by primary key so re-storing the same entry_id replaces the
         # row rather than failing on the PK constraint.
-        updates = ", ".join(f"{col}=excluded.{col}" for col in columns if col != "entry_id")
-        self._conn.execute(
-            f"INSERT INTO vault_entries ({column_list}) VALUES ({placeholders}) "
-            f"ON CONFLICT(entry_id) DO UPDATE SET {updates}",
-            self._to_row(entry),
-        )
+        self._conn.execute(_INSERT_SQL, self._to_row(entry))
         self._conn.commit()
         return entry.entry_id
 
@@ -170,14 +177,15 @@ async def query(
         lowercased and split into words; each candidate entry scores one point
         per distinct query word found as a substring of its (lowercased)
         content; only positive-scoring entries are returned, highest score
-        first, capped at *limit*. ``LIKE`` is used purely as a parameterized
-        prefilter; no semantic/vector matching is involved.
+        first, capped at *limit*. The rows are read with a static ``SELECT``
+        (optionally narrowed by ``entry_type``); no semantic/vector matching is
+        involved.
         """
         query_words = query.lower().split()
         if not query_words:
             return []
 
-        rows = self._candidate_rows(query_words, entry_type)
+        rows = self._candidate_rows(entry_type)
         scored: list[tuple[VaultEntry, int]] = []
         for row in rows:
             lowered = row["content"].lower()
@@ -187,33 +195,19 @@ async def query(
         scored.sort(key=lambda pair: pair[1], reverse=True)
         return [entry for entry, _ in scored[:limit]]
 
-    def _candidate_rows(
-        self,
-        query_words: Iterable[str],
-        entry_type: str | None,
-    ) -> list[sqlite3.Row]:
-        """Fetch rows where content matches ANY query word (parameterized).
-
-        A row scores > 0 in :meth:`query` only if at least one (already
-        lowercased) query word is a substring of the entry's lowercased
-        content, so an OR of ``LIKE`` clauses against ``lower(content)`` is a
-        sound, loss-free prefilter -- it can only over-include. The
-        authoritative scoring in :meth:`query` re-checks every word in Python,
-        so the returned set and ranking match the in-memory backend exactly.
+    def _candidate_rows(self, entry_type: str | None) -> list[sqlite3.Row]:
+        """Read the rows to score, optionally narrowed by ``entry_type``.
+
+        The authoritative scoring in :meth:`query` re-checks every query word
+        in Python, exactly as the in-memory backend does, so reading the full
+        table (or the ``entry_type`` slice of it) yields the same result set
+        and ranking. Both statements are static literals carrying only a bound
+        parameter.
         """
-        params: list[object] = []
-        like_clauses: list[str] = []
-        for word in query_words:
-            like_clauses.append("lower(content) LIKE '%' || ? || '%'")
-            params.append(word)
-        where = f"({' OR '.join(like_clauses)})"
-        if entry_type is not None:
-            where += " AND entry_type = ?"
-            params.append(entry_type)
-        cursor = self._conn.execute(
-            f"SELECT * FROM vault_entries WHERE {where}",
-            tuple(params),
-        )
+        if entry_type is None:
+            cursor = self._conn.execute(_SELECT_ALL)
+        else:
+            cursor = self._conn.execute(_SELECT_BY_TYPE, (entry_type,))
         return cursor.fetchall()
 
     async def exists(self, content_hash: str) -> bool: