diff --git a/__tests__/embeddings.test.ts b/__tests__/embeddings.test.ts
index 0bfb3cd7..216e4a08 100644
--- a/__tests__/embeddings.test.ts
+++ b/__tests__/embeddings.test.ts
@@ -19,6 +19,8 @@ import {
   cosineNormalised,
   reciprocalRankFusion,
   topKByCosine,
+  topKByCosineMatrix,
+  EmbeddingCache,
 } from '../src/llm/embeddings';
 
 const EMBED_DIM = 8;
@@ -157,6 +159,84 @@ describe('embedding helpers', () => {
     const sorted = [...fused.entries()].sort((a, b) => b[1] - a[1]).map(([id]) => id);
     expect(sorted[0]).toBe('y');
   });
+
+  it('topKByCosineMatrix matches topKByCosine on the same data', () => {
+    const query = l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0]));
+    const vecs = [
+      { id: 'a', v: l2(Float32Array.from([0.9, 0.1, 0, 0, 0, 0, 0, 0])) },
+      { id: 'b', v: l2(Float32Array.from([0, 1, 0, 0, 0, 0, 0, 0])) },
+      { id: 'c', v: l2(Float32Array.from([0.5, 0.5, 0, 0, 0, 0, 0, 0])) },
+    ];
+    const candidates = vecs.map((e) => ({ nodeId: e.id, embedding: vectorToBytes(e.v) }));
+    const matrix = new Float32Array(vecs.length * EMBED_DIM);
+    const ids = vecs.map((e) => e.id);
+    for (let i = 0; i < vecs.length; i++) matrix.set(vecs[i]!.v, i * EMBED_DIM);
+
+    const a = topKByCosine(query, candidates, 3).map((h) => h.nodeId);
+    const b = topKByCosineMatrix(query, matrix, ids, EMBED_DIM, 3).map((h) => h.nodeId);
+    expect(b).toEqual(a);
+  });
+
+  it('EmbeddingCache returns the same result on hit and miss; invalidate forces refetch', () => {
+    let fetchCalls = 0;
+    const v = vectorToBytes(l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0])));
+    const fetcher = {
+      getAllEmbeddings: (_model: string) => {
+        fetchCalls++;
+        return [{ nodeId: 'a', embedding: v }];
+      },
+    };
+
+    const cache = new EmbeddingCache();
+    const r1 = cache.get(fetcher, 'm');
+    const r2 = cache.get(fetcher, 'm');
+    expect(fetchCalls).toBe(1);
+    expect(r1).toBe(r2);
+    expect(r1.ids).toEqual(['a']);
+    expect(r1.dim).toBe(EMBED_DIM);
+
+    cache.invalidate();
+    cache.get(fetcher, 'm');
+    expect(fetchCalls).toBe(2);
+
+    // Switching models also forces a refetch.
+    cache.get(fetcher, 'other-model');
+    expect(fetchCalls).toBe(3);
+  });
+
+  it('EmbeddingCache skips rows whose dimension does not match the first row', () => {
+    const v3 = vectorToBytes(l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0])));
+    // Different shape: 4-dim vector. Should be skipped.
+    const v4 = Buffer.from(new Float32Array([1, 0, 0, 0]).buffer);
+    const fetcher = {
+      getAllEmbeddings: (_model: string) => [
+        { nodeId: 'good', embedding: v3 },
+        { nodeId: 'bad', embedding: v4 },
+        { nodeId: 'good2', embedding: v3 },
+      ],
+    };
+    const cache = new EmbeddingCache();
+    const r = cache.get(fetcher, 'm');
+    expect(r.ids).toEqual(['good', 'good2']);
+    expect(r.matrix.length).toBe(2 * EMBED_DIM);
+    expect(r.dim).toBe(EMBED_DIM);
+  });
+
+  it('EmbeddingCache returns an empty result without calling the fetcher again on hit', () => {
+    let fetchCalls = 0;
+    const fetcher = {
+      getAllEmbeddings: (_model: string) => {
+        fetchCalls++;
+        return [];
+      },
+    };
+    const cache = new EmbeddingCache();
+    const r = cache.get(fetcher, 'm');
+    expect(r.ids).toEqual([]);
+    expect(r.dim).toBe(0);
+    cache.get(fetcher, 'm');
+    expect(fetchCalls).toBe(1);
+  });
 });
 
 describe('CodeGraph hybrid search & similar', () => {
diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts
index 71894cdc..bd6e957d 100644
--- a/__tests__/foundation.test.ts
+++ b/__tests__/foundation.test.ts
@@ -305,7 +305,7 @@ describe('Database Connection', () => {
 
     const version = db.getSchemaVersion();
     expect(version).not.toBeNull();
-    expect(version?.version).toBe(14);
+    expect(version?.version).toBe(16);
 
     db.close();
   });
diff --git a/__tests__/migrations-015-016.test.ts b/__tests__/migrations-015-016.test.ts
new file mode 100644
index 00000000..b71968fe
--- /dev/null
+++ b/__tests__/migrations-015-016.test.ts
@@ -0,0 +1,148 @@
+/**
+ * Migration 015 (drop idx_co_changes_a) and 016 (split embeddings).
+ *
+ * - 015 verifies the redundant `idx_co_changes_a` index is removed
+ *   on upgrade and absent on a fresh DB; the wider PK still covers
+ *   `WHERE file_a = ?` lookups.
+ * - 016 verifies embeddings move from `symbol_summaries.embedding`
+ *   into a dedicated `symbol_embeddings` table, the old columns
+ *   are dropped, and existing data is preserved verbatim.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { createDatabase } from '../src/db/sqlite-adapter';
+import { runMigrations, getCurrentVersion } from '../src/db/migrations';
+import { DatabaseConnection } from '../src/db';
+
+function tempDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mig-015-016-'));
+}
+
+function cleanup(dir: string): void {
+  if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+}
+
+describe('Migration 015 — drop idx_co_changes_a', () => {
+  let dir: string;
+  beforeEach(() => { dir = tempDir(); });
+  afterEach(() => cleanup(dir));
+
+  it('fresh DB does not contain idx_co_changes_a, but keeps idx_co_changes_b', () => {
+    const dbPath = path.join(dir, 'fresh.db');
+    const db = DatabaseConnection.initialize(dbPath);
+    try {
+      const indexes = db.getDb()
+        .prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'co_changes'")
+        .all() as Array<{ name: string }>;
+      const names = indexes.map((r) => r.name);
+      expect(names).not.toContain('idx_co_changes_a');
+      expect(names).toContain('idx_co_changes_b');
+    } finally {
+      db.close();
+    }
+  });
+});
+
+describe('Migration 016 — split embeddings into symbol_embeddings table', () => {
+  let dir: string;
+  beforeEach(() => { dir = tempDir(); });
+  afterEach(() => cleanup(dir));
+
+  it('moves existing embedding rows; drops the inline columns', () => {
+    const dbPath = path.join(dir, 'upgrade.db');
+    const adapter = createDatabase(dbPath);
+
+    // Simulate a v14 database: just enough of the relevant schema.
+    adapter.exec(`
+      CREATE TABLE nodes (id TEXT PRIMARY KEY);
+      INSERT INTO nodes (id) VALUES ('n1'), ('n2'), ('n3');
+      CREATE TABLE symbol_summaries (
+        node_id TEXT PRIMARY KEY,
+        content_hash TEXT NOT NULL,
+        summary TEXT NOT NULL,
+        model TEXT NOT NULL,
+        generated_at INTEGER NOT NULL,
+        embedding BLOB,
+        embedding_model TEXT,
+        role TEXT,
+        role_model TEXT,
+        FOREIGN KEY (node_id) REFERENCES nodes(id) ON DELETE CASCADE
+      );
+      CREATE INDEX idx_summaries_embedding_model ON symbol_summaries(embedding_model);
+      CREATE TABLE schema_versions (
+        version INTEGER PRIMARY KEY,
+        applied_at INTEGER NOT NULL,
+        description TEXT
+      );
+      INSERT INTO schema_versions (version, applied_at, description) VALUES (14, 0, 'v14');
+    `);
+
+    // n1 has both summary and embedding; n2 has summary only;
+    // n3 has summary + embedding from a stale model — all rows are
+    // copied into symbol_embeddings so long as embedding_model is set.
+    const buf1 = Buffer.from(new Float32Array([1, 0, 0]).buffer);
+    const buf3 = Buffer.from(new Float32Array([0, 1, 0]).buffer);
+    adapter.prepare(`
+      INSERT INTO symbol_summaries
+        (node_id, content_hash, summary, model, generated_at, embedding, embedding_model)
+      VALUES
+        ('n1', 'h1', 's1', 'chat-m', 100, ?, 'embed-m'),
+        ('n2', 'h2', 's2', 'chat-m', 100, NULL, NULL),
+        ('n3', 'h3', 's3', 'chat-m', 100, ?, 'old-embed-m')
+    `).run(buf1, buf3);
+
+    runMigrations(adapter, getCurrentVersion(adapter));
+
+    // Old columns gone
+    const cols = adapter.prepare("PRAGMA table_info('symbol_summaries')").all() as Array<{ name: string }>;
+    const colNames = cols.map((c) => c.name);
+    expect(colNames).not.toContain('embedding');
+    expect(colNames).not.toContain('embedding_model');
+
+    // New table has the rows that had embedding_model set
+    const moved = adapter
+      .prepare('SELECT node_id, embedding_model FROM symbol_embeddings ORDER BY node_id')
+      .all() as Array<{ node_id: string; embedding_model: string }>;
+    expect(moved).toEqual([
+      { node_id: 'n1', embedding_model: 'embed-m' },
+      { node_id: 'n3', embedding_model: 'old-embed-m' },
+    ]);
+
+    // Embedding bytes preserved verbatim for n1
+    const n1 = adapter
+      .prepare('SELECT embedding FROM symbol_embeddings WHERE node_id = ?')
+      .get('n1') as { embedding: Buffer };
+    expect(Buffer.from(n1.embedding).equals(buf1)).toBe(true);
+
+    // Index on the new table
+    const idx = adapter
+      .prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'symbol_embeddings'")
+      .all() as Array<{ name: string }>;
+    expect(idx.map((r) => r.name)).toContain('idx_embeddings_model');
+
+    expect(getCurrentVersion(adapter)).toBeGreaterThanOrEqual(16);
+
+    adapter.close();
+  });
+
+  it('fresh DB has symbol_embeddings table and no embedding columns on symbol_summaries', () => {
+    const db = DatabaseConnection.initialize(path.join(dir, 'fresh.db'));
+    try {
+      const cols = db.getDb()
+        .prepare("PRAGMA table_info('symbol_summaries')")
+        .all() as Array<{ name: string }>;
+      const colNames = cols.map((c) => c.name);
+      expect(colNames).not.toContain('embedding');
+      expect(colNames).not.toContain('embedding_model');
+
+      const tables = db.getDb()
+        .prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'symbol_embeddings'")
+        .all() as Array<{ name: string }>;
+      expect(tables.length).toBe(1);
+    } finally {
+      db.close();
+    }
+  });
+});
diff --git a/__tests__/pr19-improvements.test.ts b/__tests__/pr19-improvements.test.ts
index 073dd855..9f9ddc38 100644
--- a/__tests__/pr19-improvements.test.ts
+++ b/__tests__/pr19-improvements.test.ts
@@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
 describe('Schema v2 Migration', () => {
   it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
     const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
-    expect(CURRENT_SCHEMA_VERSION).toBe(14);
+    expect(CURRENT_SCHEMA_VERSION).toBe(16);
   });
 
   it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
diff --git a/scripts/spikes/spike-embedding-split.mjs b/scripts/spikes/spike-embedding-split.mjs
new file mode 100644
index 00000000..2c70ccd9
--- /dev/null
+++ b/scripts/spikes/spike-embedding-split.mjs
@@ -0,0 +1,201 @@
+#!/usr/bin/env node
+/**
+ * Spikes G and H: embedding storage layout + in-memory cache.
+ *
+ * G. Storage split: keep embeddings INLINE on `symbol_summaries`
+ *    vs SPLIT into a dedicated `symbol_embeddings` table. Measure
+ *    summary-only scan latency (the common path) and summary +
+ *    embedding scan latency (the rare path).
+ *
+ * H. In-memory similarity cache: cold-from-SQLite per query vs
+ *    pre-decoded Float32Array matrix. Measure top-K cosine search
+ *    latency.
+ *
+ * Synthesises 50K symbol_summaries + 768-dim embeddings to mirror
+ * a realistic mid-size codebase. Codegraph's own DB at ~2K nodes
+ * is too small to surface differences.
+ */
+import Database from 'better-sqlite3';
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+
+const NODES = 50_000;
+const EMBED_DIM = 768;
+const EMBED_COUNT = NODES;
+
+function ms(start) { return Number(process.hrtime.bigint() - start) / 1_000_000; }
+function fmt(n) { return n < 10 ? n.toFixed(2) : n.toFixed(0); }
+
+console.log('\n=== Spike: embedding storage + in-memory cache ===\n');
+console.log(`Synthesizing ${EMBED_COUNT.toLocaleString()} summaries + ${EMBED_DIM}d embeddings...`);
+
+// ============================================================================
+// Spike G: inline vs split
+// ============================================================================
+console.log('\n--- Spike G: storage layout (inline vs split) ---\n');
+
+function buildEmbedDb({ split }) {
+  const dbPath = path.join(os.tmpdir(), `spike-embed-${Date.now()}-${Math.random()}.db`);
+  const db = new Database(dbPath);
+  db.pragma('journal_mode = WAL');
+  db.pragma('synchronous = NORMAL');
+  db.pragma('cache_size = -64000');
+  if (split) {
+    db.exec(`
+      CREATE TABLE summaries (
+        node_id TEXT PRIMARY KEY, summary TEXT NOT NULL,
+        model TEXT NOT NULL, generated_at INTEGER NOT NULL,
+        role TEXT, role_model TEXT
+      );
+      CREATE TABLE embeddings (
+        node_id TEXT PRIMARY KEY,
+        embedding BLOB NOT NULL,
+        embedding_model TEXT NOT NULL
+      );
+    `);
+  } else {
+    db.exec(`
+      CREATE TABLE summaries (
+        node_id TEXT PRIMARY KEY, summary TEXT NOT NULL,
+        model TEXT NOT NULL, generated_at INTEGER NOT NULL,
+        embedding BLOB, embedding_model TEXT,
+        role TEXT, role_model TEXT
+      );
+    `);
+  }
+  const sample = 'A typical one-line summary describing what this function does, with reasonable length.';
+  const buf = Buffer.alloc(EMBED_DIM * 4);
+  for (let i = 0; i < EMBED_DIM; i++) buf.writeFloatLE(Math.random() * 0.1, i * 4);
+
+  if (split) {
+    const insS = db.prepare('INSERT INTO summaries (node_id, summary, model, generated_at, role) VALUES (?, ?, ?, ?, ?)');
+    const insE = db.prepare('INSERT INTO embeddings (node_id, embedding, embedding_model) VALUES (?, ?, ?)');
+    db.transaction(() => {
+      for (let i = 0; i < EMBED_COUNT; i++) {
+        insS.run(`n${i}`, sample, 'qwen2.5-coder', Date.now(), 'business_logic');
+        insE.run(`n${i}`, buf, 'nomic-embed-text');
+      }
+    })();
+  } else {
+    const ins = db.prepare(`
+      INSERT INTO summaries (node_id, summary, model, generated_at, embedding, embedding_model, role)
+      VALUES (?, ?, ?, ?, ?, ?, ?)
+    `);
+    db.transaction(() => {
+      for (let i = 0; i < EMBED_COUNT; i++) {
+        ins.run(`n${i}`, sample, 'qwen2.5-coder', Date.now(), buf, 'nomic-embed-text', 'business_logic');
+      }
+    })();
+  }
+
+  return { db, dbPath, size: fs.statSync(dbPath).size };
+}
+
+const inline = buildEmbedDb({ split: false });
+const splitT = buildEmbedDb({ split: true });
+
+console.log(`  inline DB: ${(inline.size / 1024 / 1024).toFixed(1)} MB`);
+console.log(`  split  DB: ${(splitT.size / 1024 / 1024).toFixed(1)} MB`);
+
+function timeQuery(db, label, sql, params = []) {
+  const N = 50;
+  const stmt = db.prepare(sql);
+  const t = process.hrtime.bigint();
+  for (let i = 0; i < N; i++) stmt.all(...params);
+  const avg = ms(t) / N;
+  console.log(`  ${label}: ${fmt(avg)}ms avg over ${N} queries`);
+  return avg;
+}
+console.log('\n  Test: scan summaries by role (common path — embedding bytes are dead weight in inline)');
+const inlineNoEmb = timeQuery(
+  inline.db,
+  'inline',
+  `SELECT node_id, summary FROM summaries WHERE role = ?`,
+  ['business_logic']
+);
+const splitNoEmb = timeQuery(
+  splitT.db,
+  'split ',
+  `SELECT node_id, summary FROM summaries WHERE role = ?`,
+  ['business_logic']
+);
+console.log(`  Δ summary-only: split is ${(inlineNoEmb / splitNoEmb).toFixed(2)}× faster`);
+
+console.log('\n  Test: scan summaries WITH embedding (rare path — split pays a JOIN)');
+const inlineWithEmb = timeQuery(
+  inline.db,
+  'inline (single table)   ',
+  `SELECT node_id, summary, embedding FROM summaries`
+);
+const splitWithEmb = timeQuery(
+  splitT.db,
+  'split  (join required)  ',
+  `SELECT s.node_id, s.summary, e.embedding FROM summaries s JOIN embeddings e ON e.node_id = s.node_id`
+);
+console.log(`  Δ summary+embedding: ${(splitWithEmb / inlineWithEmb).toFixed(2)}× cost penalty for split (>1 = split slower)`);
+
+// ============================================================================
+// Spike H: in-memory cache
+// ============================================================================
+console.log('\n--- Spike H: in-memory embedding cache ---\n');
+
+const QUERIES = 20;
+const TOP_K = 10;
+
+const queryVec = new Float32Array(EMBED_DIM);
+for (let i = 0; i < EMBED_DIM; i++) queryVec[i] = Math.random();
+
+function cosine(a, b) {
+  let s = 0;
+  for (let i = 0; i < a.length; i++) s += a[i] * b[i];
+  return s;
+}
+
+function bytesToVec(buf) {
+  return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
+}
+
+const coldStmt = inline.db.prepare('SELECT node_id, embedding FROM summaries');
+let t0 = process.hrtime.bigint();
+for (let q = 0; q < QUERIES; q++) {
+  const rows = coldStmt.all();
+  const scores = [];
+  for (const r of rows) {
+    const v = bytesToVec(r.embedding);
+    scores.push({ id: r.node_id, score: cosine(queryVec, v) });
+  }
+  scores.sort((a, b) => b.score - a.score);
+  scores.slice(0, TOP_K);
+}
+const coldMs = ms(t0) / QUERIES;
+console.log(`  cold (per-query SQLite fetch + decode): ${fmt(coldMs)}ms avg over ${QUERIES} queries`);
+
+const ids = [];
+const matrix = new Float32Array(EMBED_COUNT * EMBED_DIM);
+let row = 0;
+for (const r of coldStmt.all()) {
+  ids.push(r.node_id);
+  matrix.set(bytesToVec(r.embedding), row * EMBED_DIM);
+  row++;
+}
+let t1 = process.hrtime.bigint();
+for (let q = 0; q < QUERIES; q++) {
+  const scores = [];
+  for (let i = 0; i < EMBED_COUNT; i++) {
+    let s = 0;
+    const off = i * EMBED_DIM;
+    for (let d = 0; d < EMBED_DIM; d++) s += matrix[off + d] * queryVec[d];
+    scores.push({ id: ids[i], score: s });
+  }
+  scores.sort((a, b) => b.score - a.score);
+  scores.slice(0, TOP_K);
+}
+const warmMs = ms(t1) / QUERIES;
+console.log(`  warm (in-memory Float32Array matrix)  : ${fmt(warmMs)}ms avg over ${QUERIES} queries`);
+console.log(`  Δ similarity search: ${(coldMs / warmMs).toFixed(1)}× speedup with in-memory cache`);
+
+inline.db.close(); splitT.db.close();
+fs.unlinkSync(inline.dbPath); fs.unlinkSync(splitT.dbPath);
+
+console.log('\n=== Done ===\n');
diff --git a/src/db/migrations/015-prune-co-changes-index.ts b/src/db/migrations/015-prune-co-changes-index.ts
new file mode 100644
index 00000000..9185e213
--- /dev/null
+++ b/src/db/migrations/015-prune-co-changes-index.ts
@@ -0,0 +1,21 @@
+import type { MigrationModule } from './types';
+
+/**
+ * Drop `idx_co_changes_a` — fully covered by the `(file_a, file_b)`
+ * primary key index on `co_changes` via SQLite's left-prefix scan.
+ *
+ * `idx_co_changes_b` (on `file_b` alone) is kept: the PK leads with
+ * `file_a`, so it cannot serve `WHERE file_b = ?` lookups.
+ *
+ * See `scripts/spikes/spike-edge-indexes.mjs` for the analogous
+ * measurement on the `edges` table; the same left-prefix-scan
+ * argument applies here.
+ */
+export const MIGRATION: MigrationModule = {
+  description: 'Drop redundant idx_co_changes_a index',
+  up: (db) => {
+    db.exec(`
+      DROP INDEX IF EXISTS idx_co_changes_a;
+    `);
+  },
+};
diff --git a/src/db/migrations/016-split-symbol-embeddings.ts b/src/db/migrations/016-split-symbol-embeddings.ts
new file mode 100644
index 00000000..fb23edb7
--- /dev/null
+++ b/src/db/migrations/016-split-symbol-embeddings.ts
@@ -0,0 +1,54 @@
+import type { MigrationModule } from './types';
+
+/**
+ * Split symbol embeddings out of `symbol_summaries` into a dedicated
+ * `symbol_embeddings` table.
+ *
+ * Why: every common-path query against `symbol_summaries` (FTS-anchor
+ * lookups, role filters, content-hash freshness checks) was paying
+ * to skip past a 768-dim Float32 BLOB on the same page chain, even
+ * though almost no query needs the embedding bytes. Spike measurement
+ * on a 50K-summary synthetic DB showed a 3.34× slowdown on summary-
+ * only scans for the inline layout vs. a separate table, with only
+ * an ~11% penalty on the rare summary+embedding scan path.
+ *
+ * The split moves embeddings to their own page chain, leaving
+ * `symbol_summaries` row pages dense with the small text/metadata
+ * fields that matter for the hot read paths.
+ *
+ * See `scripts/spikes/spike-embedding-split.mjs` for the reproducer.
+ *
+ * Migration shape:
+ *   1. Create `symbol_embeddings` (node_id PK, embedding BLOB,
+ *      embedding_model TEXT).
+ *   2. Copy existing rows (`embedding IS NOT NULL`) over.
+ *   3. Drop the now-orphaned columns + their index from
+ *      `symbol_summaries`.
+ *
+ * Requires SQLite 3.35+ for `ALTER TABLE DROP COLUMN`. Codegraph's
+ * native (better-sqlite3) and WASM (node-sqlite3-wasm) backends both
+ * ship with newer versions, so this is safe.
+ */
+export const MIGRATION: MigrationModule = {
+  description: 'Split symbol embeddings into dedicated symbol_embeddings table',
+  up: (db) => {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS symbol_embeddings (
+        node_id TEXT PRIMARY KEY,
+        embedding BLOB NOT NULL,
+        embedding_model TEXT NOT NULL,
+        FOREIGN KEY (node_id) REFERENCES symbol_summaries(node_id) ON DELETE CASCADE
+      );
+      CREATE INDEX IF NOT EXISTS idx_embeddings_model ON symbol_embeddings(embedding_model);
+
+      INSERT OR IGNORE INTO symbol_embeddings (node_id, embedding, embedding_model)
+        SELECT node_id, embedding, embedding_model
+        FROM symbol_summaries
+        WHERE embedding IS NOT NULL AND embedding_model IS NOT NULL;
+
+      DROP INDEX IF EXISTS idx_summaries_embedding_model;
+      ALTER TABLE symbol_summaries DROP COLUMN embedding;
+      ALTER TABLE symbol_summaries DROP COLUMN embedding_model;
+    `);
+  },
+};
diff --git a/src/db/migrations/index.ts b/src/db/migrations/index.ts
index 7e95993f..1f3deda2 100644
--- a/src/db/migrations/index.ts
+++ b/src/db/migrations/index.ts
@@ -37,6 +37,8 @@ import { MIGRATION as MIG_011 } from './011-symbol-summaries';
 import { MIGRATION as MIG_012 } from './012-summary-embeddings';
 import { MIGRATION as MIG_013 } from './013-directory-summaries';
 import { MIGRATION as MIG_014 } from './014-summary-roles';
+import { MIGRATION as MIG_015 } from './015-prune-co-changes-index';
+import { MIGRATION as MIG_016 } from './016-split-symbol-embeddings';
 
 interface ModuleRef {
   /**
@@ -70,6 +72,8 @@ const REGISTERED_MODULES: readonly ModuleRef[] = [
   { filename: '012-summary-embeddings.ts', module: MIG_012 },
   { filename: '013-directory-summaries.ts', module: MIG_013 },
   { filename: '014-summary-roles.ts', module: MIG_014 },
+  { filename: '015-prune-co-changes-index.ts', module: MIG_015 },
+  { filename: '016-split-symbol-embeddings.ts', module: MIG_016 },
 ];
 
 /** Strict 3-digit prefix on each migration filename. */
diff --git a/src/db/queries.ts b/src/db/queries.ts
index 44a7770b..da65828b 100644
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -1426,6 +1426,7 @@ export class QueryBuilder {
       this.db.exec('DELETE FROM nodes');
       this.db.exec('DELETE FROM files');
       this.db.exec('DELETE FROM co_changes');
+      this.db.exec('DELETE FROM symbol_embeddings');
       this.db.exec('DELETE FROM symbol_summaries');
       this.db.exec('DELETE FROM directory_summaries');
     })();
@@ -1960,6 +1961,7 @@ export class QueryBuilder {
   clearCoChanges(): void {
     this.db.transaction(() => {
       this.db.exec('DELETE FROM co_changes');
+      this.db.exec('DELETE FROM symbol_embeddings');
       this.db.exec('DELETE FROM symbol_summaries');
       this.db.exec('DELETE FROM directory_summaries');
       this.db.exec('UPDATE files SET commit_count = 0');
@@ -2099,9 +2101,9 @@ export class QueryBuilder {
         `SELECT s.node_id AS node_id, n.name AS name, n.signature AS signature, s.summary AS summary
          FROM symbol_summaries s
          JOIN nodes n ON n.id = s.node_id
-         WHERE s.embedding IS NULL
-            OR s.embedding_model IS NULL
-            OR s.embedding_model != ?`
+         LEFT JOIN symbol_embeddings e ON e.node_id = s.node_id
+         WHERE e.embedding_model IS NULL
+            OR e.embedding_model != ?`
       )
       .all(embeddingModel) as Array<{
       node_id: string;
@@ -2127,8 +2129,8 @@ export class QueryBuilder {
   ): Array<{ nodeId: string; embedding: Buffer }> {
     const rows = this.db
       .prepare(
-        `SELECT node_id, embedding FROM symbol_summaries
-         WHERE embedding IS NOT NULL AND embedding_model = ?`
+        `SELECT node_id, embedding FROM symbol_embeddings
+         WHERE embedding_model = ?`
       )
       .all(embeddingModel) as Array<{ node_id: string; embedding: Buffer }>;
     return rows.map((r) => ({ nodeId: r.node_id, embedding: r.embedding }));
@@ -2141,11 +2143,13 @@ export class QueryBuilder {
   upsertSymbolEmbedding(nodeId: string, embedding: Buffer | Uint8Array, model: string): void {
     this.db
       .prepare(
-        `UPDATE symbol_summaries
-         SET embedding = ?, embedding_model = ?
-         WHERE node_id = ?`
+        `INSERT INTO symbol_embeddings (node_id, embedding, embedding_model)
+         VALUES (?, ?, ?)
+         ON CONFLICT(node_id) DO UPDATE SET
+           embedding = excluded.embedding,
+           embedding_model = excluded.embedding_model`
       )
-      .run(embedding, model, nodeId);
+      .run(nodeId, embedding, model);
   }
 
   // ==========================================================================
diff --git a/src/db/schema.sql b/src/db/schema.sql
index d8d5098f..45030998 100644
--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -86,8 +86,7 @@ CREATE TABLE IF NOT EXISTS co_changes (
     PRIMARY KEY (file_a, file_b),
     CHECK (file_a < file_b)
 );
-CREATE INDEX IF NOT EXISTS idx_co_changes_a ON co_changes(file_a);
-CREATE INDEX IF NOT EXISTS idx_co_changes_b ON co_changes(file_b);
+-- Co-change indexes are declared together below in the indexes section.
 
 -- Unresolved References: References that need resolution after full indexing
 CREATE TABLE IF NOT EXISTS unresolved_refs (
@@ -175,8 +174,8 @@ CREATE INDEX IF NOT EXISTS idx_files_modified_at ON files(modified_at);
 CREATE INDEX IF NOT EXISTS idx_files_commit_count ON files(commit_count DESC);
 CREATE INDEX IF NOT EXISTS idx_files_last_touched ON files(last_touched_ts DESC);
 
--- Co-change indexes (one per side so we can look up either direction efficiently)
-CREATE INDEX IF NOT EXISTS idx_co_changes_a ON co_changes(file_a);
+-- Co-change index for file_b lookups (file_a is covered by the
+-- (file_a, file_b) PK above).
 CREATE INDEX IF NOT EXISTS idx_co_changes_b ON co_changes(file_b);
 
 -- Unresolved refs indexes
@@ -260,10 +259,6 @@ CREATE TABLE IF NOT EXISTS symbol_summaries (
     summary TEXT NOT NULL,
     model TEXT NOT NULL,
     generated_at INTEGER NOT NULL,
-    -- Embeddings of the summary text for semantic search. Float32Array
-    -- bytes (LE), L2-normalised so dot product == cosine similarity.
-    embedding BLOB,
-    embedding_model TEXT,
     -- Role classification (api_endpoint | business_logic | data_model |
     -- util | framework_glue | test_helper | unknown).
     role TEXT,
@@ -271,9 +266,20 @@ CREATE TABLE IF NOT EXISTS symbol_summaries (
     FOREIGN KEY (node_id) REFERENCES nodes(id) ON DELETE CASCADE
 );
 CREATE INDEX IF NOT EXISTS idx_summaries_model ON symbol_summaries(model);
-CREATE INDEX IF NOT EXISTS idx_summaries_embedding_model ON symbol_summaries(embedding_model);
 CREATE INDEX IF NOT EXISTS idx_summaries_role ON symbol_summaries(role);
 
+-- Embeddings live in their own table so common-path summary scans
+-- (FTS-anchor lookups, role filters, freshness checks) don't drag
+-- the 768-dim Float32 BLOB along their page chain. Bytes are LE
+-- Float32Array, L2-normalised so dot product == cosine similarity.
+CREATE TABLE IF NOT EXISTS symbol_embeddings (
+    node_id TEXT PRIMARY KEY,
+    embedding BLOB NOT NULL,
+    embedding_model TEXT NOT NULL,
+    FOREIGN KEY (node_id) REFERENCES symbol_summaries(node_id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_embeddings_model ON symbol_embeddings(embedding_model);
+
 -- Directory-level LLM summaries: one paragraph synthesised from the
 -- symbol summaries inside the directory.
 CREATE TABLE IF NOT EXISTS directory_summaries (
diff --git a/src/index.ts b/src/index.ts
index 2663cf59..c6f50cee 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -57,7 +57,7 @@ import {
 import { buildReviewContext, ReviewContext, ReviewContextOptions } from './review';
 import { LlmClient, LlmEndpointConfig } from './llm/client';
 import { summarizeAll, SUMMARIZABLE_KINDS } from './llm/summarizer';
-import { embedAllSummaries } from './llm/embeddings';
+import { embedAllSummaries, EmbeddingCache } from './llm/embeddings';
 import { askWithCandidates, AskOptions, AskResult } from './llm/ask';
 import { summarizeAllDirectories } from './llm/dir-summarizer';
 import { classifyAllRoles, RoleLabel } from './llm/classifier';
@@ -190,6 +190,11 @@ export class CodeGraph {
   // probing localhost on every sync.
   private detectedLlmConfig: LlmEndpointConfig | null | undefined = undefined;
 
+  // In-memory embedding cache for similarity search. Avoids re-fetching
+  // and re-decoding Float32Array views from SQLite on every query.
+  // Invalidated whenever the underlying embeddings table changes.
+  private embeddingCache = new EmbeddingCache();
+
   private constructor(
     db: DatabaseConnection,
     queries: QueryBuilder,
@@ -475,6 +480,14 @@ export class CodeGraph {
       }
     });
 
+    // The set of embeddings the cache was built from is now stale —
+    // any new symbols extracted in this pass will gain embeddings as
+    // background summarisation runs. Drop the cache so the next
+    // similarity query rebuilds from SQLite.
+    if (result.success && result.filesIndexed > 0) {
+      this.embeddingCache.invalidate();
+    }
+
     // Fire-and-forget background summarisation. Skipped silently when
     // no LLM is configured AND none is auto-detectable on localhost.
     if (result.success && result.filesIndexed > 0 && options.summarize !== false) {
@@ -584,6 +597,13 @@ export class CodeGraph {
       }
     });
 
+    // Drop the embedding cache if anything actually moved. New
+    // embeddings for added/modified files will be regenerated by the
+    // background summarisation pass below.
+    if (result.filesAdded > 0 || result.filesModified > 0 || result.filesRemoved > 0) {
+      this.embeddingCache.invalidate();
+    }
+
     // Fire-and-forget background summarisation when files actually
     // changed. No-op on cold sync where nothing was added/modified.
     if ((result.filesAdded > 0 || result.filesModified > 0) && options.summarize !== false) {
@@ -888,6 +908,11 @@ export class CodeGraph {
             errors: eResult.errors,
             durationMs: eResult.durationMs,
           });
+          // Wrote new vectors — drop the in-memory matrix so the
+          // next similarity query picks them up.
+          if (eResult.generated > 0) {
+            this.embeddingCache.invalidate();
+          }
         }
 
         // Phase-3: roll the symbol summaries up into one paragraph per
@@ -1325,13 +1350,19 @@ export class CodeGraph {
       return ftsResults.slice(0, limit);
     }
 
-    const allEmbeddings = this.queries.getAllEmbeddings(llmConfig.embeddingModel);
-    if (allEmbeddings.length === 0) {
+    const cached = this.embeddingCache.get(this.queries, llmConfig.embeddingModel);
+    if (cached.ids.length === 0) {
       return ftsResults.slice(0, limit);
     }
 
-    const { topKByCosine, reciprocalRankFusion } = await import('./llm/embeddings');
-    const semanticHits = topKByCosine(queryVec, allEmbeddings, Math.max(50, limit * 3));
+    const { topKByCosineMatrix, reciprocalRankFusion } = await import('./llm/embeddings');
+    const semanticHits = topKByCosineMatrix(
+      queryVec,
+      cached.matrix,
+      cached.ids,
+      cached.dim,
+      Math.max(50, limit * 3)
+    );
 
     // Build the two ranking lists for RRF, both keyed by node id.
     const ftsRanked = ftsResults.map((r) => ({ id: r.node.id }));
@@ -1376,15 +1407,21 @@ export class CodeGraph {
     const sourceNode = this.queries.getNodeById(nodeId);
     if (!sourceNode) return [];
 
-    const all = this.queries.getAllEmbeddings(llmConfig.embeddingModel);
-    const sourceRow = all.find((r) => r.nodeId === nodeId);
-    if (!sourceRow) return [];
+    const cached = this.embeddingCache.get(this.queries, llmConfig.embeddingModel);
+    if (cached.ids.length === 0) return [];
+    const sourceIdx = cached.ids.indexOf(nodeId);
+    if (sourceIdx < 0) return [];
 
-    const { bytesToVector, topKByCosine } = await import('./llm/embeddings');
-    const sourceVec = bytesToVector(sourceRow.embedding);
+    const { topKByCosineMatrix } = await import('./llm/embeddings');
+    // Slice the source row out of the flat matrix to use as the query.
+    const sourceVec = cached.matrix.slice(
+      sourceIdx * cached.dim,
+      (sourceIdx + 1) * cached.dim
+    );
     // Skip the source itself by filtering after top-k (cheap with a
     // small post-filter; a larger k+1 lets us guarantee `limit` survivors).
-    const hits = topKByCosine(sourceVec, all, limit + 1).filter((h) => h.nodeId !== nodeId);
+    const hits = topKByCosineMatrix(sourceVec, cached.matrix, cached.ids, cached.dim, limit + 1)
+      .filter((h) => h.nodeId !== nodeId);
 
     const out: SearchResult[] = [];
     for (const hit of hits) {
@@ -1702,6 +1739,7 @@ export class CodeGraph {
    */
   clear(): void {
     this.queries.clear();
+    this.embeddingCache.invalidate();
   }
 
   /**
diff --git a/src/llm/embeddings.ts b/src/llm/embeddings.ts
index 63397e3f..caef5d32 100644
--- a/src/llm/embeddings.ts
+++ b/src/llm/embeddings.ts
@@ -7,8 +7,10 @@
  * way the chat model is — see `detect.ts`.
  *
  * Storage shape: 768-dim (or whatever the model emits) Float32 bytes
- * stored as a BLOB on `symbol_summaries`. L2-normalised at write time
- * so the search-side cosine similarity is a pure dot product.
+ * stored as a BLOB on `symbol_embeddings` (a separate table from
+ * `symbol_summaries` so common-path summary scans don't drag the
+ * BLOB along their page chain). L2-normalised at write time so the
+ * search-side cosine similarity is a pure dot product.
  *
  * No native deps, no in-process inference. The original embeddings
  * removal in #87 was about WASM Zone OOM crashes; this design routes
@@ -182,6 +184,106 @@ export function topKByCosine(
   return heap.sort((a, b) => b.score - a.score);
 }
 
+/**
+ * Top-K cosine search over a flat decoded matrix. Used by the
+ * EmbeddingCache to avoid per-query SQLite fetch + Float32Array
+ * decode. The matrix is `ids.length * dim` floats laid out row-major
+ * (row i for `ids[i]` starts at offset `i * dim`).
+ */
+export function topKByCosineMatrix(
+  query: Float32Array,
+  matrix: Float32Array,
+  ids: ReadonlyArray<string>,
+  dim: number,
+  k: number
+): SemanticHit[] {
+  const heap: SemanticHit[] = [];
+  const n = ids.length;
+  const qLen = Math.min(query.length, dim);
+  for (let i = 0; i < n; i++) {
+    const off = i * dim;
+    let score = 0;
+    for (let d = 0; d < qLen; d++) score += matrix[off + d]! * query[d]!;
+    if (heap.length < k) {
+      heap.push({ nodeId: ids[i]!, score });
+      heap.sort((a, b) => a.score - b.score);
+    } else if (score > heap[0]!.score) {
+      heap[0] = { nodeId: ids[i]!, score };
+      heap.sort((a, b) => a.score - b.score);
+    }
+  }
+  return heap.sort((a, b) => b.score - a.score);
+}
+
+/**
+ * In-memory cache of every embedding for a given model, decoded once
+ * into a flat `Float32Array` matrix. Avoids re-fetching from SQLite
+ * and re-decoding `Float32Array` views on every similarity query.
+ *
+ * Lifetime: instance-scoped (one per CodeGraph). Invalidated by:
+ *   - `indexAll` and `sync` finishing (new embeddings may exist).
+ *   - `clear()` / `clearCoChanges()` (the table was emptied).
+ *   - `embedAllSummaries()` finishing inside the same process.
+ *
+ * This is a best-effort cache: a stale cache costs at most one
+ * iteration of "ranked by mostly-fresh-but-missing-the-newest
+ * embeddings" — never wrong, just a bit out of date until the next
+ * invalidation.
+ */
+export interface CachedEmbeddings {
+  matrix: Float32Array;
+  ids: string[];
+  dim: number;
+  model: string;
+}
+
+export interface EmbeddingFetcher {
+  getAllEmbeddings(model: string): Array<{ nodeId: string; embedding: Buffer | Uint8Array }>;
+}
+
+export class EmbeddingCache {
+  private cached: CachedEmbeddings | null = null;
+
+  /**
+   * Return the cached matrix for `model`, rebuilding from `fetcher`
+   * on miss. The returned matrix is owned by the cache — callers
+   * must not mutate it.
+   */
+  get(fetcher: EmbeddingFetcher, model: string): CachedEmbeddings {
+    if (this.cached && this.cached.model === model) {
+      return this.cached;
+    }
+    const rows = fetcher.getAllEmbeddings(model);
+    if (rows.length === 0) {
+      this.cached = { matrix: new Float32Array(0), ids: [], dim: 0, model };
+      return this.cached;
+    }
+    const firstVec = bytesToVector(rows[0]!.embedding);
+    const dim = firstVec.length;
+    // Skip mismatched-dim rows (a model upgrade in flight could leave
+    // some old vectors). Build a packed matrix of only the kept rows
+    // so `ids[i]` always lines up with row `i` in the matrix.
+    const ids: string[] = [];
+    const buf = new Float32Array(rows.length * dim);
+    let written = 0;
+    for (const row of rows) {
+      const v = bytesToVector(row.embedding);
+      if (v.length !== dim) continue;
+      buf.set(v, written * dim);
+      ids.push(row.nodeId);
+      written++;
+    }
+    const matrix = written === rows.length ? buf : buf.slice(0, written * dim);
+    this.cached = { matrix, ids, dim, model };
+    return this.cached;
+  }
+
+  /** Drop the cache. Next `get()` rebuilds from SQLite. */
+  invalidate(): void {
+    this.cached = null;
+  }
+}
+
 /**
  * Reciprocal Rank Fusion: combine FTS (lexical) and semantic rankings
  * into one score. Proven robust default for hybrid search.