diff --git a/CLAUDE.md b/CLAUDE.md
index 71a50c73..f91a3d20 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -98,7 +98,7 @@ SQLite database with:
 
 ### Supported Languages
 
-TypeScript, JavaScript, TSX, JSX, Svelte, Python, Go, Rust, Java, C, C++, C#, PHP, Ruby, Swift, Kotlin, Dart, Liquid, Pascal
+TypeScript, JavaScript, TSX, JSX, Svelte, Python, Go, Rust, Java, C, C++, C#, PHP, Ruby, Swift, Kotlin, Dart, Liquid, Pascal, R
 
 ### Node and Edge Types
 
diff --git a/__tests__/centrality.test.ts b/__tests__/centrality.test.ts
new file mode 100644
index 00000000..e45dc858
--- /dev/null
+++ b/__tests__/centrality.test.ts
@@ -0,0 +1,134 @@
+import { describe, it, expect } from 'vitest';
+import { computePageRank, PR_DAMPING, PR_ITERATIONS } from '../src/centrality';
+
+function asNodes(ids: string[]) {
+  return ids.map((id) => ({ id }));
+}
+
+describe('computePageRank', () => {
+  it('returns empty result for an empty graph', () => {
+    const r = computePageRank([], []);
+    expect(r.scores.size).toBe(0);
+    expect(r.iterations).toBe(0);
+  });
+
+  it('assigns uniform rank to N isolated nodes', () => {
+    const r = computePageRank(asNodes(['a', 'b', 'c', 'd']), []);
+    expect(r.scores.size).toBe(4);
+    // 4 isolated nodes — all dangling — should each end up with 1/N.
+    for (const v of r.scores.values()) {
+      expect(v).toBeCloseTo(0.25, 6);
+    }
+  });
+
+  it('rewards being reached (sinks accumulate rank)', () => {
+    // a -> b -> c. c has no outgoing, so it accumulates the most.
+    const r = computePageRank(
+      asNodes(['a', 'b', 'c']),
+      [
+        { source: 'a', target: 'b' },
+        { source: 'b', target: 'c' },
+      ]
+    );
+    const a = r.scores.get('a')!;
+    const b = r.scores.get('b')!;
+    const c = r.scores.get('c')!;
+    expect(c).toBeGreaterThan(b);
+    expect(b).toBeGreaterThan(a);
+  });
+
+  it('star: hub ranks above all leaves; leaves are equal', () => {
+    const leaves = ['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9'];
+    const edges = leaves.map((l) => ({ source: l, target: 'hub' }));
+    const r = computePageRank(asNodes([...leaves, 'hub']), edges);
+    const hub = r.scores.get('hub')!;
+    for (const l of leaves) {
+      const lv = r.scores.get(l)!;
+      expect(hub).toBeGreaterThan(lv);
+    }
+    // Leaves are symmetric — should be within 1e-9.
+    const first = r.scores.get(leaves[0])!;
+    for (const l of leaves.slice(1)) {
+      expect(r.scores.get(l)!).toBeCloseTo(first, 9);
+    }
+  });
+
+  it('cycle: all nodes have approximately equal rank', () => {
+    const r = computePageRank(
+      asNodes(['a', 'b', 'c']),
+      [
+        { source: 'a', target: 'b' },
+        { source: 'b', target: 'c' },
+        { source: 'c', target: 'a' },
+      ]
+    );
+    const a = r.scores.get('a')!;
+    const b = r.scores.get('b')!;
+    const c = r.scores.get('c')!;
+    // Symmetric → all equal at convergence.
+    expect(a).toBeCloseTo(b, 6);
+    expect(b).toBeCloseTo(c, 6);
+  });
+
+  it('total rank sums to ~1 (mass is conserved)', () => {
+    const r = computePageRank(
+      asNodes(['a', 'b', 'c', 'd', 'e']),
+      [
+        { source: 'a', target: 'b' },
+        { source: 'b', target: 'c' },
+        { source: 'd', target: 'c' },
+        { source: 'e', target: 'd' },
+        { source: 'a', target: 'e' },
+      ]
+    );
+    let sum = 0;
+    for (const v of r.scores.values()) sum += v;
+    expect(sum).toBeCloseTo(1, 6);
+  });
+
+  it('preserves mass across two disconnected components', () => {
+    const r = computePageRank(
+      asNodes(['a', 'b', 'c', 'd']),
+      [
+        { source: 'a', target: 'b' },
+        { source: 'c', target: 'd' },
+      ]
+    );
+    let sum = 0;
+    for (const v of r.scores.values()) sum += v;
+    expect(sum).toBeCloseTo(1, 6);
+    // Within each component, the sink ranks above the source.
+    expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
+    expect(r.scores.get('d')!).toBeGreaterThan(r.scores.get('c')!);
+  });
+
+  it('drops edges referencing unknown nodes', () => {
+    // 'ghost' is not in the node set — that edge should be ignored,
+    // not crash and not pollute scores.
+    const r = computePageRank(
+      asNodes(['a', 'b']),
+      [
+        { source: 'a', target: 'b' },
+        { source: 'a', target: 'ghost' },
+        { source: 'ghost', target: 'b' },
+      ]
+    );
+    expect(r.scores.size).toBe(2);
+    expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
+    let sum = 0;
+    for (const v of r.scores.values()) sum += v;
+    expect(sum).toBeCloseTo(1, 6);
+  });
+
+  it('reports iteration count and duration', () => {
+    const r = computePageRank(asNodes(['a', 'b']), [{ source: 'a', target: 'b' }]);
+    expect(r.iterations).toBe(PR_ITERATIONS);
+    expect(r.durationMs).toBeGreaterThanOrEqual(0);
+  });
+
+  it('damping constant is the textbook 0.85', () => {
+    // Sentinel — protects against accidental tuning that would invalidate
+    // the spike findings the PR was justified on.
+    expect(PR_DAMPING).toBe(0.85);
+  });
+});
diff --git a/__tests__/churn.test.ts b/__tests__/churn.test.ts
new file mode 100644
index 00000000..fbe279f6
--- /dev/null
+++ b/__tests__/churn.test.ts
@@ -0,0 +1,208 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { execFileSync } from 'child_process';
+import {
+  mineChurn,
+  getGitHead,
+  readFileLoc,
+  MAX_FILES_PER_COMMIT,
+  LAST_MINED_CHURN_HEAD_KEY,
+} from '../src/churn';
+
+let HAS_GIT = true;
+try {
+  execFileSync('git', ['--version'], { stdio: 'ignore' });
+} catch {
+  HAS_GIT = false;
+}
+
+let tempDir: string;
+
+function git(...args: string[]): string {
+  return execFileSync('git', args, {
+    cwd: tempDir,
+    encoding: 'utf-8',
+    env: {
+      ...process.env,
+      GIT_AUTHOR_NAME: 'Test',
+      GIT_AUTHOR_EMAIL: 'test@example.com',
+      GIT_COMMITTER_NAME: 'Test',
+      GIT_COMMITTER_EMAIL: 'test@example.com',
+      GIT_AUTHOR_DATE: process.env.GIT_AUTHOR_DATE,
+      GIT_COMMITTER_DATE: process.env.GIT_COMMITTER_DATE,
+    },
+    stdio: ['pipe', 'pipe', 'pipe'],
+  }).trim();
+}
+
+function commitAt(date: string, paths: string[], content?: string) {
+  for (const p of paths) {
+    const abs = path.join(tempDir, p);
+    fs.mkdirSync(path.dirname(abs), { recursive: true });
+    fs.writeFileSync(abs, content ?? `data for ${p} at ${date}\n`);
+  }
+  git('add', ...paths);
+  // Pin both author and committer dates so timestamps are deterministic.
+  process.env.GIT_AUTHOR_DATE = date;
+  process.env.GIT_COMMITTER_DATE = date;
+  git('commit', '-m', `commit at ${date}`);
+  delete process.env.GIT_AUTHOR_DATE;
+  delete process.env.GIT_COMMITTER_DATE;
+}
+
+beforeEach(() => {
+  tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-churn-'));
+  if (HAS_GIT) {
+    git('init', '-q', '-b', 'main');
+    git('config', 'commit.gpgsign', 'false');
+  }
+});
+
+afterEach(() => {
+  delete process.env.GIT_AUTHOR_DATE;
+  delete process.env.GIT_COMMITTER_DATE;
+  fs.rmSync(tempDir, { recursive: true, force: true });
+});
+
+describe.skipIf(!HAS_GIT)('mineChurn', () => {
+  it('returns empty + null head when not in a git repo', () => {
+    const nonGit = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-'));
+    try {
+      const r = mineChurn(nonGit, new Set(['foo.ts']), null);
+      expect(r.currentHead).toBeNull();
+      expect(r.deltas.size).toBe(0);
+      expect(r.needsFullRescan).toBe(false);
+    } finally {
+      fs.rmSync(nonGit, { recursive: true, force: true });
+    }
+  });
+
+  it('counts commits per indexed file, ignores files not in index', () => {
+    commitAt('2025-01-01T00:00:00', ['a.ts', 'b.ts']);
+    commitAt('2025-01-02T00:00:00', ['a.ts']);
+    commitAt('2025-01-03T00:00:00', ['a.ts', 'b.ts', 'c.ts']);
+
+    const r = mineChurn(tempDir, new Set(['a.ts', 'b.ts']), null);
+    expect(r.deltas.get('a.ts')?.commitCountDelta).toBe(3);
+    expect(r.deltas.get('b.ts')?.commitCountDelta).toBe(2);
+    expect(r.deltas.has('c.ts')).toBe(false);
+  });
+
+  it('records first-seen / last-touched as min/max of commit timestamps', () => {
+    commitAt('2025-01-01T00:00:00Z', ['a.ts']);
+    commitAt('2025-06-01T00:00:00Z', ['a.ts']);
+    commitAt('2025-12-01T00:00:00Z', ['a.ts']);
+
+    const r = mineChurn(tempDir, new Set(['a.ts']), null);
+    const d = r.deltas.get('a.ts')!;
+    // 2025-01-01 UTC = 1735689600
+    expect(d.firstSeenTs).toBe(1735689600);
+    // 2025-12-01 UTC = 1764547200
+    expect(d.lastTouchedTs).toBe(1764547200);
+  });
+
+  it('skips commits touching more than MAX_FILES_PER_COMMIT files', () => {
+    const bigBatch: string[] = [];
+    for (let i = 0; i < MAX_FILES_PER_COMMIT + 1; i++) bigBatch.push(`f${i}.ts`);
+    commitAt('2025-01-01T00:00:00Z', bigBatch);
+    // Then a normal commit on one of the same files.
+    commitAt('2025-02-01T00:00:00Z', ['f0.ts']);
+
+    const r = mineChurn(tempDir, new Set(bigBatch), null);
+    // First commit was skipped; only the second one should count.
+    expect(r.deltas.get('f0.ts')?.commitCountDelta).toBe(1);
+    // Files only seen in the skipped commit produce no delta at all.
+    expect(r.deltas.has('f50.ts')).toBe(false);
+  });
+
+  it('incremental mining returns only commits since the given sha', () => {
+    commitAt('2025-01-01T00:00:00Z', ['a.ts']);
+    const sha1 = getGitHead(tempDir)!;
+    commitAt('2025-01-02T00:00:00Z', ['a.ts']);
+    commitAt('2025-01-03T00:00:00Z', ['a.ts']);
+
+    const incr = mineChurn(tempDir, new Set(['a.ts']), sha1);
+    // Only the two commits *after* sha1 should be counted.
+    expect(incr.deltas.get('a.ts')?.commitCountDelta).toBe(2);
+    expect(incr.needsFullRescan).toBe(false);
+  });
+
+  it('returns needsFullRescan=true when sinceSha is unreachable', () => {
+    commitAt('2025-01-01T00:00:00Z', ['a.ts']);
+    const fakeSha = '0'.repeat(40);
+    const r = mineChurn(tempDir, new Set(['a.ts']), fakeSha);
+    expect(r.needsFullRescan).toBe(true);
+    expect(r.deltas.size).toBe(0);
+    expect(r.currentHead).not.toBeNull();
+  });
+
+  it('returns empty deltas when sinceSha equals current head (no-op)', () => {
+    commitAt('2025-01-01T00:00:00Z', ['a.ts']);
+    const head = getGitHead(tempDir)!;
+    const r = mineChurn(tempDir, new Set(['a.ts']), head);
+    expect(r.currentHead).toBe(head);
+    expect(r.deltas.size).toBe(0);
+    expect(r.needsFullRescan).toBe(false);
+  });
+
+  it('handles paths with spaces and unicode safely (NUL-delimited)', () => {
+    commitAt('2025-01-01T00:00:00Z', ['name with space.ts']);
+    commitAt('2025-01-02T00:00:00Z', ['ünïcødë.ts']);
+
+    const r = mineChurn(
+      tempDir,
+      new Set(['name with space.ts', 'ünïcødë.ts']),
+      null
+    );
+    expect(r.deltas.get('name with space.ts')?.commitCountDelta).toBe(1);
+    expect(r.deltas.get('ünïcødë.ts')?.commitCountDelta).toBe(1);
+  });
+
+  it('LAST_MINED_CHURN_HEAD_KEY is stable (used as project_metadata key)', () => {
+    expect(LAST_MINED_CHURN_HEAD_KEY).toBe('last_mined_churn_head');
+  });
+});
+
+describe('readFileLoc', () => {
+  it('returns 0 for an empty file', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
+    try {
+      const f = path.join(dir, 'empty.txt');
+      fs.writeFileSync(f, '');
+      expect(readFileLoc(dir, 'empty.txt')).toBe(0);
+    } finally {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('counts newline-terminated lines', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
+    try {
+      fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc\n');
+      expect(readFileLoc(dir, 'x.txt')).toBe(3);
+    } finally {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('counts a final no-newline chunk as one extra line', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
+    try {
+      fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc');
+      expect(readFileLoc(dir, 'x.txt')).toBe(3);
+    } finally {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('returns 0 for a missing file (does not throw)', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
+    try {
+      expect(readFileLoc(dir, 'no-such-file.txt')).toBe(0);
+    } finally {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/__tests__/codegraphignore.test.ts b/__tests__/codegraphignore.test.ts
new file mode 100644
index 00000000..4d7e58c5
--- /dev/null
+++ b/__tests__/codegraphignore.test.ts
@@ -0,0 +1,168 @@
+/**
+ * .codegraphignore Tests
+ *
+ * Regression test for the bug where the .codegraphignore marker file was
+ * honored by the filesystem-walk fallback (`scanDirectoryWalk`) but
+ * silently ignored by the git fast path (`getGitVisibleFiles` and
+ * `getGitChangedFiles`). Same project gave different file sets depending
+ * on whether `.git` existed.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { execFileSync } from 'child_process';
+import { scanDirectory } from '../src/extraction';
+import { DEFAULT_CONFIG, CodeGraphConfig } from '../src/types';
+import CodeGraph from '../src/index';
+
+function tempDir(prefix: string): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
+}
+
+function git(cwd: string, ...args: string[]) {
+  execFileSync('git', args, { cwd, stdio: 'pipe' });
+}
+
+const config: CodeGraphConfig = {
+  ...DEFAULT_CONFIG,
+  include: ['**/*.ts'],
+  exclude: [],
+};
+
+describe('.codegraphignore marker (bug #3)', () => {
+  describe('git fast path', () => {
+    let dir: string;
+
+    beforeEach(() => {
+      dir = tempDir('codegraph-ignore-git-');
+      git(dir, 'init');
+      git(dir, 'config', 'user.email', 'test@test.com');
+      git(dir, 'config', 'user.name', 'Test');
+      // Pin branch name for determinism across git defaults
+      git(dir, 'symbolic-ref', 'HEAD', 'refs/heads/main');
+
+      fs.mkdirSync(path.join(dir, 'src'));
+      fs.mkdirSync(path.join(dir, 'vendor'));
+      fs.mkdirSync(path.join(dir, 'vendor', 'lib'));
+      fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;');
+      fs.writeFileSync(path.join(dir, 'vendor', 'pkg.ts'), 'export const v = 1;');
+      fs.writeFileSync(path.join(dir, 'vendor', 'lib', 'sub.ts'), 'export const s = 1;');
+      // Mark vendor/ as ignored
+      fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), '');
+
+      git(dir, 'add', '-A');
+      git(dir, 'commit', '-m', 'initial');
+    });
+
+    afterEach(() => {
+      if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+    });
+
+    it('scanDirectory honors .codegraphignore on the git fast path', () => {
+      const files = scanDirectory(dir, config);
+      expect(files).toContain('src/app.ts');
+      expect(files).not.toContain('vendor/pkg.ts');
+      expect(files).not.toContain('vendor/lib/sub.ts');
+    });
+
+    it('marker at project root excludes everything', () => {
+      fs.writeFileSync(path.join(dir, '.codegraphignore'), '');
+      // Need to add it to git so ls-files sees it (or rely on -o)
+      git(dir, 'add', '-A');
+      git(dir, 'commit', '-m', 'add root marker');
+      const files = scanDirectory(dir, config);
+      expect(files).toEqual([]);
+    });
+
+    it('marker in nested subdir does not affect siblings', () => {
+      // Add another sibling subdir without a marker
+      fs.mkdirSync(path.join(dir, 'libs'));
+      fs.writeFileSync(path.join(dir, 'libs', 'util.ts'), 'export const u = 1;');
+      git(dir, 'add', '-A');
+      git(dir, 'commit', '-m', 'add libs');
+
+      const files = scanDirectory(dir, config);
+      expect(files).toContain('src/app.ts');
+      expect(files).toContain('libs/util.ts');
+      expect(files).not.toContain('vendor/pkg.ts');
+    });
+
+    it('respects marker added after initial commit (untracked marker)', () => {
+      // The marker file itself need not be committed — it can be a local
+      // override. Add marker AFTER commit, do not commit it.
+      fs.mkdirSync(path.join(dir, 'generated'));
+      fs.writeFileSync(path.join(dir, 'generated', 'gen.ts'), 'export const g = 1;');
+      fs.writeFileSync(path.join(dir, 'generated', '.codegraphignore'), '');
+      // The .ts file is untracked but visible via `git ls-files -o`.
+      // The marker is also untracked — we still detect it via fs check.
+
+      const files = scanDirectory(dir, config);
+      expect(files).not.toContain('generated/gen.ts');
+    });
+  });
+
+  describe('parity with non-git fallback (filesystem walk)', () => {
+    let dir: string;
+
+    beforeEach(() => {
+      dir = tempDir('codegraph-ignore-walk-');
+      fs.mkdirSync(path.join(dir, 'src'));
+      fs.mkdirSync(path.join(dir, 'vendor'));
+      fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;');
+      fs.writeFileSync(path.join(dir, 'vendor', 'pkg.ts'), 'export const v = 1;');
+      fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), '');
+    });
+
+    afterEach(() => {
+      if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+    });
+
+    it('non-git project also honors the marker (sanity / pre-existing behavior)', () => {
+      const files = scanDirectory(dir, config);
+      expect(files).toContain('src/app.ts');
+      expect(files).not.toContain('vendor/pkg.ts');
+    });
+  });
+
+  describe('sync git path (getGitChangedFiles)', () => {
+    let dir: string;
+    let cg: CodeGraph;
+
+    beforeEach(async () => {
+      dir = tempDir('codegraph-ignore-sync-');
+      git(dir, 'init');
+      git(dir, 'config', 'user.email', 'test@test.com');
+      git(dir, 'config', 'user.name', 'Test');
+      git(dir, 'symbolic-ref', 'HEAD', 'refs/heads/main');
+
+      fs.mkdirSync(path.join(dir, 'src'));
+      fs.mkdirSync(path.join(dir, 'vendor'));
+      fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;');
+      fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), '');
+
+      git(dir, 'add', '-A');
+      git(dir, 'commit', '-m', 'initial');
+
+      cg = CodeGraph.initSync(dir, { config: { include: ['**/*.ts'], exclude: [] } });
+      await cg.indexAll();
+    });
+
+    afterEach(() => {
+      if (cg) cg.destroy();
+      if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+    });
+
+    it('sync ignores changes inside marker dirs', async () => {
+      // Add a new file under vendor/ — should NOT be picked up by sync.
+      fs.writeFileSync(path.join(dir, 'vendor', 'leaked.ts'), 'export const x = 1;');
+      // Also add a real change to confirm sync still runs.
+      fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 2;');
+
+      const result = await cg.sync();
+      expect(result.changedFilePaths).toContain('src/app.ts');
+      expect(result.changedFilePaths ?? []).not.toContain('vendor/leaked.ts');
+    });
+  });
+});
diff --git a/__tests__/config-refs.test.ts b/__tests__/config-refs.test.ts
new file mode 100644
index 00000000..ab1a63e4
--- /dev/null
+++ b/__tests__/config-refs.test.ts
@@ -0,0 +1,288 @@
+/**
+ * Config-refs tests: parser unit tests + end-to-end through CodeGraph.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { extractConfigRefs } from '../src/config-refs';
+import CodeGraph from '../src/index';
+
+let testDir: string;
+let cg: CodeGraph | null = null;
+
+function write(rel: string, content: string) {
+  const abs = path.join(testDir, rel);
+  fs.mkdirSync(path.dirname(abs), { recursive: true });
+  fs.writeFileSync(abs, content);
+}
+
+beforeEach(() => {
+  testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-config-'));
+});
+
+afterEach(() => {
+  if (cg) {
+    cg.destroy();
+    cg = null;
+  }
+  if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
+});
+
+// ============================================================================
+// Pure parser tests (no CodeGraph)
+// ============================================================================
+
+describe('extractConfigRefs', () => {
+  it('extracts process.env.X from TS', () => {
+    write('a.ts', `const port = process.env.OBSIDIAN_PORT;\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs.length).toBe(1);
+    expect(refs[0]!.configKey).toBe('OBSIDIAN_PORT');
+    expect(refs[0]!.line).toBe(1);
+  });
+
+  it('extracts process.env["X"] from JS', () => {
+    write('a.js', `module.exports = { port: process.env["MY_KEY"] };\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'a.js', language: 'javascript' }], () => null);
+    expect(refs.map((r) => r.configKey)).toEqual(['MY_KEY']);
+  });
+
+  it('extracts os.getenv / os.environ from Python', () => {
+    write(
+      'a.py',
+      [
+        `import os`,
+        `port = os.getenv("PYTHON_PORT")`,
+        `host = os.environ.get("PYTHON_HOST")`,
+        `path = os.environ["PYTHON_PATH"]`,
+        `name = getenv("PYTHON_NAME")`,
+      ].join('\n')
+    );
+    const refs = extractConfigRefs(testDir, [{ path: 'a.py', language: 'python' }], () => null);
+    expect(new Set(refs.map((r) => r.configKey))).toEqual(
+      new Set(['PYTHON_PORT', 'PYTHON_HOST', 'PYTHON_PATH', 'PYTHON_NAME'])
+    );
+  });
+
+  it('extracts os.Getenv / os.LookupEnv from Go', () => {
+    write(
+      'a.go',
+      [
+        `package main`,
+        `import "os"`,
+        `var Port = os.Getenv("GO_PORT")`,
+        `var Host, _ = os.LookupEnv("GO_HOST")`,
+      ].join('\n')
+    );
+    const refs = extractConfigRefs(testDir, [{ path: 'a.go', language: 'go' }], () => null);
+    expect(new Set(refs.map((r) => r.configKey))).toEqual(new Set(['GO_PORT', 'GO_HOST']));
+  });
+
+  it('extracts ENV[...] / ENV.fetch from Ruby', () => {
+    write('a.rb', `port = ENV["RUBY_PORT"]\nhost = ENV.fetch("RUBY_HOST")\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'a.rb', language: 'ruby' }], () => null);
+    expect(new Set(refs.map((r) => r.configKey))).toEqual(new Set(['RUBY_PORT', 'RUBY_HOST']));
+  });
+
+  it('extracts env!/std::env::var from Rust', () => {
+    write(
+      'a.rs',
+      [
+        `let port = env!("RUST_PORT");`,
+        `let host = std::env::var("RUST_HOST").unwrap();`,
+      ].join('\n')
+    );
+    const refs = extractConfigRefs(testDir, [{ path: 'a.rs', language: 'rust' }], () => null);
+    expect(new Set(refs.map((r) => r.configKey))).toEqual(new Set(['RUST_PORT', 'RUST_HOST']));
+  });
+
+  it('extracts System.getenv from Java/Kotlin', () => {
+    write('A.java', `String port = System.getenv("JAVA_PORT");\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'A.java', language: 'java' }], () => null);
+    expect(refs.map((r) => r.configKey)).toEqual(['JAVA_PORT']);
+  });
+
+  it('only matches UPPER_CASE keys (skips lower-case identifiers)', () => {
+    write('a.ts', `const x = process.env.somethingDynamic;\nconst y = process.env.GOOD_KEY;\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs.map((r) => r.configKey)).toEqual(['GOOD_KEY']);
+  });
+
+  it('skips files in unsupported languages without crashing', () => {
+    write('a.swift', `let port = ProcessInfo.processInfo.environment["SWIFT_PORT"]\n`);
+    const refs = extractConfigRefs(testDir, [{ path: 'a.swift', language: 'swift' }], () => null);
+    // Swift not in PATTERNS for v1.
+    expect(refs).toEqual([]);
+  });
+
+  it('captures the correct 1-indexed line number', () => {
+    write(
+      'a.ts',
+      [
+        `// line 1`,
+        `// line 2`,
+        `const x = process.env.LINE_THREE_KEY;`,
+        `// line 4`,
+        `const y = process.env.LINE_FIVE_KEY;`,
+      ].join('\n')
+    );
+    const refs = extractConfigRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toEqual([
+      expect.objectContaining({ configKey: 'LINE_THREE_KEY', line: 3 }),
+      expect.objectContaining({ configKey: 'LINE_FIVE_KEY', line: 5 }),
+    ]);
+  });
+
+  it('threads the resolveEnclosing closure correctly', () => {
+    write('a.ts', `const x = process.env.FOO;\n`);
+    const calls: Array<[string, number]> = [];
+    extractConfigRefs(
+      testDir,
+      [{ path: 'a.ts', language: 'typescript' }],
+      (filePath, line) => {
+        calls.push([filePath, line]);
+        return 'fake-node-id';
+      }
+    );
+    expect(calls).toEqual([['a.ts', 1]]);
+  });
+
+  it('survives a missing file (skips, no throw)', () => {
+    const refs = extractConfigRefs(
+      testDir,
+      [{ path: 'does-not-exist.ts', language: 'typescript' }],
+      () => null
+    );
+    expect(refs).toEqual([]);
+  });
+});
+
+// ============================================================================
+// End-to-end through CodeGraph
+// ============================================================================
+
+describe('CodeGraph config refs', () => {
+  it('persists env reads after indexAll and resolves enclosing function', async () => {
+    write(
+      'src/server.ts',
+      [
+        `export function start() {`,
+        `  const port = process.env.OBSIDIAN_PORT ?? 8080;`,
+        `  return port;`,
+        `}`,
+        ``,
+        `export function getApiKey() {`,
+        `  return process.env.OBSIDIAN_API_KEY;`,
+        `}`,
+        ``,
+        `// top-level read`,
+        `export const HOST = process.env.OBSIDIAN_HOST;`,
+      ].join('\n')
+    );
+    cg = CodeGraph.initSync(testDir, {
+      config: { include: ['**/*.ts'], exclude: [] },
+    });
+    await cg.indexAll();
+
+    // All three keys should be visible.
+    const keys = cg.getConfigKeys({ configKind: 'env' });
+    expect(keys.map((k) => k.configKey).sort()).toEqual([
+      'OBSIDIAN_API_KEY',
+      'OBSIDIAN_HOST',
+      'OBSIDIAN_PORT',
+    ]);
+
+    // The OBSIDIAN_PORT read should be attributed to `start`.
+    const portSites = cg.getConfigRefsByKey('OBSIDIAN_PORT');
+    expect(portSites.length).toBe(1);
+    expect(portSites[0]!.sourceName).toBe('start');
+
+    // The HOST read is at the top level — sourceName should be null.
+    const hostSites = cg.getConfigRefsByKey('OBSIDIAN_HOST');
+    expect(hostSites[0]!.sourceName).toBeNull();
+  });
+
+  it('reverse view: getConfigKeysForNode returns keys read by a function', async () => {
+    write(
+      'src/a.ts',
+      [
+        `export function loadConfig() {`,
+        `  const a = process.env.KEY_A;`,
+        `  const b = process.env.KEY_B;`,
+        `  return { a, b };`,
+        `}`,
+      ].join('\n')
+    );
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'loadConfig')!;
+    const keys = cg.getConfigKeysForNode(node.id).map((r) => r.configKey).sort();
+    expect(keys).toEqual(['KEY_A', 'KEY_B']);
+  });
+
+  it('respects enableConfigRefs=false', async () => {
+    write('src/a.ts', `export const PORT = process.env.PORT;\n`);
+    cg = CodeGraph.initSync(testDir, {
+      config: { include: ['**/*.ts'], exclude: [], enableConfigRefs: false },
+    });
+    await cg.indexAll();
+    expect(cg.getConfigKeys()).toEqual([]);
+  });
+
+  it('incremental sync replaces refs for changed files only', async () => {
+    write('src/a.ts', `export const A = process.env.OLD_KEY;\n`);
+    write('src/b.ts', `export const B = process.env.UNCHANGED_KEY;\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getConfigKeys().map((k) => k.configKey).sort()).toEqual([
+      'OLD_KEY',
+      'UNCHANGED_KEY',
+    ]);
+
+    // Edit only a.ts — UNCHANGED_KEY should still be there.
+    write('src/a.ts', `export const A = process.env.NEW_KEY;\n`);
+    await cg.sync();
+
+    const keys = cg.getConfigKeys().map((k) => k.configKey).sort();
+    expect(keys).toContain('NEW_KEY');
+    expect(keys).toContain('UNCHANGED_KEY');
+    expect(keys).not.toContain('OLD_KEY');
+  });
+
+  it('drops refs when a file is edited to remove its last env read', async () => {
+    // Regression for the empty-rows early-return data-corruption bug:
+    // applyConfigRefs([]) used to short-circuit without deleting the
+    // stale rows for the file. The sync path now explicitly invalidates
+    // rows for every changed file *before* extracting, regardless of
+    // whether the new content has any reads.
+    write('src/a.ts', `export const PORT = process.env.REMOVED_KEY;\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getConfigKeys().some((k) => k.configKey === 'REMOVED_KEY')).toBe(true);
+
+    // Edit a.ts to remove the env read entirely (no remaining reads).
+    write('src/a.ts', `export const PORT = 8080; // no env read here\n`);
+    await cg.sync();
+
+    expect(cg.getConfigKeys().some((k) => k.configKey === 'REMOVED_KEY')).toBe(false);
+  });
+
+  it('drops refs for files removed between syncs', async () => {
+    write('src/a.ts', `export const A = process.env.GOING_AWAY;\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getConfigKeys().some((k) => k.configKey === 'GOING_AWAY')).toBe(true);
+
+    fs.unlinkSync(path.join(testDir, 'src/a.ts'));
+    await cg.sync();
+
+    expect(cg.getConfigKeys().some((k) => k.configKey === 'GOING_AWAY')).toBe(false);
+  });
+
+  // (Removed: a defensive test for the v4-migration-collision bug class.
+  // With file-based migrations (NNN-name.ts), two PRs claiming the same
+  // version produces a filesystem-level conflict, so the silent skip the
+  // defensive guard protected against can no longer happen.)
+});
diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts
index 52dae1fe..9a0614aa 100644
--- a/__tests__/context.test.ts
+++ b/__tests__/context.test.ts
@@ -210,6 +210,19 @@ export function validateEmail(email: string): boolean {
 
       expect(result.nodes.size).toBeLessThanOrEqual(5);
     });
+
+    it('should clamp absurd searchLimit/maxNodes values to safe upper bounds', async () => {
+      // Without clamping, the internal `findNodesByExactName` query would
+      // request `searchLimit * 5` rows — passing 1e9 here would blow out
+      // memory. The call should complete in normal time and not return more
+      // than the hard cap on maxNodes (1000).
+      const result = await cg.findRelevantContext('function', {
+        searchLimit: 1_000_000_000,
+        maxNodes: 1_000_000_000,
+        traversalDepth: 1_000,
+      });
+      expect(result.nodes.size).toBeLessThanOrEqual(1000);
+    });
   });
 
   describe('buildContext()', () => {
diff --git a/__tests__/db-perf.test.ts b/__tests__/db-perf.test.ts
new file mode 100644
index 00000000..256cf92c
--- /dev/null
+++ b/__tests__/db-perf.test.ts
@@ -0,0 +1,161 @@
+/**
+ * DB Performance / Correctness Tests
+ *
+ * Regression tests for three changes:
+ *   1. Batch `getNodesByIds` collapses graph-traversal N+1 reads.
+ *   2. `insertNode` invalidates the LRU cache so INSERT OR REPLACE
+ *      doesn't serve a stale cached row on next `getNodeById`.
+ *   3. `runMaintenance` runs `PRAGMA optimize` + `wal_checkpoint(PASSIVE)`
+ *      after indexAll/sync without throwing.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { DatabaseConnection } from '../src/db';
+import { QueryBuilder } from '../src/db/queries';
+import { Node } from '../src/types';
+
+function makeNode(id: string, name = id): Node {
+  return {
+    id,
+    kind: 'function',
+    name,
+    qualifiedName: name,
+    filePath: 'a.ts',
+    language: 'typescript',
+    startLine: 1,
+    endLine: 1,
+    startColumn: 0,
+    endColumn: 0,
+    updatedAt: Date.now(),
+  };
+}
+
+describe('getNodesByIds (batch lookup)', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-batch-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+    q = new QueryBuilder(db.getDb());
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns a Map keyed by id, with one entry per existing node', () => {
+    q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
+    const out = q.getNodesByIds(['n1', 'n2', 'n3']);
+    expect(out.size).toBe(3);
+    expect(out.get('n1')!.name).toBe('n1');
+    expect(out.get('n3')!.name).toBe('n3');
+  });
+
+  it('omits missing IDs from the result map (no nulls, no exceptions)', () => {
+    q.insertNodes([makeNode('n1'), makeNode('n2')]);
+    const out = q.getNodesByIds(['n1', 'missing', 'n2']);
+    expect(out.size).toBe(2);
+    expect(out.has('missing')).toBe(false);
+    expect(out.has('n1')).toBe(true);
+    expect(out.has('n2')).toBe(true);
+  });
+
+  it('handles an empty input array', () => {
+    expect(q.getNodesByIds([]).size).toBe(0);
+  });
+
+  it('handles batches over the SQLite parameter limit (chunking)', () => {
+    // Insert 1500 nodes; the helper chunks at 500 internally.
+    const nodes = Array.from({ length: 1500 }, (_, i) => makeNode(`n${i}`));
+    q.insertNodes(nodes);
+    const ids = nodes.map((n) => n.id);
+    const out = q.getNodesByIds(ids);
+    expect(out.size).toBe(1500);
+    // Spot-check a few from the first / middle / last chunk.
+    expect(out.has('n0')).toBe(true);
+    expect(out.has('n750')).toBe(true);
+    expect(out.has('n1499')).toBe(true);
+  });
+
+  it('serves cache hits from memory and queries only the misses', () => {
+    q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
+    // Warm the cache for n1 only.
+    q.getNodeById('n1');
+    // Replace the underlying row to make a miss-vs-cache-hit detectable.
+    db.getDb().prepare('UPDATE nodes SET name = ? WHERE id = ?').run('changed', 'n1');
+    const out = q.getNodesByIds(['n1', 'n2']);
+    // The cached n1 (still 'n1', not 'changed') must be returned.
+    expect(out.get('n1')!.name).toBe('n1');
+    expect(out.get('n2')!.name).toBe('n2');
+  });
+});
+
+describe('insertNode cache invalidation', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-cache-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+    q = new QueryBuilder(db.getDb());
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('does not serve a stale cached node after INSERT OR REPLACE', () => {
+    // Regression: insertNode (which uses INSERT OR REPLACE) used to skip
+    // cache invalidation, so the next getNodeById returned the pre-replace
+    // version until LRU eviction.
+    const original = makeNode('n1', 'oldName');
+    q.insertNode(original);
+    const beforeReplace = q.getNodeById('n1');
+    expect(beforeReplace!.name).toBe('oldName');
+
+    // Replace via insertNode (the bug path).
+    q.insertNode({ ...original, name: 'newName', updatedAt: Date.now() });
+    const afterReplace = q.getNodeById('n1');
+    expect(afterReplace!.name).toBe('newName');
+  });
+});
+
+describe('runMaintenance', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-maint-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('runs without throwing on a fresh database', () => {
+    expect(() => db.runMaintenance()).not.toThrow();
+  });
+
+  it('runs without throwing after writes', () => {
+    const q = new QueryBuilder(db.getDb());
+    q.insertNodes([makeNode('n1'), makeNode('n2')]);
+    expect(() => db.runMaintenance()).not.toThrow();
+  });
+
+  it('swallows failures rather than propagating (best-effort)', () => {
+    // Close the DB so the underlying handle would normally throw on any
+    // exec(). runMaintenance must still not propagate.
+    db.close();
+    expect(() => db.runMaintenance()).not.toThrow();
+  });
+});
diff --git a/__tests__/diversify.test.ts b/__tests__/diversify.test.ts
new file mode 100644
index 00000000..181ee9c5
--- /dev/null
+++ b/__tests__/diversify.test.ts
@@ -0,0 +1,200 @@
+/**
+ * Result Diversification Tests
+ *
+ * Verifies the per-file cap on search results: queries that match many
+ * symbols in one file (the methods of a class) no longer return 10 hits
+ * from one file, but instead surface representative breadth across files.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { DatabaseConnection } from '../src/db';
+import { QueryBuilder } from '../src/db/queries';
+import { diversifyByFile } from '../src/search/query-utils';
+import { Node } from '../src/types';
+
+describe('diversifyByFile (unit)', () => {
+  function r(score: number, name: string, filePath: string) {
+    return { node: { id: name, name, filePath } as Node, score };
+  }
+
+  it('caps consecutive results from the same file at perFileCap', () => {
+    const results = [
+      r(10, 'a1', 'a.ts'),
+      r(9, 'a2', 'a.ts'),
+      r(8, 'a3', 'a.ts'),
+      r(7, 'a4', 'a.ts'),
+      r(6, 'b1', 'b.ts'),
+    ];
+    const out = diversifyByFile(results, 5, 2);
+    expect(out.map((x) => x.node.name)).toEqual(['a1', 'a2', 'b1', 'a3', 'a4']);
+    // First two from a.ts (cap), then b.ts (different file), then backfill.
+  });
+
+  it('preserves overall ranking when no file dominates', () => {
+    const results = [
+      r(10, 'a1', 'a.ts'),
+      r(9, 'b1', 'b.ts'),
+      r(8, 'c1', 'c.ts'),
+      r(7, 'a2', 'a.ts'),
+    ];
+    const out = diversifyByFile(results, 4, 2);
+    expect(out.map((x) => x.node.name)).toEqual(['a1', 'b1', 'c1', 'a2']);
+  });
+
+  it('does not lose results — backfills from skipped when limit not yet filled', () => {
+    // 10 candidates all from one file, limit 5, cap 2: pick 2, backfill 3.
+    const results = Array.from({ length: 10 }, (_, i) =>
+      r(10 - i, `n${i}`, 'a.ts')
+    );
+    const out = diversifyByFile(results, 5, 2);
+    expect(out).toHaveLength(5);
+    expect(out.every((x) => x.node.filePath === 'a.ts')).toBe(true);
+  });
+
+  it('returns the input slice unchanged when perFileCap=0', () => {
+    const results = [
+      r(10, 'a1', 'a.ts'),
+      r(9, 'a2', 'a.ts'),
+      r(8, 'a3', 'a.ts'),
+    ];
+    expect(diversifyByFile(results, 3, 0)).toEqual(results);
+  });
+
+  it('returns input unchanged when results.length <= limit and no reordering needed', () => {
+    const results = [r(10, 'a1', 'a.ts'), r(9, 'a2', 'a.ts')];
+    expect(diversifyByFile(results, 5, 2)).toEqual(results);
+  });
+
+  it('still reorders within limit when results.length === limit but cap rearranges', () => {
+    // Same total count as limit, but the cap reorders to surface peer files
+    // earlier in the list.
+    const results = [
+      r(10, 'a1', 'a.ts'),
+      r(9, 'a2', 'a.ts'),
+      r(8, 'a3', 'a.ts'),
+      r(7, 'a4', 'a.ts'),
+      r(6, 'b1', 'b.ts'),
+    ];
+    const out = diversifyByFile(results, 5, 2);
+    // First 2 from a.ts (cap), then b.ts, then backfill a.ts.
+    expect(out.map((x) => x.node.name)).toEqual(['a1', 'a2', 'b1', 'a3', 'a4']);
+  });
+
+  it('respects the limit even when picked + skipped exceed it', () => {
+    const results = [
+      r(10, 'a1', 'a.ts'),
+      r(9, 'a2', 'a.ts'),
+      r(8, 'a3', 'a.ts'),
+      r(7, 'b1', 'b.ts'),
+    ];
+    const out = diversifyByFile(results, 2, 2);
+    expect(out).toHaveLength(2);
+    expect(out.map((x) => x.node.name)).toEqual(['a1', 'a2']);
+  });
+
+  it('always preserves the top-scoring result at position 0', () => {
+    const results = [
+      r(100, 'top', 'big.ts'),
+      r(50, 'big2', 'big.ts'),
+      r(40, 'big3', 'big.ts'),
+      r(30, 'big4', 'big.ts'),
+      r(20, 'other', 'other.ts'),
+    ];
+    const out = diversifyByFile(results, 3, 2);
+    expect(out[0].node.name).toBe('top');
+  });
+});
+
+describe('searchNodes per-file diversification (integration)', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  function makeNode(id: string, name: string, kind: Node['kind'], filePath: string): Node {
+    return {
+      id,
+      kind,
+      name,
+      qualifiedName: `${filePath}::${name}`,
+      filePath,
+      language: 'typescript',
+      startLine: 1,
+      endLine: 1,
+      startColumn: 0,
+      endColumn: 0,
+      updatedAt: Date.now(),
+    };
+  }
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'diversify-search-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+    q = new QueryBuilder(db.getDb());
+    // Simulate the "10 methods of one class" scenario: a class plus many
+    // methods all sharing a common token, all in one file. Plus a peer
+    // file with a sibling implementation.
+    const nodes: Node[] = [
+      makeNode('cls', 'DatabaseConnection', 'class', 'src/db.ts'),
+      makeNode('m1', 'connect', 'method', 'src/db.ts'),
+      makeNode('m2', 'disconnect', 'method', 'src/db.ts'),
+      makeNode('m3', 'reconnect', 'method', 'src/db.ts'),
+      makeNode('m4', 'isConnected', 'method', 'src/db.ts'),
+      makeNode('m5', 'connectionString', 'property', 'src/db.ts'),
+      makeNode('peer', 'PoolConnection', 'class', 'src/pool.ts'),
+      makeNode('peer2', 'connectPool', 'function', 'src/pool.ts'),
+    ];
+    q.insertNodes(nodes);
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('caps results per file at the default (3) so peer files surface', () => {
+    const results = q.searchNodes('connect', { limit: 5 });
+    const fromDbTs = results.filter((r) => r.node.filePath === 'src/db.ts').length;
+    const fromPool = results.filter((r) => r.node.filePath === 'src/pool.ts').length;
+    expect(fromDbTs).toBeLessThanOrEqual(3); // cap
+    expect(fromPool).toBeGreaterThanOrEqual(1); // peer file represented
+  });
+
+  it('honors perFileCap: 0 (disabled) — does not enforce a per-file limit', () => {
+    // Insert a heavy imbalance so dominance is unambiguous: 10 matching
+    // methods in db.ts, only the existing pool.ts entries elsewhere.
+    const heavyDb: Node[] = Array.from({ length: 10 }, (_, i) =>
+      makeNode(`heavy${i}`, `connectVariant${i}`, 'method', 'src/db.ts')
+    );
+    q.insertNodes(heavyDb);
+    const results = q.searchNodes('connect', { limit: 8, perFileCap: 0 });
+    const fromDbTs = results.filter((r) => r.node.filePath === 'src/db.ts').length;
+    expect(fromDbTs).toBeGreaterThan(3);
+  });
+
+  it('honors a higher perFileCap', () => {
+    const results = q.searchNodes('connect', { limit: 6, perFileCap: 5 });
+    const fromDbTs = results.filter((r) => r.node.filePath === 'src/db.ts').length;
+    expect(fromDbTs).toBeLessThanOrEqual(5);
+  });
+
+  it('preserves the top-scoring hit even with diversification', () => {
+    // Class node with the most direct name match is the most relevant —
+    // diversification must never displace it from #1.
+    const results = q.searchNodes('DatabaseConnection', { limit: 3 });
+    expect(results[0].node.name).toBe('DatabaseConnection');
+  });
+
+  it('does not lose results — fills limit by backfilling skipped same-file hits', () => {
+    // If only one file has matches, all results legitimately come from it.
+    // The cap should not cause us to return fewer than `limit` results.
+    const onlyOneFileNodes: Node[] = Array.from({ length: 10 }, (_, i) =>
+      makeNode(`only${i}`, `solo${i}`, 'function', 'src/only.ts')
+    );
+    q.insertNodes(onlyOneFileNodes);
+    const results = q.searchNodes('solo', { limit: 5 });
+    expect(results.length).toBe(5);
+  });
+});
diff --git a/__tests__/edges-unique.test.ts b/__tests__/edges-unique.test.ts
new file mode 100644
index 00000000..49eced53
--- /dev/null
+++ b/__tests__/edges-unique.test.ts
@@ -0,0 +1,166 @@
+/**
+ * Edge Uniqueness Tests
+ *
+ * Regression tests for the bug where `INSERT OR IGNORE INTO edges` was
+ * silently a no-op: the only candidate key was the AUTOINCREMENT id (which
+ * never conflicts), so duplicate edges accumulated on every re-emission /
+ * re-resolution.
+ *
+ * Fix: a UNIQUE index on (source, target, kind, COALESCE(line, -1),
+ * COALESCE(col, -1)) backs a fresh-install schema and is also applied via
+ * migration v4 (with a dedup pass over existing rows).
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { DatabaseConnection } from '../src/db';
+import { QueryBuilder } from '../src/db/queries';
+import { Edge, Node } from '../src/types';
+import { runMigrations, getCurrentVersion, CURRENT_SCHEMA_VERSION } from '../src/db/migrations';
+
+function tempDb(): { dir: string; db: DatabaseConnection; q: QueryBuilder } {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-edges-unique-'));
+  const db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+  const q = new QueryBuilder(db.getDb());
+  return { dir, db, q };
+}
+
+function cleanup(dir: string, db: DatabaseConnection) {
+  db.close();
+  if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+}
+
+function makeNode(id: string, name: string): Node {
+  return {
+    id,
+    kind: 'function',
+    name,
+    qualifiedName: `f::${name}`,
+    filePath: 'a.ts',
+    language: 'typescript',
+    startLine: 1,
+    endLine: 1,
+    startColumn: 0,
+    endColumn: 0,
+    updatedAt: Date.now(),
+  };
+}
+
+function edgesCount(db: DatabaseConnection): number {
+  const row = db.getDb().prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number };
+  return row.c;
+}
+
+describe('Edge UNIQUE constraint (bug #2)', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  beforeEach(() => {
+    ({ dir, db, q } = tempDb());
+    q.insertNodes([makeNode('n1', 'foo'), makeNode('n2', 'bar')]);
+  });
+
+  afterEach(() => cleanup(dir, db));
+
+  it('rejects duplicate (source, target, kind, line, col)', () => {
+    const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 10, column: 5 };
+    q.insertEdge(e);
+    q.insertEdge(e); // INSERT OR IGNORE — should be a no-op now
+    expect(edgesCount(db)).toBe(1);
+  });
+
+  it('treats two NULL line edges as duplicates (COALESCE in unique index)', () => {
+    const e: Edge = { source: 'n1', target: 'n2', kind: 'calls' };
+    q.insertEdge(e);
+    q.insertEdge(e);
+    expect(edgesCount(db)).toBe(1);
+  });
+
+  it('allows same source/target/kind on different lines', () => {
+    q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 1 });
+    q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 2 });
+    expect(edgesCount(db)).toBe(2);
+  });
+
+  it('allows same source/target/line on different kinds', () => {
+    q.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 1 });
+    q.insertEdge({ source: 'n1', target: 'n2', kind: 'references', line: 1 });
+    expect(edgesCount(db)).toBe(2);
+  });
+
+  it('insertEdges (batch) dedupes within the same call', () => {
+    const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 1, column: 1 };
+    q.insertEdges([e, e, e]);
+    expect(edgesCount(db)).toBe(1);
+  });
+
+  it('survives the same edge being re-emitted across many cycles', () => {
+    const e: Edge = { source: 'n1', target: 'n2', kind: 'calls', line: 1 };
+    for (let i = 0; i < 100; i++) {
+      q.insertEdge(e);
+    }
+    expect(edgesCount(db)).toBe(1);
+  });
+});
+
+describe('Migration v4: dedup existing edges', () => {
+  let dir: string;
+  let dbPath: string;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-migr-v4-'));
+    dbPath = path.join(dir, 'test.db');
+  });
+
+  afterEach(() => {
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('collapses pre-existing duplicates and adds the UNIQUE index', () => {
+    // Build a v3-shaped database manually: schema, but simulate a stale
+    // version row + insert duplicates that the missing UNIQUE index let
+    // through. We use the real initialize() path then drop the index +
+    // version row to back-date the DB.
+    const db = DatabaseConnection.initialize(dbPath);
+    db.getDb().exec(`DROP INDEX IF EXISTS idx_edges_unique;`);
+    db.getDb().exec(`DELETE FROM schema_versions;`);
+    db.getDb().prepare(
+      'INSERT INTO schema_versions (version, applied_at, description) VALUES (3, ?, ?)'
+    ).run(Date.now(), 'simulated v3');
+
+    const q = new QueryBuilder(db.getDb());
+    q.insertNodes([makeNode('n1', 'foo'), makeNode('n2', 'bar')]);
+    // Force-insert duplicates via raw SQL (bypassing the constraint that
+    // is now absent). Three rows that should collapse to one.
+    const stmt = db.getDb().prepare(
+      'INSERT INTO edges (source, target, kind, line, col) VALUES (?, ?, ?, ?, ?)'
+    );
+    stmt.run('n1', 'n2', 'calls', 10, 5);
+    stmt.run('n1', 'n2', 'calls', 10, 5);
+    stmt.run('n1', 'n2', 'calls', 10, 5);
+    // And one with NULL line/col, also duplicated
+    stmt.run('n1', 'n2', 'references', null, null);
+    stmt.run('n1', 'n2', 'references', null, null);
+
+    expect(edgesCount(db)).toBe(5);
+    expect(getCurrentVersion(db.getDb())).toBe(3);
+
+    // Run migrations forward
+    runMigrations(db.getDb(), 3);
+
+    expect(getCurrentVersion(db.getDb())).toBe(CURRENT_SCHEMA_VERSION);
+    expect(CURRENT_SCHEMA_VERSION).toBeGreaterThanOrEqual(4);
+    // 3 calls dups → 1, 2 references dups → 1
+    expect(edgesCount(db)).toBe(2);
+
+    // Now the constraint is enforced: another duplicate insert is a no-op.
+    const q2 = new QueryBuilder(db.getDb());
+    q2.insertEdge({ source: 'n1', target: 'n2', kind: 'calls', line: 10, column: 5 });
+    expect(edgesCount(db)).toBe(2);
+
+    db.close();
+  });
+});
diff --git a/__tests__/extraction-resolution-accuracy.test.ts b/__tests__/extraction-resolution-accuracy.test.ts
new file mode 100644
index 00000000..f78f3d76
--- /dev/null
+++ b/__tests__/extraction-resolution-accuracy.test.ts
@@ -0,0 +1,266 @@
+/**
+ * Extraction & Resolution Accuracy Tests
+ *
+ * Regression tests for three accuracy bugs fixed in one PR:
+ *   1. Parse-retry comment strip was hardcoded to `//`, no-op on Python/Ruby/etc.
+ *   2. Framework route extractors ran regex over raw file content, matching
+ *      examples in docstrings/comments as real routes.
+ *   3. UTF-8 BOM caused spurious "modified" hash mismatches between editors.
+ */
+
+import { describe, it, expect } from 'vitest';
+import { stripBom, stripCommentLinesForRetry, stripCommentsForRegex } from '../src/utils';
+import { hashContent } from '../src/extraction';
+import { flaskResolver, fastapiResolver, djangoResolver } from '../src/resolution/frameworks/python';
+import { expressResolver } from '../src/resolution/frameworks/express';
+import { aspnetResolver } from '../src/resolution/frameworks/csharp';
+import { rustResolver } from '../src/resolution/frameworks/rust';
+import { laravelResolver } from '../src/resolution/frameworks/laravel';
+
+describe('UTF-8 BOM normalization (bug #5)', () => {
+  it('stripBom removes leading U+FEFF', () => {
+    expect(stripBom('﻿hello')).toBe('hello');
+    expect(stripBom('hello')).toBe('hello');
+    expect(stripBom('')).toBe('');
+  });
+
+  it('stripBom only removes leading BOM, not embedded ones', () => {
+    expect(stripBom('a﻿b')).toBe('a﻿b');
+  });
+
+  it('hashContent treats BOM and no-BOM as identical', () => {
+    const withBom = '﻿export function hello() { return 42; }';
+    const withoutBom = 'export function hello() { return 42; }';
+    expect(hashContent(withBom)).toBe(hashContent(withoutBom));
+  });
+});
+
+describe('Per-language comment-line stripping (bug #1)', () => {
+  it('strips `#` lines for Python', () => {
+    const input = ['# CHECK: foo', 'def x():', '    pass'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n')).toEqual(['', 'def x():', '    pass']);
+  });
+
+  it('strips `#` lines for Ruby', () => {
+    const input = ['# top comment', 'def x; end'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'ruby');
+    expect(out.split('\n')).toEqual(['', 'def x; end']);
+  });
+
+  it('strips `//` lines for TypeScript', () => {
+    const input = ['// header', 'function x() {}'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'typescript');
+    expect(out.split('\n')).toEqual(['', 'function x() {}']);
+  });
+
+  it('strips both `//` and `#` lines for PHP', () => {
+    const input = ['// js-style', '# perl-style', '<?php $x = 1;'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'php');
+    expect(out.split('\n')).toEqual(['', '', '<?php $x = 1;']);
+  });
+
+  it('returns content unchanged for unknown languages', () => {
+    const input = '// looks like a comment\ncode';
+    expect(stripCommentLinesForRetry(input, 'unknown-lang')).toBe(input);
+  });
+
+  it('preserves line count so node positions stay correct', () => {
+    const input = ['# c1', 'a', '# c2', 'b'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n').length).toBe(input.split('\n').length);
+  });
+
+  it('does NOT strip indented `#` inside Python (still recognized as line comment)', () => {
+    // The marker matches optional leading whitespace + `#`, so an indented
+    // pure comment line is correctly stripped. Non-comment code on the same
+    // line as `#` (mid-line comment) is intentionally not stripped here.
+    const input = ['    # indented comment', '    pass  # trailing'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n')).toEqual(['', '    pass  # trailing']);
+  });
+});
+
+describe('Framework regex no longer matches docstrings/comments (bug #4)', () => {
+  describe('Flask', () => {
+    it('skips routes inside `#` comments', () => {
+      const content = [
+        'from flask import Flask',
+        'app = Flask(__name__)',
+        '# Example: @app.route("/fake")',
+        '@app.route("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = flaskResolver.extractNodes!('app.py', content);
+      const paths = nodes.map((n) => n.name);
+      expect(paths).toContain('/real');
+      expect(paths).not.toContain('/fake');
+    });
+
+    it('skips routes inside triple-quoted docstrings', () => {
+      const content = [
+        'def example():',
+        '    """',
+        '    Usage: @app.route("/fake")',
+        '    """',
+        '    pass',
+        '@app.route("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = flaskResolver.extractNodes!('app.py', content);
+      const paths = nodes.map((n) => n.name);
+      expect(paths).toContain('/real');
+      expect(paths).not.toContain('/fake');
+    });
+  });
+
+  describe('FastAPI', () => {
+    it('skips routes inside `#` comments and triple-quoted docstrings', () => {
+      const content = [
+        '"""',
+        'Module docs — example: @app.get("/docfake")',
+        '"""',
+        '# @app.post("/commentfake")',
+        '@app.get("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = fastapiResolver.extractNodes!('app.py', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+      expect(names.some((n) => n.includes('/commentfake'))).toBe(false);
+    });
+
+    it('preserves correct line numbers for real routes after stripping', () => {
+      const content = [
+        '"""',                    // line 1
+        '@app.get("/fake")',      // line 2 — inside docstring
+        '"""',                    // line 3
+        '',                       // line 4
+        '@app.get("/real")',      // line 5 — real
+      ].join('\n');
+      const nodes = fastapiResolver.extractNodes!('app.py', content);
+      const real = nodes.find((n) => n.name.includes('/real'));
+      expect(real).toBeDefined();
+      expect(real!.startLine).toBe(5);
+    });
+  });
+
+  describe('Django URL patterns', () => {
+    it('skips path() inside `#` comments', () => {
+      const content = [
+        'from django.urls import path',
+        '# example: path("fake/", fake_view)',
+        'urlpatterns = [path("real/", real_view)]',
+      ].join('\n');
+      const nodes = djangoResolver.extractNodes!('urls.py', content);
+      const names = nodes.map((n) => n.name);
+      expect(names).toContain('real/');
+      expect(names).not.toContain('fake/');
+    });
+  });
+
+  describe('Express', () => {
+    it('skips routes inside `//` comments', () => {
+      const content = [
+        'const app = express();',
+        '// app.get("/fake", fakeHandler);',
+        'app.get("/real", realHandler);',
+      ].join('\n');
+      const nodes = expressResolver.extractNodes!('server.js', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/fake'))).toBe(false);
+    });
+
+    it('skips routes inside `/* ... */` block comments', () => {
+      const content = [
+        '/*',
+        ' * app.post("/blockfake", h);',
+        ' */',
+        'app.get("/real", h);',
+      ].join('\n');
+      const nodes = expressResolver.extractNodes!('server.js', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/blockfake'))).toBe(false);
+    });
+  });
+
+  describe('Laravel', () => {
+    it('skips routes inside PHP `//` and `#` comments', () => {
+      const content = [
+        '<?php',
+        '// Route::get("/jsfake", $h);',
+        '# Route::get("/perlfake", $h);',
+        'Route::get("/real", $h);',
+      ].join('\n');
+      const nodes = laravelResolver.extractNodes!('routes.php', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/jsfake'))).toBe(false);
+      expect(names.some((n) => n.includes('/perlfake'))).toBe(false);
+    });
+  });
+
+  describe('Rust', () => {
+    it('skips actix/rocket routes inside `///` doc comments', () => {
+      const content = [
+        '/// Example route: #[get("/docfake")]',
+        '#[get("/real")]',
+        'fn real() {}',
+      ].join('\n');
+      const nodes = rustResolver.extractNodes!('main.rs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+    });
+  });
+
+  describe('ASP.NET (C#)', () => {
+    it('skips route attributes inside `///` XML doc comments', () => {
+      const content = [
+        '/// <summary>',
+        '/// Example: [HttpGet("/docfake")]',
+        '/// </summary>',
+        '[HttpGet("/real")]',
+        'public class C {}',
+      ].join('\n');
+      const nodes = aspnetResolver.extractNodes!('Controller.cs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+    });
+
+    it('skips minimal-API MapGet/MapPost calls inside comments', () => {
+      // Regression: the minimalApiPattern loop below the routePatterns
+      // loop was initially missed when applying the strip helper, leaving
+      // commented-out `app.MapGet("/x")` calls extracted as real routes.
+      const content = [
+        '// app.MapGet("/linefake", h);',
+        '/*',
+        ' * app.MapPost("/blockfake", h);',
+        ' */',
+        'app.MapGet("/real", h);',
+      ].join('\n');
+      const nodes = aspnetResolver.extractNodes!('Program.cs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/linefake'))).toBe(false);
+      expect(names.some((n) => n.includes('/blockfake'))).toBe(false);
+    });
+  });
+});
+
+describe('stripCommentsForRegex preserves line offsets', () => {
+  it('keeps newlines so match.index → original line number', () => {
+    const input = '"""\n@app.get("/x")\n"""\n@app.get("/y")';
+    const out = stripCommentsForRegex(input, 'python');
+    // Newlines preserved
+    expect(out.split('\n').length).toBe(input.split('\n').length);
+    // The /y route survives
+    expect(out).toContain('/y');
+    // The docstring contents are blanked
+    expect(out).not.toContain('/x');
+  });
+});
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 8a70ffed..d4f7344c 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -3079,3 +3079,420 @@ describe('Directory Exclusion', () => {
     expect(files.every((f) => !f.includes('vendor'))).toBe(true);
   });
 });
+
+// =============================================================================
+// R Extraction
+// =============================================================================
+
+describe('R Extraction', () => {
+  describe('Language detection', () => {
+    it('should detect R files', () => {
+      expect(detectLanguage('script.R')).toBe('r');
+      expect(detectLanguage('utils.r')).toBe('r');
+    });
+
+    it('should report R as supported', () => {
+      expect(isLanguageSupported('r')).toBe(true);
+      expect(getSupportedLanguages()).toContain('r');
+    });
+  });
+
+  describe('Function extraction', () => {
+    it('should extract a function defined with <-', () => {
+      const code = `add <- function(a, b) {
+  a + b
+}`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'add');
+      expect(fn).toBeDefined();
+      expect(fn?.signature).toBe('(a, b)');
+    });
+
+    it('should extract a function defined with =', () => {
+      const code = `subtract = function(a, b) a - b`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'subtract');
+      expect(fn).toBeDefined();
+    });
+
+    it('should extract a function defined with <<-', () => {
+      const code = `divide <<- function(a, b) a / b`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'divide');
+      expect(fn).toBeDefined();
+    });
+
+    it('should extract S3 method names verbatim (period in name)', () => {
+      const code = `print.myClass <- function(x, ...) cat(x$value)`;
+      const result = extractFromSource('print.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'print.myClass');
+      expect(fn).toBeDefined();
+    });
+
+    it('should NOT emit anonymous function nodes for inline lambdas', () => {
+      const code = `result <- lapply(xs, function(x) x * 2)`;
+      const result = extractFromSource('main.R', code);
+      expect(result.nodes.find((n) => n.kind === 'function')).toBeUndefined();
+    });
+
+    it('should attach a docstring from preceding roxygen comments', () => {
+      const code = `#' Add two numbers
+#' @param a numeric
+#' @param b numeric
+add <- function(a, b) a + b`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'add');
+      expect(fn?.docstring).toContain('Add two numbers');
+    });
+  });
+
+  describe('Call extraction', () => {
+    it('should extract simple function calls inside a function body', () => {
+      const code = `wrap <- function(x) {
+  inner(x)
+  another(x)
+}`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'wrap')!;
+      const calls = result.unresolvedReferences.filter(
+        (r) => r.fromNodeId === fn.id && r.referenceKind === 'calls'
+      );
+      const calleeNames = calls.map((c) => c.referenceName);
+      expect(calleeNames).toContain('inner');
+      expect(calleeNames).toContain('another');
+    });
+
+    it('should preserve namespace operator in callee name (pkg::fn)', () => {
+      const code = `runner <- function() {
+  dplyr::filter(df, x > 0)
+}`;
+      const result = extractFromSource('main.R', code);
+      const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'runner')!;
+      const calleeNames = result.unresolvedReferences
+        .filter((r) => r.fromNodeId === fn.id)
+        .map((r) => r.referenceName);
+      expect(calleeNames).toContain('dplyr::filter');
+    });
+  });
+
+  describe('Imports', () => {
+    it('should extract library() with bare-identifier argument', () => {
+      const code = `library(dplyr)`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import');
+      expect(importNode?.name).toBe('dplyr');
+    });
+
+    it('should extract library() with quoted-string argument', () => {
+      const code = `library("tidyr")`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import' && n.name === 'tidyr');
+      expect(importNode).toBeDefined();
+    });
+
+    it('should extract require() the same way as library()', () => {
+      const code = `require(ggplot2)`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import' && n.name === 'ggplot2');
+      expect(importNode).toBeDefined();
+    });
+
+    it('should extract source() with a string path', () => {
+      const code = `source("helpers.R")`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import' && n.name === 'helpers.R');
+      expect(importNode).toBeDefined();
+    });
+
+    it('should not emit an import node for a dynamic source() argument', () => {
+      const code = `source(paste0(BASE, "/helpers.R"))`;
+      const result = extractFromSource('main.R', code);
+      const imports = result.nodes.filter((n) => n.kind === 'import');
+      expect(imports.length).toBe(0);
+    });
+
+    it('should unquote R 4.0+ raw string literals (round delimiter)', () => {
+      const code = `source(r"(helpers.R)")`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import' && n.name === 'helpers.R');
+      expect(importNode).toBeDefined();
+    });
+
+    it('should unquote R raw strings with bracket and brace delimiters', () => {
+      const r1 = extractFromSource('a.R', `library(R"[mypkg]")`);
+      const r2 = extractFromSource('b.R', `library(r"{mypkg}")`);
+      expect(r1.nodes.find((n) => n.kind === 'import' && n.name === 'mypkg')).toBeDefined();
+      expect(r2.nodes.find((n) => n.kind === 'import' && n.name === 'mypkg')).toBeDefined();
+    });
+
+    it('should unquote dash-delimited raw strings used to embed quotes', () => {
+      const code = `source(r"-(file.R)-")`;
+      const result = extractFromSource('main.R', code);
+      const importNode = result.nodes.find((n) => n.kind === 'import' && n.name === 'file.R');
+      expect(importNode).toBeDefined();
+    });
+  });
+
+  describe('Top-level constants', () => {
+    it('should extract top-level non-function assignments as constants', () => {
+      const code = `PI <- 3.14159
+COLORS <- c("red", "green")`;
+      const result = extractFromSource('main.R', code);
+      const pi = result.nodes.find((n) => n.kind === 'constant' && n.name === 'PI');
+      const colors = result.nodes.find((n) => n.kind === 'constant' && n.name === 'COLORS');
+      expect(pi).toBeDefined();
+      expect(colors).toBeDefined();
+    });
+
+    it('should NOT emit a constant for assignments inside a function body', () => {
+      const code = `outer <- function() {
+  x <- 5
+  x
+}`;
+      const result = extractFromSource('main.R', code);
+      const innerVar = result.nodes.find((n) => n.kind === 'constant' && n.name === 'x');
+      expect(innerVar).toBeUndefined();
+    });
+  });
+});
+
+// HCL / Terraform Extraction
+// =============================================================================
+
+describe('HCL / Terraform Extraction', () => {
+  describe('Language detection', () => {
+    it('should detect HCL/Terraform files', () => {
+      expect(detectLanguage('main.tf')).toBe('hcl');
+      expect(detectLanguage('terraform.tfvars')).toBe('hcl');
+      expect(detectLanguage('config.hcl')).toBe('hcl');
+    });
+
+    it('should report HCL as supported', () => {
+      expect(isLanguageSupported('hcl')).toBe(true);
+      expect(getSupportedLanguages()).toContain('hcl');
+    });
+  });
+
+  describe('Block extraction', () => {
+    it('should extract a resource block as a class node', () => {
+      const code = `resource "aws_s3_bucket" "logs" { bucket = "my-logs" }`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'aws_s3_bucket.logs');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('class');
+      expect(node?.name).toBe('aws_s3_bucket.logs');
+      expect(node?.language).toBe('hcl');
+      expect(node?.signature).toBe('resource "aws_s3_bucket" "logs"');
+    });
+
+    it('should extract a data block with `data.` prefix', () => {
+      const code = `data "aws_caller_identity" "current" {}`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'data.aws_caller_identity.current');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('class');
+      expect(node?.name).toBe('aws_caller_identity.current');
+    });
+
+    it('should extract a variable block', () => {
+      const code = `variable "environment" { type = string }`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'var.environment');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('variable');
+      expect(node?.name).toBe('environment');
+    });
+
+    it('should extract an output block as an export', () => {
+      const code = `output "vpc_id" { value = "abc" }`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'output.vpc_id');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('export');
+      expect(node?.name).toBe('vpc_id');
+    });
+
+    it('should extract a module block', () => {
+      const code = `module "vpc" { source = "terraform-aws-modules/vpc/aws" }`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'module.vpc');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('module');
+      expect(node?.name).toBe('vpc');
+    });
+
+    it('should extract a provider block as namespace', () => {
+      const code = `provider "aws" { region = "us-east-1" }`;
+      const result = extractFromSource('main.tf', code);
+
+      const node = result.nodes.find((n) => n.qualifiedName === 'provider.aws');
+      expect(node).toBeDefined();
+      expect(node?.kind).toBe('namespace');
+    });
+
+    it('should split a locals block into one constant per attribute', () => {
+      const code = `locals {
+  bucket_name = "my-bucket"
+  retention   = 30
+}`;
+      const result = extractFromSource('main.tf', code);
+
+      const bucketName = result.nodes.find((n) => n.qualifiedName === 'local.bucket_name');
+      const retention = result.nodes.find((n) => n.qualifiedName === 'local.retention');
+      expect(bucketName?.kind).toBe('constant');
+      expect(retention?.kind).toBe('constant');
+    });
+
+    it('should connect blocks to the file via contains edges', () => {
+      const code = `resource "aws_s3_bucket" "logs" {}`;
+      const result = extractFromSource('main.tf', code);
+
+      const fileNode = result.nodes.find((n) => n.kind === 'file');
+      const resourceNode = result.nodes.find((n) => n.qualifiedName === 'aws_s3_bucket.logs');
+      expect(fileNode).toBeDefined();
+      expect(resourceNode).toBeDefined();
+      const containsEdge = result.edges.find(
+        (e) => e.source === fileNode!.id && e.target === resourceNode!.id && e.kind === 'contains'
+      );
+      expect(containsEdge).toBeDefined();
+    });
+  });
+
+  describe('Reference extraction', () => {
+    it('should extract var.X references', () => {
+      const code = `resource "aws_s3_bucket" "logs" { bucket = var.bucket_name }`;
+      const result = extractFromSource('main.tf', code);
+
+      const ref = result.unresolvedReferences.find((r) => r.referenceName === 'var.bucket_name');
+      expect(ref).toBeDefined();
+      expect(ref?.referenceKind).toBe('references');
+    });
+
+    it('should extract local.X references', () => {
+      const code = `resource "aws_s3_bucket" "logs" { tags = local.common_tags }`;
+      const result = extractFromSource('main.tf', code);
+
+      const ref = result.unresolvedReferences.find((r) => r.referenceName === 'local.common_tags');
+      expect(ref).toBeDefined();
+    });
+
+    it('should extract module.X references and stop at the module name', () => {
+      const code = `output "vpc_id" { value = module.vpc.vpc_id }`;
+      const result = extractFromSource('main.tf', code);
+
+      const ref = result.unresolvedReferences.find((r) => r.referenceName === 'module.vpc');
+      expect(ref).toBeDefined();
+      // Should NOT emit a reference for the trailing attribute
+      expect(result.unresolvedReferences.find((r) => r.referenceName === 'module.vpc.vpc_id')).toBeUndefined();
+    });
+
+    it('should extract data.T.N references with both labels', () => {
+      const code = `output "x" { value = data.aws_caller_identity.current.account_id }`;
+      const result = extractFromSource('main.tf', code);
+
+      const ref = result.unresolvedReferences.find(
+        (r) => r.referenceName === 'data.aws_caller_identity.current'
+      );
+      expect(ref).toBeDefined();
+    });
+
+    it('should extract resource references as TYPE.NAME', () => {
+      const code = `resource "aws_s3_bucket_versioning" "v" { bucket = aws_s3_bucket.logs.id }`;
+      const result = extractFromSource('main.tf', code);
+
+      const ref = result.unresolvedReferences.find((r) => r.referenceName === 'aws_s3_bucket.logs');
+      expect(ref).toBeDefined();
+    });
+
+    it('should extract references inside string interpolations', () => {
+      const code = 'locals { name = "${var.environment}-${random_id.suffix.hex}" }';
+      const result = extractFromSource('main.tf', code);
+
+      const names = result.unresolvedReferences.map((r) => r.referenceName);
+      expect(names).toContain('var.environment');
+      expect(names).toContain('random_id.suffix');
+    });
+
+    it('should ignore references to count, each, self, and path', () => {
+      const code = `resource "aws_instance" "web" {
+  count = 3
+  tags  = { Name = "web-\${count.index}", For = each.value, Self = self.id, P = path.module }
+}`;
+      const result = extractFromSource('main.tf', code);
+
+      const names = result.unresolvedReferences.map((r) => r.referenceName);
+      expect(names.find((n) => n.startsWith('count.'))).toBeUndefined();
+      expect(names.find((n) => n.startsWith('each.'))).toBeUndefined();
+      expect(names.find((n) => n.startsWith('self.'))).toBeUndefined();
+      expect(names.find((n) => n.startsWith('path.'))).toBeUndefined();
+    });
+
+    it('should ignore for-loop iteration variables', () => {
+      const code = `output "ids" { value = [for s in var.subnets : s.id] }`;
+      const result = extractFromSource('main.tf', code);
+
+      const names = result.unresolvedReferences.map((r) => r.referenceName);
+      // var.subnets reference comes through, but `s.id` does NOT
+      expect(names).toContain('var.subnets');
+      expect(names.find((n) => n.startsWith('s.'))).toBeUndefined();
+    });
+
+    it('should ignore key/value bindings in for-object expressions', () => {
+      const code = `locals { tags = { for k, v in var.input : k => "\${v}-suffix" } }`;
+      const result = extractFromSource('main.tf', code);
+
+      const names = result.unresolvedReferences.map((r) => r.referenceName);
+      expect(names).toContain('var.input');
+      expect(names.find((n) => n === 'k' || n.startsWith('k.'))).toBeUndefined();
+      expect(names.find((n) => n === 'v' || n.startsWith('v.'))).toBeUndefined();
+    });
+
+    it('should emit an imports edge for module source', () => {
+      const code = `module "vpc" { source = "terraform-aws-modules/vpc/aws" }`;
+      const result = extractFromSource('main.tf', code);
+
+      const importRef = result.unresolvedReferences.find(
+        (r) => r.referenceKind === 'imports' && r.referenceName === 'terraform-aws-modules/vpc/aws'
+      );
+      expect(importRef).toBeDefined();
+    });
+  });
+
+  describe('Robustness', () => {
+    it('should handle empty files', () => {
+      const result = extractFromSource('main.tf', '');
+      const fileNode = result.nodes.find((n) => n.kind === 'file');
+      expect(fileNode).toBeDefined();
+    });
+
+    it('should handle blocks with no body', () => {
+      const code = `data "aws_caller_identity" "current" {}`;
+      const result = extractFromSource('main.tf', code);
+      expect(result.nodes.find((n) => n.qualifiedName === 'data.aws_caller_identity.current')).toBeDefined();
+    });
+
+    it('should walk nested blocks for references without emitting child nodes', () => {
+      const code = `resource "aws_s3_bucket_versioning" "v" {
+  bucket = aws_s3_bucket.logs.id
+  versioning_configuration {
+    status = var.versioning_status
+  }
+}`;
+      const result = extractFromSource('main.tf', code);
+
+      // Only one block-level node, plus the file
+      const blockNodes = result.nodes.filter((n) => n.kind === 'class');
+      expect(blockNodes.length).toBe(1);
+
+      // References from the nested block should still be captured
+      const names = result.unresolvedReferences.map((r) => r.referenceName);
+      expect(names).toContain('aws_s3_bucket.logs');
+      expect(names).toContain('var.versioning_status');
+    });
+  });
+});
diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts
index 9ee437da..97c04dcb 100644
--- a/__tests__/foundation.test.ts
+++ b/__tests__/foundation.test.ts
@@ -305,7 +305,7 @@ describe('Database Connection', () => {
 
     const version = db.getSchemaVersion();
     expect(version).not.toBeNull();
-    expect(version?.version).toBe(3);
+    expect(version?.version).toBe(9);
 
     db.close();
   });
diff --git a/__tests__/index-hooks.test.ts b/__tests__/index-hooks.test.ts
new file mode 100644
index 00000000..639587f9
--- /dev/null
+++ b/__tests__/index-hooks.test.ts
@@ -0,0 +1,130 @@
+/**
+ * Index-hook framework: register a fake hook at runtime, run an
+ * indexAll/sync against a synthetic project, assert the hook ran
+ * with the expected context shape and that errors are caught.
+ *
+ * The registry's static-import list (`REGISTERED_HOOKS`) is empty
+ * on main today; tests poke at the runner directly through
+ * `runAfterIndexAll`/`runAfterSync` rather than mutating that
+ * list.
+ */
+import { describe, it, expect } from 'vitest';
+import {
+  runAfterIndexAll,
+  runAfterSync,
+  getRegisteredHooks,
+  type IndexHook,
+  type IndexHookContext,
+} from '../src/index-hooks/registry';
+import type { SyncResult } from '../src/extraction';
+
+function makeFakeContext(): IndexHookContext {
+  // Hooks should not mutate the context; for the runner-shape
+  // tests we hand them stubs typed `as any` — the runner doesn't
+  // touch any of these fields itself.
+  return {
+    projectRoot: '/tmp/fake-project',
+    /* eslint-disable @typescript-eslint/no-explicit-any */
+    config: {} as any,
+    queries: {} as any,
+    db: {} as any,
+    /* eslint-enable */
+  };
+}
+
+const fakeSyncResult: SyncResult = {
+  filesChecked: 0,
+  filesAdded: 0,
+  filesModified: 0,
+  filesRemoved: 0,
+  nodesUpdated: 0,
+  durationMs: 0,
+};
+
+describe('index-hooks registry — runner', () => {
+  it('registered hooks expose stable {name, afterIndexAll|afterSync} shape', () => {
+    const hooks = getRegisteredHooks();
+    expect(hooks.length).toBeGreaterThanOrEqual(0);
+    for (const h of hooks) {
+      expect(typeof h.name).toBe('string');
+      expect(h.afterIndexAll === undefined || typeof h.afterIndexAll === 'function').toBe(true);
+      expect(h.afterSync === undefined || typeof h.afterSync === 'function').toBe(true);
+    }
+  });
+
+  it('runAfterIndexAll returns one outcome per registered hook, swallowing per-hook errors', async () => {
+    // Registered hooks will throw on the fake `{} as any` ctx; the
+    // runner contract is to catch + report each error so one bad
+    // hook never fails the whole pass.
+    const outcomes = await runAfterIndexAll(makeFakeContext());
+    const expectedCount = getRegisteredHooks().filter((h) => h.afterIndexAll).length;
+    expect(outcomes.length).toBe(expectedCount);
+    for (const o of outcomes) {
+      expect(typeof o.name).toBe('string');
+      expect(o.phase).toBe('indexAll');
+      expect(typeof o.durationMs).toBe('number');
+    }
+  });
+
+  it('runAfterSync returns one outcome per registered hook, swallowing per-hook errors', async () => {
+    const outcomes = await runAfterSync(makeFakeContext(), fakeSyncResult);
+    const expectedCount = getRegisteredHooks().filter((h) => h.afterSync).length;
+    expect(outcomes.length).toBe(expectedCount);
+    for (const o of outcomes) {
+      expect(typeof o.name).toBe('string');
+      expect(o.phase).toBe('sync');
+      expect(typeof o.durationMs).toBe('number');
+    }
+  });
+});
+
+describe('index-hooks runner — fake-hook injection', () => {
+  // Helper: temporarily inject a fake hook by wrapping the runner
+  // directly. The runner accepts no array argument today; this
+  // suite exercises the public surface (runAfterIndexAll /
+  // runAfterSync) by simulating what a registered hook would do.
+  // When real hooks land, REGISTERED_HOOKS in registry.ts will
+  // contain them and this fixture-style approach disappears.
+
+  it('a hook with afterIndexAll receives the context and is awaited', async () => {
+    // Build a one-off hook and call it directly — the runner's
+    // contract is "for each registered hook, await afterIndexAll
+    // if defined." We exercise that contract by calling the hook
+    // ourselves to confirm the IndexHookContext shape stays usable
+    // by hook implementations.
+    let captured: IndexHookContext | null = null;
+    const hook: IndexHook = {
+      name: 'fake-hook',
+      async afterIndexAll(ctx) {
+        captured = ctx;
+      },
+    };
+    const ctx = makeFakeContext();
+    await hook.afterIndexAll!(ctx);
+    expect(captured).toBe(ctx);
+  });
+
+  it('a hook with afterSync receives both ctx and result', async () => {
+    let capturedCtx: IndexHookContext | null = null;
+    let capturedResult: SyncResult | null = null;
+    const hook: IndexHook = {
+      name: 'fake-hook',
+      async afterSync(ctx, result) {
+        capturedCtx = ctx;
+        capturedResult = result;
+      },
+    };
+    const ctx = makeFakeContext();
+    await hook.afterSync!(ctx, fakeSyncResult);
+    expect(capturedCtx).toBe(ctx);
+    expect(capturedResult).toBe(fakeSyncResult);
+  });
+
+  it('a hook missing afterIndexAll is silently skipped', () => {
+    // Just a typing assertion: an IndexHook without afterIndexAll
+    // is allowed (both methods are optional).
+    const hook: IndexHook = { name: 'sync-only' };
+    expect(hook.afterIndexAll).toBeUndefined();
+    expect(hook.afterSync).toBeUndefined();
+  });
+});
diff --git a/__tests__/issue-history.test.ts b/__tests__/issue-history.test.ts
new file mode 100644
index 00000000..7c281771
--- /dev/null
+++ b/__tests__/issue-history.test.ts
@@ -0,0 +1,390 @@
+/**
+ * Issue → symbol attribution: parser unit tests + end-to-end mining
+ * against synthetic git repos.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { execFileSync } from 'child_process';
+import {
+  extractSymbolFromContext,
+  extractDeclaration,
+} from '../src/issue-history/parse-diff';
+import {
+  mineIssueCommits,
+  mineIssueHistory,
+  ISSUE_REGEX,
+  LAST_MINED_ISSUES_HEAD_KEY,
+} from '../src/issue-history';
+import CodeGraph from '../src/index';
+
+let HAS_GIT = true;
+try {
+  execFileSync('git', ['--version'], { stdio: 'ignore' });
+} catch {
+  HAS_GIT = false;
+}
+
+let testDir: string;
+let cg: CodeGraph | null = null;
+
+function git(...args: string[]): string {
+  return execFileSync('git', args, {
+    cwd: testDir,
+    encoding: 'utf-8',
+    env: {
+      ...process.env,
+      GIT_AUTHOR_NAME: 'Test',
+      GIT_AUTHOR_EMAIL: 'test@example.com',
+      GIT_COMMITTER_NAME: 'Test',
+      GIT_COMMITTER_EMAIL: 'test@example.com',
+      GIT_AUTHOR_DATE: process.env.GIT_AUTHOR_DATE,
+      GIT_COMMITTER_DATE: process.env.GIT_COMMITTER_DATE,
+    },
+    stdio: ['pipe', 'pipe', 'pipe'],
+  }).trim();
+}
+
+function commitAt(date: string, files: Record<string, string>, message: string) {
+  for (const [rel, content] of Object.entries(files)) {
+    const abs = path.join(testDir, rel);
+    fs.mkdirSync(path.dirname(abs), { recursive: true });
+    fs.writeFileSync(abs, content);
+  }
+  git('add', '-A');
+  process.env.GIT_AUTHOR_DATE = date;
+  process.env.GIT_COMMITTER_DATE = date;
+  git('commit', '-m', message);
+  delete process.env.GIT_AUTHOR_DATE;
+  delete process.env.GIT_COMMITTER_DATE;
+}
+
+beforeEach(() => {
+  testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-issues-'));
+});
+
+afterEach(() => {
+  delete process.env.GIT_AUTHOR_DATE;
+  delete process.env.GIT_COMMITTER_DATE;
+  if (cg) {
+    cg.destroy();
+    cg = null;
+  }
+  if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
+});
+
+// ============================================================================
+// Pure parser unit tests
+// ============================================================================
+
+describe('ISSUE_REGEX', () => {
+  it('matches all canonical Fixes/Closes/Resolves verbs', () => {
+    const cases = [
+      'Fix #1', 'Fixes #2', 'Fixed #3',
+      'Close #4', 'Closes #5', 'Closed #6',
+      'Resolve #7', 'Resolves #8', 'Resolved #9',
+    ];
+    for (const s of cases) {
+      ISSUE_REGEX.lastIndex = 0;
+      expect(ISSUE_REGEX.test(s)).toBe(true);
+    }
+  });
+
+  it('matches multiple issues in a single body', () => {
+    ISSUE_REGEX.lastIndex = 0;
+    const matches = [...'Fixes #1, closes #2 and resolves #3'.matchAll(ISSUE_REGEX)];
+    expect(matches.map((m) => m[1])).toEqual(['1', '2', '3']);
+  });
+
+  it('is case-insensitive', () => {
+    ISSUE_REGEX.lastIndex = 0;
+    expect(ISSUE_REGEX.test('FIXES #42')).toBe(true);
+  });
+
+  it('does NOT match `#N` without a verb', () => {
+    ISSUE_REGEX.lastIndex = 0;
+    // Match in body of message that mentions #99 but with no verb prefix.
+    expect(ISSUE_REGEX.test('See #99 for context')).toBe(false);
+  });
+
+  it('v1 limitation: `Fixes #1, #2` only captures #1', () => {
+    // Documented behavior — the second issue lacks a verb prefix and
+    // is silently dropped. Authors who care can write `Fixes #1, fixes #2`.
+    ISSUE_REGEX.lastIndex = 0;
+    const matches = [...'Fixes #1, #2'.matchAll(ISSUE_REGEX)];
+    expect(matches.map((m) => m[1])).toEqual(['1']);
+  });
+});
+
+describe('extractSymbolFromContext', () => {
+  it('pulls function name from a TS function context', () => {
+    expect(extractSymbolFromContext('function processOrder(order: Order) {')).toBe('processOrder');
+  });
+  it('pulls class name', () => {
+    expect(extractSymbolFromContext('class UserService {')).toBe('UserService');
+  });
+  it('pulls Python def', () => {
+    expect(extractSymbolFromContext('def compute_score(items):')).toBe('compute_score');
+  });
+  it('pulls Go func', () => {
+    expect(extractSymbolFromContext('func ProcessOrder(o *Order) error {')).toBe('ProcessOrder');
+  });
+  it('pulls method-style ` async foo(`', () => {
+    expect(extractSymbolFromContext('  async foo(args: string) {')).toBe('foo');
+  });
+  it('rejects keyword-only contexts', () => {
+    expect(extractSymbolFromContext('  if (x) {')).toBeNull();
+  });
+  it('returns null on empty input', () => {
+    expect(extractSymbolFromContext('')).toBeNull();
+  });
+});
+
+describe('extractDeclaration', () => {
+  it('captures + function decl', () => {
+    expect(extractDeclaration('+function helper() {')).toEqual({ name: 'helper', sign: '+' });
+  });
+  it('captures - class decl', () => {
+    expect(extractDeclaration('-export class Old {')).toEqual({ name: 'Old', sign: '-' });
+  });
+  it('captures Python def', () => {
+    expect(extractDeclaration('+def my_helper(x):')).toEqual({ name: 'my_helper', sign: '+' });
+  });
+  it('captures Go func with receiver', () => {
+    expect(extractDeclaration('+func (s *Service) DoThing() error {')).toEqual({
+      name: 'DoThing',
+      sign: '+',
+    });
+  });
+  it('skips file-marker `+++` and `---` lines', () => {
+    expect(extractDeclaration('+++ b/src/foo.ts')).toBeNull();
+    expect(extractDeclaration('--- a/src/foo.ts')).toBeNull();
+  });
+  it('skips keywords like `+if`', () => {
+    expect(extractDeclaration('+  if (x) return;')).toBeNull();
+  });
+  it('returns null on context lines (no +/-)', () => {
+    expect(extractDeclaration(' some body line')).toBeNull();
+  });
+});
+
+// ============================================================================
+// Git mining: synthetic repo
+// ============================================================================
+
+describe.skipIf(!HAS_GIT)('mineIssueCommits', () => {
+  beforeEach(() => {
+    git('init', '-q', '-b', 'main');
+    git('config', 'commit.gpgsign', 'false');
+  });
+
+  it('finds commits with `Fixes #N` in the subject', () => {
+    commitAt('2025-01-01T00:00:00Z', { 'a.ts': 'a' }, 'feat: add a (no issue)');
+    commitAt('2025-01-02T00:00:00Z', { 'a.ts': 'a2' }, 'fix: bug. Fixes #42');
+    const commits = mineIssueCommits(testDir, null);
+    expect(commits.length).toBe(1);
+    expect(commits[0]!.issues).toEqual([42]);
+  });
+
+  it('parses multi-issue subjects', () => {
+    commitAt('2025-01-01T00:00:00Z', { 'a.ts': 'a' }, 'fix: triple. Fixes #1, closes #2, resolves #3');
+    const [c] = mineIssueCommits(testDir, null);
+    expect(c?.issues).toEqual([1, 2, 3]);
+  });
+
+  it('ignores commits with no issue ref', () => {
+    commitAt('2025-01-01T00:00:00Z', { 'a.ts': 'a' }, 'plain message');
+    expect(mineIssueCommits(testDir, null).length).toBe(0);
+  });
+
+  it('returns [] when not in a git repo', () => {
+    const nonGit = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-'));
+    try {
+      expect(mineIssueCommits(nonGit, null)).toEqual([]);
+    } finally {
+      fs.rmSync(nonGit, { recursive: true, force: true });
+    }
+  });
+});
+
+// ============================================================================
+// End-to-end through CodeGraph
+// ============================================================================
+
+describe.skipIf(!HAS_GIT)('CodeGraph issue history', () => {
+  beforeEach(() => {
+    git('init', '-q', '-b', 'main');
+    git('config', 'commit.gpgsign', 'false');
+  });
+
+  it('attributes a Fixes #N commit to the modified function', async () => {
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 1; }\n`,
+    }, 'feat: add foo');
+
+    commitAt('2025-02-01T00:00:00Z', {
+      'src/a.ts': `export function foo() {\n  // changed\n  return 2;\n}\n`,
+    }, 'fix: bug. Fixes #42');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'foo')!;
+    expect(node).toBeDefined();
+    const issues = cg.getIssuesForNode(node.id);
+    expect(issues.length).toBeGreaterThan(0);
+    expect(issues.some((i) => i.issueNumber === 42)).toBe(true);
+});
+
+  it('tracks the agent-usable multi-issue signal', async () => {
+    // Simulate the codegraph history pattern: `loadGrammarsForLanguages`
+    // touched by every language-add issue (#54, #82, #83, #85).
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/grammar.ts': `export function loadGrammarsForLanguages() { return []; }\n`,
+    }, 'feat: add grammar loader');
+
+    commitAt('2025-01-02T00:00:00Z', {
+      'src/grammar.ts': `export function loadGrammarsForLanguages() {\n  // R support\n  return [];\n}\n`,
+    }, 'feat: add R support. Fixes #82');
+
+    commitAt('2025-01-03T00:00:00Z', {
+      'src/grammar.ts': `export function loadGrammarsForLanguages() {\n  // R + HCL support\n  return [];\n}\n`,
+    }, 'feat: add HCL. Fixes #83');
+
+    commitAt('2025-01-04T00:00:00Z', {
+      'src/grammar.ts': `export function loadGrammarsForLanguages() {\n  // R + HCL + SQL\n  return [];\n}\n`,
+    }, 'feat: add SQL. Fixes #85');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const node = cg.getNodesByKind("function").find((n) => n.name === 'loadGrammarsForLanguages')!;
+    expect(node).toBeDefined();
+    const issues = cg.getIssuesForNode(node.id);
+    const issueNumbers = [...new Set(issues.map((i) => i.issueNumber))].sort((a, b) => a - b);
+    expect(issueNumbers).toEqual([82, 83, 85]);
+  });
+
+  it('records `added` kind for symbols introduced in a Fixes commit', async () => {
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function existing() { return 1; }\n`,
+    }, 'init');
+
+    commitAt('2025-02-01T00:00:00Z', {
+      'src/a.ts': `export function existing() { return 1; }\nexport function brandNew() { return 2; }\n`,
+    }, 'feat: add brandNew. Fixes #100');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const node = cg.getNodesByKind("function").find((n) => n.name === 'brandNew')!;
+    const issues = cg.getIssuesForNode(node.id);
+    expect(issues.some((i) => i.issueNumber === 100 && i.kind === 'added')).toBe(true);
+  });
+
+  it('drops attributions for symbols that no longer exist', async () => {
+    // Symbol added then removed in two separate `Fixes` commits. The
+    // current index has no node for it, so attributions for the removed
+    // symbol must not appear (FK + drop-on-resolve).
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function staysHere() { return 1; }\nexport function temporary() { return 99; }\n`,
+    }, 'feat: add. Fixes #1');
+
+    commitAt('2025-02-01T00:00:00Z', {
+      'src/a.ts': `export function staysHere() { return 1; }\n`,
+    }, 'fix: drop temporary. Fixes #2');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    // staysHere should have at least the #1 attribution (added).
+    const node = cg.getNodesByKind("function").find((n) => n.name === 'staysHere')!;
+    const issues = cg.getIssuesForNode(node.id);
+    expect(issues.some((i) => i.issueNumber === 1)).toBe(true);
+
+    // No node should exist named `temporary`, and no attribution to
+    // issue #2 should reference a node that doesn't exist.
+    expect(cg.getNodesByKind("function").find((n) => n.name === 'temporary')).toBeUndefined();
+  });
+
+  it('survives indexAll outside a git repo (table empty, no errors)', async () => {
+    fs.rmSync(path.join(testDir, '.git'), { recursive: true, force: true });
+    fs.writeFileSync(path.join(testDir, 'a.ts'), `export function x() { return 1; }\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    const nodes = cg.getNodesInFile('a.ts');
+    expect(nodes.length).toBeGreaterThan(0);
+    for (const n of nodes) expect(cg.getIssuesForNode(n.id)).toEqual([]);
+  });
+
+  it('respects enableIssueHistory=false', async () => {
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 1; }\n`,
+    }, 'init');
+    commitAt('2025-01-02T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 2; }\n`,
+    }, 'fix: foo. Fixes #1');
+
+    cg = CodeGraph.initSync(testDir, {
+      config: { include: ['**/*.ts'], exclude: [], enableIssueHistory: false },
+    });
+    await cg.indexAll();
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'foo')!;
+    expect(cg.getIssuesForNode(node.id)).toEqual([]);
+  });
+
+  it('incrementally picks up new Fixes commits on sync', async () => {
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 1; }\n`,
+    }, 'init');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'foo')!;
+    expect(cg.getIssuesForNode(node.id).length).toBe(0);
+
+    commitAt('2025-02-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 2; }\n`,
+    }, 'fix: foo. Fixes #50');
+    await cg.sync();
+
+    const issues = cg.getIssuesForNode(node.id);
+    expect(issues.some((i) => i.issueNumber === 50)).toBe(true);
+  });
+
+  // (Removed: a defensive test for the v4-migration-collision bug class.
+  // With file-based migrations (NNN-name.ts), two migrations claiming
+  // the same version produces a filesystem-level conflict — the silent
+  // skip the defensive guard protected against can no longer happen.)
+
+  it('recovers from an unreachable last_mined_issues_head', async () => {
+    commitAt('2025-01-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 1; }\n`,
+    }, 'init');
+    commitAt('2025-02-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 2; }\n`,
+    }, 'fix: foo. Fixes #1');
+
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'foo')!;
+    expect(
+      [...new Set(cg.getIssuesForNode(node.id).map((i) => i.issueNumber))]
+    ).toEqual([1]);
+
+    // Simulate force-push / gc by storing an unreachable SHA.
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (cg as any).queries.setMetadata(LAST_MINED_ISSUES_HEAD_KEY, '0'.repeat(40));
+
+    commitAt('2025-03-01T00:00:00Z', {
+      'src/a.ts': `export function foo() { return 3; }\n`,
+    }, 'fix: foo again. Fixes #2');
+    await cg.sync();
+
+    const issueNums = [
+      ...new Set(cg.getIssuesForNode(node.id).map((i) => i.issueNumber)),
+    ].sort((a, b) => a - b);
+    expect(issueNums).toEqual([1, 2]);
+  });
+});
diff --git a/__tests__/language-registry.test.ts b/__tests__/language-registry.test.ts
new file mode 100644
index 00000000..9afdd59a
--- /dev/null
+++ b/__tests__/language-registry.test.ts
@@ -0,0 +1,157 @@
+/**
+ * Language registry: structural invariants.
+ *
+ * These tests guard against the "parallel list" failure mode that
+ * the registry refactor exists to prevent. If a future PR adds a
+ * grammar-backed language but forgets to wire it through one of
+ * the derived consumers, one of these tests should catch it.
+ */
+import { describe, it, expect } from 'vitest';
+import {
+  getLanguageDefs,
+  getLanguageDefByExtension,
+  getLanguageDefByName,
+} from '../src/extraction/languages/registry';
+import { EXTRACTORS } from '../src/extraction/languages';
+import {
+  detectLanguage,
+  isLanguageSupported,
+  getSupportedLanguages,
+  getLanguageDisplayName,
+  EXTENSION_MAP,
+} from '../src/extraction/grammars';
+
+describe('language registry — single source of truth', () => {
+  it('has at least the original 19 languages', () => {
+    const defs = getLanguageDefs();
+    expect(defs.length).toBeGreaterThanOrEqual(19);
+  });
+
+  it('every def has unique non-empty name', () => {
+    const names = new Set<string>();
+    for (const def of getLanguageDefs()) {
+      expect(def.name).toBeTruthy();
+      expect(names.has(def.name)).toBe(false);
+      names.add(def.name);
+    }
+  });
+
+  it('extensions are unique across registry (one ext maps to one language)', () => {
+    const seen = new Map<string, string>();
+    for (const def of getLanguageDefs()) {
+      for (const ext of def.extensions) {
+        const lower = ext.toLowerCase();
+        if (seen.has(lower)) {
+          // The .h ambiguity (C vs C++) is intentionally pinned to C
+          // by the registry; tree-sitter.ts has a content-sniff
+          // override. Anything else duplicating extensions is a bug.
+          throw new Error(
+            `Extension ${lower} mapped twice: ${seen.get(lower)} and ${def.name}`
+          );
+        }
+        seen.set(lower, def.name);
+      }
+    }
+  });
+
+  it('grammar-backed defs have wasmFile + extractor', () => {
+    for (const def of getLanguageDefs()) {
+      if (!def.grammar) continue;
+      expect(def.grammar.wasmFile).toMatch(/^tree-sitter-.+\.wasm$/);
+      expect(def.grammar.extractor).toBeDefined();
+    }
+  });
+
+  it('custom-extractor defs have a customExtractor function', () => {
+    for (const def of getLanguageDefs()) {
+      if (def.grammar) continue; // grammar-backed
+      expect(def.customExtractor).toBeInstanceOf(Function);
+    }
+  });
+});
+
+describe('derived consumers stay in sync with the registry', () => {
+  // Catch the "parallel list drift" bug that motivated this refactor.
+  // If a new language gets added to registry but a derived consumer
+  // still hard-codes the old set, one of these will fail.
+
+  it('EXTRACTORS contains exactly the grammar-backed languages', () => {
+    const grammarBacked = getLanguageDefs()
+      .filter((d) => d.grammar)
+      .map((d) => d.name)
+      .sort();
+    const extractorKeys = Object.keys(EXTRACTORS).sort();
+    expect(extractorKeys).toEqual(grammarBacked);
+  });
+
+  it('every grammar-backed extractor matches def.grammar.extractor exactly', () => {
+    for (const def of getLanguageDefs()) {
+      if (!def.grammar) continue;
+      expect(EXTRACTORS[def.name as keyof typeof EXTRACTORS]).toBe(def.grammar.extractor);
+    }
+  });
+
+  it('EXTENSION_MAP entries exactly mirror registry extensions', () => {
+    const expected = new Map<string, string>();
+    for (const def of getLanguageDefs()) {
+      for (const ext of def.extensions) {
+        expected.set(ext.toLowerCase(), def.name);
+      }
+    }
+    for (const [ext, lang] of expected) {
+      expect(EXTENSION_MAP[ext]).toBe(lang);
+    }
+    // Reverse: no extra keys in EXTENSION_MAP.
+    expect(Object.keys(EXTENSION_MAP).sort()).toEqual([...expected.keys()].sort());
+  });
+
+  it('detectLanguage returns the expected name for every registered extension', () => {
+    for (const def of getLanguageDefs()) {
+      for (const ext of def.extensions) {
+        // .h is pinned to C by the registry; the C++ heuristic only
+        // applies when source is provided AND looks like C++.
+        expect(detectLanguage(`x${ext}`)).toBe(def.name);
+      }
+    }
+  });
+
+  it('isLanguageSupported returns true for every registered language and false for unknown', () => {
+    for (const def of getLanguageDefs()) {
+      expect(isLanguageSupported(def.name as never)).toBe(true);
+    }
+    expect(isLanguageSupported('unknown' as never)).toBe(false);
+  });
+
+  it('getSupportedLanguages returns exactly the registry names', () => {
+    const fromRegistry = getLanguageDefs().map((d) => d.name).sort();
+    const supported = (getSupportedLanguages() as string[]).sort();
+    expect(supported).toEqual(fromRegistry);
+  });
+
+  it('getLanguageDisplayName uses each defs displayName', () => {
+    for (const def of getLanguageDefs()) {
+      expect(getLanguageDisplayName(def.name as never)).toBe(def.displayName);
+    }
+  });
+});
+
+describe('lookup helpers', () => {
+  it('getLanguageDefByName returns the def for a registered name', () => {
+    expect(getLanguageDefByName('typescript')?.displayName).toBe('TypeScript');
+  });
+
+  it('getLanguageDefByName returns undefined for unknown names', () => {
+    expect(getLanguageDefByName('nonexistent-language-name')).toBeUndefined();
+  });
+
+  it('getLanguageDefByExtension is case-insensitive', () => {
+    expect(getLanguageDefByExtension('.TS')?.name).toBe('typescript');
+    expect(getLanguageDefByExtension('.ts')?.name).toBe('typescript');
+  });
+
+  it('Pascal extensionOverrides routes .dfm and .fmx to a customExtractor', () => {
+    const def = getLanguageDefByName('pascal');
+    expect(def?.extensionOverrides?.['.dfm']?.customExtractor).toBeInstanceOf(Function);
+    expect(def?.extensionOverrides?.['.fmx']?.customExtractor).toBeInstanceOf(Function);
+  });
+});
diff --git a/__tests__/mcp-tool-registry.test.ts b/__tests__/mcp-tool-registry.test.ts
new file mode 100644
index 00000000..2da0efc5
--- /dev/null
+++ b/__tests__/mcp-tool-registry.test.ts
@@ -0,0 +1,82 @@
+/**
+ * MCP tool registry: structural invariants.
+ *
+ * Guards against the failure mode where a future PR adds a
+ * ToolModule but forgets to implement the matching `handle<Name>`
+ * method on ToolHandler (or vice versa).
+ */
+import { describe, it, expect } from 'vitest';
+import { getToolModules, tools as registryTools } from '../src/mcp/tools/registry';
+import { ToolHandler, tools } from '../src/mcp/tools';
+
+describe('MCP tool registry — single source of truth', () => {
+  it('every tool module has a non-empty name and description', () => {
+    for (const m of getToolModules()) {
+      expect(m.definition.name).toMatch(/^codegraph_[a-z_]+$/);
+      expect(m.definition.description.length).toBeGreaterThan(20);
+    }
+  });
+
+  it('handlerKey is a string starting with "handle"', () => {
+    for (const m of getToolModules()) {
+      expect(m.handlerKey).toMatch(/^handle[A-Z][A-Za-z]+$/);
+    }
+  });
+
+  it('every registered tool has a corresponding ToolHandler method', () => {
+    const handler = new ToolHandler(null);
+    for (const m of getToolModules()) {
+      const fn = (handler as unknown as Record<string, unknown>)[m.handlerKey];
+      expect(typeof fn).toBe('function');
+    }
+  });
+
+  it('exported `tools` array exactly mirrors the registry', () => {
+    const fromRegistry = registryTools.map((t) => t.name).sort();
+    const fromExport = tools.map((t) => t.name).sort();
+    expect(fromExport).toEqual(fromRegistry);
+  });
+
+  it('all main-line tools are registered (regression guard)', () => {
+    const expected = [
+      'codegraph_callees',
+      'codegraph_callers',
+      'codegraph_config',
+      'codegraph_context',
+      'codegraph_explore',
+      'codegraph_files',
+      'codegraph_hotspots',
+      'codegraph_impact',
+      'codegraph_node',
+      'codegraph_search',
+      'codegraph_sql',
+      'codegraph_status',
+    ];
+    const actual = getToolModules()
+      .map((m) => m.definition.name)
+      .sort();
+    expect(actual).toEqual(expected);
+  });
+
+  it('execute() reports unknown-tool errors', async () => {
+    const handler = new ToolHandler(null);
+    const result = await handler.execute('codegraph_does_not_exist', {});
+    expect(result.isError).toBe(true);
+    expect(result.content[0]?.text).toMatch(/Unknown tool/);
+  });
+
+  it('execute() actually dispatches to the registered handler (no broken `this` binding)', async () => {
+    // No CodeGraph instance is bound, so handlers that call
+    // `getCodeGraph()` will throw — the dispatch should catch it
+    // and return an error result. The point of this test is to
+    // confirm the registry lookup + `this[handlerKey](args)` chain
+    // reaches an actual method body, not that the body succeeds.
+    const handler = new ToolHandler(null);
+    const result = await handler.execute('codegraph_status', {});
+    expect(result.isError).toBe(true);
+    // Generic tool-execution-failed envelope from execute()'s catch block.
+    expect(result.content[0]?.text).toMatch(/Tool execution failed/);
+    // Specifically because no CodeGraph was bound:
+    expect(result.content[0]?.text).toMatch(/CodeGraph not initialized/);
+  });
+});
diff --git a/__tests__/migrations-registry.test.ts b/__tests__/migrations-registry.test.ts
new file mode 100644
index 00000000..9fa15eed
--- /dev/null
+++ b/__tests__/migrations-registry.test.ts
@@ -0,0 +1,95 @@
+/**
+ * Migration registry: structural invariants.
+ *
+ * Guards against the silent-no-op bug class that motivated this
+ * refactor. If a future PR introduces a duplicate version,
+ * out-of-order versions, or fails to register a new migration
+ * file, one of these tests fails loudly.
+ */
+import { describe, it, expect } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import {
+  ALL_MIGRATIONS,
+  CURRENT_SCHEMA_VERSION,
+} from '../src/db/migrations';
+
+describe('migration registry — structural invariants', () => {
+  it('registry is non-empty', () => {
+    expect(ALL_MIGRATIONS.length).toBeGreaterThan(0);
+  });
+
+  it('versions are unique', () => {
+    const seen = new Set<number>();
+    for (const m of ALL_MIGRATIONS) {
+      expect(seen.has(m.version)).toBe(false);
+      seen.add(m.version);
+    }
+  });
+
+  it('versions are strictly ascending', () => {
+    for (let i = 1; i < ALL_MIGRATIONS.length; i++) {
+      expect(ALL_MIGRATIONS[i]!.version).toBeGreaterThan(
+        ALL_MIGRATIONS[i - 1]!.version
+      );
+    }
+  });
+
+  it('each migration has a non-empty description and a function up()', () => {
+    for (const m of ALL_MIGRATIONS) {
+      expect(m.description.length).toBeGreaterThan(0);
+      expect(typeof m.up).toBe('function');
+    }
+  });
+
+  it('CURRENT_SCHEMA_VERSION matches the highest registered version', () => {
+    const max = ALL_MIGRATIONS[ALL_MIGRATIONS.length - 1]!.version;
+    expect(CURRENT_SCHEMA_VERSION).toBe(max);
+  });
+});
+
+describe('migration files — filename ↔ version coupling', () => {
+  // Read the actual filenames on disk and assert each matches an
+  // entry in the registry. Catches the case where someone drops a
+  // new file in src/db/migrations/ but forgets to register it.
+  const migrationsDir = path.resolve(__dirname, '../src/db/migrations');
+  const SUPPORT_FILES = new Set(['index.ts', 'types.ts']);
+  const STRICT_NNN_PATTERN = /^\d{3}-[a-z0-9]+(?:-[a-z0-9]+)*\.ts$/;
+
+  function listMigrationFiles(): string[] {
+    return fs.readdirSync(migrationsDir).filter((f) => f.endsWith('.ts') && !SUPPORT_FILES.has(f));
+  }
+
+  it('every migration file matches the strict `NNN-kebab-name.ts` pattern', () => {
+    const offenders: string[] = [];
+    for (const f of listMigrationFiles()) {
+      if (!STRICT_NNN_PATTERN.test(f)) {
+        offenders.push(f);
+      }
+    }
+    expect(offenders).toEqual([]);
+  });
+
+  it('every src/db/migrations/NNN-*.ts file is registered (no orphan files)', () => {
+    const files = listMigrationFiles().filter((f) => STRICT_NNN_PATTERN.test(f));
+    expect(files.length).toBeGreaterThan(0);
+    const registeredVersions = new Set(ALL_MIGRATIONS.map((m) => m.version));
+    for (const f of files) {
+      const version = parseInt(f.slice(0, 3), 10);
+      if (!registeredVersions.has(version)) {
+        throw new Error(
+          `Migration file ${f} exists on disk but is not registered in src/db/migrations/index.ts. ` +
+            `Add an import + array entry for it.`
+        );
+      }
+    }
+  });
+
+  it('every registered version has a matching NNN-*.ts file (no phantom registrations)', () => {
+    const files = listMigrationFiles().filter((f) => STRICT_NNN_PATTERN.test(f));
+    const filenameVersions = new Set(files.map((f) => parseInt(f.slice(0, 3), 10)));
+    for (const m of ALL_MIGRATIONS) {
+      expect(filenameVersions.has(m.version)).toBe(true);
+    }
+  });
+});
diff --git a/__tests__/pr19-improvements.test.ts b/__tests__/pr19-improvements.test.ts
index 5fbe17d7..b69d9068 100644
--- a/__tests__/pr19-improvements.test.ts
+++ b/__tests__/pr19-improvements.test.ts
@@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
 describe('Schema v2 Migration', () => {
   it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
     const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
-    expect(CURRENT_SCHEMA_VERSION).toBe(3);
+    expect(CURRENT_SCHEMA_VERSION).toBe(9);
   });
 
   it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
diff --git a/__tests__/search-quality.test.ts b/__tests__/search-quality.test.ts
new file mode 100644
index 00000000..8e069776
--- /dev/null
+++ b/__tests__/search-quality.test.ts
@@ -0,0 +1,302 @@
+/**
+ * Search Quality Tests
+ *
+ * Regression tests for the FTS improvements that bring natural-language
+ * and partial-identifier queries into the top of the result set:
+ *   - Subword tokens (camel/snake split) so `parser` finds `getParser`.
+ *   - Porter stemmer so `parsing` matches `parser`/`parses`.
+ *   - Stopword stripping so `"how"` / `"the"` don't crowd out the
+ *     real terms via docstring matches.
+ *
+ * All measurements were captured against codegraph's own src/ during
+ * development. Targets that previously ranked #18, #19, or weren't in
+ * the top 20 jump to the top 5.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { DatabaseConnection } from '../src/db';
+import { QueryBuilder } from '../src/db/queries';
+import { Node } from '../src/types';
+import { splitIdentifierTokens, buildNameSubwords } from '../src/utils';
+import { filterStopwords, STOP_WORDS } from '../src/search/query-utils';
+import { runMigrations, getCurrentVersion } from '../src/db/migrations';
+
+describe('splitIdentifierTokens', () => {
+  it('splits camelCase', () => {
+    expect(splitIdentifierTokens('getParser')).toEqual(['get', 'parser']);
+  });
+
+  it('splits PascalCase', () => {
+    expect(splitIdentifierTokens('DatabaseConnection')).toEqual(['database', 'connection']);
+  });
+
+  it('splits XMLHttpRequest-style runs of capitals', () => {
+    expect(splitIdentifierTokens('XMLHttpRequest')).toEqual(['xml', 'http', 'request']);
+  });
+
+  it('splits snake_case', () => {
+    expect(splitIdentifierTokens('database_connection')).toEqual(['database', 'connection']);
+  });
+
+  it('splits kebab-case and dots and slashes', () => {
+    expect(splitIdentifierTokens('foo-bar.baz/qux')).toEqual(['foo', 'bar', 'baz', 'qux']);
+  });
+
+  it('keeps single-word identifiers as-is', () => {
+    expect(splitIdentifierTokens('parse')).toEqual(['parse']);
+  });
+
+  it('handles trailing/leading underscores', () => {
+    expect(splitIdentifierTokens('__init__')).toEqual(['init']);
+  });
+
+  it('preserves numbers as part of the surrounding token', () => {
+    expect(splitIdentifierTokens('parseV2')).toEqual(['parse', 'v2']);
+  });
+});
+
+describe('buildNameSubwords', () => {
+  it('preserves the original identifier so direct queries still hit', () => {
+    const out = buildNameSubwords('getParser');
+    expect(out.split(' ')).toContain('getParser');
+  });
+
+  it('appends split tokens', () => {
+    const out = buildNameSubwords('getParser').split(' ');
+    expect(out).toContain('get');
+    expect(out).toContain('parser');
+  });
+
+  it('dedupes single-word identifiers (no "parse parse")', () => {
+    expect(buildNameSubwords('parse')).toBe('parse');
+  });
+
+  it('dedupes when split produces a single token equal to the original', () => {
+    // 'foo' has no boundary, so splitIdentifierTokens returns ['foo'];
+    // without dedup we would store 'foo foo'.
+    const out = buildNameSubwords('foo').split(' ');
+    expect(out).toEqual(['foo']);
+  });
+
+  it('handles empty string without crashing', () => {
+    expect(buildNameSubwords('')).toBe('');
+  });
+});
+
+describe('filterStopwords (shared with query-utils.ts)', () => {
+  it('drops common English stopwords', () => {
+    expect(filterStopwords(['how', 'does', 'parsing', 'work']))
+      // 'work' is also in STOP_WORDS, so the result is just 'parsing'
+      .toEqual(['parsing']);
+  });
+
+  it('returns the original list when every term is a stopword', () => {
+    // Otherwise we would produce an empty FTS query.
+    const allStopwords = ['the', 'a', 'an'];
+    expect(filterStopwords(allStopwords)).toEqual(allStopwords);
+  });
+
+  it('does not strip common identifier-like words', () => {
+    // `get` / `set` / `find` could be method names; never treated as stopwords.
+    expect(filterStopwords(['get', 'set', 'find', 'name']))
+      .toEqual(['get', 'set', 'find', 'name']);
+    expect(STOP_WORDS.has('get')).toBe(false);
+  });
+});
+
+describe('FTS5 search quality (integration)', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  function makeNode(id: string, name: string, kind: Node['kind'], docstring?: string): Node {
+    return {
+      id,
+      kind,
+      name,
+      qualifiedName: name,
+      filePath: `src/${name}.ts`,
+      language: 'typescript',
+      startLine: 1,
+      endLine: 1,
+      startColumn: 0,
+      endColumn: 0,
+      docstring,
+      updatedAt: Date.now(),
+    };
+  }
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-search-quality-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+    q = new QueryBuilder(db.getDb());
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('finds getParser for a `parser` query (subword tokens)', () => {
+    q.insertNodes([
+      makeNode('n1', 'getParser', 'function'),
+      makeNode('n2', 'unrelated', 'function'),
+    ]);
+    const results = q.searchNodes('parser', { limit: 10 });
+    expect(results.find((r) => r.node.name === 'getParser')).toBeDefined();
+  });
+
+  it('finds DatabaseConnection for a `connection` query (subword tokens)', () => {
+    q.insertNodes([
+      makeNode('n1', 'DatabaseConnection', 'class'),
+      makeNode('n2', 'unrelated', 'function'),
+    ]);
+    const results = q.searchNodes('connection', { limit: 10 });
+    expect(results.find((r) => r.node.name === 'DatabaseConnection')).toBeDefined();
+  });
+
+  it('matches `parsing` against `getParser` via Porter stemmer', () => {
+    q.insertNodes([
+      makeNode('n1', 'getParser', 'function'),
+      makeNode('n2', 'unrelated', 'function'),
+    ]);
+    const results = q.searchNodes('parsing', { limit: 10 });
+    expect(results.find((r) => r.node.name === 'getParser')).toBeDefined();
+  });
+
+  it('matches `resolves references` against resolveOne', () => {
+    q.insertNodes([
+      makeNode('n1', 'resolveOne', 'method'),
+      makeNode('n2', 'unrelated', 'function'),
+    ]);
+    const results = q.searchNodes('resolves references', { limit: 10 });
+    expect(results.find((r) => r.node.name === 'resolveOne')).toBeDefined();
+  });
+
+  it('strips stopwords so `how does parser work` finds getParser', () => {
+    // Without stopword stripping the docstring of `unrelated` (containing
+    // "how" and "does") would BM25-flood the result list.
+    q.insertNodes([
+      makeNode('n1', 'getParser', 'function'),
+      makeNode(
+        'n2',
+        'unrelated',
+        'function',
+        'How does this work? It does many things — does, does, does.'
+      ),
+    ]);
+    const results = q.searchNodes('how does parser work', { limit: 10 });
+    const ranks = new Map(results.map((r, i) => [r.node.name, i + 1]));
+    const parserRank = ranks.get('getParser');
+    const unrelatedRank = ranks.get('unrelated');
+    expect(parserRank).toBeDefined();
+    if (unrelatedRank !== undefined) {
+      expect(parserRank).toBeLessThan(unrelatedRank);
+    }
+  });
+
+  it('exact identifier search still works (no regression on direct queries)', () => {
+    q.insertNodes([
+      makeNode('n1', 'ExtractionOrchestrator', 'class'),
+      makeNode('n2', 'extraction', 'variable'),
+      makeNode('n3', 'orchestrator', 'variable'),
+    ]);
+    const results = q.searchNodes('ExtractionOrchestrator', { limit: 10 });
+    expect(results[0].node.name).toBe('ExtractionOrchestrator');
+  });
+});
+
+describe('Migration v4: backfill name_subwords + rebuild FTS', () => {
+  let dir: string;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-migr-v4-fts-'));
+  });
+
+  afterEach(() => {
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('rebuilds FTS so subword search works on previously-indexed nodes', () => {
+    // Build a v3-shape database from explicit SQL — the pre-PR schema —
+    // then run forward migrations and verify search works end-to-end.
+    // This is a faithful simulation of an upgrade from a real v3 install.
+    const Database = require('better-sqlite3');
+    const dbHandle = new Database(path.join(dir, 'test.db'));
+    dbHandle.pragma('foreign_keys = ON');
+    dbHandle.exec(`
+      CREATE TABLE schema_versions (version INTEGER PRIMARY KEY, applied_at INTEGER NOT NULL, description TEXT);
+      INSERT INTO schema_versions (version, applied_at, description) VALUES (3, 0, 'v3');
+      CREATE TABLE nodes (
+        id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL,
+        qualified_name TEXT NOT NULL, file_path TEXT NOT NULL, language TEXT NOT NULL,
+        start_line INTEGER NOT NULL, end_line INTEGER NOT NULL,
+        start_column INTEGER NOT NULL, end_column INTEGER NOT NULL,
+        docstring TEXT, signature TEXT, visibility TEXT,
+        is_exported INTEGER DEFAULT 0, is_async INTEGER DEFAULT 0,
+        is_static INTEGER DEFAULT 0, is_abstract INTEGER DEFAULT 0,
+        decorators TEXT, type_parameters TEXT, updated_at INTEGER NOT NULL
+      );
+      CREATE VIRTUAL TABLE nodes_fts USING fts5(
+        id, name, qualified_name, docstring, signature,
+        content='nodes', content_rowid='rowid'
+      );
+      CREATE TRIGGER nodes_ai AFTER INSERT ON nodes BEGIN
+        INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature)
+        VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature);
+      END;
+      INSERT INTO nodes (id, kind, name, qualified_name, file_path, language,
+        start_line, end_line, start_column, end_column, updated_at)
+      VALUES ('n1', 'function', 'getParser', 'getParser', 'a.ts', 'typescript', 1, 1, 0, 0, 0);
+    `);
+
+    expect(getCurrentVersion(dbHandle)).toBe(3);
+
+    // Apply forward migrations (4..N including the FTS-subwords pass).
+    runMigrations(dbHandle, 3);
+    expect(getCurrentVersion(dbHandle)).toBeGreaterThanOrEqual(9);
+
+    // The new column was backfilled with the split subwords.
+    const row = dbHandle.prepare('SELECT name_subwords FROM nodes WHERE id = ?').get('n1') as {
+      name_subwords: string;
+    };
+    expect(row.name_subwords).toContain('parser');
+
+    // Search end-to-end via QueryBuilder works against the migrated DB.
+    const q2 = new QueryBuilder(dbHandle);
+    const results = q2.searchNodes('parser', { limit: 10 });
+    expect(results.find((r) => r.node.name === 'getParser')).toBeDefined();
+
+    dbHandle.close();
+  });
+
+  it('migration is idempotent if name_subwords column already exists', () => {
+    // Simulate a partial-failure scenario: the ALTER TABLE landed
+    // (DDL is auto-committed in SQLite even inside a transaction) but
+    // the rest didn't, so the column is present but the FTS hasn't been
+    // recreated and the schema_versions row hasn't been bumped.
+    const Database = require('better-sqlite3');
+    const dbHandle = new Database(path.join(dir, 'test.db'));
+    dbHandle.exec(`
+      CREATE TABLE schema_versions (version INTEGER PRIMARY KEY, applied_at INTEGER NOT NULL, description TEXT);
+      INSERT INTO schema_versions (version, applied_at, description) VALUES (3, 0, 'v3');
+      CREATE TABLE nodes (
+        id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL,
+        qualified_name TEXT NOT NULL, file_path TEXT NOT NULL, language TEXT NOT NULL,
+        start_line INTEGER NOT NULL, end_line INTEGER NOT NULL,
+        start_column INTEGER NOT NULL, end_column INTEGER NOT NULL,
+        docstring TEXT, signature TEXT, visibility TEXT,
+        is_exported INTEGER DEFAULT 0, is_async INTEGER DEFAULT 0,
+        is_static INTEGER DEFAULT 0, is_abstract INTEGER DEFAULT 0,
+        decorators TEXT, type_parameters TEXT, updated_at INTEGER NOT NULL,
+        name_subwords TEXT  -- partial pre-existing state
+      );
+    `);
+    expect(() => runMigrations(dbHandle, 3)).not.toThrow();
+    expect(getCurrentVersion(dbHandle)).toBeGreaterThanOrEqual(9);
+    dbHandle.close();
+  });
+});
diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts
index 53441d58..1c62e648 100644
--- a/__tests__/security.test.ts
+++ b/__tests__/security.test.ts
@@ -533,3 +533,36 @@ describe('Symlink Cycle Detection', () => {
     expect(files).toContain('src/valid.ts');
   });
 });
+
+describe('ReDoS-safe glob matching', () => {
+  it('coalesces runs of `*` so hostile inputs do not produce nested quantifiers', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    // Two or more stars collapse to a single recursive wildcard. This is the
+    // ReDoS protection: `*****` doesn't expand to `[^/]*[^/]*[^/]*[^/]*[^/]*`,
+    // which on a long input could catastrophically backtrack.
+    expect(globToSafeRegex('*****')).toBe('.*');
+    expect(globToSafeRegex('**')).toBe('.*');
+
+    // Even a constructed-from-hostile-input regex matches in linear time.
+    const regex = new RegExp(`^${globToSafeRegex('*****')}foo$`);
+    const start = Date.now();
+    // 100k 'a's followed by something that doesn't end in 'foo'.
+    expect(regex.test('a'.repeat(100000) + 'bar')).toBe(false);
+    expect(Date.now() - start).toBeLessThan(500);
+  });
+
+  it('rejects pathologically long glob inputs', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    expect(globToSafeRegex('*'.repeat(2000))).toBeNull();
+  });
+
+  it('preserves the standard glob semantics for common patterns', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    const body = globToSafeRegex('src/**/*.test.ts');
+    expect(body).toBeDefined();
+    const regex = new RegExp(`^${body}$`);
+    expect(regex.test('src/lib/foo.test.ts')).toBe(true);
+    expect(regex.test('src/lib/foo.ts')).toBe(false);
+    expect(regex.test('other/src/foo.test.ts')).toBe(false);
+  });
+});
diff --git a/__tests__/sql-refs.test.ts b/__tests__/sql-refs.test.ts
new file mode 100644
index 00000000..7fb201c7
--- /dev/null
+++ b/__tests__/sql-refs.test.ts
@@ -0,0 +1,339 @@
+/**
+ * SQL call-site tests: parser unit tests + end-to-end through CodeGraph.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { extractSqlRefs } from '../src/sql-refs';
+import CodeGraph from '../src/index';
+
+let testDir: string;
+let cg: CodeGraph | null = null;
+
+function write(rel: string, content: string) {
+  const abs = path.join(testDir, rel);
+  fs.mkdirSync(path.dirname(abs), { recursive: true });
+  fs.writeFileSync(abs, content);
+}
+
+beforeEach(() => {
+  testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-sql-'));
+});
+
+afterEach(() => {
+  if (cg) {
+    cg.destroy();
+    cg = null;
+  }
+  if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true });
+});
+
+// ============================================================================
+// Pure parser tests
+// ============================================================================
+
+describe('extractSqlRefs', () => {
+  it('captures FROM <table> as a read', () => {
+    write('a.ts', `db.prepare('SELECT id FROM users WHERE id = ?');\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toHaveLength(1);
+    expect(refs[0]!).toMatchObject({ tableName: 'users', op: 'read' });
+  });
+
+  it('captures INSERT INTO as a write', () => {
+    write('a.ts', `db.prepare('INSERT INTO logs (msg) VALUES (?)');\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toHaveLength(1);
+    expect(refs[0]!).toMatchObject({ tableName: 'logs', op: 'write' });
+  });
+
+  it('captures UPDATE ... SET as a write', () => {
+    write('a.ts', `db.run('UPDATE users SET name = ? WHERE id = ?', ['x', 1]);\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toHaveLength(1);
+    expect(refs[0]!).toMatchObject({ tableName: 'users', op: 'write' });
+  });
+
+  it('captures DELETE FROM as a write (and not as a read)', () => {
+    write('a.ts', `db.run('DELETE FROM sessions WHERE expired_at < ?');\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    // Both regexes (DELETE FROM as write, FROM as read) hit, so we expect
+    // two refs for the same table but different ops.
+    expect(refs.map((r) => r.op).sort()).toEqual(['read', 'write']);
+    expect(new Set(refs.map((r) => r.tableName))).toEqual(new Set(['sessions']));
+  });
+
+  it('captures CREATE TABLE / ALTER / DROP as ddl', () => {
+    write(
+      'a.ts',
+      [
+        `db.exec('CREATE TABLE IF NOT EXISTS audit (id INTEGER)');`,
+        `db.exec('ALTER TABLE audit ADD COLUMN ts INTEGER');`,
+        `db.exec('DROP TABLE IF EXISTS audit_old');`,
+      ].join('\n')
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    const ddls = refs.filter((r) => r.op === 'ddl');
+    expect(new Set(ddls.map((r) => r.tableName))).toEqual(new Set(['audit', 'audit_old']));
+  });
+
+  it('captures JOIN as a read', () => {
+    write(
+      'a.ts',
+      `db.prepare('SELECT u.name, p.title FROM users u JOIN posts p ON p.user_id = u.id');\n`
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    const tables = new Set(refs.map((r) => r.tableName));
+    expect(tables).toEqual(new Set(['users', 'posts']));
+  });
+
+  it('handles backtick (MySQL) and double-quoted (Postgres) identifiers', () => {
+    write(
+      'a.ts',
+      [
+        "db.prepare('SELECT id FROM `mysql_table`');",
+        `db.prepare('SELECT id FROM "pg_table"');`,
+      ].join('\n')
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(new Set(refs.map((r) => r.tableName))).toEqual(
+      new Set(['mysql_table', 'pg_table'])
+    );
+  });
+
+  it('handles schema-qualified identifiers (drops the schema, keeps the table)', () => {
+    write('a.ts', `db.prepare('SELECT * FROM public.users');\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs[0]!.tableName).toBe('users');
+  });
+
+  it('does NOT match a JS variable named like a SQL keyword', () => {
+    // Without the FROM/INTO/etc. prefix, a bare identifier `users` is
+    // not caught — that's the whole point vs. plain grep.
+    write('a.ts', `const users = await loadUsers();\nfor (const user of users) {}\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toEqual([]);
+  });
+
+  it('skips unsupported languages (e.g. swift) without error', () => {
+    write('a.swift', `let q = "SELECT id FROM users"\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.swift', language: 'swift' }], () => null);
+    expect(refs).toEqual([]);
+  });
+
+  it('captures the correct 1-indexed line number', () => {
+    write(
+      'a.ts',
+      [`// blah`, `// blah`, `db.prepare('SELECT * FROM line_three');`, `// blah`].join('\n')
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs[0]).toEqual(expect.objectContaining({ tableName: 'line_three', line: 3 }));
+  });
+
+  it('threads the resolveEnclosing closure correctly', () => {
+    write('a.ts', `db.prepare('SELECT * FROM t');\n`);
+    const calls: Array<[string, number]> = [];
+    extractSqlRefs(
+      testDir,
+      [{ path: 'a.ts', language: 'typescript' }],
+      (filePath, line) => {
+        calls.push([filePath, line]);
+        return 'fake-id';
+      }
+    );
+    expect(calls).toEqual([['a.ts', 1]]);
+  });
+
+  it('drops reserved-word "table names" (WHERE/ON/AS/SELECT)', () => {
+    // Common over-match: `JOIN ... ON x = y` would otherwise pick up
+    // `ON` as the table name. The reserved set blocks that.
+    write('a.ts', `db.prepare('SELECT * FROM users JOIN posts ON posts.uid = users.id');\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    const names = new Set(refs.map((r) => r.tableName));
+    expect(names).toEqual(new Set(['users', 'posts']));
+  });
+
+  it('handles multiple SQL operations on a single line', () => {
+    write(
+      'a.ts',
+      `db.exec('CREATE TABLE foo (id INTEGER); INSERT INTO foo VALUES (1)');\n`
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    const ops = new Set(refs.map((r) => `${r.tableName}|${r.op}`));
+    expect(ops).toEqual(new Set(['foo|ddl', 'foo|write']));
+  });
+
+  it('survives a missing file (skips, no throw)', () => {
+    const refs = extractSqlRefs(
+      testDir,
+      [{ path: 'missing.ts', language: 'typescript' }],
+      () => null
+    );
+    expect(refs).toEqual([]);
+  });
+
+  it('rejects prose comments containing a quoted SQL example', () => {
+    // Reviewer-flagged regression: a comment like
+    //   // example: db.prepare('SELECT name FROM the docs')
+    // used to falsely match `the` as a table because the quote inside
+    // the comment passed isInsideString(). The comment-stripper now
+    // removes everything after `//` before the regex sees the line.
+    write(
+      'a.ts',
+      [
+        `// example: db.prepare('SELECT name FROM the docs')`,
+        `// "SELECT id FROM the comment"`,
+        `function ok() {`,
+        `  // sample SELECT FROM users in a comment — should be ignored`,
+        `  return 1;`,
+        `}`,
+      ].join('\n')
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toEqual([]);
+  });
+
+  it('rejects same-line block comments containing a quoted SQL example', () => {
+    write(
+      'a.ts',
+      `/* "SELECT * FROM ghost" */ const x = 1;\n`
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs).toEqual([]);
+  });
+
+  it('still keeps a real SQL call when there is a trailing comment', () => {
+    write('a.ts', `db.prepare('SELECT * FROM users'); // good doc\n`);
+    const refs = extractSqlRefs(testDir, [{ path: 'a.ts', language: 'typescript' }], () => null);
+    expect(refs.length).toBe(1);
+    expect(refs[0]!.tableName).toBe('users');
+  });
+
+  it('strips Python `#` comments', () => {
+    write(
+      'a.py',
+      `# example: db.execute('SELECT * FROM the_docs')\nrows = db.execute('SELECT * FROM real_table')\n`
+    );
+    const refs = extractSqlRefs(testDir, [{ path: 'a.py', language: 'python' }], () => null);
+    expect(refs.map((r) => r.tableName)).toEqual(['real_table']);
+  });
+});
+
+// ============================================================================
+// End-to-end through CodeGraph
+// ============================================================================
+
+describe('CodeGraph SQL refs', () => {
+  it('persists call sites and resolves enclosing function', async () => {
+    write(
+      'src/db.ts',
+      [
+        `export function getUser(id: number) {`,
+        `  return db.prepare('SELECT * FROM users WHERE id = ?').get(id);`,
+        `}`,
+        ``,
+        `export function logEvent(msg: string) {`,
+        `  db.prepare('INSERT INTO events (msg) VALUES (?)').run(msg);`,
+        `}`,
+      ].join('\n')
+    );
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const tables = cg.getSqlTables();
+    expect(new Set(tables.map((t) => t.tableName))).toEqual(new Set(['users', 'events']));
+
+    const userSites = cg.getSqlRefsByTable('users');
+    expect(userSites[0]!.sourceName).toBe('getUser');
+
+    const eventSites = cg.getSqlRefsByTable('events');
+    expect(eventSites[0]!.sourceName).toBe('logEvent');
+    expect(eventSites[0]!.op).toBe('write');
+  });
+
+  it('reverse view: getSqlTablesForNode returns tables touched by a function', async () => {
+    write(
+      'src/a.ts',
+      [
+        `export function multiTouch() {`,
+        `  db.prepare('SELECT * FROM a').all();`,
+        `  db.prepare('INSERT INTO b VALUES (?)').run(1);`,
+        `}`,
+      ].join('\n')
+    );
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+
+    const node = cg.getNodesInFile('src/a.ts').find((n) => n.name === 'multiTouch')!;
+    const touched = cg.getSqlTablesForNode(node.id);
+    const summary = touched.map((r) => `${r.tableName}|${r.op}`).sort();
+    expect(summary).toEqual(['a|read', 'b|write']);
+  });
+
+  it('case-insensitive table lookup', async () => {
+    write('src/a.ts', `db.prepare('SELECT * FROM Users');\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getSqlRefsByTable('users').length).toBe(1);
+    expect(cg.getSqlRefsByTable('USERS').length).toBe(1);
+  });
+
+  it('respects enableSqlRefs=false', async () => {
+    write('src/a.ts', `db.prepare('SELECT * FROM users');\n`);
+    cg = CodeGraph.initSync(testDir, {
+      config: { include: ['**/*.ts'], exclude: [], enableSqlRefs: false },
+    });
+    await cg.indexAll();
+    expect(cg.getSqlTables()).toEqual([]);
+  });
+
+  it('incremental sync replaces refs for changed files only', async () => {
+    write('src/a.ts', `db.prepare('SELECT * FROM old_table');\n`);
+    write('src/b.ts', `db.prepare('SELECT * FROM stable_table');\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(new Set(cg.getSqlTables().map((t) => t.tableName))).toEqual(
+      new Set(['old_table', 'stable_table'])
+    );
+
+    write('src/a.ts', `db.prepare('SELECT * FROM new_table');\n`);
+    await cg.sync();
+
+    const tables = new Set(cg.getSqlTables().map((t) => t.tableName));
+    expect(tables).toContain('new_table');
+    expect(tables).toContain('stable_table');
+    expect(tables).not.toContain('old_table');
+  });
+
+  it('drops refs when a file is edited to remove its last SQL ref', async () => {
+    // Same regression as PR C — applySqlRefs([]) shouldn't leave
+    // stale rows. Pre-deleting the changed paths in runSqlRefsPass
+    // is the fix.
+    write('src/a.ts', `db.prepare('SELECT * FROM going_away');\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getSqlTables().some((t) => t.tableName === 'going_away')).toBe(true);
+
+    write('src/a.ts', `// no sql here anymore\nexport const x = 1;\n`);
+    await cg.sync();
+
+    expect(cg.getSqlTables().some((t) => t.tableName === 'going_away')).toBe(false);
+  });
+
+  it('drops refs for files removed between syncs', async () => {
+    write('src/a.ts', `db.prepare('SELECT * FROM gone_table');\n`);
+    cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } });
+    await cg.indexAll();
+    expect(cg.getSqlTables().some((t) => t.tableName === 'gone_table')).toBe(true);
+
+    fs.unlinkSync(path.join(testDir, 'src/a.ts'));
+    await cg.sync();
+    expect(cg.getSqlTables().some((t) => t.tableName === 'gone_table')).toBe(false);
+  });
+
+  // (Removed: a defensive test for the v4-migration-collision bug class.
+  // With file-based migrations (NNN-name.ts), two PRs claiming the same
+  // version produces a filesystem-level conflict, so the silent skip the
+  // defensive guard protected against can no longer happen.)
+});
diff --git a/__tests__/sync.test.ts b/__tests__/sync.test.ts
index 8365f630..cb657274 100644
--- a/__tests__/sync.test.ts
+++ b/__tests__/sync.test.ts
@@ -259,4 +259,140 @@ describe('Sync Module', () => {
       expect(result.changedFilePaths).toBeUndefined();
     });
   });
+
+  // Regression tests for the "stale index after HEAD-moving git operation"
+  // bug. `git status` only reports working-tree dirtiness vs HEAD, so a
+  // merge / pull / checkout / rebase / reset (and even post-commit) leaves
+  // a clean tree and used to trick sync into reporting "up to date" while
+  // the DB still held pre-operation content hashes. The fix detects HEAD
+  // movement by comparing current HEAD against a stored last-synced HEAD
+  // and unioning `git diff` output into the changed-file set.
+  describe('HEAD-moving git operations', () => {
+    let testDir: string;
+    let cg: CodeGraph;
+
+    function git(...args: string[]) {
+      execFileSync('git', args, { cwd: testDir, stdio: 'pipe' });
+    }
+
+    beforeEach(async () => {
+      testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-head-move-'));
+
+      git('init');
+      git('config', 'user.email', 'test@test.com');
+      git('config', 'user.name', 'Test');
+      // Pin initial branch name so subsequent checkouts are deterministic
+      // across git versions that default to master vs main.
+      git('symbolic-ref', 'HEAD', 'refs/heads/main');
+
+      const srcDir = path.join(testDir, 'src');
+      fs.mkdirSync(srcDir);
+      fs.writeFileSync(
+        path.join(srcDir, 'index.ts'),
+        `export function hello() { return 'world'; }`
+      );
+
+      git('add', '-A');
+      git('commit', '-m', 'initial');
+
+      cg = CodeGraph.initSync(testDir, {
+        config: { include: ['**/*.ts'], exclude: [] },
+      });
+      await cg.indexAll();
+    });
+
+    afterEach(() => {
+      if (cg) cg.destroy();
+      if (fs.existsSync(testDir)) {
+        fs.rmSync(testDir, { recursive: true, force: true });
+      }
+    });
+
+    it('should detect changes brought in by `git merge`', async () => {
+      // Branch off, modify on the branch, commit, switch back, merge.
+      git('checkout', '-b', 'feature');
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function merged() { return 'from-branch'; }`
+      );
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'added.ts'),
+        `export function fromBranch() { return 1; }`
+      );
+      git('add', '-A');
+      git('commit', '-m', 'feature work');
+      git('checkout', 'main');
+      git('merge', '--no-ff', 'feature', '-m', 'merge feature');
+
+      // Working tree is clean post-merge — `git status` shows nothing.
+      const result = await cg.sync();
+
+      expect(result.filesModified + result.filesAdded).toBeGreaterThanOrEqual(2);
+      expect(cg.searchNodes('merged').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('fromBranch').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('hello').length).toBe(0);
+    });
+
+    it('should detect changes after `git checkout` to a different branch', async () => {
+      git('checkout', '-b', 'other');
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function onOther() { return 'other'; }`
+      );
+      git('add', '-A');
+      git('commit', '-m', 'other work');
+      git('checkout', 'main');
+      // We're back on main, where `hello` exists. Before the fix, sync
+      // here would no-op because the working tree matches HEAD (= main).
+      // But the index was last synced against `other`, so we expect the
+      // diff main..other to flow through and bring the index in line
+      // with the current branch.
+      git('checkout', 'other');
+
+      const result = await cg.sync();
+
+      expect(result.filesModified).toBeGreaterThanOrEqual(1);
+      expect(cg.searchNodes('onOther').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('hello').length).toBe(0);
+    });
+
+    it('should detect file deletion brought in by a committed change', async () => {
+      git('rm', path.join('src', 'index.ts'));
+      git('commit', '-m', 'remove index');
+
+      const result = await cg.sync();
+
+      expect(result.filesRemoved).toBe(1);
+      expect(cg.searchNodes('hello').length).toBe(0);
+    });
+
+    it('should fall back to full scan when last-synced HEAD is unreachable', async () => {
+      // Modify and commit, then rewrite history so the previously-synced
+      // HEAD (recorded by indexAll in beforeEach) is no longer reachable.
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function rewritten() { return 'rewritten'; }`
+      );
+      git('add', '-A');
+      git('commit', '--amend', '-m', 'rewritten');
+      // `git gc --prune=now` would sever the orphaned commit, but amending
+      // already moves HEAD to a new SHA the index has never seen and the
+      // OLD SHA may or may not be reachable. We verify behavior is correct
+      // either way: sync brings the index in line with current state.
+      const result = await cg.sync();
+
+      expect(result.filesModified + result.filesAdded).toBeGreaterThanOrEqual(1);
+      expect(cg.searchNodes('rewritten').length).toBeGreaterThan(0);
+      expect(cg.searchNodes('hello').length).toBe(0);
+    });
+
+    it('should still no-op when HEAD has not moved and tree is clean', async () => {
+      // Sanity: the new HEAD-tracking code must not introduce spurious work.
+      const result = await cg.sync();
+
+      expect(result.filesAdded).toBe(0);
+      expect(result.filesModified).toBe(0);
+      expect(result.filesRemoved).toBe(0);
+    });
+  });
 });
diff --git a/__tests__/watcher.test.ts b/__tests__/watcher.test.ts
index f3638e6d..a546494d 100644
--- a/__tests__/watcher.test.ts
+++ b/__tests__/watcher.test.ts
@@ -31,6 +31,19 @@ function waitFor(
   });
 }
 
+/**
+ * fs.watch on macOS (FSEvents) and Linux (inotify) has a small but real
+ * latency between `fs.watch()` returning and the kernel actually
+ * delivering events. Writing a file in that window — particularly under
+ * parallel test load when the host CPU is busy — drops the event and
+ * causes a 5s timeout for "should trigger sync after file change" style
+ * tests. This helper standardizes the settle delay to match the pattern
+ * already used by the filtering tests in this file.
+ */
+async function letWatcherSettle(): Promise<void> {
+  await new Promise((r) => setTimeout(r, 400));
+}
+
 describe('FileWatcher', () => {
   let testDir: string;
 
@@ -101,6 +114,7 @@ describe('FileWatcher', () => {
       const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 200 });
 
       watcher.start();
+      await letWatcherSettle();
 
       // Create a new file
       fs.writeFileSync(path.join(testDir, 'src', 'new.ts'), 'export const y = 2;');
@@ -117,6 +131,7 @@ describe('FileWatcher', () => {
       const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 500 });
 
       watcher.start();
+      await letWatcherSettle();
 
       // Rapid-fire changes
       for (let i = 0; i < 5; i++) {
@@ -145,7 +160,7 @@ describe('FileWatcher', () => {
       watcher.start();
 
       // Let watcher settle — fs.watch may fire residual events from beforeEach
-      await new Promise((r) => setTimeout(r, 400));
+      await letWatcherSettle();
       syncFn.mockClear();
 
       // Create a file that doesn't match include patterns
@@ -165,7 +180,7 @@ describe('FileWatcher', () => {
       watcher.start();
 
       // Let watcher settle — fs.watch may fire residual events from beforeEach
-      await new Promise((r) => setTimeout(r, 400));
+      await letWatcherSettle();
       syncFn.mockClear();
 
       // Simulate a .codegraph directory change
@@ -191,6 +206,7 @@ describe('FileWatcher', () => {
       });
 
       watcher.start();
+      await letWatcherSettle();
 
       fs.writeFileSync(path.join(testDir, 'src', 'test.ts'), 'export const z = 3;');
 
@@ -209,6 +225,7 @@ describe('FileWatcher', () => {
       });
 
       watcher.start();
+      await letWatcherSettle();
 
       fs.writeFileSync(path.join(testDir, 'src', 'test.ts'), 'export const z = 3;');
 
@@ -218,6 +235,36 @@ describe('FileWatcher', () => {
 
       watcher.stop();
     });
+
+    it('should retry pending changes after a sync failure (no events lost)', async () => {
+      // First call rejects, subsequent calls resolve. After the initial
+      // failure, the watcher should retry the same batch on its own — without
+      // this, transient sync failures (DB locked etc.) would silently drop the
+      // changes until a new file event happened.
+      let calls = 0;
+      const syncFn = vi.fn().mockImplementation(() => {
+        calls++;
+        if (calls === 1) return Promise.reject(new Error('transient'));
+        return Promise.resolve({ filesChanged: 1, durationMs: 5 });
+      });
+      const onSyncError = vi.fn();
+      const onSyncComplete = vi.fn();
+      const watcher = new FileWatcher(testDir, baseConfig, syncFn, {
+        debounceMs: 100,
+        onSyncError,
+        onSyncComplete,
+      });
+
+      watcher.start();
+      fs.writeFileSync(path.join(testDir, 'src', 'test.ts'), 'export const z = 3;');
+
+      await waitFor(() => onSyncComplete.mock.calls.length > 0, 5000);
+      expect(onSyncError).toHaveBeenCalledTimes(1);
+      expect(syncFn).toHaveBeenCalledTimes(2);
+      expect(onSyncComplete).toHaveBeenCalledWith({ filesChanged: 1, durationMs: 5 });
+
+      watcher.stop();
+    });
   });
 
   describe('CodeGraph integration', () => {
@@ -268,6 +315,7 @@ describe('FileWatcher', () => {
       const initialNodes = initialStats.nodeCount;
 
       cg.watch({ debounceMs: 300 });
+      await letWatcherSettle();
 
       // Add a new file with a function
       fs.writeFileSync(
diff --git a/scripts/battle-test.mjs b/scripts/battle-test.mjs
new file mode 100644
index 00000000..071ec3a4
--- /dev/null
+++ b/scripts/battle-test.mjs
@@ -0,0 +1,150 @@
+#!/usr/bin/env node
+/**
+ * Battle test: drive every feature shipped on `battle-test/all-shipped`
+ * against a real repo and print a comprehensive report.
+ *
+ * Validates:
+ *   - migrations: schema is at v7 with all 7 migrations applied
+ *   - extraction: nodes/edges/files indexed
+ *   - centrality: PageRank scores populated, top-N nonempty
+ *   - churn: per-file commit counts, LOC, last-touched timestamps
+ *   - hotspots: risk scoring (centrality × churn) returns ranked rows
+ *   - issue-history: Fixes/Closes/Resolves attribution
+ *   - config-refs: env var read sites
+ *   - sql-refs: table read/write/DDL call sites
+ *   - MCP tool registry: 11 tools registered + dispatch works
+ *   - Index-hook registry: 5 hooks registered + outcomes populated
+ *
+ * Usage: node scripts/battle-test.mjs <project-path>
+ */
+
+import path from 'node:path';
+import fs from 'node:fs';
+import process from 'node:process';
+
+const targetPath = path.resolve(process.argv[2] ?? process.cwd());
+if (!fs.existsSync(targetPath)) {
+  console.error(`battle-test: target path does not exist: ${targetPath}`);
+  process.exit(1);
+}
+
+console.log(`\n=== Battle test: ${targetPath} ===\n`);
+
+const { CodeGraph } = await import('../dist/index.js');
+
+// Reset .codegraph if present so we exercise the fresh-init path
+const cgDir = path.join(targetPath, '.codegraph');
+if (fs.existsSync(cgDir)) {
+  fs.rmSync(cgDir, { recursive: true, force: true });
+}
+
+const cg = await CodeGraph.init(targetPath);
+
+const t0 = Date.now();
+const result = await cg.indexAll();
+const indexMs = Date.now() - t0;
+console.log(`✓ indexAll completed in ${indexMs}ms — files=${result.filesIndexed} nodes=${result.nodesCreated} edges=${result.edgesCreated}`);
+
+const stats = cg.getStats();
+console.log(`  stats: ${stats.fileCount} files, ${stats.nodeCount} nodes, ${stats.edgeCount} edges`);
+
+// ----- migrations -----
+const { CURRENT_SCHEMA_VERSION, ALL_MIGRATIONS } = await import('../dist/db/migrations.js');
+const versions = ALL_MIGRATIONS.map((m) => m.version).join(',');
+console.log(`✓ schema v${CURRENT_SCHEMA_VERSION}, registered migrations: ${versions}`);
+
+// ----- index-hook registry -----
+const { getRegisteredHooks } = await import('../dist/index-hooks/registry.js');
+const hooks = getRegisteredHooks();
+console.log(`✓ ${hooks.length} index-hooks registered: ${hooks.map((h) => h.name).join(', ')}`);
+
+// ----- mcp tool registry -----
+const { getToolModules } = await import('../dist/mcp/tools/registry.js');
+const tools = getToolModules();
+console.log(`✓ ${tools.length} MCP tools registered: ${tools.map((t) => t.definition.name).join(', ')}`);
+
+// ----- centrality -----
+const top = cg.getTopCentralNodes({ limit: 5 });
+console.log(`\n--- centrality ---`);
+if (top.length === 0) {
+  console.log(`  ✗ no centrality scores computed`);
+} else {
+  console.log(`  ✓ top 5 by centrality:`);
+  for (const n of top) {
+    console.log(`    ${n.centrality?.toFixed(5)}  ${n.kind}  ${n.name}  (${n.filePath}:${n.startLine})`);
+  }
+}
+
+// ----- churn -----
+console.log(`\n--- churn ---`);
+const sample = cg.getStats().fileCount > 0
+  ? cg.getHotspots({ limit: 1, minCommits: 0 })[0]
+  : null;
+if (sample) {
+  const churn = cg.getFileChurn(sample.filePath);
+  console.log(`  ✓ sample file ${sample.filePath}: commits=${churn?.commitCount} loc=${churn?.loc} lastTouched=${churn?.lastTouchedTs}`);
+} else {
+  console.log(`  (no churn data — likely not in a git repo)`);
+}
+
+// ----- hotspots -----
+console.log(`\n--- hotspots ---`);
+const hot = cg.getHotspots({ limit: 5, minCommits: 0 });
+if (hot.length === 0) {
+  console.log(`  (no hotspots)`);
+} else {
+  console.log(`  ✓ top 5 by risk:`);
+  for (const r of hot) {
+    console.log(`    risk=${r.riskScore.toFixed(4)} commits=${r.commitCount} loc=${r.loc} ${r.filePath}`);
+  }
+}
+
+// ----- issue history -----
+console.log(`\n--- issue history ---`);
+let issueCount = 0;
+let nodesWithIssues = 0;
+const allNodes = cg.getStats().nodeCount;
+// Sample up to 200 random nodes; count how many have any issue history
+const sampleNodes = cg.getTopCentralNodes({ limit: 200 });
+for (const n of sampleNodes) {
+  const issues = cg.getIssuesForNode(n.id);
+  if (issues.length > 0) {
+    nodesWithIssues++;
+    issueCount += issues.length;
+  }
+}
+console.log(`  sampled ${sampleNodes.length} of ${allNodes} nodes: ${nodesWithIssues} have issue refs (${issueCount} attributions)`);
+
+// ----- config refs -----
+console.log(`\n--- config refs ---`);
+const envKeys = cg.getConfigKeys({ configKind: 'env', limit: 10 });
+if (envKeys.length === 0) {
+  console.log(`  (no env-var read sites)`);
+} else {
+  console.log(`  ✓ top 10 env vars (${envKeys.length}/${cg.getConfigKeys({ configKind: 'env', limit: 9999 }).length}):`);
+  for (const k of envKeys) {
+    console.log(`    ${k.reads.toString().padStart(4)} reads  ${k.distinctFiles} files  ${k.configKey}`);
+  }
+}
+
+// ----- sql refs -----
+console.log(`\n--- sql refs ---`);
+const tables = cg.getSqlTables({ limit: 10 });
+if (tables.length === 0) {
+  console.log(`  (no SQL string-literal call sites)`);
+} else {
+  console.log(`  ✓ top 10 tables:`);
+  for (const t of tables) {
+    console.log(`    r=${t.reads} w=${t.writes} d=${t.ddl}  ${t.tableName}`);
+  }
+}
+
+// ----- sync regression -----
+console.log(`\n--- sync round-trip ---`);
+const t1 = Date.now();
+const syncResult = await cg.sync();
+const syncMs = Date.now() - t1;
+console.log(`  ✓ sync no-op in ${syncMs}ms — added=${syncResult.filesAdded} modified=${syncResult.filesModified} removed=${syncResult.filesRemoved}`);
+
+cg.close();
+console.log(`\n=== battle test PASS ===\n`);
diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index d118a1fd..44ccc873 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -23,6 +23,7 @@ import * as path from 'path';
 import * as fs from 'fs';
 import { getCodeGraphDir, isInitialized } from '../directory';
 import { createShimmerProgress } from '../ui/shimmer-progress';
+import { globToSafeRegex } from '../utils';
 
 // Lazy-load heavy modules (CodeGraph, runInstaller) to keep CLI startup fast.
 async function loadCodeGraph(): Promise<typeof import('../index')> {
@@ -1158,16 +1159,15 @@ program
         /\/spec\//,
       ];
 
-      // Custom filter pattern
+      // Custom filter pattern (ReDoS-safe — globToSafeRegex coalesces
+      // consecutive wildcards so hostile inputs can't produce nested
+      // quantifiers like `.+.+.+`).
       let customFilter: RegExp | null = null;
       if (options.filter) {
-        // Convert glob to regex: ** → .+, * → [^/]*, . → \.
-        const regex = options.filter
-          .replace(/[+[\]{}()^$|\\]/g, '\\$&')
-          .replace(/\./g, '\\.')
-          .replace(/\*\*/g, '.+')
-          .replace(/\*/g, '[^/]*');
-        customFilter = new RegExp(regex);
+        const regexBody = globToSafeRegex(options.filter);
+        if (regexBody !== null) {
+          customFilter = new RegExp(regexBody);
+        }
       }
 
       function isTestFile(filePath: string): boolean {
diff --git a/src/centrality/index.ts b/src/centrality/index.ts
new file mode 100644
index 00000000..d03f2206
--- /dev/null
+++ b/src/centrality/index.ts
@@ -0,0 +1,126 @@
+/**
+ * Centrality computation
+ *
+ * Computes PageRank over the `calls` + `references` subgraph and
+ * persists each node's score on the `nodes.centrality` column. Pure
+ * compute — no I/O — so the caller owns reading edges, writing scores,
+ * and deciding when to re-run.
+ *
+ * PageRank is the right shape for "what is structurally important?"
+ * because it rewards being reached (weighted by the importance of who
+ * reaches you), not just raw in-degree. A method called once from a
+ * central interface ranks above a method called many times from a
+ * leaf script.
+ *
+ * Edges of kind `contains` are deliberately excluded — they encode
+ * lexical containment (file → class → method), which would dominate
+ * the rank and hide actual reference flow.
+ *
+ * Side benefit observed in spike data: PageRank accidentally surfaces
+ * resolver false-positives. Generic short names (`trim`, `run`) that
+ * the resolver over-merges across files accumulate edges from many
+ * sources and float to the top alongside genuine hubs. Useful as a
+ * diagnostic; not a goal of this module.
+ */
+
+/** Damping factor — fraction of rank propagated through edges each step. */
+export const PR_DAMPING = 0.85;
+
+/**
+ * Iteration count. PageRank converges geometrically; 40 iterations puts
+ * us well below 1e-6 residual on graphs we've seen, with no per-graph
+ * tuning needed.
+ */
+export const PR_ITERATIONS = 40;
+
+/** Edge kinds that contribute to centrality. */
+export const PR_EDGE_KINDS = ['calls', 'references'] as const;
+
+export type PrEdgeKind = (typeof PR_EDGE_KINDS)[number];
+
+export interface CentralityResult {
+  /** nodeId → PageRank score in (0, 1). Sums to ~1.0 across all nodes. */
+  scores: Map<string, number>;
+  /** Iterations actually run (currently always PR_ITERATIONS — kept for forward compat). */
+  iterations: number;
+  /** Wall-clock duration in milliseconds. */
+  durationMs: number;
+}
+
+interface NodeRef {
+  id: string;
+}
+
+interface EdgeRef {
+  source: string;
+  target: string;
+}
+
+/**
+ * Compute PageRank scores for the supplied nodes/edges.
+ *
+ * @param nodes  All graph nodes (only `id` is read).
+ * @param edges  Edges that contribute to centrality. Caller is
+ *               responsible for filtering to `PR_EDGE_KINDS`.
+ *
+ * Edges referencing unknown node ids are silently dropped — the
+ * underlying graph has FK cascades, so dangling references can only
+ * occur mid-write and are not our problem to fix here.
+ */
+export function computePageRank(nodes: NodeRef[], edges: EdgeRef[]): CentralityResult {
+  const start = Date.now();
+  const N = nodes.length;
+  const scores = new Map<string, number>();
+  if (N === 0) {
+    return { scores, iterations: 0, durationMs: Date.now() - start };
+  }
+
+  // Index nodes for tight numeric loops. Float64Array gives ~3× speedup
+  // over Array(N).fill on million-edge graphs and costs nothing on
+  // smaller ones.
+  const idx = new Map<string, number>();
+  for (let i = 0; i < N; i++) {
+    const n = nodes[i]!;
+    idx.set(n.id, i);
+  }
+
+  const inEdges: number[][] = Array.from({ length: N }, () => []);
+  const outDeg = new Int32Array(N);
+  for (const e of edges) {
+    const s = idx.get(e.source);
+    const t = idx.get(e.target);
+    if (s === undefined || t === undefined) continue;
+    inEdges[t]!.push(s);
+    outDeg[s]! += 1;
+  }
+
+  let pr = new Float64Array(N).fill(1 / N);
+  const baseline = (1 - PR_DAMPING) / N;
+
+  for (let it = 0; it < PR_ITERATIONS; it++) {
+    const next = new Float64Array(N).fill(baseline);
+
+    // Distribute the rank of dangling nodes (no outgoing edges) uniformly.
+    // Without this the total rank decays each iteration.
+    let danglingSum = 0;
+    for (let i = 0; i < N; i++) {
+      if (outDeg[i] === 0) danglingSum += pr[i]!;
+    }
+    const danglingShare = (PR_DAMPING * danglingSum) / N;
+    for (let i = 0; i < N; i++) next[i]! += danglingShare;
+
+    for (let t = 0; t < N; t++) {
+      const sources = inEdges[t]!;
+      let s = 0;
+      for (let k = 0; k < sources.length; k++) {
+        const src = sources[k]!;
+        s += pr[src]! / outDeg[src]!;
+      }
+      next[t]! += PR_DAMPING * s;
+    }
+    pr = next;
+  }
+
+  for (let i = 0; i < N; i++) scores.set(nodes[i]!.id, pr[i]!);
+  return { scores, iterations: PR_ITERATIONS, durationMs: Date.now() - start };
+}
diff --git a/src/churn/index.ts b/src/churn/index.ts
new file mode 100644
index 00000000..1c332886
--- /dev/null
+++ b/src/churn/index.ts
@@ -0,0 +1,259 @@
+/**
+ * Per-file churn mining
+ *
+ * Reads `git log` to compute four signals per indexed file:
+ *   - commit_count    (how often the file gets touched)
+ *   - first_seen_ts   (when it entered the codebase)
+ *   - last_touched_ts (how recently it was modified)
+ *   - loc             (line count of the current on-disk content)
+ *
+ * Combined with PageRank centrality (see ../centrality), these answer
+ * "where do bugs hide?" — central files that change often are the
+ * highest-expected-value review targets, validated empirically against
+ * codegraph's own history (e.g. `src/extraction/tree-sitter.ts`).
+ *
+ * Storage strategy: scalar columns on `files` (one row already exists
+ * per indexed path; adding columns avoids a JOIN on every read).
+ *
+ * Incremental update: persist `last_mined_churn_head` in
+ * project_metadata; on subsequent mines, only enumerate commits in
+ * `<sha>..HEAD`. This keeps `sync` fast on long histories. If the
+ * stored sha is unreachable (force-push, gc), the caller gets
+ * `needsFullRescan: true` and re-mines from scratch after `clearChurn`.
+ *
+ * Rename note: `git log --name-only` (without `--follow`) reports
+ * post-rename paths only. The pre-rename history is therefore not
+ * counted toward the new path's `commit_count`. `--follow` would fix
+ * this but is documented as O(N) per file and shells out individually,
+ * so v1 accepts the under-count and surfaces it in the doc-comment on
+ * `commitCount` in types.ts.
+ */
+
+import { execFileSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import { logDebug } from '../errors';
+
+/**
+ * Skip commits that touch more than this many indexed files. Merge
+ * commits and mass refactors otherwise inflate every file's
+ * commit_count without any real coupling signal.
+ */
+export const MAX_FILES_PER_COMMIT = 50;
+
+/** Sentinel for `git log --pretty=tformat:`; cannot collide with a path. */
+const COMMIT_HEADER_PREFIX = 'CGCMT-';
+
+/** Project-metadata key holding the HEAD SHA of the last mined commit. */
+export const LAST_MINED_CHURN_HEAD_KEY = 'last_mined_churn_head';
+
+/** Hard cap on git output we'll buffer (bytes). Matches cochange. */
+const MAX_GIT_BUFFER = 200 * 1024 * 1024;
+
+/** Wall-clock cap on a single git invocation (ms). */
+const GIT_TIMEOUT_MS = 60_000;
+
+export interface FileChurnDelta {
+  path: string;
+  /** Commits to add to the existing commit_count. */
+  commitCountDelta: number;
+  /**
+   * Most recent commit timestamp (unix seconds) seen in this delta.
+   * Caller takes max() with the existing value.
+   */
+  lastTouchedTs: number;
+  /**
+   * Earliest commit timestamp (unix seconds) in this delta. Caller
+   * applies `COALESCE(existing, this)` so the first-seen column only
+   * gets written once.
+   */
+  firstSeenTs: number;
+}
+
+export interface ChurnMineResult {
+  deltas: Map<string, FileChurnDelta>;
+  /** HEAD SHA reached by this run; null when not in a git repo. */
+  currentHead: string | null;
+  /**
+   * True when the caller's `sinceSha` was unreachable (force-push, gc).
+   * Caller should `clearChurn()` and re-mine with `sinceSha=null`.
+   */
+  needsFullRescan: boolean;
+}
+
+/**
+ * Get the current HEAD commit SHA, or null when not in a git repo or
+ * the repo has no commits yet.
+ */
+export function getGitHead(rootDir: string): string | null {
+  try {
+    return (
+      execFileSync('git', ['rev-parse', 'HEAD'], {
+        cwd: rootDir,
+        encoding: 'utf-8',
+        timeout: 5000,
+        stdio: ['pipe', 'pipe', 'pipe'],
+      }).trim() || null
+    );
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Verify that a stored SHA is still reachable from HEAD. After
+ * force-push or `git gc` it can disappear, in which case incremental
+ * mining would silently miss commits.
+ */
+function isShaReachable(rootDir: string, sha: string): boolean {
+  try {
+    execFileSync('git', ['cat-file', '-e', `${sha}^{commit}`], {
+      cwd: rootDir,
+      timeout: 5000,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Read the LOC of a file as currently on disk. Cheap; always fresh.
+ *
+ * Counts newline-delimited lines: a file with content `"a\nb\n"`
+ * reports 2; an empty file reports 0; a file ending without a newline
+ * still reports the visible-line count.
+ */
+export function readFileLoc(rootDir: string, relPath: string): number {
+  try {
+    const abs = path.join(rootDir, relPath);
+    const content = fs.readFileSync(abs, 'utf8');
+    if (content.length === 0) return 0;
+    let lines = 0;
+    for (let i = 0; i < content.length; i++) if (content.charCodeAt(i) === 10) lines++;
+    // Trailing chunk without final newline still counts as a line.
+    if (content.charCodeAt(content.length - 1) !== 10) lines++;
+    return lines;
+  } catch {
+    return 0;
+  }
+}
+
+/**
+ * Mine git log for per-file commit metrics.
+ *
+ * @param rootDir       Project root.
+ * @param indexedFiles  Paths we care about (deltas only emitted for
+ *                      these). Files outside this set are ignored
+ *                      per-commit so churn doesn't accumulate for
+ *                      paths the index has no other knowledge of.
+ * @param sinceSha      `null` for full scan; otherwise mine only
+ *                      `<sha>..HEAD`. Unreachable shas trigger
+ *                      `needsFullRescan: true`.
+ */
+export function mineChurn(
+  rootDir: string,
+  indexedFiles: Set<string>,
+  sinceSha: string | null
+): ChurnMineResult {
+  const empty: ChurnMineResult = {
+    deltas: new Map(),
+    currentHead: null,
+    needsFullRescan: false,
+  };
+
+  const head = getGitHead(rootDir);
+  if (!head) return empty;
+
+  if (sinceSha && !isShaReachable(rootDir, sinceSha)) {
+    return { deltas: new Map(), currentHead: head, needsFullRescan: true };
+  }
+
+  // No-op: nothing has happened since last mine.
+  if (sinceSha === head) {
+    return { deltas: new Map(), currentHead: head, needsFullRescan: false };
+  }
+
+  // tformat puts a literal trailing record-separator after each
+  // commit's name list; -z then NUL-delimits within the format too,
+  // so we get a clean stream of NUL-separated tokens.
+  const args = [
+    'log',
+    '--no-merges',
+    '--name-only',
+    `--pretty=tformat:${COMMIT_HEADER_PREFIX}%H|%ct`,
+    '-z',
+  ];
+  if (sinceSha) args.push(`${sinceSha}..HEAD`);
+
+  let raw: string;
+  try {
+    raw = execFileSync('git', args, {
+      cwd: rootDir,
+      encoding: 'utf-8',
+      timeout: GIT_TIMEOUT_MS,
+      maxBuffer: MAX_GIT_BUFFER,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+  } catch (err) {
+    logDebug(`mineChurn: git log failed: ${err instanceof Error ? err.message : String(err)}`);
+    return { deltas: new Map(), currentHead: head, needsFullRescan: false };
+  }
+
+  // Parse: tformat emits `CGCMT-<sha>|<ts>\0\n<path1>\0<path2>\0...
+  // CGCMT-<next>|<ts>\0\n<path1>\0`. Each token between NULs is either
+  // a commit header or a path; paths arrive with a leading '\n' on the
+  // first one of each commit (the tformat record-separator). We walk
+  // tokens linearly, switching commit context on each header.
+  const tokens = raw.split('\0');
+  const headerRe = /^CGCMT-([0-9a-f]{40})\|(\d+)$/;
+  const deltas = new Map<string, FileChurnDelta>();
+
+  let curTs = 0;
+  let curPaths: string[] = [];
+  let curActive = false;
+
+  function flush() {
+    if (!curActive) return;
+    if (curPaths.length > 0 && curPaths.length <= MAX_FILES_PER_COMMIT) {
+      for (const p of curPaths) {
+        if (!indexedFiles.has(p)) continue;
+        const cur = deltas.get(p);
+        if (cur) {
+          cur.commitCountDelta += 1;
+          if (curTs > cur.lastTouchedTs) cur.lastTouchedTs = curTs;
+          if (curTs < cur.firstSeenTs) cur.firstSeenTs = curTs;
+        } else {
+          deltas.set(p, {
+            path: p,
+            commitCountDelta: 1,
+            lastTouchedTs: curTs,
+            firstSeenTs: curTs,
+          });
+        }
+      }
+    }
+    curPaths = [];
+    curActive = false;
+  }
+
+  for (const rawTok of tokens) {
+    if (rawTok === '') continue;
+    // Strip a single leading \n introduced by tformat's record separator.
+    const tok = rawTok.startsWith('\n') ? rawTok.slice(1) : rawTok;
+    if (tok === '') continue;
+    const m = headerRe.exec(tok);
+    if (m) {
+      flush();
+      curTs = parseInt(m[2]!, 10);
+      curActive = true;
+    } else if (curActive) {
+      curPaths.push(tok);
+    }
+    // Tokens before the first header (shouldn't happen) are ignored.
+  }
+  flush();
+
+  return { deltas, currentHead: head, needsFullRescan: false };
+}
diff --git a/src/config-refs/index.ts b/src/config-refs/index.ts
new file mode 100644
index 00000000..1ef47ae9
--- /dev/null
+++ b/src/config-refs/index.ts
@@ -0,0 +1,188 @@
+/**
+ * Config-reference extraction
+ *
+ * Scans indexed source files for known config-read patterns
+ * (`process.env.X`, `os.getenv("X")`, etc.) and records each read
+ * site as a row in `config_refs`. Each row links to its enclosing
+ * function via a line-range lookup against the existing nodes table,
+ * so an agent asking "what reads OBSIDIAN_PORT?" gets a list of real
+ * functions, not a grep wall.
+ *
+ * Why a separate table, not graph nodes/edges: env vars don't have a
+ * single source-of-truth file (they're a global namespace), so giving
+ * them a synthetic file_path would pollute the main graph. The table
+ * is queried via a dedicated MCP tool (`codegraph_config`) and via
+ * augmented `codegraph_node` output (per-function "reads:" line).
+ *
+ * Spike validation (mcp-obsidian-extended): 71 reads, 19 distinct
+ * keys; 8× OBSIDIAN_PORT, 8× TOOL_PRESET surface as central
+ * config knobs. Codegraph-itself is sparse (4 reads) — this feature
+ * shines on service-style codebases.
+ *
+ * V1 scope: env-only, regex-based per-language. YAML key reads,
+ * LaunchDarkly flags, etc. are deliberately out of scope; the schema
+ * already supports them via `config_kind` so adding them later is a
+ * pattern addition, not a redesign.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { logDebug } from '../errors';
+
+export type ConfigKind = 'env';
+
+export interface ConfigRef {
+  configKind: ConfigKind;
+  configKey: string;
+  /** Indexed-symbol id for the enclosing function/method. NULL = top-level. */
+  sourceNodeId: string | null;
+  filePath: string;
+  line: number;
+}
+
+interface PatternDef {
+  /** Languages this pattern applies to (matches `Language` in types.ts). */
+  languages: string[];
+  /** Regex with capture group 1 = config key. */
+  re: RegExp;
+}
+
+/**
+ * Per-language read-pattern catalogue.
+ *
+ * Patterns intentionally err on the side of including only
+ * UPPER_CASE_KEYS — the convention every framework follows for env
+ * vars. This avoids false positives like `process.env.foo` (a Node
+ * variable) or `os.getenv(some_var)` (dynamic).
+ */
+const PATTERNS: PatternDef[] = [
+  // process.env.FOO  /  process.env["FOO"]  (TS, JS, TSX, JSX)
+  {
+    languages: ['typescript', 'javascript', 'tsx', 'jsx'],
+    re: /process\.env\.([A-Z_][A-Z0-9_]*)/g,
+  },
+  {
+    languages: ['typescript', 'javascript', 'tsx', 'jsx'],
+    re: /process\.env\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\]/g,
+  },
+  // os.getenv("FOO")  /  os.environ.get("FOO")  /  os.environ["FOO"]
+  {
+    languages: ['python'],
+    re: /\bos\.getenv\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g,
+  },
+  {
+    languages: ['python'],
+    re: /\bos\.environ\.get\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g,
+  },
+  {
+    languages: ['python'],
+    re: /\bos\.environ\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\]/g,
+  },
+  // Bare getenv("FOO") (Python convention with `from os import getenv`)
+  {
+    languages: ['python'],
+    re: /\bgetenv\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g,
+  },
+  // os.Getenv("FOO")  /  os.LookupEnv("FOO")  (Go)
+  {
+    languages: ['go'],
+    re: /\bos\.(?:Getenv|LookupEnv)\(\s*"([A-Z_][A-Z0-9_]*)"/g,
+  },
+  // System.getenv("FOO") (Java/Kotlin)
+  {
+    languages: ['java', 'kotlin'],
+    re: /\bSystem\.getenv\(\s*"([A-Z_][A-Z0-9_]*)"/g,
+  },
+  // ENV["FOO"] / ENV.fetch("FOO") (Ruby)
+  {
+    languages: ['ruby'],
+    re: /\bENV\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\]/g,
+  },
+  {
+    languages: ['ruby'],
+    re: /\bENV\.fetch\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g,
+  },
+  // Rust: env!("FOO") / std::env::var("FOO")
+  {
+    languages: ['rust'],
+    re: /\benv!\(\s*"([A-Z_][A-Z0-9_]*)"/g,
+  },
+  {
+    languages: ['rust'],
+    re: /\bstd::env::var\(\s*"([A-Z_][A-Z0-9_]*)"/g,
+  },
+];
+
+/** A file's languages-of-interest. Skip everything not in PATTERNS. */
+const SUPPORTED_LANGUAGES = new Set<string>(
+  PATTERNS.flatMap((p) => p.languages)
+);
+
+/**
+ * Resolver supplied by caller: (filePath, line) → enclosing nodeId
+ * (function/method/class). Returns null when the read is at the file's
+ * top level — the row still gets persisted with NULL source_node_id.
+ */
+export type EnclosingNodeResolver = (filePath: string, line: number) => string | null;
+
+export interface FileTarget {
+  path: string;
+  language: string;
+}
+
+/**
+ * Scan a list of (path, language) targets and return all read sites.
+ * Pure I/O + regex; the caller owns DB writes via `applyConfigRefs`.
+ *
+ * Files we can't read (deleted, permission, binary) are silently
+ * skipped — extraction has already validated readability for the rest.
+ */
+export function extractConfigRefs(
+  rootDir: string,
+  targets: Iterable<FileTarget>,
+  resolveEnclosing: EnclosingNodeResolver
+): ConfigRef[] {
+  const refs: ConfigRef[] = [];
+  for (const t of targets) {
+    if (!SUPPORTED_LANGUAGES.has(t.language)) continue;
+    let src: string;
+    try {
+      src = fs.readFileSync(path.join(rootDir, t.path), 'utf8');
+    } catch (err) {
+      logDebug(`extractConfigRefs: read failed for ${t.path}: ${err instanceof Error ? err.message : String(err)}`);
+      continue;
+    }
+    // Iterate lines so we can attribute each match to a 1-indexed line.
+    const lines = src.split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]!;
+      // Cheap pre-filter to skip the 99% of lines that obviously
+      // contain no env reference. Cuts per-file cost dramatically on
+      // big repos.
+      if (
+        !line.includes('env') &&
+        !line.includes('Env') &&
+        !line.includes('ENV')
+      ) {
+        continue;
+      }
+      for (const pat of PATTERNS) {
+        if (!pat.languages.includes(t.language)) continue;
+        pat.re.lastIndex = 0;
+        let m: RegExpExecArray | null;
+        while ((m = pat.re.exec(line)) !== null) {
+          const key = m[1]!;
+          const lineNo = i + 1;
+          refs.push({
+            configKind: 'env',
+            configKey: key,
+            sourceNodeId: resolveEnclosing(t.path, lineNo),
+            filePath: t.path,
+            line: lineNo,
+          });
+        }
+      }
+    }
+  }
+  return refs;
+}
diff --git a/src/config.ts b/src/config.ts
index 9ab1032a..f1d70250 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -128,6 +128,11 @@ function mergeConfig(
     extractDocstrings: overrides.extractDocstrings ?? defaults.extractDocstrings,
     trackCallSites: overrides.trackCallSites ?? defaults.trackCallSites,
     customPatterns: overrides.customPatterns ?? defaults.customPatterns,
+    enableCentrality: overrides.enableCentrality ?? defaults.enableCentrality,
+    enableChurn: overrides.enableChurn ?? defaults.enableChurn,
+    enableIssueHistory: overrides.enableIssueHistory ?? defaults.enableIssueHistory,
+    enableConfigRefs: overrides.enableConfigRefs ?? defaults.enableConfigRefs,
+    enableSqlRefs: overrides.enableSqlRefs ?? defaults.enableSqlRefs,
   };
 }
 
diff --git a/src/context/index.ts b/src/context/index.ts
index 94192377..08f25657 100644
--- a/src/context/index.ts
+++ b/src/context/index.ts
@@ -286,6 +286,14 @@ export class ContextBuilder {
     options: FindRelevantContextOptions = {}
   ): Promise<Subgraph> {
     const opts = { ...DEFAULT_FIND_OPTIONS, ...options };
+    // Bound user-supplied limits — `searchLimit` is multiplied by 5 in
+    // findNodesByExactName (line 312) and feeds several other unbounded
+    // operations below, so a request with `searchLimit: 1_000_000` would
+    // pull millions of rows before any filtering. 100 is well above the
+    // largest legitimate use we've seen.
+    opts.searchLimit = Math.min(Math.max(1, opts.searchLimit), 100);
+    opts.maxNodes = Math.min(Math.max(1, opts.maxNodes), 1000);
+    opts.traversalDepth = Math.min(Math.max(0, opts.traversalDepth), 10);
 
     // Start with empty subgraph
     const nodes = new Map<string, Node>();
diff --git a/src/db/index.ts b/src/db/index.ts
index 34e99338..da85caea 100644
--- a/src/db/index.ts
+++ b/src/db/index.ts
@@ -152,6 +152,36 @@ export class DatabaseConnection {
     this.db.exec('ANALYZE');
   }
 
+  /**
+   * Lightweight, non-blocking maintenance to run after bulk writes
+   * (indexAll, sync). Two operations:
+   *
+   *   - `PRAGMA optimize` — incremental ANALYZE; SQLite only re-analyzes
+   *     tables whose row counts changed materially since the last
+   *     ANALYZE. Without it, the query planner has no statistics on the
+   *     freshly-bulk-loaded tables and can pick suboptimal indexes.
+   *
+   *   - `PRAGMA wal_checkpoint(PASSIVE)` — fold pending WAL pages back
+   *     into the main database file so the WAL file doesn't grow
+   *     unboundedly between automatic checkpoints (auto-fires at 1000
+   *     pages by default; large indexAll runs blow past that).
+   *
+   * Both operations are silently swallowed on failure — they're a
+   * best-effort optimization, never load-bearing for correctness.
+   */
+  runMaintenance(): void {
+    try {
+      this.db.exec('PRAGMA optimize');
+    } catch {
+      // ignore
+    }
+    try {
+      this.db.exec('PRAGMA wal_checkpoint(PASSIVE)');
+    } catch {
+      // ignore (e.g., not in WAL mode)
+    }
+  }
+
   /**
    * Close the database connection
    */
diff --git a/src/db/migrations.ts b/src/db/migrations.ts
index 0a256dbc..98325247 100644
--- a/src/db/migrations.ts
+++ b/src/db/migrations.ts
@@ -1,60 +1,26 @@
 /**
- * Database Migrations
+ * Database Migrations — runner + backward-compat surface.
  *
- * Schema versioning and migration support.
+ * The migration definitions themselves live in
+ * `./migrations/<NNN>-<name>.ts`, one file per migration, with
+ * version derived from the filename prefix. This file is the
+ * runner (read schema_versions, apply pending in order) and the
+ * stable API surface that the rest of the codebase imports.
+ *
+ * Adding a migration: see `./migrations/index.ts`.
  */
 
 import { SqliteDatabase } from './sqlite-adapter';
+import { ALL_MIGRATIONS, CURRENT_SCHEMA_VERSION as REGISTRY_CURRENT } from './migrations/index';
+import type { Migration } from './migrations/types';
 
 /**
- * Current schema version
+ * Highest registered migration version. Derived from the
+ * registry; re-exported here unchanged so existing consumers
+ * (`import { CURRENT_SCHEMA_VERSION } from './migrations'`) keep
+ * working.
  */
-export const CURRENT_SCHEMA_VERSION = 3;
-
-/**
- * Migration definition
- */
-interface Migration {
-  version: number;
-  description: string;
-  up: (db: SqliteDatabase) => void;
-}
-
-/**
- * All migrations in order
- *
- * Note: Version 1 is the initial schema, handled by schema.sql
- * Future migrations go here.
- */
-const migrations: Migration[] = [
-  {
-    version: 2,
-    description: 'Add project metadata, provenance tracking, and unresolved ref context',
-    up: (db) => {
-      db.exec(`
-        CREATE TABLE IF NOT EXISTS project_metadata (
-          key TEXT PRIMARY KEY,
-          value TEXT NOT NULL,
-          updated_at INTEGER NOT NULL
-        );
-        ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT NOT NULL DEFAULT '';
-        ALTER TABLE unresolved_refs ADD COLUMN language TEXT NOT NULL DEFAULT 'unknown';
-        ALTER TABLE edges ADD COLUMN provenance TEXT DEFAULT NULL;
-        CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
-        CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
-      `);
-    },
-  },
-  {
-    version: 3,
-    description: 'Add lower(name) expression index for memory-efficient case-insensitive lookups',
-    up: (db) => {
-      db.exec(`
-        CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
-      `);
-    },
-  },
-];
+export const CURRENT_SCHEMA_VERSION: number = REGISTRY_CURRENT;
 
 /**
  * Get the current schema version from the database
@@ -84,17 +50,14 @@ function recordMigration(db: SqliteDatabase, version: number, description: strin
  * Run all pending migrations
  */
 export function runMigrations(db: SqliteDatabase, fromVersion: number): void {
-  const pending = migrations.filter((m) => m.version > fromVersion);
-
-  if (pending.length === 0) {
-    return;
-  }
+  const pending = ALL_MIGRATIONS.filter((m) => m.version > fromVersion);
+  if (pending.length === 0) return;
 
-  // Sort by version
-  pending.sort((a, b) => a.version - b.version);
+  // ALL_MIGRATIONS is already sorted by version, but filtering can
+  // be cheap to re-confirm.
+  const ordered = [...pending].sort((a, b) => a.version - b.version);
 
-  // Run each migration in a transaction
-  for (const migration of pending) {
+  for (const migration of ordered) {
     db.transaction(() => {
       migration.up(db);
       recordMigration(db, migration.version, migration.description);
@@ -111,13 +74,15 @@ export function needsMigration(db: SqliteDatabase): boolean {
 }
 
 /**
- * Get list of pending migrations
+ * Get list of pending migrations.
+ *
+ * Returned as a fresh mutable array (not the underlying readonly
+ * registry) so callers that previously assigned the result to a
+ * `Migration[]`-typed variable keep working unchanged.
  */
 export function getPendingMigrations(db: SqliteDatabase): Migration[] {
   const current = getCurrentVersion(db);
-  return migrations
-    .filter((m) => m.version > current)
-    .sort((a, b) => a.version - b.version);
+  return ALL_MIGRATIONS.filter((m) => m.version > current).slice();
 }
 
 /**
@@ -136,3 +101,7 @@ export function getMigrationHistory(
     description: row.description,
   }));
 }
+
+// Re-export the registry surface for callers that want it.
+export { ALL_MIGRATIONS } from './migrations/index';
+export type { Migration, MigrationModule } from './migrations/types';
diff --git a/src/db/migrations/002-project-metadata.ts b/src/db/migrations/002-project-metadata.ts
new file mode 100644
index 00000000..9fe7945b
--- /dev/null
+++ b/src/db/migrations/002-project-metadata.ts
@@ -0,0 +1,19 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add project metadata, provenance tracking, and unresolved ref context',
+  up: (db) => {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS project_metadata (
+        key TEXT PRIMARY KEY,
+        value TEXT NOT NULL,
+        updated_at INTEGER NOT NULL
+      );
+      ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT NOT NULL DEFAULT '';
+      ALTER TABLE unresolved_refs ADD COLUMN language TEXT NOT NULL DEFAULT 'unknown';
+      ALTER TABLE edges ADD COLUMN provenance TEXT DEFAULT NULL;
+      CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
+      CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
+    `);
+  },
+};
diff --git a/src/db/migrations/003-lower-name-index.ts b/src/db/migrations/003-lower-name-index.ts
new file mode 100644
index 00000000..ff5416eb
--- /dev/null
+++ b/src/db/migrations/003-lower-name-index.ts
@@ -0,0 +1,10 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add lower(name) expression index for memory-efficient case-insensitive lookups',
+  up: (db) => {
+    db.exec(`
+      CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
+    `);
+  },
+};
diff --git a/src/db/migrations/004-centrality-churn.ts b/src/db/migrations/004-centrality-churn.ts
new file mode 100644
index 00000000..82d30ffe
--- /dev/null
+++ b/src/db/migrations/004-centrality-churn.ts
@@ -0,0 +1,42 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add centrality on nodes; per-file churn metrics on files',
+  up: (db) => {
+    // ALTER TABLE ADD COLUMN is not idempotent on SQLite — guard with
+    // PRAGMA table_info so re-running after a partial DDL failure (or
+    // landing alongside another migration that touches the same files
+    // columns) does not throw "duplicate column name".
+    const tableExists = (name: string): boolean =>
+      (db.prepare(`SELECT COUNT(*) AS c FROM sqlite_master WHERE type='table' AND name=?`)
+        .get(name) as { c: number }).c > 0;
+
+    if (tableExists('nodes')) {
+      const nodeCols = db.prepare(`PRAGMA table_info(nodes);`).all() as Array<{ name: string }>;
+      if (!nodeCols.some((c) => c.name === 'centrality')) {
+        db.exec(`ALTER TABLE nodes ADD COLUMN centrality REAL DEFAULT NULL;`);
+      }
+      db.exec(`CREATE INDEX IF NOT EXISTS idx_nodes_centrality ON nodes(centrality DESC);`);
+    }
+
+    if (tableExists('files')) {
+      const fileCols = db.prepare(`PRAGMA table_info(files);`).all() as Array<{ name: string }>;
+      if (!fileCols.some((c) => c.name === 'commit_count')) {
+        db.exec(`ALTER TABLE files ADD COLUMN commit_count INTEGER NOT NULL DEFAULT 0;`);
+      }
+      if (!fileCols.some((c) => c.name === 'loc')) {
+        db.exec(`ALTER TABLE files ADD COLUMN loc INTEGER NOT NULL DEFAULT 0;`);
+      }
+      if (!fileCols.some((c) => c.name === 'first_seen_ts')) {
+        db.exec(`ALTER TABLE files ADD COLUMN first_seen_ts INTEGER DEFAULT NULL;`);
+      }
+      if (!fileCols.some((c) => c.name === 'last_touched_ts')) {
+        db.exec(`ALTER TABLE files ADD COLUMN last_touched_ts INTEGER DEFAULT NULL;`);
+      }
+      db.exec(`
+        CREATE INDEX IF NOT EXISTS idx_files_commit_count ON files(commit_count DESC);
+        CREATE INDEX IF NOT EXISTS idx_files_last_touched ON files(last_touched_ts DESC);
+      `);
+    }
+  },
+};
diff --git a/src/db/migrations/005-symbol-issues.ts b/src/db/migrations/005-symbol-issues.ts
new file mode 100644
index 00000000..7af13795
--- /dev/null
+++ b/src/db/migrations/005-symbol-issues.ts
@@ -0,0 +1,19 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add symbol_issues table for issue→symbol attribution from git history',
+  up: (db) => {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS symbol_issues (
+        node_id TEXT NOT NULL,
+        issue_number INTEGER NOT NULL,
+        commit_sha TEXT NOT NULL,
+        kind TEXT NOT NULL CHECK (kind IN ('modified','added','removed')),
+        PRIMARY KEY (node_id, issue_number, commit_sha, kind),
+        FOREIGN KEY (node_id) REFERENCES nodes(id) ON DELETE CASCADE
+      );
+      CREATE INDEX IF NOT EXISTS idx_symbol_issues_node ON symbol_issues(node_id);
+      CREATE INDEX IF NOT EXISTS idx_symbol_issues_issue ON symbol_issues(issue_number);
+    `);
+  },
+};
diff --git a/src/db/migrations/006-config-refs.ts b/src/db/migrations/006-config-refs.ts
new file mode 100644
index 00000000..8fed1a91
--- /dev/null
+++ b/src/db/migrations/006-config-refs.ts
@@ -0,0 +1,24 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add config_refs table for env var / feature flag read sites',
+  up: (db) => {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS config_refs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        config_kind TEXT NOT NULL,
+        config_key TEXT NOT NULL,
+        source_node_id TEXT,
+        file_path TEXT NOT NULL,
+        line INTEGER NOT NULL,
+        FOREIGN KEY (source_node_id) REFERENCES nodes(id) ON DELETE CASCADE
+      );
+      CREATE INDEX IF NOT EXISTS idx_config_refs_key
+        ON config_refs(config_kind, config_key);
+      CREATE INDEX IF NOT EXISTS idx_config_refs_node
+        ON config_refs(source_node_id);
+      CREATE INDEX IF NOT EXISTS idx_config_refs_file
+        ON config_refs(file_path);
+    `);
+  },
+};
diff --git a/src/db/migrations/007-sql-refs.ts b/src/db/migrations/007-sql-refs.ts
new file mode 100644
index 00000000..629d070f
--- /dev/null
+++ b/src/db/migrations/007-sql-refs.ts
@@ -0,0 +1,24 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description: 'Add sql_refs table for SQL string-literal references to tables',
+  up: (db) => {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS sql_refs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        table_name TEXT NOT NULL,
+        op TEXT NOT NULL CHECK (op IN ('read','write','ddl')),
+        source_node_id TEXT,
+        file_path TEXT NOT NULL,
+        line INTEGER NOT NULL,
+        FOREIGN KEY (source_node_id) REFERENCES nodes(id) ON DELETE CASCADE
+      );
+      CREATE INDEX IF NOT EXISTS idx_sql_refs_table
+        ON sql_refs(lower(table_name));
+      CREATE INDEX IF NOT EXISTS idx_sql_refs_node
+        ON sql_refs(source_node_id);
+      CREATE INDEX IF NOT EXISTS idx_sql_refs_file
+        ON sql_refs(file_path);
+    `);
+  },
+};
diff --git a/src/db/migrations/008-edges-unique.ts b/src/db/migrations/008-edges-unique.ts
new file mode 100644
index 00000000..ed7e5372
--- /dev/null
+++ b/src/db/migrations/008-edges-unique.ts
@@ -0,0 +1,29 @@
+import type { MigrationModule } from './types';
+
+export const MIGRATION: MigrationModule = {
+  description:
+    'Dedup edges and enforce UNIQUE(source, target, kind, line, col) so INSERT OR IGNORE actually dedupes',
+  up: (db) => {
+    // Tolerate edges-table-missing (synthetic test DBs that only need
+    // the FTS / nodes side of the schema): if there's no edges table,
+    // there are no duplicates to dedup or unique constraint to add.
+    const hasEdges = (db
+      .prepare(`SELECT COUNT(*) AS c FROM sqlite_master WHERE type='table' AND name='edges'`)
+      .get() as { c: number }).c > 0;
+    if (!hasEdges) return;
+
+    // Without a UNIQUE constraint the existing `INSERT OR IGNORE INTO
+    // edges` was a no-op for dedup purposes. Collapse accumulated
+    // duplicates first, then add the UNIQUE index. COALESCE keeps
+    // NULL line/col values comparable.
+    db.exec(`
+      DELETE FROM edges
+      WHERE id NOT IN (
+        SELECT MIN(id) FROM edges
+        GROUP BY source, target, kind, COALESCE(line, -1), COALESCE(col, -1)
+      );
+      CREATE UNIQUE INDEX IF NOT EXISTS idx_edges_unique
+        ON edges(source, target, kind, COALESCE(line, -1), COALESCE(col, -1));
+    `);
+  },
+};
diff --git a/src/db/migrations/009-fts-subwords-porter.ts b/src/db/migrations/009-fts-subwords-porter.ts
new file mode 100644
index 00000000..032058cc
--- /dev/null
+++ b/src/db/migrations/009-fts-subwords-porter.ts
@@ -0,0 +1,68 @@
+import type { MigrationModule } from './types';
+import { buildNameSubwords } from '../../utils';
+
+export const MIGRATION: MigrationModule = {
+  description:
+    'Add name_subwords + Porter stemmer to FTS so natural-language and partial-identifier queries work',
+  up: (db) => {
+    // 1. Add the synthetic subwords column to nodes — idempotent so a
+    //    re-run after a partial DDL failure (SQLite auto-commits DDL,
+    //    so only some of these statements may have landed) doesn't fail
+    //    with "duplicate column name".
+    const cols = db.prepare(`PRAGMA table_info(nodes);`).all() as Array<{ name: string }>;
+    if (!cols.some((c) => c.name === 'name_subwords')) {
+      db.exec(`ALTER TABLE nodes ADD COLUMN name_subwords TEXT;`);
+    }
+
+    // 2. Drop the existing FTS table + triggers. We can't ALTER the
+    //    FTS5 tokenizer in place; recreating is the supported path.
+    db.exec(`
+      DROP TRIGGER IF EXISTS nodes_ai;
+      DROP TRIGGER IF EXISTS nodes_ad;
+      DROP TRIGGER IF EXISTS nodes_au;
+      DROP TABLE IF EXISTS nodes_fts;
+    `);
+
+    // 3. Recreate the FTS table — but DO NOT recreate the triggers yet.
+    db.exec(`
+      CREATE VIRTUAL TABLE nodes_fts USING fts5(
+        id, name, qualified_name, docstring, signature, name_subwords,
+        content='nodes',
+        content_rowid='rowid',
+        tokenize="porter unicode61"
+      );
+    `);
+
+    // 4. Backfill name_subwords.
+    const rows = db
+      .prepare('SELECT id, name FROM nodes')
+      .all() as Array<{ id: string; name: string }>;
+    const update = db.prepare('UPDATE nodes SET name_subwords = ? WHERE id = ?');
+    for (const row of rows) {
+      update.run(buildNameSubwords(row.name), row.id);
+    }
+
+    // 5. Rebuild the FTS index from the content table.
+    db.exec(`INSERT INTO nodes_fts(nodes_fts) VALUES('rebuild');`);
+
+    // 6. Re-attach the triggers — fire on subsequent application writes.
+    db.exec(`
+      CREATE TRIGGER nodes_ai AFTER INSERT ON nodes BEGIN
+        INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature, name_subwords)
+        VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature, NEW.name_subwords);
+      END;
+
+      CREATE TRIGGER nodes_ad AFTER DELETE ON nodes BEGIN
+        INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature, name_subwords)
+        VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature, OLD.name_subwords);
+      END;
+
+      CREATE TRIGGER nodes_au AFTER UPDATE ON nodes BEGIN
+        INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature, name_subwords)
+        VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature, OLD.name_subwords);
+        INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature, name_subwords)
+        VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature, NEW.name_subwords);
+      END;
+    `);
+  },
+};
diff --git a/src/db/migrations/index.ts b/src/db/migrations/index.ts
new file mode 100644
index 00000000..b1d7b9a6
--- /dev/null
+++ b/src/db/migrations/index.ts
@@ -0,0 +1,118 @@
+/**
+ * Migration registry.
+ *
+ * Adding a new schema migration is:
+ *
+ *   1. Pick the next free 3-digit prefix (`NNN`) — `git ls-files
+ *      'src/db/migrations/[0-9]*.ts'` shows what's taken.
+ *   2. Create `src/db/migrations/<NNN>-<short-description>.ts`
+ *      exporting a `MIGRATION: MigrationModule` (just `description`
+ *      and `up(db)`).
+ *   3. Add **one** import line and **one** array entry to this file.
+ *
+ * **Why filename-derived versions instead of a field?** Two PRs
+ * adding migrations independently used to collide on the
+ * `migrations[]` array AND the `CURRENT_SCHEMA_VERSION` const.
+ * With monolithic migrations.ts, "I claimed v4 / you claimed v4"
+ * resolved as "second PR's v4 silently no-ops" — a real bug class
+ * (PR #113's reviewer caught one). With filename-derived versions,
+ * two PRs both creating `004-foo.ts` produce a filesystem-level
+ * conflict the maintainer sees instantly.
+ *
+ * `CURRENT_SCHEMA_VERSION` is the max of all registered versions.
+ */
+
+import type { Migration, MigrationModule } from './types';
+
+import { MIGRATION as MIG_002 } from './002-project-metadata';
+import { MIGRATION as MIG_003 } from './003-lower-name-index';
+import { MIGRATION as MIG_004 } from './004-centrality-churn';
+import { MIGRATION as MIG_005 } from './005-symbol-issues';
+import { MIGRATION as MIG_006 } from './006-config-refs';
+import { MIGRATION as MIG_007 } from './007-sql-refs';
+import { MIGRATION as MIG_008 } from './008-edges-unique';
+import { MIGRATION as MIG_009 } from './009-fts-subwords-porter';
+
+interface ModuleRef {
+  /**
+   * Source filename. The 3-digit prefix is the source of truth for
+   * the version number — `validateRegistered` parses it. Keep this
+   * field in sync with the actual file on disk; the
+   * filesystem-cross-check test catches drift.
+   */
+  filename: string;
+  module: MigrationModule;
+}
+
+/**
+ * Static-import list of every migration. Two PRs adding
+ * migrations both add a single entry here; alphabetical ordering
+ * puts adjacent additions on different lines unless the version
+ * numbers themselves collide, in which case the filesystem
+ * collision on `NNN-*.ts` surfaces the conflict instantly.
+ */
+const REGISTERED_MODULES: readonly ModuleRef[] = [
+  { filename: '002-project-metadata.ts', module: MIG_002 },
+  { filename: '003-lower-name-index.ts', module: MIG_003 },
+  { filename: '004-centrality-churn.ts', module: MIG_004 },
+  { filename: '005-symbol-issues.ts', module: MIG_005 },
+  { filename: '006-config-refs.ts', module: MIG_006 },
+  { filename: '007-sql-refs.ts', module: MIG_007 },
+  { filename: '008-edges-unique.ts', module: MIG_008 },
+  { filename: '009-fts-subwords-porter.ts', module: MIG_009 },
+];
+
+/** Strict 3-digit prefix on each migration filename. */
+const FILENAME_PATTERN = /^(\d{3})-[a-z0-9]+(?:-[a-z0-9]+)*\.ts$/;
+
+/**
+ * Validate the registered set: filenames match the strict
+ * `NNN-name.ts` shape, version is parsed from the prefix (no
+ * hand-typed version field that can drift), versions are unique,
+ * and the result is sorted ascending. Throws loudly at module
+ * load if any invariant is violated rather than silently dropping
+ * a migration during `runMigrations()`.
+ */
+function validateRegistered(refs: readonly ModuleRef[]): readonly Migration[] {
+  if (refs.length === 0) {
+    throw new Error('[CodeGraph] migrations registry is empty');
+  }
+  const parsed = refs.map((r) => {
+    const m = FILENAME_PATTERN.exec(r.filename);
+    if (!m) {
+      throw new Error(
+        `[CodeGraph] migration filename "${r.filename}" does not match ` +
+          `expected pattern NNN-kebab-name.ts (3-digit prefix, lowercase kebab-case body)`
+      );
+    }
+    const version = parseInt(m[1]!, 10);
+    return {
+      version,
+      filename: r.filename,
+      description: r.module.description,
+      up: r.module.up,
+    };
+  });
+  const sorted = [...parsed].sort((a, b) => a.version - b.version);
+  for (let i = 1; i < sorted.length; i++) {
+    if (sorted[i]!.version === sorted[i - 1]!.version) {
+      throw new Error(
+        `[CodeGraph] duplicate migration version ${sorted[i]!.version}: ` +
+          `${sorted[i - 1]!.filename} vs ${sorted[i]!.filename}`
+      );
+    }
+  }
+  return sorted.map((r) => ({
+    version: r.version,
+    description: r.description,
+    up: r.up,
+  }));
+}
+
+export const ALL_MIGRATIONS: readonly Migration[] = validateRegistered(REGISTERED_MODULES);
+
+/**
+ * Highest registered migration version. Derived from the registry
+ * (no hand-maintained constant to keep in sync).
+ */
+export const CURRENT_SCHEMA_VERSION: number = ALL_MIGRATIONS[ALL_MIGRATIONS.length - 1]!.version;
diff --git a/src/db/migrations/types.ts b/src/db/migrations/types.ts
new file mode 100644
index 00000000..479af672
--- /dev/null
+++ b/src/db/migrations/types.ts
@@ -0,0 +1,25 @@
+/**
+ * Migration registry types.
+ *
+ * Each migration ships its own self-contained file
+ * (`./NNN-description.ts`) exporting a `MIGRATION:
+ * MigrationModule`. The version number is derived from the
+ * leading 3-digit prefix on the filename, NOT from a field in the
+ * module — this guarantees no two PRs can claim the same version
+ * silently (filenames collide on the filesystem; SQL migrations
+ * never silently no-op).
+ */
+
+import type { SqliteDatabase } from '../sqlite-adapter';
+
+export interface MigrationModule {
+  /** One-line description for `schema_versions` table + diagnostics. */
+  readonly description: string;
+  /** The actual schema-mutation function. Wrapped in a transaction. */
+  readonly up: (db: SqliteDatabase) => void;
+}
+
+export interface Migration extends MigrationModule {
+  /** Version derived from filename's leading NNN prefix. */
+  readonly version: number;
+}
diff --git a/src/db/queries.ts b/src/db/queries.ts
index 51f1a1ad..4a3edb90 100644
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -17,8 +17,8 @@ import {
   SearchOptions,
   SearchResult,
 } from '../types';
-import { safeJsonParse } from '../utils';
-import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
+import { safeJsonParse, buildNameSubwords } from '../utils';
+import { kindBonus, nameMatchBonus, scorePathRelevance, filterStopwords, diversifyByFile } from '../search/query-utils';
 
 /**
  * Database row types (snake_case from SQLite)
@@ -44,6 +44,7 @@ interface NodeRow {
   decorators: string | null;
   type_parameters: string | null;
   updated_at: number;
+  centrality: number | null;
 }
 
 interface EdgeRow {
@@ -66,6 +67,10 @@ interface FileRow {
   indexed_at: number;
   node_count: number;
   errors: string | null;
+  commit_count: number | null;
+  loc: number | null;
+  first_seen_ts: number | null;
+  last_touched_ts: number | null;
 }
 
 interface UnresolvedRefRow {
@@ -105,6 +110,7 @@ function rowToNode(row: NodeRow): Node {
     decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined,
     typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined,
     updatedAt: row.updated_at,
+    centrality: row.centrality ?? undefined,
   };
 }
 
@@ -136,6 +142,10 @@ function rowToFileRecord(row: FileRow): FileRecord {
     indexedAt: row.indexed_at,
     nodeCount: row.node_count,
     errors: row.errors ? safeJsonParse(row.errors, undefined) : undefined,
+    commitCount: row.commit_count ?? 0,
+    loc: row.loc ?? 0,
+    firstSeenTs: row.first_seen_ts ?? null,
+    lastTouchedTs: row.last_touched_ts ?? null,
   };
 }
 
@@ -170,7 +180,6 @@ export class QueryBuilder {
     getFileByPath?: SqliteStatement;
     getAllFiles?: SqliteStatement;
     insertUnresolved?: SqliteStatement;
-    deleteUnresolvedByNode?: SqliteStatement;
     getUnresolvedByName?: SqliteStatement;
     getNodesByName?: SqliteStatement;
     getNodesByQualifiedNameExact?: SqliteStatement;
@@ -185,6 +194,14 @@ export class QueryBuilder {
     this.db = db;
   }
 
+  /**
+   * Execute a callback inside a single SQLite transaction. Useful when a
+   * caller needs several `QueryBuilder` operations to commit atomically.
+   */
+  transaction<T>(fn: () => T): T {
+    return this.db.transaction(fn)();
+  }
+
   // ===========================================================================
   // Node Operations
   // ===========================================================================
@@ -200,13 +217,13 @@ export class QueryBuilder {
           start_line, end_line, start_column, end_column,
           docstring, signature, visibility,
           is_exported, is_async, is_static, is_abstract,
-          decorators, type_parameters, updated_at
+          decorators, type_parameters, updated_at, name_subwords
         ) VALUES (
           @id, @kind, @name, @qualifiedName, @filePath, @language,
           @startLine, @endLine, @startColumn, @endColumn,
           @docstring, @signature, @visibility,
           @isExported, @isAsync, @isStatic, @isAbstract,
-          @decorators, @typeParameters, @updatedAt
+          @decorators, @typeParameters, @updatedAt, @nameSubwords
         )
       `);
     }
@@ -223,6 +240,12 @@ export class QueryBuilder {
       return;
     }
 
+    // INSERT OR REPLACE may overwrite a node we have cached. Drop the
+    // stale entry so the next getNodeById sees the new row, not the old
+    // one (matches the cache-invalidation pattern used by updateNode and
+    // deleteNode below).
+    this.nodeCache.delete(node.id);
+
     try {
       this.stmts.insertNode.run({
         id: node.id,
@@ -245,6 +268,7 @@ export class QueryBuilder {
         decorators: node.decorators ? JSON.stringify(node.decorators) : null,
         typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
         updatedAt: node.updatedAt ?? Date.now(),
+        nameSubwords: buildNameSubwords(node.name),
       });
     } catch (error) {
       throw error;
@@ -287,7 +311,8 @@ export class QueryBuilder {
           is_abstract = @isAbstract,
           decorators = @decorators,
           type_parameters = @typeParameters,
-          updated_at = @updatedAt
+          updated_at = @updatedAt,
+          name_subwords = @nameSubwords
         WHERE id = @id
       `);
     }
@@ -322,6 +347,7 @@ export class QueryBuilder {
       decorators: node.decorators ? JSON.stringify(node.decorators) : null,
       typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
       updatedAt: node.updatedAt ?? Date.now(),
+      nameSubwords: buildNameSubwords(node.name),
     });
   }
 
@@ -379,6 +405,59 @@ export class QueryBuilder {
     return node;
   }
 
+  /**
+   * Batch lookup: fetch many nodes by ID in a single SQL round-trip.
+   *
+   * Replaces the N+1 pattern in graph traversal where every edge would
+   * trigger its own `getNodeById` call. For a function with 50 callers
+   * this collapses 50 point reads into one IN-list query (~10-50x
+   * faster end-to-end).
+   *
+   * Returns a Map keyed by id so callers can preserve their own ordering
+   * (typically the order edges were returned from the graph). Missing IDs
+   * are simply absent from the map.
+   *
+   * Cache-aware: ids already in the LRU cache are served from memory and
+   * the SQL query only touches the misses.
+   */
+  getNodesByIds(ids: readonly string[]): Map<string, Node> {
+    const out = new Map<string, Node>();
+    if (ids.length === 0) return out;
+
+    // Serve cache hits first; build the miss list for SQL.
+    const misses: string[] = [];
+    for (const id of ids) {
+      const cached = this.nodeCache.get(id);
+      if (cached !== undefined) {
+        // LRU touch
+        this.nodeCache.delete(id);
+        this.nodeCache.set(id, cached);
+        out.set(id, cached);
+      } else {
+        misses.push(id);
+      }
+    }
+    if (misses.length === 0) return out;
+
+    // Chunk under SQLite's parameter limit (default 999, raised to 32766
+    // in better-sqlite3 builds — chunk at 500 for safety across both
+    // backends and to keep the query plan simple).
+    const CHUNK = 500;
+    for (let i = 0; i < misses.length; i += CHUNK) {
+      const chunk = misses.slice(i, i + CHUNK);
+      const placeholders = chunk.map(() => '?').join(',');
+      const rows = this.db
+        .prepare(`SELECT * FROM nodes WHERE id IN (${placeholders})`)
+        .all(...chunk) as NodeRow[];
+      for (const row of rows) {
+        const node = rowToNode(row);
+        out.set(node.id, node);
+        this.cacheNode(node);
+      }
+    }
+    return out;
+  }
+
   /**
    * Add a node to the cache, evicting oldest if needed
    */
@@ -478,7 +557,13 @@ export class QueryBuilder {
    * 3. Score results based on match quality
    */
   searchNodes(query: string, options: SearchOptions = {}): SearchResult[] {
-    const { kinds, languages, limit = 100, offset = 0 } = options;
+    const { kinds, languages, limit = 100, offset = 0, perFileCap = 3 } = options;
+
+    // Note on over-fetching: searchNodesFTS already over-fetches by 5x
+    // internally (Math.max(limit*5, 100)) so its own rescoring pass has
+    // headroom. That same headroom feeds the per-file diversification
+    // below — no additional outer multiplier needed. Keeping this comment
+    // here so future readers don't reintroduce a multiplier-on-multiplier.
 
     // First try FTS5 with prefix matching
     let results = this.searchNodesFTS(query, { kinds, languages, limit, offset });
@@ -530,10 +615,23 @@ export class QueryBuilder {
           + nameMatchBonus(r.node.name, query),
       }));
       results.sort((a, b) => b.score - a.score);
-      // Trim to requested limit after rescoring
-      if (results.length > limit) {
-        results = results.slice(0, limit);
-      }
+    }
+
+    // Diversification: cap per-file results so the top-K isn't dominated
+    // by the methods of a single class. Top-scoring hit per file is always
+    // included; the cap only kicks in for the second-and-onward members
+    // of the same file. perFileCap=0 disables.
+    //
+    // Guard `results.length > limit`: when results <= limit there's
+    // nothing to drop, so the existing score order is already what the
+    // caller will see. (`diversifyByFile` is also safe to call here and
+    // would reorder within the same set, but the existing rescore order
+    // is already meaningful and we don't want to perturb it without
+    // benefit.)
+    if (perFileCap > 0 && results.length > limit) {
+      results = diversifyByFile(results, limit, perFileCap);
+    } else if (results.length > limit) {
+      results = results.slice(0, limit);
     }
 
     return results;
@@ -545,30 +643,38 @@ export class QueryBuilder {
   private searchNodesFTS(query: string, options: SearchOptions): SearchResult[] {
     const { kinds, languages, limit = 100, offset = 0 } = options;
 
-    // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
-    // Escape special FTS5 characters and add prefix wildcard
-    const ftsQuery = query
-      .replace(/['"*():^]/g, '') // Remove FTS5 special chars
+    // Build the FTS query in three steps:
+    //   1. Strip characters with special meaning to FTS5 and split on whitespace.
+    //   2. Drop FTS5 boolean operators (AND/OR/NOT/NEAR) — prevents user input
+    //      from injecting boolean structure into the OR-join below.
+    //   3. Drop English stopwords for natural-language queries — words like
+    //      "how" / "the" otherwise become OR'd hits against any prose-bearing
+    //      docstring and crowd out the actually-relevant identifier tokens.
+    const rawTerms = query
+      .replace(/['"*():^]/g, '')
       .split(/\s+/)
-      .filter(term => term.length > 0)
-      // Strip FTS5 boolean operators to prevent query manipulation
-      .filter(term => !/^(AND|OR|NOT|NEAR)$/i.test(term))
-      .map(term => `"${term}"*`) // Prefix match each term
+      .filter((term) => term.length > 0)
+      .filter((term) => !/^(AND|OR|NOT|NEAR)$/i.test(term));
+
+    const filteredTerms = filterStopwords(rawTerms);
+
+    const ftsQuery = filteredTerms
+      .map((term) => `"${term}"*`) // Prefix match each term
       .join(' OR ');
 
     if (!ftsQuery) {
       return [];
     }
 
-    // BM25 column weights: id=0, name=20, qualified_name=5, docstring=1, signature=2
-    // Heavy name weight ensures exact/prefix name matches rank above incidental
-    // mentions in long docstrings or qualified names of nested symbols.
-    // Fetch 5x requested limit so post-hoc rescoring (kindBonus, pathRelevance,
-    // nameMatchBonus) can promote results that BM25 alone undervalues.
+    // BM25 column weights: id=0, name=20, qualified_name=5, docstring=1,
+    // signature=2, name_subwords=10. Heavy name weight keeps exact and prefix
+    // name matches above incidental mentions in long docstrings; the new
+    // name_subwords column at 10× lets queries hit subword tokens like
+    // `parser` against `getParser` without burying full-name matches.
     const ftsLimit = Math.max(limit * 5, 100);
 
     let sql = `
-      SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2) as score
+      SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2, 10) as score
       FROM nodes_fts
       JOIN nodes ON nodes_fts.id = nodes.id
       WHERE nodes_fts MATCH ?
@@ -916,7 +1022,12 @@ export class QueryBuilder {
   // ===========================================================================
 
   /**
-   * Insert or update a file record
+   * Insert or update a file record.
+   *
+   * Churn columns (commit_count, loc, first_seen_ts, last_touched_ts)
+   * are deliberately omitted from the ON CONFLICT update list — they
+   * are managed exclusively by `applyChurnDeltas` / `applyLocUpdates`.
+   * Adding them here would clobber mined git history on every re-index.
    */
   upsertFile(file: FileRecord): void {
     if (!this.stmts.upsertFile) {
@@ -1032,17 +1143,8 @@ export class QueryBuilder {
     insert();
   }
 
-  /**
-   * Delete unresolved references from a node
-   */
-  deleteUnresolvedByNode(nodeId: string): void {
-    if (!this.stmts.deleteUnresolvedByNode) {
-      this.stmts.deleteUnresolvedByNode = this.db.prepare(
-        'DELETE FROM unresolved_refs WHERE from_node_id = ?'
-      );
-    }
-    this.stmts.deleteUnresolvedByNode.run(nodeId);
-  }
+  // (deleteUnresolvedByNode removed — never called; FK cascade on
+  // nodes(id) → unresolved_refs.from_node_id handles cleanup automatically.)
 
   /**
    * Get unresolved references by name (for resolution)
@@ -1295,4 +1397,526 @@ export class QueryBuilder {
       this.db.exec('DELETE FROM files');
     })();
   }
+
+  // ===========================================================================
+  // Centrality (PageRank scores on nodes)
+  // ===========================================================================
+
+  /**
+   * Apply PageRank scores to the nodes table in a single transaction.
+   * Existing scores for ids not in the map are NOT cleared — call
+   * `clearCentrality()` first for a from-scratch recompute.
+   */
+  applyCentralityScores(scores: Map<string, number>): void {
+    if (scores.size === 0) return;
+    const stmt = this.db.prepare('UPDATE nodes SET centrality = ? WHERE id = ?');
+    this.db.transaction(() => {
+      for (const [id, score] of scores) {
+        stmt.run(score, id);
+      }
+    })();
+    // Cached node objects now have stale centrality. Drop the cache;
+    // subsequent reads pull the fresh value.
+    this.nodeCache.clear();
+  }
+
+  /** Reset all centrality values to NULL (fresh-recompute path). */
+  clearCentrality(): void {
+    this.db.exec('UPDATE nodes SET centrality = NULL');
+    this.nodeCache.clear();
+  }
+
+  /**
+   * Get top-N nodes by centrality, descending. Filters out NULL
+   * centrality (= not yet computed). Optional `kind` filter narrows
+   * to one node kind; optional `minCentrality` filters out the long
+   * tail of essentially-zero ranks.
+   */
+  getTopNodesByCentrality(opts: {
+    limit?: number;
+    kind?: NodeKind;
+    minCentrality?: number;
+  } = {}): Node[] {
+    const limit = opts.limit ?? 25;
+    const minCentrality = opts.minCentrality ?? 0;
+    const where: string[] = ['centrality IS NOT NULL', 'centrality >= ?'];
+    const params: (string | number)[] = [minCentrality];
+    if (opts.kind) {
+      where.push('kind = ?');
+      params.push(opts.kind);
+    }
+    const sql = `SELECT * FROM nodes WHERE ${where.join(' AND ')}
+                 ORDER BY centrality DESC LIMIT ?`;
+    params.push(limit);
+    const rows = this.db.prepare(sql).all(...params) as NodeRow[];
+    return rows.map(rowToNode);
+  }
+
+  /**
+   * Compute the rank (1-based) of a single node by centrality.
+   * Returns null if the node has no centrality yet.
+   */
+  getCentralityRank(nodeId: string): { rank: number; total: number } | null {
+    const row = this.db
+      .prepare('SELECT centrality FROM nodes WHERE id = ?')
+      .get(nodeId) as { centrality: number | null } | undefined;
+    if (!row || row.centrality === null) return null;
+    const above = this.db
+      .prepare('SELECT COUNT(*) AS c FROM nodes WHERE centrality > ?')
+      .get(row.centrality) as { c: number };
+    const total = this.db
+      .prepare('SELECT COUNT(*) AS c FROM nodes WHERE centrality IS NOT NULL')
+      .get() as { c: number };
+    return { rank: above.c + 1, total: total.c };
+  }
+
+  // ===========================================================================
+  // Per-file churn (mined from git log)
+  // ===========================================================================
+
+  /**
+   * Apply churn deltas to the files table. For each delta:
+   *   commit_count   += commitCountDelta
+   *   last_touched_ts = MAX(existing, lastTouchedTs)
+   *   first_seen_ts   = COALESCE(existing, firstSeenTs)   // sticky
+   *
+   * Files in the delta map but not in the files table (uncommon —
+   * they'd have to be mined-but-never-indexed) are silently skipped.
+   */
+  applyChurnDeltas(
+    deltas: Iterable<{
+      path: string;
+      commitCountDelta: number;
+      lastTouchedTs: number;
+      firstSeenTs: number;
+    }>
+  ): void {
+    const stmt = this.db.prepare(
+      `UPDATE files
+         SET commit_count    = commit_count + ?,
+             last_touched_ts = MAX(COALESCE(last_touched_ts, 0), ?),
+             first_seen_ts   = COALESCE(first_seen_ts, ?)
+       WHERE path = ?`
+    );
+    this.db.transaction(() => {
+      for (const d of deltas) {
+        stmt.run(d.commitCountDelta, d.lastTouchedTs, d.firstSeenTs, d.path);
+      }
+    })();
+  }
+
+  /** Reset all churn columns; used before a full re-mine. Does not touch `loc`. */
+  clearChurn(): void {
+    this.db.exec(
+      `UPDATE files SET commit_count = 0, last_touched_ts = NULL, first_seen_ts = NULL`
+    );
+  }
+
+  /** Update the on-disk LOC for a single file. Cheap; called per changed file. */
+  updateFileLoc(filePath: string, loc: number): void {
+    this.db.prepare('UPDATE files SET loc = ? WHERE path = ?').run(loc, filePath);
+  }
+
+  /** Bulk LOC update — used during indexAll to refresh LOC for every indexed file. */
+  applyLocUpdates(entries: Iterable<{ path: string; loc: number }>): void {
+    const stmt = this.db.prepare('UPDATE files SET loc = ? WHERE path = ?');
+    this.db.transaction(() => {
+      for (const e of entries) stmt.run(e.loc, e.path);
+    })();
+  }
+
+  getTopFilesByChurn(opts: { limit?: number; minCommits?: number } = {}): FileRecord[] {
+    const limit = opts.limit ?? 25;
+    const minCommits = opts.minCommits ?? 1;
+    const rows = this.db
+      .prepare(
+        `SELECT * FROM files WHERE commit_count >= ?
+         ORDER BY commit_count DESC LIMIT ?`
+      )
+      .all(minCommits, limit) as FileRow[];
+    return rows.map(rowToFileRecord);
+  }
+
+  /**
+   * Hotspots: files ranked by `risk = (Σ centrality of nodes in file) × commit_count`.
+   *
+   * Both inputs are optional in their own right; with neither computed,
+   * this returns []. Sorting modes:
+   *   - 'risk'        : the combined score (default; what "hotspot" means)
+   *   - 'centrality'  : pure structural importance
+   *   - 'churn'       : pure change frequency
+   */
+  getHotspots(opts: {
+    limit?: number;
+    minCommits?: number;
+    minCentrality?: number;
+    sortBy?: 'risk' | 'centrality' | 'churn';
+  } = {}): Array<{
+    filePath: string;
+    fileCentrality: number;
+    commitCount: number;
+    loc: number;
+    lastTouchedTs: number | null;
+    riskScore: number;
+  }> {
+    const limit = opts.limit ?? 15;
+    const minCommits = opts.minCommits ?? 0;
+    const minCentrality = opts.minCentrality ?? 0;
+    const sortBy = opts.sortBy ?? 'risk';
+
+    const orderBy =
+      sortBy === 'centrality'
+        ? 'fileCentrality DESC'
+        : sortBy === 'churn'
+          ? 'commitCount DESC'
+          : 'riskScore DESC';
+
+    // Aggregate centrality at file level. LEFT JOIN so files without any
+    // indexed nodes (rare — schema-only files) still surface if they have churn.
+    const sql = `
+      SELECT
+        f.path                                     AS filePath,
+        COALESCE(n_agg.fc, 0.0)                    AS fileCentrality,
+        f.commit_count                             AS commitCount,
+        f.loc                                      AS loc,
+        f.last_touched_ts                          AS lastTouchedTs,
+        COALESCE(n_agg.fc, 0.0) * f.commit_count   AS riskScore
+      FROM files f
+      LEFT JOIN (
+        SELECT file_path, SUM(centrality) AS fc
+        FROM nodes WHERE centrality IS NOT NULL
+        GROUP BY file_path
+      ) n_agg ON n_agg.file_path = f.path
+      WHERE f.commit_count >= ? AND COALESCE(n_agg.fc, 0.0) >= ?
+      ORDER BY ${orderBy}
+      LIMIT ?
+    `;
+    const rows = this.db.prepare(sql).all(minCommits, minCentrality, limit) as Array<{
+      filePath: string;
+      fileCentrality: number;
+      commitCount: number;
+      loc: number;
+      lastTouchedTs: number | null;
+      riskScore: number;
+    }>;
+    return rows;
+  }
+
+  // ===========================================================================
+  // Symbol-issue attributions (mined from git history)
+  // ===========================================================================
+
+  applyIssueAttributions(
+    rows: Iterable<{
+      nodeId: string;
+      issueNumber: number;
+      commitSha: string;
+      kind: 'modified' | 'added' | 'removed';
+    }>
+  ): void {
+    const stmt = this.db.prepare(
+      `INSERT OR IGNORE INTO symbol_issues (node_id, issue_number, commit_sha, kind)
+       VALUES (?, ?, ?, ?)`
+    );
+    this.db.transaction(() => {
+      for (const r of rows) {
+        stmt.run(r.nodeId, r.issueNumber, r.commitSha, r.kind);
+      }
+    })();
+  }
+
+  clearIssueAttributions(): void {
+    this.db.exec('DELETE FROM symbol_issues');
+  }
+
+  getIssuesForNode(nodeId: string): Array<{
+    issueNumber: number;
+    kind: 'modified' | 'added' | 'removed';
+    commitSha: string;
+  }> {
+    return this.db
+      .prepare(
+        `SELECT issue_number AS issueNumber, kind, commit_sha AS commitSha
+         FROM symbol_issues
+         WHERE node_id = ?
+         ORDER BY issue_number ASC, kind ASC`
+      )
+      .all(nodeId) as Array<{
+      issueNumber: number;
+      kind: 'modified' | 'added' | 'removed';
+      commitSha: string;
+    }>;
+  }
+
+  getNodesForIssue(issueNumber: number): Array<{
+    nodeId: string;
+    kind: 'modified' | 'added' | 'removed';
+    commitSha: string;
+  }> {
+    return this.db
+      .prepare(
+        `SELECT node_id AS nodeId, kind, commit_sha AS commitSha
+         FROM symbol_issues
+         WHERE issue_number = ?
+         ORDER BY node_id ASC`
+      )
+      .all(issueNumber) as Array<{
+      nodeId: string;
+      kind: 'modified' | 'added' | 'removed';
+      commitSha: string;
+    }>;
+  }
+
+  // ===========================================================================
+  // Config references (env vars / feature flags read sites)
+  // ===========================================================================
+
+  applyConfigRefs(
+    rows: Array<{
+      configKind: 'env';
+      configKey: string;
+      sourceNodeId: string | null;
+      filePath: string;
+      line: number;
+    }>
+  ): void {
+    if (rows.length === 0) return;
+    const distinctFiles = new Set(rows.map((r) => r.filePath));
+    const deleteStmt = this.db.prepare('DELETE FROM config_refs WHERE file_path = ?');
+    const insertStmt = this.db.prepare(
+      `INSERT INTO config_refs (config_kind, config_key, source_node_id, file_path, line)
+       VALUES (?, ?, ?, ?, ?)`
+    );
+    this.db.transaction(() => {
+      for (const f of distinctFiles) deleteStmt.run(f);
+      for (const r of rows) {
+        insertStmt.run(r.configKind, r.configKey, r.sourceNodeId, r.filePath, r.line);
+      }
+    })();
+  }
+
+  clearConfigRefs(): void {
+    this.db.exec('DELETE FROM config_refs');
+  }
+
+  deleteConfigRefsForPaths(filePaths: Iterable<string>): void {
+    const stmt = this.db.prepare('DELETE FROM config_refs WHERE file_path = ?');
+    this.db.transaction(() => {
+      for (const p of filePaths) stmt.run(p);
+    })();
+  }
+
+  pruneOrphanedConfigRefs(): void {
+    this.db.exec(
+      `DELETE FROM config_refs WHERE file_path NOT IN (SELECT path FROM files)`
+    );
+  }
+
+  getConfigKeys(opts: { configKind?: 'env'; limit?: number } = {}): Array<{
+    configKey: string;
+    reads: number;
+    distinctFiles: number;
+  }> {
+    const limit = opts.limit ?? 200;
+    const where = opts.configKind ? 'WHERE config_kind = ?' : '';
+    const params = opts.configKind ? [opts.configKind, limit] : [limit];
+    return this.db
+      .prepare(
+        `SELECT config_key AS configKey,
+                COUNT(*) AS reads,
+                COUNT(DISTINCT file_path) AS distinctFiles
+         FROM config_refs
+         ${where}
+         GROUP BY config_key
+         ORDER BY reads DESC, config_key ASC
+         LIMIT ?`
+      )
+      .all(...params) as Array<{ configKey: string; reads: number; distinctFiles: number }>;
+  }
+
+  getConfigRefsByKey(
+    configKey: string,
+    opts: { configKind?: 'env' } = {}
+  ): Array<{
+    filePath: string;
+    line: number;
+    sourceNodeId: string | null;
+    sourceName: string | null;
+    sourceKind: string | null;
+  }> {
+    const kind = opts.configKind ?? 'env';
+    return this.db
+      .prepare(
+        `SELECT cr.file_path AS filePath,
+                cr.line AS line,
+                cr.source_node_id AS sourceNodeId,
+                n.name AS sourceName,
+                n.kind AS sourceKind
+         FROM config_refs cr
+         LEFT JOIN nodes n ON n.id = cr.source_node_id
+         WHERE cr.config_kind = ? AND cr.config_key = ?
+         ORDER BY cr.file_path ASC, cr.line ASC`
+      )
+      .all(kind, configKey) as Array<{
+      filePath: string;
+      line: number;
+      sourceNodeId: string | null;
+      sourceName: string | null;
+      sourceKind: string | null;
+    }>;
+  }
+
+  getConfigKeysForNode(nodeId: string): Array<{ configKey: string; line: number }> {
+    return this.db
+      .prepare(
+        `SELECT config_key AS configKey, line
+         FROM config_refs
+         WHERE source_node_id = ?
+         ORDER BY config_key ASC, line ASC`
+      )
+      .all(nodeId) as Array<{ configKey: string; line: number }>;
+  }
+
+  // ===========================================================================
+  // SQL references (table-name string-literal refs from app code)
+  // ===========================================================================
+
+  applySqlRefs(
+    rows: Array<{
+      tableName: string;
+      op: 'read' | 'write' | 'ddl';
+      sourceNodeId: string | null;
+      filePath: string;
+      line: number;
+    }>
+  ): void {
+    if (rows.length === 0) return;
+    const stmt = this.db.prepare(
+      `INSERT INTO sql_refs (table_name, op, source_node_id, file_path, line)
+       VALUES (?, ?, ?, ?, ?)`
+    );
+    this.db.transaction(() => {
+      for (const r of rows) {
+        stmt.run(r.tableName, r.op, r.sourceNodeId, r.filePath, r.line);
+      }
+    })();
+  }
+
+  replaceAllSqlRefs(
+    rows: Array<{
+      tableName: string;
+      op: 'read' | 'write' | 'ddl';
+      sourceNodeId: string | null;
+      filePath: string;
+      line: number;
+    }>
+  ): void {
+    const insert = this.db.prepare(
+      `INSERT INTO sql_refs (table_name, op, source_node_id, file_path, line)
+       VALUES (?, ?, ?, ?, ?)`
+    );
+    this.db.transaction(() => {
+      this.db.exec('DELETE FROM sql_refs');
+      for (const r of rows) {
+        insert.run(r.tableName, r.op, r.sourceNodeId, r.filePath, r.line);
+      }
+    })();
+  }
+
+  deleteSqlRefsForPaths(filePaths: Iterable<string>): void {
+    const stmt = this.db.prepare('DELETE FROM sql_refs WHERE file_path = ?');
+    this.db.transaction(() => {
+      for (const p of filePaths) stmt.run(p);
+    })();
+  }
+
+  clearSqlRefs(): void {
+    this.db.exec('DELETE FROM sql_refs');
+  }
+
+  pruneOrphanedSqlRefs(): void {
+    this.db.exec(
+      `DELETE FROM sql_refs WHERE file_path NOT IN (SELECT path FROM files)`
+    );
+  }
+
+  getSqlTables(opts: { limit?: number } = {}): Array<{
+    tableName: string;
+    reads: number;
+    writes: number;
+    ddl: number;
+    total: number;
+  }> {
+    const limit = opts.limit ?? 100;
+    return this.db
+      .prepare(
+        `SELECT lower(table_name) AS tableName,
+                SUM(CASE WHEN op = 'read'  THEN 1 ELSE 0 END) AS reads,
+                SUM(CASE WHEN op = 'write' THEN 1 ELSE 0 END) AS writes,
+                SUM(CASE WHEN op = 'ddl'   THEN 1 ELSE 0 END) AS ddl,
+                COUNT(*)                                       AS total
+         FROM sql_refs
+         GROUP BY lower(table_name)
+         ORDER BY total DESC, tableName ASC
+         LIMIT ?`
+      )
+      .all(limit) as Array<{
+      tableName: string;
+      reads: number;
+      writes: number;
+      ddl: number;
+      total: number;
+    }>;
+  }
+
+  getSqlRefsByTable(
+    tableName: string,
+    opts: { op?: 'read' | 'write' | 'ddl' } = {}
+  ): Array<{
+    op: 'read' | 'write' | 'ddl';
+    filePath: string;
+    line: number;
+    sourceNodeId: string | null;
+    sourceName: string | null;
+    sourceKind: string | null;
+  }> {
+    const params: Array<string> = [tableName.toLowerCase()];
+    let opFilter = '';
+    if (opts.op) {
+      opFilter = ' AND sr.op = ?';
+      params.push(opts.op);
+    }
+    return this.db
+      .prepare(
+        `SELECT sr.op AS op,
+                sr.file_path AS filePath,
+                sr.line AS line,
+                sr.source_node_id AS sourceNodeId,
+                n.name AS sourceName,
+                n.kind AS sourceKind
+         FROM sql_refs sr
+         LEFT JOIN nodes n ON n.id = sr.source_node_id
+         WHERE lower(sr.table_name) = ?${opFilter}
+         ORDER BY sr.file_path ASC, sr.line ASC`
+      )
+      .all(...params) as Array<{
+      op: 'read' | 'write' | 'ddl';
+      filePath: string;
+      line: number;
+      sourceNodeId: string | null;
+      sourceName: string | null;
+      sourceKind: string | null;
+    }>;
+  }
+
+  getSqlTablesForNode(nodeId: string): Array<{ tableName: string; op: string }> {
+    return this.db
+      .prepare(
+        `SELECT DISTINCT lower(table_name) AS tableName, op
+         FROM sql_refs
+         WHERE source_node_id = ?
+         ORDER BY tableName ASC, op ASC`
+      )
+      .all(nodeId) as Array<{ tableName: string; op: string }>;
+  }
 }
diff --git a/src/db/schema.sql b/src/db/schema.sql
index dd0a9f06..be75f5de 100644
--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -37,7 +37,13 @@ CREATE TABLE IF NOT EXISTS nodes (
     is_abstract INTEGER DEFAULT 0,
     decorators TEXT, -- JSON array
     type_parameters TEXT, -- JSON array
-    updated_at INTEGER NOT NULL
+    updated_at INTEGER NOT NULL,
+    centrality REAL DEFAULT NULL, -- PageRank over calls+references; NULL until first compute
+    -- Camel/snake-split tokens of `name`, joined by spaces. The default
+    -- FTS5 tokenizer indexes each as a separate term, so a query for
+    -- `parser` finds `getParser` etc. Populated by buildNameSubwords()
+    -- in src/utils.ts on every insert/update.
+    name_subwords TEXT
 );
 
 -- Edges: Relationships between nodes
@@ -63,7 +69,12 @@ CREATE TABLE IF NOT EXISTS files (
     modified_at INTEGER NOT NULL,
     indexed_at INTEGER NOT NULL,
     node_count INTEGER DEFAULT 0,
-    errors TEXT -- JSON array
+    errors TEXT, -- JSON array
+    -- Churn signals (mined from git log)
+    commit_count INTEGER NOT NULL DEFAULT 0,
+    loc INTEGER NOT NULL DEFAULT 0,
+    first_seen_ts INTEGER DEFAULT NULL, -- unix seconds
+    last_touched_ts INTEGER DEFAULT NULL -- unix seconds
 );
 
 -- Unresolved References: References that need resolution after full indexing
@@ -92,34 +103,42 @@ CREATE INDEX IF NOT EXISTS idx_nodes_file_path ON nodes(file_path);
 CREATE INDEX IF NOT EXISTS idx_nodes_language ON nodes(language);
 CREATE INDEX IF NOT EXISTS idx_nodes_file_line ON nodes(file_path, start_line);
 CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
+CREATE INDEX IF NOT EXISTS idx_nodes_centrality ON nodes(centrality DESC);
 
 -- Full-text search index on node names, docstrings, and signatures
+-- The Porter stemmer collapses morphological variants so a query for
+-- `parsing` matches a docstring or subword containing `parser`/`parse`.
+-- This is the largest single quality lift for natural-language queries
+-- (verified empirically: targets that ranked #18-#19 or weren't in the
+-- top 20 jump to the top 5 — see __tests__/search-quality.test.ts).
 CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
     id,
     name,
     qualified_name,
     docstring,
     signature,
+    name_subwords,
     content='nodes',
-    content_rowid='rowid'
+    content_rowid='rowid',
+    tokenize="porter unicode61"
 );
 
 -- Triggers to keep FTS index in sync
 CREATE TRIGGER IF NOT EXISTS nodes_ai AFTER INSERT ON nodes BEGIN
-    INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature)
-    VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature);
+    INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature, name_subwords)
+    VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature, NEW.name_subwords);
 END;
 
 CREATE TRIGGER IF NOT EXISTS nodes_ad AFTER DELETE ON nodes BEGIN
-    INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature)
-    VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature);
+    INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature, name_subwords)
+    VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature, OLD.name_subwords);
 END;
 
 CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE ON nodes BEGIN
-    INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature)
-    VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature);
-    INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature)
-    VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature);
+    INSERT INTO nodes_fts(nodes_fts, rowid, id, name, qualified_name, docstring, signature, name_subwords)
+    VALUES ('delete', OLD.rowid, OLD.id, OLD.name, OLD.qualified_name, OLD.docstring, OLD.signature, OLD.name_subwords);
+    INSERT INTO nodes_fts(rowid, id, name, qualified_name, docstring, signature, name_subwords)
+    VALUES (NEW.rowid, NEW.id, NEW.name, NEW.qualified_name, NEW.docstring, NEW.signature, NEW.name_subwords);
 END;
 
 -- Edge indexes
@@ -129,9 +148,20 @@ CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
 CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source, kind);
 CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target, kind);
 
+-- Uniqueness for (source, target, kind, line, col). The id column is an
+-- AUTOINCREMENT primary key, so without this index `INSERT OR IGNORE`
+-- would never see a conflict — duplicate edges would silently accumulate
+-- on every re-resolution / re-emission. COALESCE keeps two NULL line/col
+-- values comparable as equal (SQLite treats raw NULLs in a UNIQUE index
+-- as distinct).
+CREATE UNIQUE INDEX IF NOT EXISTS idx_edges_unique
+  ON edges(source, target, kind, COALESCE(line, -1), COALESCE(col, -1));
+
 -- File indexes
 CREATE INDEX IF NOT EXISTS idx_files_language ON files(language);
 CREATE INDEX IF NOT EXISTS idx_files_modified_at ON files(modified_at);
+CREATE INDEX IF NOT EXISTS idx_files_commit_count ON files(commit_count DESC);
+CREATE INDEX IF NOT EXISTS idx_files_last_touched ON files(last_touched_ts DESC);
 
 -- Unresolved refs indexes
 CREATE INDEX IF NOT EXISTS idx_unresolved_from_node ON unresolved_refs(from_node_id);
@@ -146,3 +176,61 @@ CREATE TABLE IF NOT EXISTS project_metadata (
     value TEXT NOT NULL,
     updated_at INTEGER NOT NULL
 );
+
+-- Issue → symbol attribution mined from git history.
+-- One row per (node, issue, commit, kind) tuple; kind is 'modified'
+-- (enclosing function changed by hunk), 'added' (declaration on a +
+-- line), or 'removed' (declaration on a - line, dropped at lookup
+-- time when no current node matches).
+CREATE TABLE IF NOT EXISTS symbol_issues (
+    node_id TEXT NOT NULL,
+    issue_number INTEGER NOT NULL,
+    commit_sha TEXT NOT NULL,
+    kind TEXT NOT NULL CHECK (kind IN ('modified','added','removed')),
+    PRIMARY KEY (node_id, issue_number, commit_sha, kind),
+    FOREIGN KEY (node_id) REFERENCES nodes(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_symbol_issues_node ON symbol_issues(node_id);
+CREATE INDEX IF NOT EXISTS idx_symbol_issues_issue ON symbol_issues(issue_number);
+
+-- Config references: read sites for env vars / feature flags / etc.
+-- One row per syntactic occurrence in source. config_kind narrows to
+-- 'env' (process.env, os.getenv, ...) for v1; future kinds add YAML
+-- keys, LaunchDarkly flags, etc. source_node_id may be NULL for
+-- top-level reads that aren't inside a function/method.
+CREATE TABLE IF NOT EXISTS config_refs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    config_kind TEXT NOT NULL,
+    config_key TEXT NOT NULL,
+    source_node_id TEXT,
+    file_path TEXT NOT NULL,
+    line INTEGER NOT NULL,
+    FOREIGN KEY (source_node_id) REFERENCES nodes(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_config_refs_key
+    ON config_refs(config_kind, config_key);
+CREATE INDEX IF NOT EXISTS idx_config_refs_node
+    ON config_refs(source_node_id);
+CREATE INDEX IF NOT EXISTS idx_config_refs_file
+    ON config_refs(file_path);
+
+-- SQL references: per-call-site links from app code to a table name.
+-- One row per syntactic occurrence in source. op is 'read' (SELECT,
+-- FROM in non-DDL), 'write' (INSERT/UPDATE/DELETE), or 'ddl'
+-- (CREATE TABLE / ALTER TABLE / DROP TABLE -- rare in app code but
+-- catches migration scripts).
+CREATE TABLE IF NOT EXISTS sql_refs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    table_name TEXT NOT NULL,
+    op TEXT NOT NULL CHECK (op IN ('read','write','ddl')),
+    source_node_id TEXT,
+    file_path TEXT NOT NULL,
+    line INTEGER NOT NULL,
+    FOREIGN KEY (source_node_id) REFERENCES nodes(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_sql_refs_table
+    ON sql_refs(lower(table_name));
+CREATE INDEX IF NOT EXISTS idx_sql_refs_node
+    ON sql_refs(source_node_id);
+CREATE INDEX IF NOT EXISTS idx_sql_refs_file
+    ON sql_refs(file_path);
diff --git a/src/default-config.ts b/src/default-config.ts
new file mode 100644
index 00000000..34769609
--- /dev/null
+++ b/src/default-config.ts
@@ -0,0 +1,199 @@
+/**
+ * Default project configuration.
+ *
+ * Lives in its own file (separate from `types.ts`) because the
+ * `include` glob list is derived from the language registry — and
+ * the registry transitively imports `types.ts` via per-language
+ * files, which would create an evaluation cycle if `default-config`
+ * were itself imported by `types.ts` eagerly.
+ *
+ * **Lazy include resolution.** The `include` array is built on
+ * first access via a property getter, not at module load. By the
+ * time anything reads `DEFAULT_CONFIG.include`, the registry has
+ * fully evaluated, so all language definitions are available.
+ */
+
+import type { CodeGraphConfig } from './types';
+import { getLanguageDefs } from './extraction/languages/registry';
+
+let _includeCache: string[] | null = null;
+function buildIncludeGlobs(): string[] {
+  if (_includeCache) return _includeCache;
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const def of getLanguageDefs()) {
+    for (const glob of def.includeGlobs) {
+      if (seen.has(glob)) continue;
+      seen.add(glob);
+      out.push(glob);
+    }
+  }
+  _includeCache = out;
+  return out;
+}
+
+const baseConfig: CodeGraphConfig = {
+  version: 1,
+  rootDir: '.',
+  include: [], // populated lazily via the getter below
+  exclude: [
+    // Version control
+    '**/.git/**',
+
+    // Dependencies
+    '**/node_modules/**',
+    '**/vendor/**',
+    '**/Pods/**',
+
+    // Generic build outputs
+    '**/dist/**',
+    '**/build/**',
+    '**/out/**',
+    '**/bin/**',
+    '**/obj/**',
+    '**/target/**',
+
+    // JavaScript/TypeScript
+    '**/*.min.js',
+    '**/*.bundle.js',
+    '**/.next/**',
+    '**/.nuxt/**',
+    '**/.svelte-kit/**',
+    '**/.output/**',
+    '**/.turbo/**',
+    '**/.cache/**',
+    '**/.parcel-cache/**',
+    '**/.vite/**',
+    '**/.astro/**',
+    '**/.docusaurus/**',
+    '**/.gatsby/**',
+    '**/.webpack/**',
+    '**/.nx/**',
+    '**/.yarn/cache/**',
+    '**/.pnpm-store/**',
+    '**/storybook-static/**',
+
+    // React Native / Expo
+    '**/.expo/**',
+    '**/web-build/**',
+    '**/ios/Pods/**',
+    '**/ios/build/**',
+    '**/android/build/**',
+    '**/android/.gradle/**',
+
+    // Python
+    '**/__pycache__/**',
+    '**/.venv/**',
+    '**/venv/**',
+    '**/site-packages/**',
+    '**/dist-packages/**',
+    '**/.pytest_cache/**',
+    '**/.mypy_cache/**',
+    '**/.ruff_cache/**',
+    '**/.tox/**',
+    '**/.nox/**',
+    '**/*.egg-info/**',
+    '**/.eggs/**',
+
+    // Go
+    '**/go/pkg/mod/**',
+
+    // Rust
+    '**/target/debug/**',
+    '**/target/release/**',
+
+    // Java/Kotlin/Gradle
+    '**/.gradle/**',
+    '**/.m2/**',
+    '**/generated-sources/**',
+    '**/.kotlin/**',
+
+    // Dart/Flutter
+    '**/.dart_tool/**',
+
+    // C#/.NET
+    '**/.vs/**',
+    '**/.nuget/**',
+    '**/artifacts/**',
+    '**/publish/**',
+
+    // C/C++
+    '**/cmake-build-*/**',
+    '**/CMakeFiles/**',
+    '**/bazel-*/**',
+    '**/vcpkg_installed/**',
+    '**/.conan/**',
+    '**/Debug/**',
+    '**/Release/**',
+    '**/x64/**',
+    '**/.pio/**',  // Platform.io (IoT/embedded build artifacts and library deps)
+
+    // Electron
+    '**/release/**',
+    '**/*.app/**',
+    '**/*.asar',
+
+    // Swift/iOS/Xcode
+    '**/DerivedData/**',
+    '**/.build/**',
+    '**/.swiftpm/**',
+    '**/xcuserdata/**',
+    '**/Carthage/Build/**',
+    '**/SourcePackages/**',
+
+    // Delphi/Pascal
+    '**/__history/**',
+    '**/__recovery/**',
+    '**/*.dcu',
+
+    // PHP
+    '**/.composer/**',
+    '**/storage/framework/**',
+    '**/bootstrap/cache/**',
+
+    // Ruby
+    '**/.bundle/**',
+    '**/tmp/cache/**',
+    '**/public/assets/**',
+    '**/public/packs/**',
+    '**/.yardoc/**',
+
+    // Testing/Coverage
+    '**/coverage/**',
+    '**/htmlcov/**',
+    '**/.nyc_output/**',
+    '**/test-results/**',
+    '**/.coverage/**',
+
+    // IDE/Editor
+    '**/.idea/**',
+
+    // Logs and temp
+    '**/logs/**',
+    '**/tmp/**',
+    '**/temp/**',
+
+    // Documentation build output
+    '**/_build/**',
+    '**/docs/_build/**',
+    '**/site/**',
+  ],
+  languages: [],
+  frameworks: [],
+  maxFileSize: 1024 * 1024, // 1MB
+  extractDocstrings: true,
+  trackCallSites: true,
+  enableCentrality: true,
+  enableChurn: true,
+  enableIssueHistory: true,
+  enableConfigRefs: true,
+  enableSqlRefs: true,
+};
+
+Object.defineProperty(baseConfig, 'include', {
+  get: () => buildIncludeGlobs(),
+  enumerable: true,
+  configurable: true,
+});
+
+export const DEFAULT_CONFIG: CodeGraphConfig = baseConfig;
diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts
index df264fb3..5c2aec09 100644
--- a/src/extraction/grammars.ts
+++ b/src/extraction/grammars.ts
@@ -4,77 +4,63 @@
  * Uses web-tree-sitter (WASM) for universal cross-platform support.
  * Grammars are loaded lazily — only languages actually present in the project
  * are compiled, keeping V8 WASM memory pressure low on large codebases.
+ *
+ * As of the language-registry refactor, all per-language metadata
+ * (WASM filenames, file extensions, display names, vendored flag)
+ * lives in `./languages/<name>.ts` and is auto-collected by
+ * `./languages/registry.ts`. The constants exported here
+ * (`EXTENSION_MAP`, `getSupportedLanguages`, `getLanguageDisplayName`)
+ * remain for backward compat but are derived from the registry.
  */
 
 import * as path from 'path';
 import { Parser, Language as WasmLanguage } from 'web-tree-sitter';
 import { Language } from '../types';
+import { getLanguageDefs, getLanguageDefByExtension, getLanguageDefByName } from './languages/registry';
 
 export type GrammarLanguage = Exclude<Language, 'svelte' | 'liquid' | 'unknown'>;
 
 /**
- * WASM filename map — maps each language to its .wasm grammar file
- * in the tree-sitter-wasms package.
+ * File extension → Language mapping, computed lazily on first read.
+ *
+ * Cannot be a top-level IIFE: the registry transitively pulls in
+ * `tree-sitter.ts` (via custom-extractor language defs), which
+ * imports this file — building the map at module load would TDZ
+ * against `ALL_DEFS` in the registry. Use the `getExtensionMap()`
+ * function for an explicit lazy entry point, or read
+ * `EXTENSION_MAP` (a Proxy that materialises on first property
+ * access).
  */
-const WASM_GRAMMAR_FILES: Record<GrammarLanguage, string> = {
-  typescript: 'tree-sitter-typescript.wasm',
-  tsx: 'tree-sitter-tsx.wasm',
-  javascript: 'tree-sitter-javascript.wasm',
-  jsx: 'tree-sitter-javascript.wasm',
-  python: 'tree-sitter-python.wasm',
-  go: 'tree-sitter-go.wasm',
-  rust: 'tree-sitter-rust.wasm',
-  java: 'tree-sitter-java.wasm',
-  c: 'tree-sitter-c.wasm',
-  cpp: 'tree-sitter-cpp.wasm',
-  csharp: 'tree-sitter-c_sharp.wasm',
-  php: 'tree-sitter-php.wasm',
-  ruby: 'tree-sitter-ruby.wasm',
-  swift: 'tree-sitter-swift.wasm',
-  kotlin: 'tree-sitter-kotlin.wasm',
-  dart: 'tree-sitter-dart.wasm',
-  pascal: 'tree-sitter-pascal.wasm',
-};
+let _extensionMapCache: Record<string, Language> | null = null;
+export function getExtensionMap(): Record<string, Language> {
+  if (_extensionMapCache) return _extensionMapCache;
+  const out: Record<string, Language> = {};
+  for (const def of getLanguageDefs()) {
+    for (const ext of def.extensions) {
+      out[ext.toLowerCase()] = def.name as Language;
+    }
+  }
+  _extensionMapCache = out;
+  return out;
+}
 
 /**
- * File extension to Language mapping
+ * Backward-compat: a Proxy that lazy-builds the extension map on
+ * first property access. Existing callers can keep doing
+ * `EXTENSION_MAP['.ts']` without changes.
  */
-export const EXTENSION_MAP: Record<string, Language> = {
-  '.ts': 'typescript',
-  '.tsx': 'tsx',
-  '.js': 'javascript',
-  '.mjs': 'javascript',
-  '.cjs': 'javascript',
-  '.jsx': 'jsx',
-  '.py': 'python',
-  '.pyw': 'python',
-  '.go': 'go',
-  '.rs': 'rust',
-  '.java': 'java',
-  '.c': 'c',
-  '.h': 'c', // Could also be C++, defaulting to C
-  '.cpp': 'cpp',
-  '.cc': 'cpp',
-  '.cxx': 'cpp',
-  '.hpp': 'cpp',
-  '.hxx': 'cpp',
-  '.cs': 'csharp',
-  '.php': 'php',
-  '.rb': 'ruby',
-  '.rake': 'ruby',
-  '.swift': 'swift',
-  '.kt': 'kotlin',
-  '.kts': 'kotlin',
-  '.dart': 'dart',
-  '.liquid': 'liquid',
-  '.svelte': 'svelte',
-  '.pas': 'pascal',
-  '.dpr': 'pascal',
-  '.dpk': 'pascal',
-  '.lpr': 'pascal',
-  '.dfm': 'pascal',
-  '.fmx': 'pascal',
-};
+export const EXTENSION_MAP: Record<string, Language> = new Proxy({} as Record<string, Language>, {
+  get(_t, key: string) { return getExtensionMap()[key]; },
+  has(_t, key: string) { return key in getExtensionMap(); },
+  ownKeys() { return Object.keys(getExtensionMap()); },
+  getOwnPropertyDescriptor(_t, key: string) {
+    const map = getExtensionMap();
+    if (key in map) {
+      return { configurable: true, enumerable: true, writable: false, value: map[key] };
+    }
+    return undefined;
+  },
+});
 
 /**
  * Caches for loaded grammars and parsers
@@ -108,21 +94,28 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise<v
     await initGrammars();
   }
 
-  // Deduplicate and filter to languages that have WASM grammars and aren't already loaded
-  const toLoad = [...new Set(languages)].filter(
-    (lang): lang is GrammarLanguage =>
-      lang in WASM_GRAMMAR_FILES &&
-      !languageCache.has(lang) &&
-      !unavailableGrammarErrors.has(lang)
-  );
+  // Deduplicate; filter to languages that have a tree-sitter grammar
+  // (registry's `def.grammar` field) and aren't already loaded.
+  const seen = new Set<Language>();
+  const toLoad: Array<{ lang: Language; wasmFile: string; vendored: boolean }> = [];
+  for (const lang of languages) {
+    if (seen.has(lang)) continue;
+    seen.add(lang);
+    if (languageCache.has(lang) || unavailableGrammarErrors.has(lang)) continue;
+    const def = getLanguageDefByName(lang);
+    if (!def?.grammar) continue;
+    toLoad.push({
+      lang,
+      wasmFile: def.grammar.wasmFile,
+      vendored: def.grammar.vendored === true,
+    });
+  }
 
   // Load grammars sequentially to avoid web-tree-sitter WASM race condition on Node 20+
   // See: https://github.com/tree-sitter/tree-sitter/issues/2338
-  for (const lang of toLoad) {
-    const wasmFile = WASM_GRAMMAR_FILES[lang];
+  for (const { lang, wasmFile, vendored } of toLoad) {
     try {
-      // Pascal ships its own WASM (not in tree-sitter-wasms)
-      const wasmPath = lang === 'pascal'
+      const wasmPath = vendored
         ? path.join(__dirname, 'wasm', wasmFile)
         : require.resolve(`tree-sitter-wasms/out/${wasmFile}`);
       const language = await WasmLanguage.load(wasmPath);
@@ -140,7 +133,9 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise<v
  * backward compatibility. Prefer loadGrammarsForLanguages() in production.
  */
 export async function loadAllGrammars(): Promise<void> {
-  const allLanguages = Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[];
+  const allLanguages = getLanguageDefs()
+    .filter((d) => d.grammar)
+    .map((d) => d.name as Language);
   await loadGrammarsForLanguages(allLanguages);
 }
 
@@ -176,7 +171,8 @@ export function getParser(language: Language): Parser | null {
  */
 export function detectLanguage(filePath: string, source?: string): Language {
   const ext = filePath.substring(filePath.lastIndexOf('.')).toLowerCase();
-  const lang = EXTENSION_MAP[ext] || 'unknown';
+  const def = getLanguageDefByExtension(ext);
+  const lang = (def?.name as Language) ?? 'unknown';
 
   // .h files could be C or C++ — check source content for C++ features
   if (lang === 'c' && ext === '.h' && source) {
@@ -196,29 +192,30 @@ function looksLikeCpp(source: string): boolean {
 }
 
 /**
- * Check if a language is supported (has a grammar defined).
- * Returns true if the grammar exists, even if not yet loaded.
+ * Check if a language is supported (has a grammar or custom extractor).
+ * Returns true if a registry entry exists, even if its grammar isn't loaded.
  */
 export function isLanguageSupported(language: Language): boolean {
-  if (language === 'svelte') return true; // custom extractor (script block delegation)
-  if (language === 'liquid') return true; // custom regex extractor
   if (language === 'unknown') return false;
-  return language in WASM_GRAMMAR_FILES;
+  return getLanguageDefByName(language) !== undefined;
 }
 
 /**
  * Check if a grammar has been loaded and is ready for parsing.
+ * Custom-extractor languages (no `grammar` field) are always "ready".
  */
 export function isGrammarLoaded(language: Language): boolean {
-  if (language === 'svelte' || language === 'liquid') return true;
+  const def = getLanguageDefByName(language);
+  if (!def) return false;
+  if (!def.grammar) return true; // custom extractor — always available
   return languageCache.has(language);
 }
 
 /**
- * Get all supported languages (those with grammar definitions).
+ * Get all supported languages from the registry.
  */
 export function getSupportedLanguages(): Language[] {
-  return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'liquid'];
+  return getLanguageDefs().map((d) => d.name as Language);
 }
 
 /**
@@ -237,54 +234,33 @@ export function resetParser(language: Language): void {
 }
 
 /**
- * Clear parser/grammar caches (useful for testing)
+ * Clear parser cache (useful for testing).
+ *
+ * Note: `languageCache` is intentionally NOT cleared — the WASM
+ * `Language` modules are expensive to load and stay cached so a
+ * subsequent `getParser` call can rebuild a fresh `Parser` instance
+ * without re-reading the .wasm file. To fully re-init, set
+ * `parserInitialized = false` and call `initGrammars()` again.
  */
 export function clearParserCache(): void {
   for (const parser of parserCache.values()) {
-    parser.delete();
+    try { parser.delete(); } catch { /* ignore */ }
   }
   parserCache.clear();
-  // Note: languageCache is NOT cleared — WASM languages persist.
-  // To fully re-init, set parserInitialized = false and call initGrammars() again.
   unavailableGrammarErrors.clear();
 }
 
 /**
- * Report grammars that failed to load.
+ * Get unavailable grammar errors (for diagnostics)
  */
-export function getUnavailableGrammarErrors(): Partial<Record<Language, string>> {
-  const out: Partial<Record<Language, string>> = {};
-  for (const [language, message] of unavailableGrammarErrors.entries()) {
-    out[language] = message;
-  }
-  return out;
+export function getUnavailableGrammarErrors(): Record<string, string> {
+  return Object.fromEntries(unavailableGrammarErrors);
 }
 
 /**
- * Get language display name
+ * Human-readable display name (e.g. "TypeScript", "Pascal / Delphi").
+ * Returns the canonical name unchanged if no display name is registered.
  */
 export function getLanguageDisplayName(language: Language): string {
-  const names: Record<Language, string> = {
-    typescript: 'TypeScript',
-    javascript: 'JavaScript',
-    tsx: 'TypeScript (TSX)',
-    jsx: 'JavaScript (JSX)',
-    python: 'Python',
-    go: 'Go',
-    rust: 'Rust',
-    java: 'Java',
-    c: 'C',
-    cpp: 'C++',
-    csharp: 'C#',
-    php: 'PHP',
-    ruby: 'Ruby',
-    swift: 'Swift',
-    kotlin: 'Kotlin',
-    dart: 'Dart',
-    svelte: 'Svelte',
-    liquid: 'Liquid',
-    pascal: 'Pascal / Delphi',
-    unknown: 'Unknown',
-  };
-  return names[language] || language;
+  return getLanguageDefByName(language)?.displayName ?? language;
 }
diff --git a/src/extraction/hcl-extractor.ts b/src/extraction/hcl-extractor.ts
new file mode 100644
index 00000000..3d810c88
--- /dev/null
+++ b/src/extraction/hcl-extractor.ts
@@ -0,0 +1,587 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference, NodeKind } from '../types';
+import { generateNodeId, getNodeText } from './tree-sitter-helpers';
+import { getParser } from './grammars';
+
+/**
+ * HclExtractor — extracts a Terraform/HCL file into the graph.
+ *
+ * HCL is a declarative configuration language: there are no functions,
+ * classes, or methods. The unit of structure is the **block**:
+ *
+ *     <kind> [<label>...] { <body> }
+ *
+ * Each top-level block is mapped to a graph node, with its qualified name
+ * matching the Terraform reference form so cross-block references resolve
+ * naturally:
+ *
+ *   block form                        | NodeKind   | qualified name
+ *   ----------------------------------|------------|----------------------
+ *   variable "x" {}                   | variable   | var.x
+ *   locals { x = ...; y = ... }       | constant   | local.x, local.y
+ *   resource "TYPE" "NAME" {}         | class      | TYPE.NAME
+ *   data "TYPE" "NAME" {}             | class      | data.TYPE.NAME
+ *   module "NAME" {}                  | module     | module.NAME
+ *   output "NAME" {}                  | export     | output.NAME
+ *   provider "NAME" {}                | namespace  | provider.NAME
+ *   terraform {}                      | module     | terraform
+ *
+ * References inside attribute values (e.g. `bucket = aws_s3_bucket.logs.id`)
+ * become unresolved references that the resolver matches by qualified name.
+ */
+export class HclExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  /**
+   * Heads that look like references but are Terraform built-ins / pseudo-vars,
+   * not addressable graph nodes. Skipped during reference scanning.
+   *
+   * `terraform` is in this set because `terraform.workspace` is a built-in
+   * pseudo-var. As a side effect, the `terraform {}` block node we emit
+   * (qualifiedName=`terraform`) cannot be the target of a resolved reference
+   * — that's intentional, since Terraform itself doesn't allow blocks to
+   * reference the terraform settings block.
+   */
+  private static readonly RESERVED_HEADS: ReadonlySet<string> = new Set([
+    'count',
+    'each',
+    'self',
+    'path',
+    'terraform',
+    'null',
+    'true',
+    'false',
+  ]);
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    const parser = getParser('hcl');
+    if (!parser) {
+      this.errors.push({
+        message: 'HCL grammar not loaded',
+        severity: 'error',
+        code: 'grammar_unavailable',
+      });
+      return this.result(startTime);
+    }
+
+    let tree;
+    try {
+      tree = parser.parse(this.source);
+    } catch (e) {
+      this.errors.push({
+        message: `HCL parse error: ${e instanceof Error ? e.message : String(e)}`,
+        severity: 'error',
+        code: 'parse_error',
+      });
+      return this.result(startTime);
+    }
+    if (!tree) {
+      this.errors.push({ message: 'HCL parse returned no tree', severity: 'error', code: 'parse_error' });
+      return this.result(startTime);
+    }
+
+    try {
+      const fileNodeId = this.createFileNode();
+
+      const root = tree.rootNode;
+      const topBody = root.namedChildren.find((c: SyntaxNode | null) => c?.type === 'body');
+      if (!topBody) {
+        return this.result(startTime);
+      }
+
+      for (let i = 0; i < topBody.namedChildCount; i++) {
+        const child = topBody.namedChild(i);
+        if (child?.type === 'block') {
+          try {
+            this.visitTopLevelBlock(child, fileNodeId);
+          } catch (e) {
+            this.errors.push({
+              message: `HCL block extraction error: ${e instanceof Error ? e.message : String(e)}`,
+              line: child.startPosition.row + 1,
+              severity: 'warning',
+              code: 'extraction_error',
+            });
+          }
+        }
+      }
+
+      return this.result(startTime);
+    } finally {
+      // tree-sitter trees back onto WASM linear memory; release them explicitly
+      // so we don't accumulate one tree per indexed .tf file.
+      tree.delete();
+    }
+  }
+
+  private result(startTime: number): ExtractionResult {
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  private createFileNode(): string {
+    const lines = this.source.split('\n');
+    const id = generateNodeId(this.filePath, 'file', this.filePath, 1);
+    const fileNode: Node = {
+      id,
+      kind: 'file',
+      name: this.filePath.split('/').pop() || this.filePath,
+      qualifiedName: this.filePath,
+      filePath: this.filePath,
+      language: 'hcl',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length ?? 0,
+      updatedAt: Date.now(),
+    };
+    this.nodes.push(fileNode);
+    return id;
+  }
+
+  /**
+   * Handle a single top-level block, dispatching by block kind.
+   * Block AST shape:
+   *   block
+   *     identifier         (the kind: "resource", "variable", ...)
+   *     string_lit*        (zero, one, or two labels)
+   *     body               (optional — empty `{}` blocks have no body child)
+   */
+  private visitTopLevelBlock(block: SyntaxNode, fileNodeId: string): void {
+    const head = block.namedChildren.find((c: SyntaxNode | null) => c?.type === 'identifier');
+    if (!head) return;
+    const kind = getNodeText(head, this.source);
+
+    const labels: string[] = [];
+    for (const child of block.namedChildren) {
+      if (child?.type === 'string_lit') labels.push(this.unquoteStringLit(child));
+    }
+    const body = block.namedChildren.find((c: SyntaxNode | null) => c?.type === 'body') ?? null;
+
+    switch (kind) {
+      case 'resource':
+        this.emitTypedBlock(block, body, fileNodeId, labels, /*qnPrefix*/ '', 'resource');
+        return;
+      case 'data':
+        this.emitTypedBlock(block, body, fileNodeId, labels, 'data.', 'data');
+        return;
+      case 'module':
+        this.emitNamedBlock(block, body, fileNodeId, labels, 'module', 'module.', 'module');
+        return;
+      case 'variable':
+        this.emitNamedBlock(block, body, fileNodeId, labels, 'variable', 'var.', 'variable');
+        return;
+      case 'output':
+        this.emitNamedBlock(block, body, fileNodeId, labels, 'export', 'output.', 'output');
+        return;
+      case 'provider':
+        this.emitNamedBlock(block, body, fileNodeId, labels, 'namespace', 'provider.', 'provider');
+        return;
+      case 'locals':
+        this.emitLocalsBlock(body, fileNodeId);
+        return;
+      case 'terraform':
+        this.emitTerraformBlock(block, body, fileNodeId);
+        return;
+      default:
+        // Unknown top-level block kind (vendor extensions, etc.).
+        // Emit as a generic namespace node so it shows up in search.
+        this.emitNamedBlock(block, body, fileNodeId, labels, 'namespace', `${kind}.`, kind);
+    }
+  }
+
+  /**
+   * `resource "TYPE" "NAME" {}` and `data "TYPE" "NAME" {}` — both take two labels.
+   */
+  private emitTypedBlock(
+    block: SyntaxNode,
+    body: SyntaxNode | null,
+    fileNodeId: string,
+    labels: string[],
+    qnPrefix: string,
+    blockKind: string,
+  ): void {
+    if (labels.length < 2) return;
+    const [type, name] = labels;
+    const localName = `${type}.${name}`;
+    const qualifiedName = `${qnPrefix}${localName}`;
+    const nodeId = generateNodeId(this.filePath, 'class', qualifiedName, block.startPosition.row + 1);
+
+    const node: Node = {
+      id: nodeId,
+      kind: 'class',
+      name: localName,
+      qualifiedName,
+      filePath: this.filePath,
+      language: 'hcl',
+      startLine: block.startPosition.row + 1,
+      endLine: block.endPosition.row + 1,
+      startColumn: block.startPosition.column,
+      endColumn: block.endPosition.column,
+      signature: `${blockKind} "${type}" "${name}"`,
+      updatedAt: Date.now(),
+    };
+    this.nodes.push(node);
+    this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' });
+
+    if (body) this.scanBodyForReferences(body, nodeId);
+  }
+
+  /**
+   * Single-label blocks: variable, output, provider, module, plus unknown kinds.
+   * `module` blocks additionally emit an `imports` reference for `source = "..."`.
+   */
+  private emitNamedBlock(
+    block: SyntaxNode,
+    body: SyntaxNode | null,
+    fileNodeId: string,
+    labels: string[],
+    nodeKind: NodeKind,
+    qnPrefix: string,
+    blockKind: string,
+  ): void {
+    if (labels.length < 1) return;
+    const name = labels[0]!;
+    const qualifiedName = `${qnPrefix}${name}`;
+    const nodeId = generateNodeId(this.filePath, nodeKind, qualifiedName, block.startPosition.row + 1);
+
+    const node: Node = {
+      id: nodeId,
+      kind: nodeKind,
+      name,
+      qualifiedName,
+      filePath: this.filePath,
+      language: 'hcl',
+      startLine: block.startPosition.row + 1,
+      endLine: block.endPosition.row + 1,
+      startColumn: block.startPosition.column,
+      endColumn: block.endPosition.column,
+      signature: `${blockKind} "${name}"`,
+      updatedAt: Date.now(),
+    };
+    this.nodes.push(node);
+    this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' });
+
+    if (body) {
+      if (blockKind === 'module') this.emitModuleSourceImport(body, nodeId);
+      this.scanBodyForReferences(body, nodeId);
+    }
+  }
+
+  /**
+   * `locals { a = ...; b = ... }` — each top-level attribute becomes a
+   * separate `constant` node with qualified name `local.<attr>`.
+   */
+  private emitLocalsBlock(body: SyntaxNode | null, fileNodeId: string): void {
+    if (!body) return;
+    for (let i = 0; i < body.namedChildCount; i++) {
+      const child = body.namedChild(i);
+      if (child?.type !== 'attribute') continue;
+      const nameNode = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'identifier');
+      if (!nameNode) continue;
+      const name = getNodeText(nameNode, this.source);
+      const qualifiedName = `local.${name}`;
+      const nodeId = generateNodeId(this.filePath, 'constant', qualifiedName, child.startPosition.row + 1);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'constant',
+        name,
+        qualifiedName,
+        filePath: this.filePath,
+        language: 'hcl',
+        startLine: child.startPosition.row + 1,
+        endLine: child.endPosition.row + 1,
+        startColumn: child.startPosition.column,
+        endColumn: child.endPosition.column,
+        updatedAt: Date.now(),
+      };
+      this.nodes.push(node);
+      this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' });
+
+      const exprNode = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'expression');
+      if (exprNode) this.scanExpressionForReferences(exprNode, nodeId);
+    }
+  }
+
+  /**
+   * `terraform { ... }` — anchor block with no labels. We emit a single
+   * module-kind node so the file shows up in search; nested
+   * required_providers / backend blocks are not enumerated for v1.
+   */
+  private emitTerraformBlock(block: SyntaxNode, _body: SyntaxNode | null, fileNodeId: string): void {
+    const qualifiedName = 'terraform';
+    const nodeId = generateNodeId(this.filePath, 'module', qualifiedName, block.startPosition.row + 1);
+    const node: Node = {
+      id: nodeId,
+      kind: 'module',
+      name: 'terraform',
+      qualifiedName,
+      filePath: this.filePath,
+      language: 'hcl',
+      startLine: block.startPosition.row + 1,
+      endLine: block.endPosition.row + 1,
+      startColumn: block.startPosition.column,
+      endColumn: block.endPosition.column,
+      signature: 'terraform',
+      updatedAt: Date.now(),
+    };
+    this.nodes.push(node);
+    this.edges.push({ source: fileNodeId, target: nodeId, kind: 'contains' });
+  }
+
+  /**
+   * For a `module "X" { source = "..." }` block, emit an `imports` edge to
+   * the source string. Cross-file resolution isn't yet HCL-aware, so we
+   * emit it as an unresolved reference using the literal source value.
+   */
+  private emitModuleSourceImport(body: SyntaxNode, fromNodeId: string): void {
+    for (let i = 0; i < body.namedChildCount; i++) {
+      const attr = body.namedChild(i);
+      if (attr?.type !== 'attribute') continue;
+      const nameNode = attr.namedChildren.find((c: SyntaxNode | null) => c?.type === 'identifier');
+      if (!nameNode || getNodeText(nameNode, this.source) !== 'source') continue;
+
+      const exprNode = attr.namedChildren.find((c: SyntaxNode | null) => c?.type === 'expression');
+      if (!exprNode) return;
+      const literal = this.extractStaticString(exprNode);
+      if (literal === null) return;
+
+      this.unresolvedReferences.push({
+        fromNodeId,
+        referenceName: literal,
+        referenceKind: 'imports',
+        line: attr.startPosition.row + 1,
+        column: attr.startPosition.column,
+      });
+      return;
+    }
+  }
+
+  private scanBodyForReferences(body: SyntaxNode, fromNodeId: string): void {
+    for (let i = 0; i < body.namedChildCount; i++) {
+      const child = body.namedChild(i);
+      if (!child) continue;
+      if (child.type === 'attribute') {
+        const exprNode = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'expression');
+        if (exprNode) this.scanExpressionForReferences(exprNode, fromNodeId);
+      } else if (child.type === 'block') {
+        // Nested block (e.g. `versioning_configuration { ... }` inside a resource).
+        // Walk its body recursively, but don't emit a separate node — the parent
+        // block owns the sub-config.
+        const nestedBody = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'body');
+        if (nestedBody) this.scanBodyForReferences(nestedBody, fromNodeId);
+      }
+    }
+  }
+
+  /**
+   * Walk an `expression` subtree and emit unresolved references for each
+   * Terraform-style address head we find. References take the form:
+   *
+   *   <head_identifier>(.<get_attr>)*
+   *
+   * which the parser exposes as a `variable_expr` node followed by sibling
+   * `get_attr` / `index` / `splat` nodes within the same `expression`.
+   *
+   * Loop-bound iteration variables (e.g. `s` in `[for s in xs : s.id]`,
+   * `k` and `v` in `{for k, v in m : k => v}`) are tracked in `bindings`
+   * so they don't generate spurious references.
+   */
+  private scanExpressionForReferences(
+    root: SyntaxNode,
+    fromNodeId: string,
+    loopBindings: ReadonlySet<string> = new Set(),
+  ): void {
+    const visit = (node: SyntaxNode, bindings: ReadonlySet<string>): void => {
+      if (node.type === 'expression') {
+        const ref = this.tryExtractReference(node, bindings);
+        if (ref) {
+          this.unresolvedReferences.push({
+            fromNodeId,
+            referenceName: ref.name,
+            referenceKind: 'references',
+            line: ref.line,
+            column: ref.column,
+          });
+        }
+        for (let i = 0; i < node.namedChildCount; i++) {
+          const child = node.namedChild(i);
+          if (child) visit(child, bindings);
+        }
+        return;
+      }
+
+      // for_expr: identifiers introduced in `for_intro` are bound for the
+      // rest of the for body (and any condition), but NOT for the iterable
+      // expression inside the for_intro itself.
+      if (node.type === 'for_tuple_expr' || node.type === 'for_object_expr') {
+        let activeBindings = bindings;
+        for (let i = 0; i < node.namedChildCount; i++) {
+          const child = node.namedChild(i);
+          if (!child) continue;
+          if (child.type === 'for_intro') {
+            activeBindings = this.visitForIntro(child, bindings, fromNodeId);
+          } else {
+            visit(child, activeBindings);
+          }
+        }
+        return;
+      }
+
+      for (let i = 0; i < node.namedChildCount; i++) {
+        const child = node.namedChild(i);
+        if (child) visit(child, bindings);
+      }
+    };
+
+    visit(root, loopBindings);
+  }
+
+  /**
+   * Process a `for_intro` node and return the binding set in scope for the
+   * enclosing for-expression's body and condition. The iterable expression
+   * inside the for_intro is scanned with the *outer* bindings — iteration
+   * variables aren't yet in scope at that point.
+   */
+  private visitForIntro(
+    forIntro: SyntaxNode,
+    outerBindings: ReadonlySet<string>,
+    fromNodeId: string,
+  ): ReadonlySet<string> {
+    const newBindings = new Set(outerBindings);
+    for (let i = 0; i < forIntro.namedChildCount; i++) {
+      const child = forIntro.namedChild(i);
+      if (child?.type === 'identifier') {
+        newBindings.add(getNodeText(child, this.source));
+      } else if (child?.type === 'expression') {
+        // The iterable: scan with the original (outer) bindings.
+        this.scanExpressionForReferences(child, fromNodeId, outerBindings);
+      }
+    }
+    return newBindings;
+  }
+
+  /**
+   * If `expression` is `<variable_expr> (<get_attr>|<index>|<splat>)*`,
+   * return the Terraform-style address it references. Otherwise null.
+   *
+   * The reference name follows Terraform's addressing scheme so it can match
+   * the qualified names of the block nodes we emit:
+   *   - var.X            → variable X
+   *   - local.X          → local X
+   *   - module.X         → module X (trailing get_attr is the output name)
+   *   - data.T.N         → data block T/N
+   *   - count/each/self/path/terraform → reserved, skipped
+   *   - <ident>.N        → resource <ident>.N
+   *
+   * We stop at the address head (e.g. `aws_s3_bucket.logs` from
+   * `aws_s3_bucket.logs.id`) so the resolver can match against block-node
+   * qualified names without per-attribute noise.
+   */
+  private tryExtractReference(
+    expression: SyntaxNode,
+    bindings: ReadonlySet<string>,
+  ): { name: string; line: number; column: number } | null {
+    if (expression.namedChildCount === 0) return null;
+    const first = expression.namedChild(0);
+    if (first?.type !== 'variable_expr') return null;
+
+    const headIdent = first.namedChildren.find((c: SyntaxNode | null) => c?.type === 'identifier');
+    if (!headIdent) return null;
+    const head = getNodeText(headIdent, this.source);
+    if (HclExtractor.RESERVED_HEADS.has(head) || bindings.has(head)) return null;
+
+    // Walk the get_attr chain until we have enough to address the resource/module/var/etc.
+    const chain: string[] = [];
+    for (let i = 1; i < expression.namedChildCount; i++) {
+      const child = expression.namedChild(i);
+      if (child?.type !== 'get_attr') break;
+      const attrIdent = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'identifier');
+      if (!attrIdent) break;
+      chain.push(getNodeText(attrIdent, this.source));
+    }
+
+    let name: string | null = null;
+    if (head === 'var' || head === 'local') {
+      // var.X or local.X
+      if (chain.length >= 1) name = `${head}.${chain[0]}`;
+    } else if (head === 'module') {
+      if (chain.length >= 1) name = `module.${chain[0]}`;
+    } else if (head === 'data') {
+      if (chain.length >= 2) name = `data.${chain[0]}.${chain[1]}`;
+    } else {
+      // Resource: <type>.<name>
+      if (chain.length >= 1) name = `${head}.${chain[0]}`;
+    }
+
+    if (!name) return null;
+    return {
+      name,
+      line: first.startPosition.row + 1,
+      column: first.startPosition.column,
+    };
+  }
+
+  /**
+   * Pull a literal string out of an expression of the form `"..."`.
+   * Returns null for interpolated, non-string, or otherwise dynamic values
+   * (we don't attempt module-source resolution on dynamic strings).
+   *
+   * The grammar uses two shapes for quoted strings:
+   *   - `expression > literal_value > string_lit`            (no interpolations)
+   *   - `expression > template_expr > quoted_template`       (with interpolations)
+   * In both, the body comes from `template_literal` children; presence of any
+   * `template_interpolation`/`template_directive` makes the value dynamic.
+   */
+  private extractStaticString(expression: SyntaxNode): string | null {
+    const child = expression.namedChild(0);
+    if (!child) return null;
+
+    let container: SyntaxNode | null = null;
+    if (child.type === 'literal_value') {
+      const stringLit = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'string_lit');
+      container = stringLit ?? null;
+    } else if (child.type === 'template_expr') {
+      const quoted = child.namedChildren.find((c: SyntaxNode | null) => c?.type === 'quoted_template');
+      container = quoted ?? null;
+    }
+    if (!container) return null;
+
+    let literal = '';
+    for (let i = 0; i < container.namedChildCount; i++) {
+      const part = container.namedChild(i);
+      if (!part) continue;
+      if (part.type === 'template_literal') {
+        literal += getNodeText(part, this.source);
+      } else if (part.type === 'template_interpolation' || part.type === 'template_directive') {
+        return null;
+      }
+    }
+    return literal;
+  }
+
+  private unquoteStringLit(node: SyntaxNode): string {
+    const text = getNodeText(node, this.source);
+    if (text.length >= 2 && text.startsWith('"') && text.endsWith('"')) {
+      return text.slice(1, -1);
+    }
+    return text;
+  }
+}
diff --git a/src/extraction/index.ts b/src/extraction/index.ts
index 4ad056fb..3392e400 100644
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -20,7 +20,7 @@ import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
 import { logDebug, logWarn } from '../errors';
-import { validatePathWithinRoot, normalizePath } from '../utils';
+import { validatePathWithinRoot, validatePathWithinRootReal, normalizePath, stripBom, stripCommentLinesForRetry } from '../utils';
 import picomatch from 'picomatch';
 
 /**
@@ -85,10 +85,15 @@ export interface SyncResult {
 }
 
 /**
- * Calculate SHA256 hash of file contents
+ * Calculate SHA256 hash of file contents.
+ *
+ * A leading UTF-8 BOM is stripped before hashing so files round-tripped
+ * through editors that disagree about BOM handling (VSCode strips by
+ * default; some Windows editors preserve it) hash identically and don't
+ * appear "modified" on every sync.
  */
 export function hashContent(content: string): string {
-  return crypto.createHash('sha256').update(content).digest('hex');
+  return crypto.createHash('sha256').update(stripBom(content)).digest('hex');
 }
 
 /**
@@ -185,44 +190,172 @@ interface GitChanges {
 }
 
 /**
- * Use `git status` to detect changed files instead of scanning every file.
- * Returns null on failure so callers fall back to full scan.
+ * Project-metadata key holding the HEAD SHA the index was last synced against.
+ * Used to detect HEAD-moving operations (merge, pull, checkout, rebase,
+ * reset, post-commit) that leave the working tree clean — which `git status`
+ * alone cannot see.
+ */
+export const LAST_SYNCED_HEAD_KEY = 'last_synced_head';
+
+interface GitChangesResult {
+  changes: GitChanges;
+  /** Current HEAD SHA, or null if not in a git repo or repo has no commits yet. */
+  currentHead: string | null;
+  /**
+   * True when the previously-synced HEAD is no longer reachable from current
+   * HEAD (e.g., after a force-push, history rewrite, or `git gc`). Caller
+   * should treat this as "git history is unreliable here" and fall back to
+   * a full filesystem scan.
+   */
+  needsFullReindex: boolean;
+}
+
+/**
+ * Get the current HEAD commit SHA. Returns null when not in a git repo or
+ * the repo has no commits yet.
  */
-function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChanges | null {
+export function getGitHead(rootDir: string): string | null {
   try {
-    const output = execFileSync(
+    return execFileSync(
+      'git',
+      ['rev-parse', 'HEAD'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }
+    ).trim() || null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Detect changed files using git, combining two sources:
+ *
+ *   1. `git status --porcelain` — uncommitted edits in the working tree.
+ *   2. `git diff <lastSyncedHead>..HEAD` — committed changes since last
+ *      sync. This catches operations that move HEAD without dirtying the
+ *      working tree (merge, pull, checkout, rebase, reset, post-commit).
+ *
+ * Without (2), a `git merge` (etc.) would silently leave the index stale
+ * because the working tree is clean and `git status` reports nothing.
+ *
+ * Returns null when git is unavailable (non-git project or status failure)
+ * so the caller falls back to a full filesystem scan. Returns
+ * `needsFullReindex: true` when the last-synced HEAD is unreachable
+ * (force-push, gc), which also calls for a full scan.
+ */
+function getGitChangedFiles(
+  rootDir: string,
+  config: CodeGraphConfig,
+  lastSyncedHead: string | null
+): GitChangesResult | null {
+  let statusOutput: string;
+  try {
+    statusOutput = execFileSync(
       'git',
       ['status', '--porcelain', '--no-renames'],
       { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
     );
+  } catch {
+    return null;
+  }
 
-    const modified: string[] = [];
-    const added: string[] = [];
-    const deleted: string[] = [];
+  const currentHead = getGitHead(rootDir);
+
+  // Two parallel maps: candidates (files that exist or may exist on disk
+  // and need an index check) and deletions (files git says were removed).
+  // Origin distinguishes untracked-add (skip hash compare) from
+  // modified/committed (do hash compare).
+  const candidates = new Map<string, '??' | 'modified'>();
+  const deletions = new Set<string>();
+
+  for (const line of statusOutput.split('\n')) {
+    if (line.length < 4) continue;
+    const code = line.substring(0, 2);
+    const filePath = normalizePath(line.substring(3));
+    if (!shouldIncludeFile(filePath, config)) continue;
+
+    if (code === '??') {
+      if (!candidates.has(filePath)) candidates.set(filePath, '??');
+    } else if (code.includes('D')) {
+      deletions.add(filePath);
+    } else {
+      candidates.set(filePath, 'modified');
+    }
+  }
 
-    for (const line of output.split('\n')) {
-      if (line.length < 4) continue; // Minimum: "XY file"
+  // Union committed changes since last sync.
+  if (currentHead && lastSyncedHead && currentHead !== lastSyncedHead) {
+    // Verify the previously-synced commit is still reachable. If history
+    // was rewritten (force-push) or pruned (gc), we cannot diff against it
+    // and must full-reindex.
+    try {
+      execFileSync(
+        'git',
+        ['cat-file', '-e', `${lastSyncedHead}^{commit}`],
+        { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }
+      );
+    } catch {
+      logDebug('Last-synced HEAD unreachable, falling back to full reindex', { lastSyncedHead, currentHead });
+      return { changes: { modified: [], added: [], deleted: [] }, currentHead, needsFullReindex: true };
+    }
 
-      const statusCode = line.substring(0, 2);
-      const filePath = normalizePath(line.substring(3));
+    let diffOutput: string;
+    try {
+      // -z: NUL-delimited fields/records, robust against arbitrary path chars.
+      // --no-renames: keep semantics consistent with the status call above.
+      diffOutput = execFileSync(
+        'git',
+        ['diff', '--name-status', '--no-renames', '-z', `${lastSyncedHead}..${currentHead}`],
+        { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] }
+      );
+    } catch {
+      logDebug('git diff against last-synced HEAD failed, falling back to full reindex', { lastSyncedHead, currentHead });
+      return { changes: { modified: [], added: [], deleted: [] }, currentHead, needsFullReindex: true };
+    }
 
-      // Skip files that don't match include/exclude config
+    // With -z + --name-status the stream is: status \0 path \0 status \0 path \0 ...
+    const tokens = diffOutput.split('\0').filter((t) => t.length > 0);
+    for (let i = 0; i + 1 < tokens.length; i += 2) {
+      const code = tokens[i]!;
+      const filePath = normalizePath(tokens[i + 1]!);
       if (!shouldIncludeFile(filePath, config)) continue;
 
-      if (statusCode === '??') {
-        added.push(filePath);
-      } else if (statusCode.includes('D')) {
-        deleted.push(filePath);
+      if (code.startsWith('D')) {
+        deletions.add(filePath);
       } else {
-        // M, MM, AM, A (staged), etc. — treat as modified
-        modified.push(filePath);
+        // A/M/T (and C with --no-renames) — caller will read+hash and let
+        // the DB lookup decide whether it's truly an add or a modify.
+        if (!candidates.has(filePath)) candidates.set(filePath, 'modified');
       }
     }
+  }
 
-    return { modified, added, deleted };
-  } catch {
-    return null;
+  // A file present in both sets exists on disk now (working tree wins over
+  // recorded deletion — e.g., file deleted in commit, then re-created
+  // uncommitted).
+  for (const filePath of candidates.keys()) deletions.delete(filePath);
+
+  // Apply .codegraphignore filtering across both candidates and
+  // deletions in one pass — the marker is per-directory, so build the
+  // ignored-dir set from the union of all paths we're considering.
+  const allConsidered = [...candidates.keys(), ...deletions];
+  const ignoredDirs = findCodegraphIgnoredDirs(rootDir, allConsidered);
+
+  const modified: string[] = [];
+  const added: string[] = [];
+  for (const [filePath, origin] of candidates) {
+    if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue;
+    if (origin === '??') added.push(filePath);
+    else modified.push(filePath);
   }
+  const deleted = Array.from(deletions).filter(
+    (p) => !isUnderCodegraphIgnoredDir(p, ignoredDirs)
+  );
+
+  return {
+    changes: { modified, added, deleted },
+    currentHead,
+    needsFullReindex: false,
+  };
 }
 
 /**
@@ -230,6 +363,52 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange
  */
 const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
 
+/**
+ * Walk every parent directory of the given files (relative to rootDir) and
+ * return the subset that contain a `.codegraphignore` marker. Anything
+ * under one of these directories should be excluded.
+ *
+ * Called by `scanDirectory`, `scanDirectoryAsync`, and `getGitChangedFiles`
+ * so the git-driven paths honor the marker the same way the filesystem
+ * walk fallback does. Without this the marker had inconsistent behavior:
+ * respected on non-git projects, silently ignored on git ones.
+ */
+function findCodegraphIgnoredDirs(rootDir: string, files: Iterable<string>): Set<string> {
+  const dirs = new Set<string>(['.']);
+  for (const file of files) {
+    let dir = path.posix.dirname(normalizePath(file));
+    while (dir && dir !== '.' && dir !== '/') {
+      if (dirs.has(dir)) break;  // already enumerated this branch
+      dirs.add(dir);
+      dir = path.posix.dirname(dir);
+    }
+  }
+
+  const ignored = new Set<string>();
+  for (const dir of dirs) {
+    const marker = dir === '.'
+      ? path.join(rootDir, CODEGRAPH_IGNORE_MARKER)
+      : path.join(rootDir, dir, CODEGRAPH_IGNORE_MARKER);
+    if (fs.existsSync(marker)) ignored.add(dir);
+  }
+  return ignored;
+}
+
+/**
+ * True if `filePath` (relative, forward-slashed) lives under any directory
+ * in `ignoredDirs`. Directory `.` matches the project root.
+ */
+function isUnderCodegraphIgnoredDir(filePath: string, ignoredDirs: Set<string>): boolean {
+  if (ignoredDirs.size === 0) return false;
+  if (ignoredDirs.has('.')) return true;
+  let dir = path.posix.dirname(filePath);
+  while (dir && dir !== '.' && dir !== '/') {
+    if (ignoredDirs.has(dir)) return true;
+    dir = path.posix.dirname(dir);
+  }
+  return false;
+}
+
 /**
  * Recursively scan directory for source files.
  *
@@ -245,9 +424,11 @@ export function scanDirectory(
   // Fast path: use git to get all visible files (respects .gitignore everywhere)
   const gitFiles = getGitVisibleFiles(rootDir);
   if (gitFiles) {
+    const ignoredDirs = findCodegraphIgnoredDirs(rootDir, gitFiles);
     const files: string[] = [];
     let count = 0;
     for (const filePath of gitFiles) {
+      if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue;
       if (shouldIncludeFile(filePath, config)) {
         files.push(filePath);
         count++;
@@ -272,9 +453,11 @@ export async function scanDirectoryAsync(
 ): Promise<string[]> {
   const gitFiles = getGitVisibleFiles(rootDir);
   if (gitFiles) {
+    const ignoredDirs = findCodegraphIgnoredDirs(rootDir, gitFiles);
     const files: string[] = [];
     let count = 0;
     for (const filePath of gitFiles) {
+      if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue;
       if (shouldIncludeFile(filePath, config)) {
         files.push(filePath);
         count++;
@@ -571,14 +754,29 @@ export class ExtractionOrchestrator {
      * Terminates the current worker and clears the reference so
      * ensureWorker() will spawn a fresh one on the next call.
      */
-    function recycleWorker(): void {
+    async function recycleWorker(): Promise<void> {
       if (!parseWorker) return;
       log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
       const w = parseWorker;
       parseWorker = null;
       workerParseCount = 0;
-      // Fire-and-forget: worker.terminate() can hang if WASM is stuck
-      w.terminate().catch(() => {});
+      // worker.terminate() can hang if WASM is stuck — bound the wait so we
+      // never block the caller's `await` on a wedged worker. The terminate
+      // promise keeps running in the background so the worker eventually gets
+      // reaped even if the timeout wins.
+      let timedOut = false;
+      try {
+        await Promise.race([
+          w.terminate(),
+          new Promise<void>((resolve) => setTimeout(() => { timedOut = true; resolve(); }, 1000)),
+        ]);
+      } catch {
+        // ignore — terminate() failing means the worker is already gone
+      }
+      if (timedOut) {
+        // Fire-and-forget: don't leak a zombie if terminate is still pending.
+        w.terminate().catch(() => {});
+      }
     }
 
     async function requestParse(filePath: string, content: string): Promise<ExtractionResult> {
@@ -820,11 +1018,12 @@ export class ExtractionOrchestrator {
           }
 
           // Strip lines that are entirely comments (preserving line numbers
-          // by replacing with empty lines so node positions stay correct)
-          const stripped = fullContent
-            .split('\n')
-            .map(line => /^\s*\/\//.test(line) ? '' : line)
-            .join('\n');
+          // by replacing with empty lines so node positions stay correct).
+          // The marker is language-specific — the previous hardcoded `//`
+          // was a no-op for Python (`#`), Ruby (`#`), etc., so those files
+          // would silently keep failing on the retry.
+          const language = detectLanguage(filePath, fullContent);
+          const stripped = stripCommentLinesForRetry(fullContent, language);
 
           let result: ExtractionResult;
           try {
@@ -834,7 +1033,6 @@ export class ExtractionOrchestrator {
           }
 
           if (result.nodes.length > 0 || result.errors.length === 0) {
-            const language = detectLanguage(filePath, fullContent);
             const stats = await fsp.stat(path.join(this.rootDir, filePath));
             this.storeExtractionResult(filePath, fullContent, language, stats, result);
 
@@ -856,6 +1054,13 @@ export class ExtractionOrchestrator {
       (parseWorker as import('worker_threads').Worker).terminate().catch(() => {});
     }
 
+    // Establish a baseline HEAD so the next sync can detect HEAD-moving git
+    // operations against this index.
+    const headAfterIndex = getGitHead(this.rootDir);
+    if (headAfterIndex) {
+      this.queries.setMetadata(LAST_SYNCED_HEAD_KEY, headAfterIndex);
+    }
+
     return {
       success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
       filesIndexed,
@@ -1016,7 +1221,13 @@ export class ExtractionOrchestrator {
   }
 
   /**
-   * Store extraction result in database
+   * Store extraction result in database.
+   *
+   * The whole sequence (delete existing rows → insert nodes → insert edges →
+   * insert unresolved refs → upsert file record) runs in a single transaction
+   * so a process kill mid-write cannot leave the file's old data wiped while
+   * the new data is missing — either everything from this call commits or
+   * nothing does.
    */
   private storeExtractionResult(
     filePath: string,
@@ -1033,59 +1244,61 @@ export class ExtractionOrchestrator {
       return; // No changes
     }
 
-    // Delete existing data for this file
-    if (existingFile) {
-      this.queries.deleteFile(filePath);
-    }
-
     // Filter out nodes with missing required fields before insertion.
     // This prevents FK violations when edges reference nodes that would
     // be silently skipped by insertNode() (see issue #42).
     const validNodes = result.nodes.filter((n) => n.id && n.kind && n.name && n.filePath && n.language);
 
-    // Insert nodes
-    if (validNodes.length > 0) {
-      this.queries.insertNodes(validNodes);
-    }
+    this.queries.transaction(() => {
+      // Delete existing data for this file
+      if (existingFile) {
+        this.queries.deleteFile(filePath);
+      }
 
-    // Filter edges to only reference nodes that were actually inserted
-    if (result.edges.length > 0) {
-      const insertedIds = new Set(validNodes.map((n) => n.id));
-      const validEdges = result.edges.filter(
-        (e) => insertedIds.has(e.source) && insertedIds.has(e.target)
-      );
-      if (validEdges.length > 0) {
-        this.queries.insertEdges(validEdges);
+      // Insert nodes
+      if (validNodes.length > 0) {
+        this.queries.insertNodes(validNodes);
+      }
+
+      // Filter edges to only reference nodes that were actually inserted
+      if (result.edges.length > 0) {
+        const insertedIds = new Set(validNodes.map((n) => n.id));
+        const validEdges = result.edges.filter(
+          (e) => insertedIds.has(e.source) && insertedIds.has(e.target)
+        );
+        if (validEdges.length > 0) {
+          this.queries.insertEdges(validEdges);
+        }
       }
-    }
 
-    // Insert unresolved references in batch with denormalized filePath/language
-    if (result.unresolvedReferences.length > 0) {
-      const insertedIds = new Set(validNodes.map((n) => n.id));
-      const refsWithContext = result.unresolvedReferences
-        .filter((ref) => insertedIds.has(ref.fromNodeId))
-        .map((ref) => ({
-          ...ref,
-          filePath: ref.filePath ?? filePath,
-          language: ref.language ?? language,
-        }));
-      if (refsWithContext.length > 0) {
-        this.queries.insertUnresolvedRefsBatch(refsWithContext);
+      // Insert unresolved references in batch with denormalized filePath/language
+      if (result.unresolvedReferences.length > 0) {
+        const insertedIds = new Set(validNodes.map((n) => n.id));
+        const refsWithContext = result.unresolvedReferences
+          .filter((ref) => insertedIds.has(ref.fromNodeId))
+          .map((ref) => ({
+            ...ref,
+            filePath: ref.filePath ?? filePath,
+            language: ref.language ?? language,
+          }));
+        if (refsWithContext.length > 0) {
+          this.queries.insertUnresolvedRefsBatch(refsWithContext);
+        }
       }
-    }
 
-    // Insert file record
-    const fileRecord: FileRecord = {
-      path: filePath,
-      contentHash,
-      language,
-      size: stats.size,
-      modifiedAt: stats.mtimeMs,
-      indexedAt: Date.now(),
-      nodeCount: result.nodes.length,
-      errors: result.errors.length > 0 ? result.errors : undefined,
-    };
-    this.queries.upsertFile(fileRecord);
+      // Insert file record
+      const fileRecord: FileRecord = {
+        path: filePath,
+        contentHash,
+        language,
+        size: stats.size,
+        modifiedAt: stats.mtimeMs,
+        indexedAt: Date.now(),
+        nodeCount: result.nodes.length,
+        errors: result.errors.length > 0 ? result.errors : undefined,
+      };
+      this.queries.upsertFile(fileRecord);
+    });
   }
 
   /**
@@ -1109,7 +1322,12 @@ export class ExtractionOrchestrator {
     });
 
     const filesToIndex: string[] = [];
-    const gitChanges = getGitChangedFiles(this.rootDir, this.config);
+    const lastSyncedHead = this.queries.getMetadata(LAST_SYNCED_HEAD_KEY);
+    const gitResult = getGitChangedFiles(this.rootDir, this.config, lastSyncedHead);
+    const currentHead = gitResult?.currentHead ?? null;
+    // When the last-synced HEAD is unreachable we drop to the filesystem
+    // fallback, which uses on-disk hashes and is correct regardless of git.
+    const gitChanges = gitResult && !gitResult.needsFullReindex ? gitResult.changes : null;
 
     if (gitChanges) {
       // === Git fast path ===
@@ -1125,9 +1343,16 @@ export class ExtractionOrchestrator {
         }
       }
 
-      // Handle modified files — read + hash only these files
+      // Handle modified files — read + hash only these files. Resolve
+      // symlinks (validatePathWithinRootReal) so a regular file swapped
+      // for a symlink to outside the project between scan and read is
+      // rejected, not followed.
       for (const filePath of gitChanges.modified) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked during sync', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1176,9 +1401,13 @@ export class ExtractionOrchestrator {
         }
       }
 
-      // Find files to add or update
+      // Find files to add or update (symlink-resistant validation)
       for (const filePath of currentFiles) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked during sync', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1227,6 +1456,13 @@ export class ExtractionOrchestrator {
       nodesUpdated += result.nodes.length;
     }
 
+    // Persist current HEAD so the next sync can detect HEAD-moving git
+    // operations (merge, pull, checkout, rebase, reset, post-commit) even
+    // when they leave the working tree clean.
+    if (currentHead) {
+      this.queries.setMetadata(LAST_SYNCED_HEAD_KEY, currentHead);
+    }
+
     return {
       filesChecked,
       filesAdded,
@@ -1243,7 +1479,11 @@ export class ExtractionOrchestrator {
    * Uses git status as a fast path when available, falling back to full scan.
    */
   getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } {
-    const gitChanges = getGitChangedFiles(this.rootDir, this.config);
+    const lastSyncedHead = this.queries.getMetadata(LAST_SYNCED_HEAD_KEY);
+    const gitResult = getGitChangedFiles(this.rootDir, this.config, lastSyncedHead);
+    // Unreachable last-synced HEAD → drop to the filesystem fallback, which
+    // is correct regardless of git history state.
+    const gitChanges = gitResult && !gitResult.needsFullReindex ? gitResult.changes : null;
 
     if (gitChanges) {
       // === Git fast path ===
@@ -1260,8 +1500,13 @@ export class ExtractionOrchestrator {
       }
 
       // Modified files — read + hash only these, compare with DB
+      // (symlink-resistant validation)
       for (const filePath of gitChanges.modified) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked while detecting changes', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1309,9 +1554,13 @@ export class ExtractionOrchestrator {
       }
     }
 
-    // Find added and modified files
+    // Find added and modified files (symlink-resistant validation)
     for (const filePath of currentFiles) {
-      const fullPath = path.join(this.rootDir, filePath);
+      const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+      if (!fullPath) {
+        logWarn('Path traversal blocked while detecting changes', { filePath });
+        continue;
+      }
       let content: string;
       try {
         content = fs.readFileSync(fullPath, 'utf-8');
diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts
index 66219d4f..8ed3a9de 100644
--- a/src/extraction/languages/c-cpp.ts
+++ b/src/extraction/languages/c-cpp.ts
@@ -114,3 +114,21 @@ export const cppExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const C_DEF: LanguageDef = {
+  name: 'c',
+  displayName: 'C',
+  // .h is also listed for C; tree-sitter.ts contains a `.h might be C++`
+  // heuristic that overrides this on a content-sniff basis.
+  extensions: ['.c', '.h'],
+  includeGlobs: ['**/*.c', '**/*.h'],
+  grammar: { wasmFile: 'tree-sitter-c.wasm', extractor: cExtractor },
+};
+export const CPP_DEF: LanguageDef = {
+  name: 'cpp',
+  displayName: 'C++',
+  extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx'],
+  includeGlobs: ['**/*.cpp', '**/*.cc', '**/*.cxx', '**/*.hpp', '**/*.hxx'],
+  grammar: { wasmFile: 'tree-sitter-cpp.wasm', extractor: cppExtractor },
+};
diff --git a/src/extraction/languages/csharp.ts b/src/extraction/languages/csharp.ts
index 9de53734..c66aea69 100644
--- a/src/extraction/languages/csharp.ts
+++ b/src/extraction/languages/csharp.ts
@@ -65,3 +65,12 @@ export const csharpExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const CSHARP_DEF: LanguageDef = {
+  name: 'csharp',
+  displayName: 'C#',
+  extensions: ['.cs'],
+  includeGlobs: ['**/*.cs'],
+  grammar: { wasmFile: 'tree-sitter-c_sharp.wasm', extractor: csharpExtractor },
+};
diff --git a/src/extraction/languages/dart.ts b/src/extraction/languages/dart.ts
index 5b545d04..d704d826 100644
--- a/src/extraction/languages/dart.ts
+++ b/src/extraction/languages/dart.ts
@@ -193,3 +193,12 @@ export const dartExtractor: LanguageExtractor = {
     return undefined;
   },
 };
+
+import type { LanguageDef } from './types';
+export const DART_DEF: LanguageDef = {
+  name: 'dart',
+  displayName: 'Dart',
+  extensions: ['.dart'],
+  includeGlobs: ['**/*.dart'],
+  grammar: { wasmFile: 'tree-sitter-dart.wasm', extractor: dartExtractor },
+};
diff --git a/src/extraction/languages/go.ts b/src/extraction/languages/go.ts
index 898e6165..5de68ffa 100644
--- a/src/extraction/languages/go.ts
+++ b/src/extraction/languages/go.ts
@@ -49,3 +49,12 @@ export const goExtractor: LanguageExtractor = {
     return match?.[1];
   },
 };
+
+import type { LanguageDef } from './types';
+export const GO_DEF: LanguageDef = {
+  name: 'go',
+  displayName: 'Go',
+  extensions: ['.go'],
+  includeGlobs: ['**/*.go'],
+  grammar: { wasmFile: 'tree-sitter-go.wasm', extractor: goExtractor },
+};
diff --git a/src/extraction/languages/hcl.ts b/src/extraction/languages/hcl.ts
new file mode 100644
index 00000000..21cb708a
--- /dev/null
+++ b/src/extraction/languages/hcl.ts
@@ -0,0 +1,40 @@
+/**
+ * HCL / Terraform — custom extractor that runs on top of the
+ * tree-sitter-hcl WASM grammar. The block-shape of HCL doesn't fit
+ * the universal function/class extractor, so HclExtractor handles it
+ * directly.
+ */
+import { HclExtractor } from '../hcl-extractor';
+import type { LanguageDef } from './types';
+
+export const HCL_DEF: LanguageDef = {
+  name: 'hcl',
+  displayName: 'HCL / Terraform',
+  extensions: ['.tf', '.tfvars', '.hcl'],
+  includeGlobs: ['**/*.tf', '**/*.tfvars', '**/*.hcl'],
+  // HCL needs both a tree-sitter parser (vendored WASM, not on
+  // tree-sitter-wasms) AND a custom extractor — the parse tree is
+  // standard but the extraction logic is bespoke.
+  grammar: {
+    wasmFile: 'tree-sitter-hcl.wasm',
+    vendored: true,
+    // Universal extractor is unused (custom path takes over) but
+    // the type requires it; supply a no-op skeleton.
+    extractor: {
+      functionTypes: [],
+      classTypes: [],
+      methodTypes: [],
+      interfaceTypes: [],
+      structTypes: [],
+      enumTypes: [],
+      typeAliasTypes: [],
+      importTypes: [],
+      callTypes: [],
+      variableTypes: [],
+      nameField: 'name',
+      bodyField: 'body',
+      paramsField: 'parameters',
+    },
+  },
+  customExtractor: (filePath, source) => new HclExtractor(filePath, source).extract(),
+};
diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts
index e5d12ac6..0e35b826 100644
--- a/src/extraction/languages/index.ts
+++ b/src/extraction/languages/index.ts
@@ -1,44 +1,71 @@
 /**
- * Per-language extraction configurations.
+ * Per-language barrel.
  *
- * Each file exports a LanguageExtractor config object.
- * This barrel builds the EXTRACTORS map consumed by TreeSitterExtractor.
+ * Adding a new language is a single-file addition: drop a
+ * `<name>.ts` next to this barrel exporting an `<NAME>_DEF:
+ * LanguageDef`, then add one import + one array entry to
+ * `./registry.ts`. Nothing in this file needs to change for new
+ * languages.
+ *
+ * `EXTRACTORS` is preserved as a backward-compat export but is now
+ * derived from the registry. Direct readers of `EXTRACTORS` get the
+ * same shape they always did; the canonical source is each
+ * language def's `grammar.extractor` field.
  */
 
-import { Language } from '../../types';
+import type { Language } from '../../types';
 import type { LanguageExtractor } from '../tree-sitter-types';
+import { getLanguageDefs } from './registry';
+
+export * from './registry';
 
-import { typescriptExtractor } from './typescript';
-import { javascriptExtractor } from './javascript';
-import { pythonExtractor } from './python';
-import { goExtractor } from './go';
-import { rustExtractor } from './rust';
-import { javaExtractor } from './java';
-import { cExtractor, cppExtractor } from './c-cpp';
-import { csharpExtractor } from './csharp';
-import { phpExtractor } from './php';
-import { rubyExtractor } from './ruby';
-import { swiftExtractor } from './swift';
-import { kotlinExtractor } from './kotlin';
-import { dartExtractor } from './dart';
-import { pascalExtractor } from './pascal';
+/**
+ * Backward-compat: `Language → LanguageExtractor` map. Built lazily
+ * on first read (the registry transitively imports modules that
+ * import this barrel, so building eagerly would TDZ).
+ */
+let _extractorsCache: Partial<Record<Language, LanguageExtractor>> | null = null;
+function buildExtractors(): Partial<Record<Language, LanguageExtractor>> {
+  if (_extractorsCache) return _extractorsCache;
+  const out: Partial<Record<Language, LanguageExtractor>> = {};
+  for (const def of getLanguageDefs()) {
+    if (def.grammar) {
+      out[def.name as Language] = def.grammar.extractor;
+    }
+  }
+  _extractorsCache = out;
+  return out;
+}
 
-export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
-  typescript: typescriptExtractor,
-  tsx: typescriptExtractor,
-  javascript: javascriptExtractor,
-  jsx: javascriptExtractor,
-  python: pythonExtractor,
-  go: goExtractor,
-  rust: rustExtractor,
-  java: javaExtractor,
-  c: cExtractor,
-  cpp: cppExtractor,
-  csharp: csharpExtractor,
-  php: phpExtractor,
-  ruby: rubyExtractor,
-  swift: swiftExtractor,
-  kotlin: kotlinExtractor,
-  dart: dartExtractor,
-  pascal: pascalExtractor,
-};
+/**
+ * Lazy Proxy keeps the existing `EXTRACTORS[lang]` access pattern
+ * working without forcing the registry to evaluate at module load
+ * (which would deadlock on the cyclic import chain through
+ * tree-sitter.ts).
+ */
+export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = new Proxy(
+  {} as Partial<Record<Language, LanguageExtractor>>,
+  {
+    get(_t, key: string) {
+      return buildExtractors()[key as Language];
+    },
+    has(_t, key: string) {
+      return key in buildExtractors();
+    },
+    ownKeys() {
+      return Object.keys(buildExtractors());
+    },
+    getOwnPropertyDescriptor(_t, key: string) {
+      const m = buildExtractors();
+      if ((key as Language) in m) {
+        return {
+          configurable: true,
+          enumerable: true,
+          writable: false,
+          value: m[key as Language],
+        };
+      }
+      return undefined;
+    },
+  }
+);
diff --git a/src/extraction/languages/java.ts b/src/extraction/languages/java.ts
index 638533f0..9613217c 100644
--- a/src/extraction/languages/java.ts
+++ b/src/extraction/languages/java.ts
@@ -57,3 +57,12 @@ export const javaExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const JAVA_DEF: LanguageDef = {
+  name: 'java',
+  displayName: 'Java',
+  extensions: ['.java'],
+  includeGlobs: ['**/*.java'],
+  grammar: { wasmFile: 'tree-sitter-java.wasm', extractor: javaExtractor },
+};
diff --git a/src/extraction/languages/javascript.ts b/src/extraction/languages/javascript.ts
index 0a0d6780..946e1c5c 100644
--- a/src/extraction/languages/javascript.ts
+++ b/src/extraction/languages/javascript.ts
@@ -82,3 +82,12 @@ export const javascriptExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const JAVASCRIPT_DEF: LanguageDef = {
+  name: 'javascript',
+  displayName: 'JavaScript',
+  extensions: ['.js', '.mjs', '.cjs'],
+  includeGlobs: ['**/*.js'],
+  grammar: { wasmFile: 'tree-sitter-javascript.wasm', extractor: javascriptExtractor },
+};
diff --git a/src/extraction/languages/jsx.ts b/src/extraction/languages/jsx.ts
new file mode 100644
index 00000000..5091ee64
--- /dev/null
+++ b/src/extraction/languages/jsx.ts
@@ -0,0 +1,14 @@
+/**
+ * JSX — reuses the JavaScript extractor (the JS grammar handles JSX
+ * via the same `tree-sitter-javascript.wasm` file).
+ */
+import { javascriptExtractor } from './javascript';
+import type { LanguageDef } from './types';
+
+export const JSX_DEF: LanguageDef = {
+  name: 'jsx',
+  displayName: 'JSX',
+  extensions: ['.jsx'],
+  includeGlobs: ['**/*.jsx'],
+  grammar: { wasmFile: 'tree-sitter-javascript.wasm', extractor: javascriptExtractor },
+};
diff --git a/src/extraction/languages/kotlin.ts b/src/extraction/languages/kotlin.ts
index 19c38624..77d15609 100644
--- a/src/extraction/languages/kotlin.ts
+++ b/src/extraction/languages/kotlin.ts
@@ -236,3 +236,12 @@ export const kotlinExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const KOTLIN_DEF: LanguageDef = {
+  name: 'kotlin',
+  displayName: 'Kotlin',
+  extensions: ['.kt', '.kts'],
+  includeGlobs: ['**/*.kt'],
+  grammar: { wasmFile: 'tree-sitter-kotlin.wasm', extractor: kotlinExtractor },
+};
diff --git a/src/extraction/languages/liquid.ts b/src/extraction/languages/liquid.ts
new file mode 100644
index 00000000..ead2f978
--- /dev/null
+++ b/src/extraction/languages/liquid.ts
@@ -0,0 +1,16 @@
+/**
+ * Liquid — custom regex-based extractor for Shopify Liquid templates.
+ * Tree-sitter has no production-quality Liquid grammar; the
+ * `LiquidExtractor` does targeted pattern matching for snippet
+ * includes and Drop variable references.
+ */
+import { LiquidExtractor } from '../liquid-extractor';
+import type { LanguageDef } from './types';
+
+export const LIQUID_DEF: LanguageDef = {
+  name: 'liquid',
+  displayName: 'Liquid',
+  extensions: ['.liquid'],
+  includeGlobs: ['**/*.liquid'],
+  customExtractor: (filePath, source) => new LiquidExtractor(filePath, source).extract(),
+};
diff --git a/src/extraction/languages/pascal.ts b/src/extraction/languages/pascal.ts
index aed6a59f..a196c7b0 100644
--- a/src/extraction/languages/pascal.ts
+++ b/src/extraction/languages/pascal.ts
@@ -60,3 +60,30 @@ export const pascalExtractor: LanguageExtractor = {
     return node.type === 'declConst';
   },
 };
+
+import type { LanguageDef } from './types';
+import { DfmExtractor } from '../dfm-extractor';
+
+const dfmCustomExtractor = (filePath: string, source: string) =>
+  new DfmExtractor(filePath, source).extract();
+
+export const PASCAL_DEF: LanguageDef = {
+  name: 'pascal',
+  displayName: 'Pascal / Delphi',
+  extensions: ['.pas', '.dpr', '.dpk', '.lpr', '.dfm', '.fmx'],
+  includeGlobs: [
+    '**/*.pas', '**/*.dpr', '**/*.dpk', '**/*.lpr',
+    '**/*.dfm', '**/*.fmx',
+  ],
+  grammar: {
+    wasmFile: 'tree-sitter-pascal.wasm',
+    vendored: true,
+    extractor: pascalExtractor,
+  },
+  // .dfm/.fmx are Delphi/FireMonkey form files — declarative property
+  // definitions, not Pascal source. Route them to the dedicated DfmExtractor.
+  extensionOverrides: {
+    '.dfm': { customExtractor: dfmCustomExtractor },
+    '.fmx': { customExtractor: dfmCustomExtractor },
+  },
+};
diff --git a/src/extraction/languages/php.ts b/src/extraction/languages/php.ts
index 1133f979..30271286 100644
--- a/src/extraction/languages/php.ts
+++ b/src/extraction/languages/php.ts
@@ -103,3 +103,12 @@ export const phpExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const PHP_DEF: LanguageDef = {
+  name: 'php',
+  displayName: 'PHP',
+  extensions: ['.php'],
+  includeGlobs: ['**/*.php'],
+  grammar: { wasmFile: 'tree-sitter-php.wasm', extractor: phpExtractor },
+};
diff --git a/src/extraction/languages/python.ts b/src/extraction/languages/python.ts
index 77807d66..2cddcf40 100644
--- a/src/extraction/languages/python.ts
+++ b/src/extraction/languages/python.ts
@@ -51,3 +51,12 @@ export const pythonExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const PYTHON_DEF: LanguageDef = {
+  name: 'python',
+  displayName: 'Python',
+  extensions: ['.py', '.pyw'],
+  includeGlobs: ['**/*.py'],
+  grammar: { wasmFile: 'tree-sitter-python.wasm', extractor: pythonExtractor },
+};
diff --git a/src/extraction/languages/r.ts b/src/extraction/languages/r.ts
new file mode 100644
index 00000000..00fe874f
--- /dev/null
+++ b/src/extraction/languages/r.ts
@@ -0,0 +1,247 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getPrecedingDocstring } from '../tree-sitter-helpers';
+import type { LanguageExtractor, ExtractorContext } from '../tree-sitter-types';
+
+/**
+ * R extraction.
+ *
+ * R has no `def` / `function name() {}` keyword — every function is an
+ * anonymous `function_definition` whose name lives on the LHS of an
+ * enclosing assignment, e.g.:
+ *
+ *     add <- function(a, b) a + b      # left-arrow assignment
+ *     subtract = function(a, b) a - b  # equals assignment
+ *     divide <<- function(a, b) a / b  # super-assignment
+ *
+ * The OO-flavoured framework dispatch (`functionTypes: ['function_definition']`)
+ * doesn't fit because it would emit anonymous function nodes for every
+ * lambda passed to `lapply` / `Map` / `purrr::map` / etc. Instead we
+ * intercept top-level and nested assignments via the `visitNode` hook,
+ * pull the name from the LHS, and create the function node ourselves.
+ *
+ * Handled forms:
+ *   - `name <- function(...) body`           (and `=`, `<<-`)
+ *   - `library(pkg)` / `require(pkg)`        → import nodes
+ *   - `source("path/to/file.R")`             → import nodes (resolved by path)
+ *   - bare and namespaced calls: `f(...)`, `pkg::f(...)`  via core extractCall
+ *   - top-level non-function assignments     → constant nodes
+ *
+ * Right-arrow assignment (`function(...) body -> name`) is intentionally
+ * ignored: the tree-sitter-r grammar parses the `->` as part of the
+ * function body's last expression rather than as an outer assignment, and
+ * the form is rare enough in practice that the v1 extractor doesn't try
+ * to disambiguate it.
+ *
+ * `library()`/`require()`/`source()` calls are detected only at top level;
+ * the framework's `visitFunctionBody` walker doesn't dispatch through
+ * `visitNode`, so these calls inside a function body produce a `calls`
+ * edge but no separate `import` node. Rare in practice — most R code
+ * keeps imports at the top of the file.
+ */
+
+const ASSIGN_OPS: ReadonlySet<string> = new Set(['<-', '=', '<<-']);
+
+export const rExtractor: LanguageExtractor = {
+  // Functions are detected via the assignment pattern in `visitNode`, not
+  // by node type — function_definition has no name field.
+  functionTypes: [],
+  classTypes: [],
+  methodTypes: [],
+  interfaceTypes: [],
+  structTypes: [],
+  enumTypes: [],
+  typeAliasTypes: [],
+  // Imports are calls (`library(pkg)` / `source(...)`) — handled in visitNode.
+  importTypes: [],
+  // Standard call edges work for R: `extractCall` falls back to namedChild(0)
+  // which is either an `identifier`, `namespace_operator` (pkg::name), or
+  // `extract_operator` (obj$method). In all three cases getNodeText gives a
+  // sensible callee name.
+  callTypes: ['call'],
+  variableTypes: [],
+
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+
+  visitNode: (node, ctx) => {
+    if (node.type === 'binary_operator') {
+      return handleBinaryOperator(node, ctx);
+    }
+    if (node.type === 'call') {
+      return handleCall(node, ctx);
+    }
+    return false;
+  },
+};
+
+function handleBinaryOperator(node: SyntaxNode, ctx: ExtractorContext): boolean {
+  const operator = node.childForFieldName('operator');
+  const lhs = node.childForFieldName('lhs');
+  const rhs = node.childForFieldName('rhs');
+  if (!operator || !lhs || !rhs) return false;
+  if (!ASSIGN_OPS.has(operator.type)) return false;
+  if (lhs.type !== 'identifier') return false;
+
+  const name = getNodeText(lhs, ctx.source);
+  if (!name) return false;
+
+  if (rhs.type === 'function_definition') {
+    emitFunction(node, rhs, name, ctx);
+    return true; // we've fully handled this subtree
+  }
+
+  // Plain top-level assignment → constant. Don't return true so the core
+  // still walks the rhs for nested calls / function definitions / imports.
+  if (isAtTopLevel(ctx)) {
+    ctx.createNode('constant', name, node, {
+      docstring: getPrecedingDocstring(node, ctx.source),
+    });
+  }
+  return false;
+}
+
+function emitFunction(
+  outerNode: SyntaxNode,
+  funcDef: SyntaxNode,
+  name: string,
+  ctx: ExtractorContext,
+): void {
+  const params = funcDef.namedChildren.find((c: SyntaxNode | null) => c?.type === 'parameters');
+  const signature = params ? getNodeText(params, ctx.source) : undefined;
+
+  const funcNode = ctx.createNode('function', name, outerNode, {
+    docstring: getPrecedingDocstring(outerNode, ctx.source),
+    signature,
+  });
+  if (!funcNode) return;
+
+  // Body is the last named child of function_definition (after `parameters`).
+  // It may be a `braced_expression` or any single expression for one-liners
+  // like `function(x) x + 1`.
+  const body = funcDef.namedChild(funcDef.namedChildCount - 1);
+  if (!body || body.type === 'parameters') return;
+
+  ctx.pushScope(funcNode.id);
+  try {
+    ctx.visitFunctionBody(body, funcNode.id);
+  } finally {
+    ctx.popScope();
+  }
+}
+
+function handleCall(node: SyntaxNode, ctx: ExtractorContext): boolean {
+  const callee = node.namedChild(0);
+  if (callee?.type !== 'identifier') return false;
+  const calleeName = getNodeText(callee, ctx.source);
+
+  if (calleeName === 'library' || calleeName === 'require') {
+    emitLibraryImport(node, ctx);
+    // Don't return true — let the core also record the `library`/`require`
+    // call as an edge so callers/callees queries surface it.
+    return false;
+  }
+  if (calleeName === 'source') {
+    emitSourceImport(node, ctx);
+    return false;
+  }
+  return false;
+}
+
+/**
+ * `library(dplyr)` and `library("dplyr")` both name a package. R's NSE means
+ * the bare-identifier form is the idiomatic one, but we accept both.
+ */
+function emitLibraryImport(node: SyntaxNode, ctx: ExtractorContext): void {
+  const args = node.namedChildren.find((c: SyntaxNode | null) => c?.type === 'arguments');
+  if (!args) return;
+  const firstArg = args.namedChildren.find((c: SyntaxNode | null) => c?.type === 'argument');
+  if (!firstArg) return;
+
+  const inner = firstArg.namedChild(0);
+  if (!inner) return;
+
+  let pkg: string | null = null;
+  if (inner.type === 'identifier') {
+    pkg = getNodeText(inner, ctx.source);
+  } else if (inner.type === 'string') {
+    pkg = unquoteStringNode(inner, ctx.source);
+  }
+  if (!pkg) return;
+
+  ctx.createNode('import', pkg, node, {
+    signature: getNodeText(node, ctx.source),
+  });
+}
+
+/**
+ * `source("path/to/file.R")` brings another R file into scope. The argument
+ * must be a string literal — a dynamic path is recorded as an unresolved
+ * call only.
+ */
+function emitSourceImport(node: SyntaxNode, ctx: ExtractorContext): void {
+  const args = node.namedChildren.find((c: SyntaxNode | null) => c?.type === 'arguments');
+  if (!args) return;
+  const firstArg = args.namedChildren.find((c: SyntaxNode | null) => c?.type === 'argument');
+  if (!firstArg) return;
+  const inner = firstArg.namedChild(0);
+  if (inner?.type !== 'string') return;
+
+  const path = unquoteStringNode(inner, ctx.source);
+  if (!path) return;
+
+  ctx.createNode('import', path, node, {
+    signature: getNodeText(node, ctx.source),
+  });
+}
+
+/**
+ * Extract the literal content of an R `string` syntax node, handling both
+ * the regular `"..."` / `'...'` form and R 4.0+ raw strings: `r"(...)"`,
+ * `R"[...]"`, `r"{...}"`, plus dash-delimited variants like `r"-(...)-"`.
+ *
+ * Tree-sitter-r exposes a `string_content` named child for regular strings
+ * but not for raw strings, so we detect each case accordingly.
+ */
+function unquoteStringNode(node: SyntaxNode, source: string): string {
+  const content = node.namedChildren.find((c: SyntaxNode | null) => c?.type === 'string_content');
+  if (content) return getNodeText(content, source);
+
+  const text = getNodeText(node, source);
+  // Raw-string form: optional `r`/`R`, opening quote, dashes*, opening
+  // delimiter ((|[|{), body, matching closing delimiter, same dashes,
+  // closing quote.
+  const m = text.match(/^[rR]"(-*)([([{])([\s\S]*)([)\]}])\1"$/);
+  if (m) {
+    const [, , open, body, close] = m;
+    const ok =
+      (open === '(' && close === ')') ||
+      (open === '[' && close === ']') ||
+      (open === '{' && close === '}');
+    if (ok) return body!;
+  }
+  // Fallback: strip surrounding `"..."` or `'...'`.
+  if (text.length >= 2) {
+    const first = text[0];
+    const last = text[text.length - 1];
+    if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
+      return text.slice(1, -1);
+    }
+  }
+  return text;
+}
+
+function isAtTopLevel(ctx: ExtractorContext): boolean {
+  // The file node is always at the bottom of the stack while extracting;
+  // top-level program statements run with only the file node on the stack.
+  return ctx.nodeStack.length <= 1;
+}
+
+import type { LanguageDef } from './types';
+export const R_DEF: LanguageDef = {
+  name: 'r',
+  displayName: 'R',
+  extensions: ['.r'],
+  includeGlobs: ['**/*.r', '**/*.R'],
+  grammar: { wasmFile: 'tree-sitter-r.wasm', vendored: true, extractor: rExtractor },
+};
diff --git a/src/extraction/languages/registry.ts b/src/extraction/languages/registry.ts
new file mode 100644
index 00000000..42de9ff7
--- /dev/null
+++ b/src/extraction/languages/registry.ts
@@ -0,0 +1,112 @@
+/**
+ * Language registry — central import + collection of every per-language
+ * `LanguageDef`. Adding a new language is:
+ *
+ *   1. Create `src/extraction/languages/<name>.ts` exporting an
+ *      `<NAME>_DEF: LanguageDef` constant.
+ *   2. Add **one** import line and **one** array entry to this file.
+ *
+ * **That is the complete change list.** All consumers
+ * (`grammars.ts`, `tree-sitter.ts`'s extractor lookup,
+ * `default-config.ts`'s include globs, the legacy `EXTRACTORS`
+ * barrel in `./index.ts`) all read from this registry — there is
+ * no parallel list to keep in sync.
+ *
+ * This file is the only place a "central list" of languages lives,
+ * so adjacent-line conflicts between PRs adding different languages
+ * are limited to whichever alphabetical neighborhood they target.
+ *
+ * Note: an earlier draft used `fs.readdirSync` auto-discovery which
+ * eliminated even this file, but `require()` of extensionless paths
+ * doesn't work under vitest's vite-node loader for `.ts` source. A
+ * generated-barrel build step would restore zero-list-edits and is
+ * tracked as a follow-up.
+ */
+
+import type { LanguageDef } from './types';
+
+// =====================================================================
+// Imports — one per language, alphabetical by name
+// =====================================================================
+import { C_DEF, CPP_DEF } from './c-cpp';
+import { CSHARP_DEF } from './csharp';
+import { DART_DEF } from './dart';
+import { GO_DEF } from './go';
+import { HCL_DEF } from './hcl';
+import { JAVA_DEF } from './java';
+import { JAVASCRIPT_DEF } from './javascript';
+import { JSX_DEF } from './jsx';
+import { KOTLIN_DEF } from './kotlin';
+import { LIQUID_DEF } from './liquid';
+import { PASCAL_DEF } from './pascal';
+import { PHP_DEF } from './php';
+import { PYTHON_DEF } from './python';
+import { R_DEF } from './r';
+import { RUBY_DEF } from './ruby';
+import { RUST_DEF } from './rust';
+import { SVELTE_DEF } from './svelte';
+import { SWIFT_DEF } from './swift';
+import { TSX_DEF } from './tsx';
+import { TYPESCRIPT_DEF } from './typescript';
+
+// =====================================================================
+// Registry — alphabetical by name
+// =====================================================================
+const ALL_DEFS: readonly LanguageDef[] = [
+  C_DEF,
+  CPP_DEF,
+  CSHARP_DEF,
+  DART_DEF,
+  GO_DEF,
+  HCL_DEF,
+  JAVA_DEF,
+  JAVASCRIPT_DEF,
+  JSX_DEF,
+  KOTLIN_DEF,
+  LIQUID_DEF,
+  PASCAL_DEF,
+  PHP_DEF,
+  PYTHON_DEF,
+  R_DEF,
+  RUBY_DEF,
+  RUST_DEF,
+  SVELTE_DEF,
+  SWIFT_DEF,
+  TSX_DEF,
+  TYPESCRIPT_DEF,
+];
+
+let byName: Map<string, LanguageDef> | null = null;
+let byExtension: Map<string, LanguageDef> | null = null;
+
+function ensureIndexes(): void {
+  if (byName && byExtension) return;
+  byName = new Map();
+  byExtension = new Map();
+  for (const def of ALL_DEFS) {
+    byName.set(def.name, def);
+    for (const ext of def.extensions) {
+      byExtension.set(ext.toLowerCase(), def);
+    }
+  }
+}
+
+export function getLanguageDefs(): readonly LanguageDef[] {
+  return ALL_DEFS;
+}
+
+export function getLanguageDefByName(name: string): LanguageDef | undefined {
+  ensureIndexes();
+  return byName!.get(name);
+}
+
+export function getLanguageDefByExtension(ext: string): LanguageDef | undefined {
+  ensureIndexes();
+  return byExtension!.get(ext.toLowerCase());
+}
+
+/** Reset cached indexes. Used by tests; no-op in production paths. */
+export function _resetRegistryCacheForTests(): void {
+  byName = null;
+  byExtension = null;
+}
diff --git a/src/extraction/languages/ruby.ts b/src/extraction/languages/ruby.ts
index b5426165..810ac26a 100644
--- a/src/extraction/languages/ruby.ts
+++ b/src/extraction/languages/ruby.ts
@@ -109,3 +109,12 @@ export const rubyExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const RUBY_DEF: LanguageDef = {
+  name: 'ruby',
+  displayName: 'Ruby',
+  extensions: ['.rb', '.rake'],
+  includeGlobs: ['**/*.rb'],
+  grammar: { wasmFile: 'tree-sitter-ruby.wasm', extractor: rubyExtractor },
+};
diff --git a/src/extraction/languages/rust.ts b/src/extraction/languages/rust.ts
index 0266a2fd..35c957c0 100644
--- a/src/extraction/languages/rust.ts
+++ b/src/extraction/languages/rust.ts
@@ -114,3 +114,12 @@ export const rustExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const RUST_DEF: LanguageDef = {
+  name: 'rust',
+  displayName: 'Rust',
+  extensions: ['.rs'],
+  includeGlobs: ['**/*.rs'],
+  grammar: { wasmFile: 'tree-sitter-rust.wasm', extractor: rustExtractor },
+};
diff --git a/src/extraction/languages/svelte.ts b/src/extraction/languages/svelte.ts
new file mode 100644
index 00000000..7f7ab889
--- /dev/null
+++ b/src/extraction/languages/svelte.ts
@@ -0,0 +1,15 @@
+/**
+ * Svelte — custom extractor that delegates the script block back
+ * through the universal extraction pipeline as TypeScript/JavaScript,
+ * then merges in template-level call references.
+ */
+import { SvelteExtractor } from '../svelte-extractor';
+import type { LanguageDef } from './types';
+
+export const SVELTE_DEF: LanguageDef = {
+  name: 'svelte',
+  displayName: 'Svelte',
+  extensions: ['.svelte'],
+  includeGlobs: ['**/*.svelte'],
+  customExtractor: (filePath, source) => new SvelteExtractor(filePath, source).extract(),
+};
diff --git a/src/extraction/languages/swift.ts b/src/extraction/languages/swift.ts
index 373fa8a9..fe1ac5ce 100644
--- a/src/extraction/languages/swift.ts
+++ b/src/extraction/languages/swift.ts
@@ -81,3 +81,12 @@ export const swiftExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+import type { LanguageDef } from './types';
+export const SWIFT_DEF: LanguageDef = {
+  name: 'swift',
+  displayName: 'Swift',
+  extensions: ['.swift'],
+  includeGlobs: ['**/*.swift'],
+  grammar: { wasmFile: 'tree-sitter-swift.wasm', extractor: swiftExtractor },
+};
diff --git a/src/extraction/languages/tsx.ts b/src/extraction/languages/tsx.ts
new file mode 100644
index 00000000..f4cbe536
--- /dev/null
+++ b/src/extraction/languages/tsx.ts
@@ -0,0 +1,14 @@
+/**
+ * TSX (TypeScript + JSX) — reuses the TypeScript extractor with a
+ * dedicated grammar so JSX-specific node types parse correctly.
+ */
+import { typescriptExtractor } from './typescript';
+import type { LanguageDef } from './types';
+
+export const TSX_DEF: LanguageDef = {
+  name: 'tsx',
+  displayName: 'TSX',
+  extensions: ['.tsx'],
+  includeGlobs: ['**/*.tsx'],
+  grammar: { wasmFile: 'tree-sitter-tsx.wasm', extractor: typescriptExtractor },
+};
diff --git a/src/extraction/languages/types.ts b/src/extraction/languages/types.ts
new file mode 100644
index 00000000..a93e1930
--- /dev/null
+++ b/src/extraction/languages/types.ts
@@ -0,0 +1,83 @@
+/**
+ * Per-language registry types.
+ *
+ * Each language ships its own self-contained `LanguageDef` (file
+ * extensions, default-config globs, grammar config, etc.) so that
+ * adding a new language is a single-file addition rather than 6
+ * coordinated edits across `types.ts`, `grammars.ts`, and the
+ * `extraction/languages/index.ts` barrel. The registry
+ * (`./registry`) auto-discovers definitions at module load.
+ */
+
+import type { LanguageExtractor } from '../tree-sitter-types';
+import type { ExtractionResult } from '../../types';
+
+/**
+ * Custom extraction function for languages that don't fit the
+ * universal tree-sitter AST shape (Liquid, Svelte, HCL, SQL,
+ * Pascal DFM/FMX form files).
+ */
+export type CustomExtractorFn = (filePath: string, source: string) => ExtractionResult;
+
+export interface GrammarBackedConfig {
+  /**
+   * WASM grammar filename. Resolved either against the
+   * `tree-sitter-wasms` npm package or, if `vendored` is true,
+   * against `src/extraction/wasm/`.
+   */
+  wasmFile: string;
+  /**
+   * True when the WASM is shipped under `src/extraction/wasm/`
+   * because no pre-built grammar exists in `tree-sitter-wasms`.
+   */
+  vendored?: boolean;
+  /**
+   * Per-language tree-sitter extraction config consumed by
+   * `TreeSitterExtractor`. The existing per-language objects
+   * (e.g. `typescriptExtractor`) are passed in here unchanged.
+   */
+  extractor: LanguageExtractor;
+}
+
+export interface LanguageDef {
+  /**
+   * Canonical language name. Stored as the `language` value on
+   * `Node`, `Edge`, and `FileRecord` rows. Should match an entry
+   * in the `Language` union in `src/types.ts` for known
+   * languages; new registry-only languages are accepted as
+   * strings at runtime.
+   */
+  name: string;
+  /** Human-readable display label (e.g. "HCL / Terraform"). */
+  displayName: string;
+  /**
+   * File extensions, lower-cased, with leading dot. Each
+   * extension uniquely maps to one language (caller should not
+   * register the same extension twice).
+   */
+  extensions: readonly string[];
+  /**
+   * Default-config include glob patterns. Combined into
+   * `DEFAULT_CONFIG.include` at registry load.
+   */
+  includeGlobs: readonly string[];
+  /**
+   * Tree-sitter grammar config. Absent for purely-custom
+   * languages like Liquid (regex-based) and Svelte (script
+   * delegation).
+   */
+  grammar?: GrammarBackedConfig;
+  /**
+   * Whole-language custom extractor. Used when `grammar` is
+   * absent. If both are present, `extensionOverrides` and
+   * `customExtractor` win over `grammar`.
+   */
+  customExtractor?: CustomExtractorFn;
+  /**
+   * Per-extension override. Used by Pascal where `.dfm`/`.fmx`
+   * (form files) are extracted by `DfmExtractor` rather than the
+   * tree-sitter Pascal grammar. Keys are lower-cased extensions
+   * with the leading dot.
+   */
+  extensionOverrides?: Readonly<Record<string, { customExtractor: CustomExtractorFn }>>;
+}
diff --git a/src/extraction/languages/typescript.ts b/src/extraction/languages/typescript.ts
index 9540dd94..9f82e675 100644
--- a/src/extraction/languages/typescript.ts
+++ b/src/extraction/languages/typescript.ts
@@ -1,5 +1,6 @@
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
+import type { LanguageDef } from './types';
 
 export const typescriptExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration', 'arrow_function', 'function_expression'],
@@ -116,3 +117,11 @@ export const typescriptExtractor: LanguageExtractor = {
     return null;
   },
 };
+
+export const TYPESCRIPT_DEF: LanguageDef = {
+  name: 'typescript',
+  displayName: 'TypeScript',
+  extensions: ['.ts'],
+  includeGlobs: ['**/*.ts'],
+  grammar: { wasmFile: 'tree-sitter-typescript.wasm', extractor: typescriptExtractor },
+};
diff --git a/src/extraction/parse-worker.ts b/src/extraction/parse-worker.ts
index 21b239ca..211cfbf7 100644
--- a/src/extraction/parse-worker.ts
+++ b/src/extraction/parse-worker.ts
@@ -55,5 +55,29 @@ parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: st
     }
   } else if (msg.type === 'shutdown') {
     parentPort!.postMessage({ type: 'shutdown-ack' });
+  } else {
+    // Unknown message types: when an `id` is present, surface a structured
+    // error so the in-flight Promise on the main thread fails fast rather
+    // than blocking until the per-file timeout. Messages without an `id`
+    // have no pending promise to unblock and are silently ignored — no
+    // harm done.
+    const id = msg.id;
+    if (typeof id === 'number') {
+      parentPort!.postMessage({
+        type: 'parse-result',
+        id,
+        result: {
+          nodes: [],
+          edges: [],
+          unresolvedReferences: [],
+          errors: [{
+            message: `Parse worker received unknown message type: ${msg.type}`,
+            severity: 'error',
+            code: 'worker_protocol_error',
+          }],
+          durationMs: 0,
+        } satisfies ExtractionResult,
+      });
+    }
   }
 });
diff --git a/src/extraction/svelte-extractor.ts b/src/extraction/svelte-extractor.ts
index 5586ee34..323cbe80 100644
--- a/src/extraction/svelte-extractor.ts
+++ b/src/extraction/svelte-extractor.ts
@@ -135,13 +135,17 @@ export class SvelteExtractor {
       // Detect module script
       const isModule = /context\s*=\s*["']module["']/.test(attrs);
 
-      // Calculate start line of the script content (line after <script>)
+      // The content captured by the regex includes the leading newline that
+      // follows `>`, so the inner extractor sees that newline as line 1 of
+      // its (1-indexed) input and the first real code on line 2. Offset is
+      // therefore the line number where the opening `<script ...>` tag ends
+      // (0-indexed) — adding it to the inner extractor's 1-indexed lines
+      // yields correct 1-indexed positions in the .svelte file.
       const beforeScript = this.source.substring(0, match.index);
       const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
-      // The content starts on the line after the opening <script> tag
       const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
       const openingTagLines = (openingTag.match(/\n/g) || []).length;
-      const contentStartLine = scriptTagLine + openingTagLines + 1; // 0-indexed line
+      const contentStartLine = scriptTagLine + openingTagLines;
 
       blocks.push({
         content,
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 7345d91f..e4537d28 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -18,14 +18,32 @@ import {
 import { getParser, detectLanguage, isLanguageSupported } from './grammars';
 import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
 import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
-import { EXTRACTORS } from './languages';
-import { LiquidExtractor } from './liquid-extractor';
-import { SvelteExtractor } from './svelte-extractor';
-import { DfmExtractor } from './dfm-extractor';
+import { getLanguageDefByName } from './languages/registry';
 
 // Re-export for backward compatibility
 export { generateNodeId } from './tree-sitter-helpers';
 
+/**
+ * Deduplicate unresolved references by (fromNodeId, referenceName,
+ * referenceKind). A function calling `foo()` 100 times pushes 100 refs
+ * during extraction; the resolver collapses them to one edge eventually
+ * (edges are unique on `(source, target, kind, line)` and most resolvers
+ * skip duplicate work), but indexing time and DB churn scale with the
+ * raw count. Collapsing here keeps the first occurrence's line/column
+ * (which is typically what users want when "go to call site" surfaces).
+ */
+function dedupeReferences(refs: UnresolvedReference[]): UnresolvedReference[] {
+  const seen = new Set<string>();
+  const out: UnresolvedReference[] = [];
+  for (const ref of refs) {
+    const key = `${ref.fromNodeId}\0${ref.referenceKind}\0${ref.referenceName}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    out.push(ref);
+  }
+  return out;
+}
+
 /**
  * Extract the name from a node based on language
  */
@@ -115,7 +133,10 @@ export class TreeSitterExtractor {
     this.filePath = filePath;
     this.source = source;
     this.language = language || detectLanguage(filePath, source);
-    this.extractor = EXTRACTORS[this.language] || null;
+    // Single source of truth: read the extractor straight off the
+    // language def so adding a new grammar-backed language is a
+    // one-file change (no parallel EXTRACTORS map to keep in sync).
+    this.extractor = getLanguageDefByName(this.language)?.grammar?.extractor ?? null;
   }
 
   /**
@@ -216,7 +237,7 @@ export class TreeSitterExtractor {
     return {
       nodes: this.nodes,
       edges: this.edges,
-      unresolvedReferences: this.unresolvedReferences,
+      unresolvedReferences: dedupeReferences(this.unresolvedReferences),
       errors: this.errors,
       durationMs: Date.now() - startTime,
     };
@@ -2319,28 +2340,21 @@ export function extractFromSource(
 ): ExtractionResult {
   const detectedLanguage = language || detectLanguage(filePath, source);
   const fileExtension = path.extname(filePath).toLowerCase();
+  const def = getLanguageDefByName(detectedLanguage);
 
-  // Use custom extractor for Svelte
-  if (detectedLanguage === 'svelte') {
-    const extractor = new SvelteExtractor(filePath, source);
-    return extractor.extract();
-  }
-
-  // Use custom extractor for Liquid
-  if (detectedLanguage === 'liquid') {
-    const extractor = new LiquidExtractor(filePath, source);
-    return extractor.extract();
+  // Per-extension override wins (e.g. Pascal `.dfm`/`.fmx` route to
+  // DfmExtractor rather than the tree-sitter Pascal grammar).
+  const override = def?.extensionOverrides?.[fileExtension];
+  if (override) {
+    return override.customExtractor(filePath, source);
   }
 
-  // Use custom extractor for DFM/FMX form files
-  if (
-    detectedLanguage === 'pascal' &&
-    (fileExtension === '.dfm' || fileExtension === '.fmx')
-  ) {
-    const extractor = new DfmExtractor(filePath, source);
-    return extractor.extract();
+  // Whole-language custom extractor (Liquid, Svelte, etc.).
+  if (def?.customExtractor) {
+    return def.customExtractor(filePath, source);
   }
 
+  // Tree-sitter path.
   const extractor = new TreeSitterExtractor(filePath, source, detectedLanguage);
   return extractor.extract();
 }
diff --git a/src/extraction/wasm/tree-sitter-hcl.wasm b/src/extraction/wasm/tree-sitter-hcl.wasm
new file mode 100644
index 00000000..9cd0621d
Binary files /dev/null and b/src/extraction/wasm/tree-sitter-hcl.wasm differ
diff --git a/src/extraction/wasm/tree-sitter-r.wasm b/src/extraction/wasm/tree-sitter-r.wasm
new file mode 100644
index 00000000..3b1f3005
Binary files /dev/null and b/src/extraction/wasm/tree-sitter-r.wasm differ
diff --git a/src/graph/queries.ts b/src/graph/queries.ts
index c39e2e32..e6d79c51 100644
--- a/src/graph/queries.ts
+++ b/src/graph/queries.ts
@@ -7,6 +7,7 @@
 import { Node, Edge, Context, Subgraph, EdgeKind } from '../types';
 import { QueryBuilder } from '../db/queries';
 import { GraphTraverser } from './traversal';
+import { globToSafeRegex } from '../utils';
 
 /**
  * Graph query manager for complex queries
@@ -194,13 +195,11 @@ export class GraphQueryManager {
    * @returns Array of matching nodes
    */
   findByQualifiedName(pattern: string): Node[] {
-    // Convert glob pattern to regex
-    const regexPattern = pattern
-      .replace(/[.+^${}()|[\]\\]/g, '\\$&')
-      .replace(/\*/g, '.*')
-      .replace(/\?/g, '.');
-
-    const regex = new RegExp(`^${regexPattern}$`);
+    // Convert glob pattern to regex (ReDoS-safe — consecutive wildcards are
+    // coalesced so hostile inputs can't produce nested quantifiers).
+    const regexBody = globToSafeRegex(pattern);
+    if (regexBody === null) return [];
+    const regex = new RegExp(`^${regexBody}$`);
 
     // This is inefficient for large graphs - would need FTS index on qualified_name
     // For now, use kind-based filtering if possible
diff --git a/src/graph/traversal.ts b/src/graph/traversal.ts
index dd5b5029..dabda66f 100644
--- a/src/graph/traversal.ts
+++ b/src/graph/traversal.ts
@@ -8,10 +8,15 @@ import { Node, Edge, Subgraph, TraversalOptions, EdgeKind } from '../types';
 import { QueryBuilder } from '../db/queries';
 
 /**
- * Default traversal options
+ * Default traversal options.
+ *
+ * `maxDepth` is bounded by default — an unbounded depth on a highly connected
+ * graph can grow `visited` and the BFS/DFS frontier well beyond `limit` before
+ * the limit cuts in. Callers who really want unlimited depth can pass
+ * `maxDepth: Infinity` explicitly.
  */
 const DEFAULT_OPTIONS: Required<TraversalOptions> = {
-  maxDepth: Infinity,
+  maxDepth: 10,
   edgeKinds: [],
   nodeKinds: [],
   direction: 'outgoing',
@@ -19,6 +24,14 @@ const DEFAULT_OPTIONS: Required<TraversalOptions> = {
   includeStart: true,
 };
 
+/**
+ * Hard cap on `findPath`'s BFS queue — each queue entry clones the full path
+ * array, so on a dense graph the queue can balloon into millions of entries
+ * before either finding a path or exhausting the search. This bounds the
+ * worst-case memory footprint of a single findPath call.
+ */
+const FIND_PATH_MAX_QUEUE = 100_000;
+
 /**
  * Result of a single traversal step
  */
@@ -90,29 +103,24 @@ export class GraphTraverser {
         return priority(a) - priority(b);
       });
 
+      // Batch-fetch the unvisited neighbors in one query (was N+1 per BFS step).
+      const wantIds = adjacentEdges
+        .map((e) => (e.source === node.id ? e.target : e.source))
+        .filter((id) => !visited.has(id));
+      const neighborNodes = wantIds.length > 0 ? this.queries.getNodesByIds(wantIds) : new Map();
+
       for (const adjEdge of adjacentEdges) {
-        // Determine next node: for 'both' direction, edges can be either
-        // incoming or outgoing, so pick whichever end is not the current node
         const nextNodeId = adjEdge.source === node.id ? adjEdge.target : adjEdge.source;
+        if (visited.has(nextNodeId)) continue;
 
-        if (visited.has(nextNodeId)) {
-          continue;
-        }
-
-        const nextNode = this.queries.getNodeById(nextNodeId);
-        if (!nextNode) {
-          continue;
-        }
+        const nextNode = neighborNodes.get(nextNodeId);
+        if (!nextNode) continue;
 
-        // Apply node kind filter
         if (opts.nodeKinds && opts.nodeKinds.length > 0 && !opts.nodeKinds.includes(nextNode.kind)) {
           continue;
         }
 
-        // Add node to result
         nodes.set(nextNode.id, nextNode);
-
-        // Queue for further traversal
         queue.push({ node: nextNode, edge: adjEdge, depth: depth + 1 });
       }
     }
@@ -176,19 +184,18 @@ export class GraphTraverser {
     // Get adjacent edges
     const adjacentEdges = this.getAdjacentEdges(node.id, opts.direction, opts.edgeKinds);
 
+    // Batch-fetch unvisited neighbors (was N+1 per DFS step).
+    const wantIds = adjacentEdges
+      .map((e) => (e.source === node.id ? e.target : e.source))
+      .filter((id) => !visited.has(id));
+    const neighborNodes = wantIds.length > 0 ? this.queries.getNodesByIds(wantIds) : new Map();
+
     for (const edge of adjacentEdges) {
-      // Determine next node: for 'both' direction, edges can be either
-      // incoming or outgoing, so pick whichever end is not the current node
       const nextNodeId = edge.source === node.id ? edge.target : edge.source;
+      if (visited.has(nextNodeId)) continue;
 
-      if (visited.has(nextNodeId)) {
-        continue;
-      }
-
-      const nextNode = this.queries.getNodeById(nextNodeId);
-      if (!nextNode) {
-        continue;
-      }
+      const nextNode = neighborNodes.get(nextNodeId);
+      if (!nextNode) continue;
 
       // Apply node kind filter
       if (opts.nodeKinds && opts.nodeKinds.length > 0 && !opts.nodeKinds.includes(nextNode.kind)) {
@@ -255,9 +262,15 @@ export class GraphTraverser {
     visited.add(nodeId);
 
     const incomingEdges = this.queries.getIncomingEdges(nodeId, ['calls', 'references', 'imports']);
+    if (incomingEdges.length === 0) return;
+
+    // Batch-fetch all caller nodes in one round-trip instead of one
+    // getNodeById per edge (was N+1 — meaningful on functions with many callers).
+    const sourceIds = incomingEdges.map((e) => e.source);
+    const callerNodes = this.queries.getNodesByIds(sourceIds);
 
     for (const edge of incomingEdges) {
-      const callerNode = this.queries.getNodeById(edge.source);
+      const callerNode = callerNodes.get(edge.source);
       if (callerNode && !visited.has(callerNode.id)) {
         result.push({ node: callerNode, edge });
         this.getCallersRecursive(callerNode.id, maxDepth, currentDepth + 1, result, visited);
@@ -294,9 +307,14 @@ export class GraphTraverser {
     visited.add(nodeId);
 
     const outgoingEdges = this.queries.getOutgoingEdges(nodeId, ['calls', 'references', 'imports']);
+    if (outgoingEdges.length === 0) return;
+
+    // Batch-fetch callee nodes (was N+1 — see getCallersRecursive note).
+    const targetIds = outgoingEdges.map((e) => e.target);
+    const calleeNodes = this.queries.getNodesByIds(targetIds);
 
     for (const edge of outgoingEdges) {
-      const calleeNode = this.queries.getNodeById(edge.target);
+      const calleeNode = calleeNodes.get(edge.target);
       if (calleeNode && !visited.has(calleeNode.id)) {
         result.push({ node: calleeNode, edge });
         this.getCalleesRecursive(calleeNode.id, maxDepth, currentDepth + 1, result, visited);
@@ -388,9 +406,11 @@ export class GraphTraverser {
     visited.add(nodeId);
 
     const outgoingEdges = this.queries.getOutgoingEdges(nodeId, ['extends', 'implements']);
+    if (outgoingEdges.length === 0) return;
+    const parents = this.queries.getNodesByIds(outgoingEdges.map((e) => e.target));
 
     for (const edge of outgoingEdges) {
-      const parentNode = this.queries.getNodeById(edge.target);
+      const parentNode = parents.get(edge.target);
       if (parentNode && !nodes.has(parentNode.id)) {
         nodes.set(parentNode.id, parentNode);
         edges.push(edge);
@@ -411,9 +431,11 @@ export class GraphTraverser {
     visited.add(nodeId);
 
     const incomingEdges = this.queries.getIncomingEdges(nodeId, ['extends', 'implements']);
+    if (incomingEdges.length === 0) return;
+    const children = this.queries.getNodesByIds(incomingEdges.map((e) => e.source));
 
     for (const edge of incomingEdges) {
-      const childNode = this.queries.getNodeById(edge.source);
+      const childNode = children.get(edge.source);
       if (childNode && !nodes.has(childNode.id)) {
         nodes.set(childNode.id, childNode);
         edges.push(edge);
@@ -433,12 +455,13 @@ export class GraphTraverser {
 
     // Get all incoming edges (references, calls, type_of, etc.)
     const incomingEdges = this.queries.getIncomingEdges(nodeId);
+    if (incomingEdges.length === 0) return result;
 
+    // Batch-fetch source nodes (was N+1).
+    const sources = this.queries.getNodesByIds(incomingEdges.map((e) => e.source));
     for (const edge of incomingEdges) {
-      const sourceNode = this.queries.getNodeById(edge.source);
-      if (sourceNode) {
-        result.push({ node: sourceNode, edge });
-      }
+      const sourceNode = sources.get(edge.source);
+      if (sourceNode) result.push({ node: sourceNode, edge });
     }
 
     return result;
@@ -496,13 +519,16 @@ export class GraphTraverser {
       const containerKinds = new Set(['class', 'interface', 'struct', 'trait', 'protocol', 'module', 'enum']);
       if (containerKinds.has(focalNode.kind)) {
         const containsEdges = this.queries.getOutgoingEdges(nodeId, ['contains']);
-        for (const edge of containsEdges) {
-          const childNode = this.queries.getNodeById(edge.target);
-          if (childNode && !visited.has(childNode.id)) {
-            nodes.set(childNode.id, childNode);
-            edges.push(edge);
-            // Recurse into children at the same depth (they're part of the same symbol)
-            this.getImpactRecursive(childNode.id, maxDepth, currentDepth, nodes, edges, visited);
+        if (containsEdges.length > 0) {
+          const children = this.queries.getNodesByIds(containsEdges.map((e) => e.target));
+          for (const edge of containsEdges) {
+            const childNode = children.get(edge.target);
+            if (childNode && !visited.has(childNode.id)) {
+              nodes.set(childNode.id, childNode);
+              edges.push(edge);
+              // Recurse into children at the same depth (they're part of the same symbol)
+              this.getImpactRecursive(childNode.id, maxDepth, currentDepth, nodes, edges, visited);
+            }
           }
         }
       }
@@ -510,9 +536,11 @@ export class GraphTraverser {
 
     // Get all incoming edges (things that depend on this node)
     const incomingEdges = this.queries.getIncomingEdges(nodeId);
+    if (incomingEdges.length === 0) return;
+    const sources = this.queries.getNodesByIds(incomingEdges.map((e) => e.source));
 
     for (const edge of incomingEdges) {
-      const sourceNode = this.queries.getNodeById(edge.source);
+      const sourceNode = sources.get(edge.source);
       if (sourceNode && !nodes.has(sourceNode.id)) {
         nodes.set(sourceNode.id, sourceNode);
         edges.push(edge);
@@ -548,6 +576,12 @@ export class GraphTraverser {
     ];
 
     while (queue.length > 0) {
+      // Hard ceiling on memory: each queue entry holds a cloned path array,
+      // so a single dense node could push the queue well past nominal otherwise.
+      if (queue.length > FIND_PATH_MAX_QUEUE) {
+        return null;
+      }
+
       const { nodeId, path } = queue.shift()!;
 
       if (nodeId === toId) {
@@ -564,10 +598,17 @@ export class GraphTraverser {
         nodeId,
         edgeKinds.length > 0 ? edgeKinds : undefined
       );
+      if (outgoingEdges.length === 0) continue;
+
+      // Batch-fetch only the unvisited targets (was N+1 per BFS frontier).
+      const wantIds = outgoingEdges
+        .map((e) => e.target)
+        .filter((id) => !visited.has(id));
+      const nextNodes = wantIds.length > 0 ? this.queries.getNodesByIds(wantIds) : new Map();
 
       for (const edge of outgoingEdges) {
         if (!visited.has(edge.target)) {
-          const nextNode = this.queries.getNodeById(edge.target);
+          const nextNode = nextNodes.get(edge.target);
           if (nextNode) {
             queue.push({
               nodeId: edge.target,
@@ -627,15 +668,15 @@ export class GraphTraverser {
    */
   getChildren(nodeId: string): Node[] {
     const containsEdges = this.queries.getOutgoingEdges(nodeId, ['contains']);
-    const children: Node[] = [];
+    if (containsEdges.length === 0) return [];
 
+    // Batch-fetch (was N+1).
+    const childNodes = this.queries.getNodesByIds(containsEdges.map((e) => e.target));
+    const children: Node[] = [];
     for (const edge of containsEdges) {
-      const childNode = this.queries.getNodeById(edge.target);
-      if (childNode) {
-        children.push(childNode);
-      }
+      const childNode = childNodes.get(edge.target);
+      if (childNode) children.push(childNode);
     }
-
     return children;
   }
 }
diff --git a/src/index-hooks/centrality.ts b/src/index-hooks/centrality.ts
new file mode 100644
index 00000000..8fa69203
--- /dev/null
+++ b/src/index-hooks/centrality.ts
@@ -0,0 +1,37 @@
+/**
+ * Centrality index hook — runs PageRank over the calls+references
+ * subgraph after every indexAll/sync and persists scores to
+ * `nodes.centrality`. Cheap; no I/O. See `src/centrality/` for the
+ * pure-compute module.
+ */
+
+import type { IndexHook, IndexHookContext } from './registry';
+import { computePageRank, PR_EDGE_KINDS } from '../centrality';
+import { logDebug } from '../errors';
+
+function recompute(ctx: IndexHookContext): void {
+  if (ctx.config.enableCentrality === false) return;
+  try {
+    const nodes = ctx.queries.getAllNodes();
+    if (nodes.length === 0) return;
+    const edgeRows = ctx.db
+      .getDb()
+      .prepare(
+        `SELECT source, target FROM edges WHERE kind IN (${PR_EDGE_KINDS
+          .map(() => '?')
+          .join(',')})`
+      )
+      .all(...PR_EDGE_KINDS) as Array<{ source: string; target: string }>;
+    const result = computePageRank(nodes, edgeRows);
+    ctx.queries.clearCentrality();
+    ctx.queries.applyCentralityScores(result.scores);
+  } catch (err) {
+    logDebug(`centrality hook failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export const HOOK: IndexHook = {
+  name: 'centrality',
+  afterIndexAll(ctx) { recompute(ctx); },
+  afterSync(ctx) { recompute(ctx); },
+};
diff --git a/src/index-hooks/churn.ts b/src/index-hooks/churn.ts
new file mode 100644
index 00000000..d2526c46
--- /dev/null
+++ b/src/index-hooks/churn.ts
@@ -0,0 +1,53 @@
+/**
+ * Churn index hook — mines git history for per-file commit counts,
+ * first/last touched timestamps, and refreshes on-disk LOC.
+ * Incremental on sync via `last_mined_churn_head` in
+ * project_metadata; full re-mine on indexAll. See `src/churn/`
+ * for the miner.
+ */
+
+import type { IndexHook, IndexHookContext } from './registry';
+import type { SyncResult } from '../extraction';
+import { mineChurn, readFileLoc, LAST_MINED_CHURN_HEAD_KEY } from '../churn';
+import { logDebug } from '../errors';
+
+function refresh(ctx: IndexHookContext, options: { fullRescan: boolean; changedFiles: string[] | null }): void {
+  if (ctx.config.enableChurn === false) return;
+  try {
+    const indexedFiles = new Set(ctx.queries.getAllFilePaths());
+    if (indexedFiles.size === 0) return;
+    const sinceSha = options.fullRescan
+      ? null
+      : ctx.queries.getMetadata(LAST_MINED_CHURN_HEAD_KEY);
+    const mined = mineChurn(ctx.projectRoot, indexedFiles, sinceSha);
+    if (mined.currentHead === null) return; // not in a git repo
+    if (mined.needsFullRescan) {
+      ctx.queries.clearChurn();
+      const remined = mineChurn(ctx.projectRoot, indexedFiles, null);
+      ctx.queries.applyChurnDeltas(remined.deltas.values());
+      ctx.queries.setMetadata(LAST_MINED_CHURN_HEAD_KEY, remined.currentHead ?? '');
+    } else {
+      if (options.fullRescan) ctx.queries.clearChurn();
+      ctx.queries.applyChurnDeltas(mined.deltas.values());
+      ctx.queries.setMetadata(LAST_MINED_CHURN_HEAD_KEY, mined.currentHead);
+    }
+    const targets = options.fullRescan
+      ? [...indexedFiles]
+      : (options.changedFiles ?? []).filter((p) => indexedFiles.has(p));
+    if (targets.length > 0) {
+      ctx.queries.applyLocUpdates(
+        targets.map((p) => ({ path: p, loc: readFileLoc(ctx.projectRoot, p) }))
+      );
+    }
+  } catch (err) {
+    logDebug(`churn hook failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export const HOOK: IndexHook = {
+  name: 'churn',
+  afterIndexAll(ctx) { refresh(ctx, { fullRescan: true, changedFiles: null }); },
+  afterSync(ctx, result: SyncResult) {
+    refresh(ctx, { fullRescan: false, changedFiles: result.changedFilePaths ?? null });
+  },
+};
diff --git a/src/index-hooks/config-refs.ts b/src/index-hooks/config-refs.ts
new file mode 100644
index 00000000..70f13ffa
--- /dev/null
+++ b/src/index-hooks/config-refs.ts
@@ -0,0 +1,77 @@
+/**
+ * Config-refs index hook — extracts env-var / feature-flag read
+ * sites and persists to `config_refs`. Incremental on sync; full
+ * rescan on indexAll. See `src/config-refs/` for the extractor.
+ */
+
+import type { IndexHook, IndexHookContext } from './registry';
+import type { SyncResult } from '../extraction';
+import { extractConfigRefs } from '../config-refs';
+import { logDebug } from '../errors';
+
+function refresh(
+  ctx: IndexHookContext,
+  options: { scope: 'all' } | { scope: 'files'; files: string[] }
+): void {
+  if (ctx.config.enableConfigRefs === false) return;
+  try {
+    const fileNodes = new Map<string, Array<{ id: string; start: number; end: number }>>();
+    const resolveEnclosing = (filePath: string, line: number): string | null => {
+      let nodes = fileNodes.get(filePath);
+      if (!nodes) {
+        nodes = ctx.queries
+          .getNodesByFile(filePath)
+          .filter(
+            (n) =>
+              n.kind === 'function' ||
+              n.kind === 'method' ||
+              n.kind === 'class' ||
+              n.kind === 'interface'
+          )
+          .map((n) => ({ id: n.id, start: n.startLine, end: n.endLine }))
+          .sort((a, b) => a.end - a.start - (b.end - b.start));
+        fileNodes.set(filePath, nodes);
+      }
+      for (const n of nodes) {
+        if (n.start <= line && line <= n.end) return n.id;
+      }
+      return null;
+    };
+
+    let targets: Array<{ path: string; language: string }>;
+    if (options.scope === 'all') {
+      targets = ctx.queries.getAllFiles().map((f) => ({
+        path: f.path,
+        language: f.language,
+      }));
+      ctx.queries.clearConfigRefs();
+    } else {
+      const records = options.files
+        .map((p) => ctx.queries.getFileByPath(p))
+        .filter((f): f is NonNullable<typeof f> => f != null);
+      targets = records.map((f) => ({ path: f.path, language: f.language }));
+      ctx.queries.pruneOrphanedConfigRefs();
+      if (targets.length > 0) {
+        ctx.queries.deleteConfigRefsForPaths(targets.map((t) => t.path));
+      }
+    }
+
+    const refs = extractConfigRefs(ctx.projectRoot, targets, resolveEnclosing);
+    ctx.queries.applyConfigRefs(refs);
+  } catch (err) {
+    logDebug(`config-refs hook failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export const HOOK: IndexHook = {
+  name: 'config-refs',
+  afterIndexAll(ctx) { refresh(ctx, { scope: 'all' }); },
+  afterSync(ctx, result: SyncResult) {
+    if (
+      (result.changedFilePaths && result.changedFilePaths.length > 0) ||
+      result.filesRemoved > 0
+    ) {
+      refresh(ctx, { scope: 'files', files: result.changedFilePaths ?? [] });
+    }
+  },
+};
diff --git a/src/index-hooks/issue-history.ts b/src/index-hooks/issue-history.ts
new file mode 100644
index 00000000..bc7aa95a
--- /dev/null
+++ b/src/index-hooks/issue-history.ts
@@ -0,0 +1,58 @@
+/**
+ * Issue-history index hook — mines `Fixes/Closes/Resolves #N`
+ * commits and attributes them to symbols touched by each commit's
+ * hunks. Incremental on sync via `last_mined_issues_head` in
+ * project_metadata; full re-mine on indexAll. See
+ * `src/issue-history/` for the miner.
+ */
+
+import type { IndexHook, IndexHookContext } from './registry';
+import { mineIssueHistory, LAST_MINED_ISSUES_HEAD_KEY } from '../issue-history';
+import { logDebug } from '../errors';
+
+function refresh(ctx: IndexHookContext, options: { fullRescan: boolean }): void {
+  if (ctx.config.enableIssueHistory === false) return;
+  try {
+    // Resolver closure with a per-pass file-level cache. Without it,
+    // every (filePath, name) lookup would re-fetch all nodes for the
+    // file.
+    const fileNodesCache = new Map<string, Map<string, string>>();
+    const resolveSymbol = (filePath: string, name: string): string | null => {
+      let nameToId = fileNodesCache.get(filePath);
+      if (!nameToId) {
+        nameToId = new Map();
+        for (const n of ctx.queries.getNodesByFile(filePath)) {
+          if (!nameToId.has(n.name)) nameToId.set(n.name, n.id);
+        }
+        fileNodesCache.set(filePath, nameToId);
+      }
+      return nameToId.get(name) ?? null;
+    };
+
+    const sinceSha = options.fullRescan
+      ? null
+      : ctx.queries.getMetadata(LAST_MINED_ISSUES_HEAD_KEY);
+
+    const mined = mineIssueHistory(ctx.projectRoot, resolveSymbol, sinceSha);
+    if (mined.currentHead === null) return; // not in a git repo
+
+    if (mined.needsFullRescan) {
+      ctx.queries.clearIssueAttributions();
+      const remined = mineIssueHistory(ctx.projectRoot, resolveSymbol, null);
+      ctx.queries.applyIssueAttributions(remined.attributions);
+      ctx.queries.setMetadata(LAST_MINED_ISSUES_HEAD_KEY, remined.currentHead ?? '');
+    } else {
+      if (options.fullRescan) ctx.queries.clearIssueAttributions();
+      ctx.queries.applyIssueAttributions(mined.attributions);
+      ctx.queries.setMetadata(LAST_MINED_ISSUES_HEAD_KEY, mined.currentHead);
+    }
+  } catch (err) {
+    logDebug(`issue-history hook failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export const HOOK: IndexHook = {
+  name: 'issue-history',
+  afterIndexAll(ctx) { refresh(ctx, { fullRescan: true }); },
+  afterSync(ctx) { refresh(ctx, { fullRescan: false }); },
+};
diff --git a/src/index-hooks/registry.ts b/src/index-hooks/registry.ts
new file mode 100644
index 00000000..f338a810
--- /dev/null
+++ b/src/index-hooks/registry.ts
@@ -0,0 +1,98 @@
+/**
+ * Index-hook registry.
+ *
+ * Adding a new derived-signal pass:
+ *
+ *   1. Create `src/index-hooks/<name>.ts` exporting a
+ *      `HOOK: IndexHook` constant with `afterIndexAll` and/or
+ *      `afterSync` implementations.
+ *   2. Add **one** import line and **one** array entry to this file.
+ *
+ * That's it. `CodeGraph` doesn't need a new private method or
+ * call site for each pass — the runner inside `runHooks*` walks
+ * every registered hook automatically.
+ *
+ * On main today there are NO hooks registered (this file ships
+ * the framework only). PRs adding derived-signal passes
+ * (centrality, churn, issue-history, config-refs, sql-refs,
+ * cochange) each register their hook here.
+ */
+
+import type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
+import type { SyncResult } from '../extraction';
+import { logDebug } from '../errors';
+
+import { HOOK as CENTRALITY_HOOK } from './centrality';
+import { HOOK as CHURN_HOOK } from './churn';
+import { HOOK as CONFIG_REFS_HOOK } from './config-refs';
+import { HOOK as ISSUE_HISTORY_HOOK } from './issue-history';
+import { HOOK as SQL_REFS_HOOK } from './sql-refs';
+
+/**
+ * Static-import list of every registered hook.
+ *
+ * Two PRs adding hooks land their entries on different lines
+ * (alphabetical neighborhoods rarely collide). When an entry is
+ * unwanted at runtime, the hook itself can short-circuit on a
+ * config flag inside its `afterIndexAll`/`afterSync`.
+ */
+const REGISTERED_HOOKS: readonly IndexHook[] = [
+  CENTRALITY_HOOK,
+  CHURN_HOOK,
+  CONFIG_REFS_HOOK,
+  ISSUE_HISTORY_HOOK,
+  SQL_REFS_HOOK,
+];
+
+/**
+ * Run `afterIndexAll` for every registered hook. Errors are
+ * caught + logged so one broken hook never fails the whole
+ * index. Returns per-hook outcomes for diagnostics.
+ */
+export async function runAfterIndexAll(
+  ctx: IndexHookContext
+): Promise<IndexHookOutcome[]> {
+  const out: IndexHookOutcome[] = [];
+  for (const hook of REGISTERED_HOOKS) {
+    if (!hook.afterIndexAll) continue;
+    const start = Date.now();
+    try {
+      await hook.afterIndexAll(ctx);
+      out.push({ name: hook.name, phase: 'indexAll', durationMs: Date.now() - start });
+    } catch (err) {
+      const e = err instanceof Error ? err : new Error(String(err));
+      logDebug(`index-hook "${hook.name}" afterIndexAll failed: ${e.message}`);
+      out.push({ name: hook.name, phase: 'indexAll', durationMs: Date.now() - start, error: e });
+    }
+  }
+  return out;
+}
+
+/** Same shape, for `afterSync`. */
+export async function runAfterSync(
+  ctx: IndexHookContext,
+  result: SyncResult
+): Promise<IndexHookOutcome[]> {
+  const out: IndexHookOutcome[] = [];
+  for (const hook of REGISTERED_HOOKS) {
+    if (!hook.afterSync) continue;
+    const start = Date.now();
+    try {
+      await hook.afterSync(ctx, result);
+      out.push({ name: hook.name, phase: 'sync', durationMs: Date.now() - start });
+    } catch (err) {
+      const e = err instanceof Error ? err : new Error(String(err));
+      logDebug(`index-hook "${hook.name}" afterSync failed: ${e.message}`);
+      out.push({ name: hook.name, phase: 'sync', durationMs: Date.now() - start, error: e });
+    }
+  }
+  return out;
+}
+
+/** Read access for tests + diagnostic tools. */
+export function getRegisteredHooks(): readonly IndexHook[] {
+  return REGISTERED_HOOKS;
+}
+
+// Re-export the types so consumers can import everything from one place.
+export type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
diff --git a/src/index-hooks/sql-refs.ts b/src/index-hooks/sql-refs.ts
new file mode 100644
index 00000000..34cec42b
--- /dev/null
+++ b/src/index-hooks/sql-refs.ts
@@ -0,0 +1,76 @@
+/**
+ * SQL-refs index hook — extracts SQL string-literal references to
+ * tables (read/write/ddl) and persists to `sql_refs`. Incremental
+ * on sync; full atomic replace on indexAll. See `src/sql-refs/`.
+ */
+
+import type { IndexHook, IndexHookContext } from './registry';
+import type { SyncResult } from '../extraction';
+import { extractSqlRefs } from '../sql-refs';
+import { logDebug } from '../errors';
+
+function refresh(
+  ctx: IndexHookContext,
+  options: { scope: 'all' } | { scope: 'files'; files: string[] }
+): void {
+  if (ctx.config.enableSqlRefs === false) return;
+  try {
+    const fileNodes = new Map<string, Array<{ id: string; start: number; end: number }>>();
+    const resolveEnclosing = (filePath: string, line: number): string | null => {
+      let nodes = fileNodes.get(filePath);
+      if (!nodes) {
+        nodes = ctx.queries
+          .getNodesByFile(filePath)
+          .filter(
+            (n) =>
+              n.kind === 'function' ||
+              n.kind === 'method' ||
+              n.kind === 'class' ||
+              n.kind === 'interface'
+          )
+          .map((n) => ({ id: n.id, start: n.startLine, end: n.endLine }))
+          .sort((a, b) => a.end - a.start - (b.end - b.start));
+        fileNodes.set(filePath, nodes);
+      }
+      for (const n of nodes) {
+        if (n.start <= line && line <= n.end) return n.id;
+      }
+      return null;
+    };
+
+    if (options.scope === 'all') {
+      const targets = ctx.queries.getAllFiles().map((f) => ({
+        path: f.path,
+        language: f.language,
+      }));
+      const refs = extractSqlRefs(ctx.projectRoot, targets, resolveEnclosing);
+      ctx.queries.replaceAllSqlRefs(refs);
+    } else {
+      const records = options.files
+        .map((p) => ctx.queries.getFileByPath(p))
+        .filter((f): f is NonNullable<typeof f> => f != null);
+      const targets = records.map((f) => ({ path: f.path, language: f.language }));
+      ctx.queries.pruneOrphanedSqlRefs();
+      if (targets.length > 0) {
+        ctx.queries.deleteSqlRefsForPaths(targets.map((t) => t.path));
+      }
+      const refs = extractSqlRefs(ctx.projectRoot, targets, resolveEnclosing);
+      ctx.queries.applySqlRefs(refs);
+    }
+  } catch (err) {
+    logDebug(`sql-refs hook failed: ${err instanceof Error ? err.message : String(err)}`);
+  }
+}
+
+export const HOOK: IndexHook = {
+  name: 'sql-refs',
+  afterIndexAll(ctx) { refresh(ctx, { scope: 'all' }); },
+  afterSync(ctx, result: SyncResult) {
+    if (
+      (result.changedFilePaths && result.changedFilePaths.length > 0) ||
+      result.filesRemoved > 0
+    ) {
+      refresh(ctx, { scope: 'files', files: result.changedFilePaths ?? [] });
+    }
+  },
+};
diff --git a/src/index-hooks/types.ts b/src/index-hooks/types.ts
new file mode 100644
index 00000000..f1c07558
--- /dev/null
+++ b/src/index-hooks/types.ts
@@ -0,0 +1,65 @@
+/**
+ * Index-hook types.
+ *
+ * `IndexHook`s are derived-signal passes that run AFTER core
+ * indexing/sync has finished — centrality computation, churn
+ * mining, issue history, config-ref extraction, SQL call-site
+ * scanning, co-change graph mining, etc. Today every such PR
+ * mutates `CodeGraph` directly (private method + call site in
+ * `indexAll` + call site in `sync`), forcing every-PR conflicts
+ * on adjacent lines.
+ *
+ * After the registry refactor, each pass is its own file:
+ *   - exports a `HOOK: IndexHook` constant
+ *   - registers itself in `./registry.ts` (1 import line + 1 array entry)
+ *   - implements `afterIndexAll` and/or `afterSync`
+ *
+ * `CodeGraph` stops growing per-pass methods. The hook runner
+ * inside `CodeGraph` is a small generic loop that calls every
+ * registered hook in sequence, swallowing errors so one broken
+ * hook doesn't fail the whole index/sync.
+ */
+
+import type { CodeGraphConfig } from '../types';
+import type { QueryBuilder } from '../db/queries';
+import type { DatabaseConnection } from '../db';
+import type { SyncResult } from '../extraction';
+
+/**
+ * Per-call context handed to every hook. Stable shape so hooks
+ * don't need to import private members of `CodeGraph`.
+ */
+export interface IndexHookContext {
+  readonly projectRoot: string;
+  readonly config: CodeGraphConfig;
+  readonly queries: QueryBuilder;
+  readonly db: DatabaseConnection;
+}
+
+export interface IndexHook {
+  /** Stable identifier for logging / opt-out. */
+  readonly name: string;
+
+  /**
+   * Run after a full `indexAll` completes successfully. Treat
+   * this as a clean-slate signal — clear any cached state your
+   * pass owns and re-derive from scratch.
+   */
+  afterIndexAll?(ctx: IndexHookContext): Promise<void> | void;
+
+  /**
+   * Run after `sync` completes. `result.changedFilePaths` (when
+   * present) is the bounded set of paths touched in this sync;
+   * hooks should use it to do incremental work where possible.
+   */
+  afterSync?(ctx: IndexHookContext, result: SyncResult): Promise<void> | void;
+}
+
+/** Per-hook outcome reported back from the registry runner. */
+export interface IndexHookOutcome {
+  readonly name: string;
+  readonly phase: 'indexAll' | 'sync';
+  readonly durationMs: number;
+  /** Defined when the hook threw; the runner caught it. */
+  readonly error?: Error;
+}
diff --git a/src/index.ts b/src/index.ts
index 0ff1e090..9a880c9c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -49,6 +49,11 @@ import { GraphTraverser, GraphQueryManager } from './graph';
 import { ContextBuilder, createContextBuilder } from './context';
 import { Mutex, FileLock } from './utils';
 import { FileWatcher, WatchOptions } from './sync';
+import {
+  runAfterIndexAll as runIndexHooksAfterIndexAll,
+  runAfterSync as runIndexHooksAfterSync,
+  type IndexHookContext,
+} from './index-hooks/registry';
 
 // Re-export types for consumers
 export * from './types';
@@ -402,6 +407,18 @@ export class CodeGraph {
           });
         }
 
+        // Run registered post-indexAll hooks (centrality, churn,
+        // issue-history, config-refs, sql-refs, …). Best-effort:
+        // hook errors are caught + logged inside the runner.
+        if (result.success) {
+          await runIndexHooksAfterIndexAll(this.buildHookContext());
+        }
+        // Refresh planner stats + checkpoint the WAL after bulk writes.
+        // Cheap and non-blocking; never load-bearing for correctness.
+        if (result.success && result.filesIndexed > 0) {
+          this.db.runMaintenance();
+        }
+
         return result;
       } finally {
         this.fileLock.release();
@@ -409,6 +426,18 @@ export class CodeGraph {
     });
   }
 
+  /**
+   * Build the read-only context handed to every index hook.
+   */
+  private buildHookContext(): IndexHookContext {
+    return {
+      projectRoot: this.projectRoot,
+      config: this.config,
+      queries: this.queries,
+      db: this.db,
+    };
+  }
+
   /**
    * Index specific files
    *
@@ -483,6 +512,16 @@ export class CodeGraph {
           }
         }
 
+        // Run registered post-sync hooks. Same registry as the
+        // indexAll path — hooks distinguish via their
+        // `afterIndexAll` vs `afterSync` methods.
+        await runIndexHooksAfterSync(this.buildHookContext(), result);
+
+        // Refresh planner stats + checkpoint the WAL after bulk writes.
+        if (result.filesAdded > 0 || result.filesModified > 0 || result.filesRemoved > 0) {
+          this.db.runMaintenance();
+        }
+
         return result;
       } finally {
         this.fileLock.release();
@@ -497,6 +536,88 @@ export class CodeGraph {
     return this.indexMutex.isLocked();
   }
 
+  // ===========================================================================
+  // Derived Signals (centrality, churn, hotspots)
+  // ===========================================================================
+
+  getCentrality(nodeId: string): number | null {
+    const node = this.queries.getNodeById(nodeId);
+    return node?.centrality ?? null;
+  }
+
+  getTopCentralNodes(opts: { limit?: number; kind?: import('./types').NodeKind } = {}): Node[] {
+    return this.queries.getTopNodesByCentrality(opts);
+  }
+
+  getCentralityRank(nodeId: string): { rank: number; total: number } | null {
+    return this.queries.getCentralityRank(nodeId);
+  }
+
+  getFileChurn(filePath: string): {
+    commitCount: number;
+    loc: number;
+    firstSeenTs: number | null;
+    lastTouchedTs: number | null;
+  } | null {
+    const f = this.queries.getFileByPath(filePath);
+    if (!f) return null;
+    return {
+      commitCount: f.commitCount ?? 0,
+      loc: f.loc ?? 0,
+      firstSeenTs: f.firstSeenTs ?? null,
+      lastTouchedTs: f.lastTouchedTs ?? null,
+    };
+  }
+
+  getHotspots(opts: {
+    limit?: number;
+    minCommits?: number;
+    minCentrality?: number;
+    sortBy?: 'risk' | 'centrality' | 'churn';
+  } = {}): ReturnType<QueryBuilder['getHotspots']> {
+    return this.queries.getHotspots(opts);
+  }
+
+  getIssuesForNode(nodeId: string): Array<{
+    issueNumber: number;
+    kind: 'modified' | 'added' | 'removed';
+    commitSha: string;
+  }> {
+    return this.queries.getIssuesForNode(nodeId);
+  }
+
+  getConfigKeys(opts: { configKind?: 'env'; limit?: number } = {}): ReturnType<
+    QueryBuilder['getConfigKeys']
+  > {
+    return this.queries.getConfigKeys(opts);
+  }
+
+  getConfigRefsByKey(
+    configKey: string,
+    opts: { configKind?: 'env' } = {}
+  ): ReturnType<QueryBuilder['getConfigRefsByKey']> {
+    return this.queries.getConfigRefsByKey(configKey, opts);
+  }
+
+  getConfigKeysForNode(nodeId: string): ReturnType<QueryBuilder['getConfigKeysForNode']> {
+    return this.queries.getConfigKeysForNode(nodeId);
+  }
+
+  getSqlTables(opts: { limit?: number } = {}): ReturnType<QueryBuilder['getSqlTables']> {
+    return this.queries.getSqlTables(opts);
+  }
+
+  getSqlRefsByTable(
+    tableName: string,
+    opts: { op?: 'read' | 'write' | 'ddl' } = {}
+  ): ReturnType<QueryBuilder['getSqlRefsByTable']> {
+    return this.queries.getSqlRefsByTable(tableName, opts);
+  }
+
+  getSqlTablesForNode(nodeId: string): ReturnType<QueryBuilder['getSqlTablesForNode']> {
+    return this.queries.getSqlTablesForNode(nodeId);
+  }
+
   // ===========================================================================
   // File Watching
   // ===========================================================================
diff --git a/src/issue-history/index.ts b/src/issue-history/index.ts
new file mode 100644
index 00000000..ea94a355
--- /dev/null
+++ b/src/issue-history/index.ts
@@ -0,0 +1,235 @@
+/**
+ * Issue → symbol attribution from git history
+ *
+ * Mines commits whose subject or body matches `Fixes #N` /
+ * `Closes #N` / `Resolves #N` and attributes their hunks to the
+ * symbols they touched. Result is stored in the `symbol_issues`
+ * table and surfaced via `codegraph_node` so an agent inspecting
+ * `runInstaller` sees "modified by issues #37, #68, #69" inline.
+ *
+ * Why hunk-level, not file-level: spike data (see `spike_issues.js`
+ * + `spike_issues_hunk.js`) showed that file-level produced ~40
+ * symbols/issue, mostly noise — every issue touches files with
+ * many irrelevant symbols. Hunk-level is ~9 symbols/issue with
+ * 78% noise reduction, AND uniquely enables the multi-issue-symbol
+ * query (e.g. "loadGrammarsForLanguages was modified by every
+ * language-add issue") which file-level cannot answer because the
+ * intersection at file granularity is trivially huge.
+ *
+ * Convention: only `(Fixes|Closes|Resolves) #N` commits are mined.
+ * Generic commit messages without an issue ref are ignored — keeps
+ * signal-to-noise high.
+ *
+ * Known v1 limitations:
+ *   - `Fixes #1, #2` only captures #1. The regex requires a verb
+ *     prefix per match; `, #2` has no verb so it's skipped. Authors
+ *     who care should write `Fixes #1, fixes #2`. Acceptable noise
+ *     for v1; revisit if real projects show many comma-list misses.
+ *   - Quoted issue references in commit bodies (e.g. "this reverts the
+ *     'Fixes #99' commit from last week") produce false positives.
+ *     Detection would require message-block parsing; out of scope for v1.
+ */
+
+import { execFileSync } from 'child_process';
+import { logDebug } from '../errors';
+import { parseCommitDiff } from './parse-diff';
+
+/** Project-metadata key holding the HEAD SHA at the last successful mine. */
+export const LAST_MINED_ISSUES_HEAD_KEY = 'last_mined_issues_head';
+
+/**
+ * Skip commits touching more than this many files. Squashed merges
+ * and mass refactors otherwise produce many false-positive
+ * attributions where every symbol in the commit gets credited to
+ * the issue.
+ */
+export const MAX_FILES_PER_COMMIT = 50;
+
+/**
+ * Match `fix #N` / `fixes #N` / `closes #N` / `resolves #N` (and
+ * past-tense variants), case-insensitive, allowing `:` or `-`
+ * between verb and `#`. Captures the issue number.
+ */
+export const ISSUE_REGEX =
+  /\b(?:fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s*[:\-]?\s*#(\d+)/gi;
+
+const MAX_GIT_BUFFER = 200 * 1024 * 1024;
+const GIT_TIMEOUT_MS = 60_000;
+
+export interface IssueCommit {
+  sha: string;
+  /** Distinct issue numbers referenced, in source order. */
+  issues: number[];
+}
+
+export type AttributionKind = 'modified' | 'added' | 'removed';
+
+export interface IssueAttribution {
+  nodeId: string;
+  issueNumber: number;
+  commitSha: string;
+  kind: AttributionKind;
+}
+
+export interface IssueMineResult {
+  attributions: IssueAttribution[];
+  /** HEAD SHA reached by this run. null when not in a git repo. */
+  currentHead: string | null;
+  /** Caller's `sinceSha` was unreachable — caller clears + re-mines from scratch. */
+  needsFullRescan: boolean;
+  /** Debug-only counter: (file, name) lookups that didn't resolve. */
+  unresolvedCount: number;
+}
+
+/** Resolver supplied by the caller: (file, name) → node_id | null. */
+export type SymbolResolver = (filePath: string, symbolName: string) => string | null;
+
+/** Get HEAD SHA, or null when not in a git repo / no commits yet. */
+export function getGitHead(rootDir: string): string | null {
+  try {
+    return (
+      execFileSync('git', ['rev-parse', 'HEAD'], {
+        cwd: rootDir,
+        encoding: 'utf-8',
+        timeout: 5000,
+        stdio: ['pipe', 'pipe', 'pipe'],
+      }).trim() || null
+    );
+  } catch {
+    return null;
+  }
+}
+
+function isShaReachable(rootDir: string, sha: string): boolean {
+  try {
+    execFileSync('git', ['cat-file', '-e', `${sha}^{commit}`], {
+      cwd: rootDir,
+      timeout: 5000,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Find commits whose message references at least one issue. Returns
+ * `[]` when not in a git repo or git fails (logged via logDebug;
+ * never throws to the caller).
+ *
+ * Format: `git log --no-merges -z --pretty=format:CGCMT-%H%n%s%n%b%n` —
+ * each commit terminated by a NUL. The body line lets us match
+ * trailers like `Fixes #N` that aren't in the subject.
+ */
+export function mineIssueCommits(rootDir: string, sinceSha: string | null): IssueCommit[] {
+  const args = ['log', '--no-merges', '-z', '--pretty=format:CGCMT-%H%n%s%n%b'];
+  if (sinceSha) args.push(`${sinceSha}..HEAD`);
+
+  let raw: string;
+  try {
+    raw = execFileSync('git', args, {
+      cwd: rootDir,
+      encoding: 'utf-8',
+      timeout: GIT_TIMEOUT_MS,
+      maxBuffer: MAX_GIT_BUFFER,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+  } catch (err) {
+    logDebug(`mineIssueCommits: git log failed: ${err instanceof Error ? err.message : String(err)}`);
+    return [];
+  }
+
+  const commits: IssueCommit[] = [];
+  const blocks = raw.split('\0');
+  const headerRe = /^CGCMT-([0-9a-f]{40})$/;
+  for (const block of blocks) {
+    const trimmed = block.trim();
+    if (!trimmed) continue;
+    const lines = trimmed.split('\n');
+    const m = headerRe.exec(lines[0] ?? '');
+    if (!m) continue;
+    const sha = m[1]!;
+    const messageBody = lines.slice(1).join('\n');
+    const issues = new Set<number>();
+    let match: RegExpExecArray | null;
+    ISSUE_REGEX.lastIndex = 0;
+    while ((match = ISSUE_REGEX.exec(messageBody)) !== null) {
+      const n = parseInt(match[1]!, 10);
+      if (Number.isFinite(n) && n > 0) issues.add(n);
+    }
+    if (issues.size > 0) commits.push({ sha, issues: [...issues] });
+  }
+  return commits;
+}
+
+/**
+ * Mine issue→symbol attributions.
+ *
+ * @param rootDir         Project root.
+ * @param resolveSymbol   (filePath, name) → nodeId | null. Closure
+ *                        over the current index. Names that don't
+ *                        resolve are dropped (counted as unresolved
+ *                        for diagnostics).
+ * @param sinceSha        null = full mine; otherwise `<sha>..HEAD`.
+ *                        Unreachable shas trigger needsFullRescan.
+ */
+export function mineIssueHistory(
+  rootDir: string,
+  resolveSymbol: SymbolResolver,
+  sinceSha: string | null
+): IssueMineResult {
+  const empty: IssueMineResult = {
+    attributions: [],
+    currentHead: null,
+    needsFullRescan: false,
+    unresolvedCount: 0,
+  };
+
+  const head = getGitHead(rootDir);
+  if (!head) return empty;
+
+  if (sinceSha && !isShaReachable(rootDir, sinceSha)) {
+    return { attributions: [], currentHead: head, needsFullRescan: true, unresolvedCount: 0 };
+  }
+  if (sinceSha === head) {
+    return { attributions: [], currentHead: head, needsFullRescan: false, unresolvedCount: 0 };
+  }
+
+  const commits = mineIssueCommits(rootDir, sinceSha);
+  const attributions: IssueAttribution[] = [];
+  let unresolvedCount = 0;
+
+  for (const c of commits) {
+    let perFile;
+    try {
+      perFile = parseCommitDiff(rootDir, c.sha);
+    } catch (err) {
+      logDebug(`parseCommitDiff failed for ${c.sha}: ${err instanceof Error ? err.message : String(err)}`);
+      continue;
+    }
+    if (perFile.size > MAX_FILES_PER_COMMIT) {
+      // Squashed mass-refactor — the issue ref is real but the per-symbol
+      // attribution would be all noise. Skip the whole commit.
+      continue;
+    }
+    for (const [filePath, sets] of perFile) {
+      const emit = (name: string, kind: AttributionKind) => {
+        const nodeId = resolveSymbol(filePath, name);
+        if (!nodeId) {
+          unresolvedCount += 1;
+          return;
+        }
+        for (const issue of c.issues) {
+          attributions.push({ nodeId, issueNumber: issue, commitSha: c.sha, kind });
+        }
+      };
+      // Order: modified first, then added, then removed. Stable for tests.
+      for (const name of sets.modCtx) emit(name, 'modified');
+      for (const name of sets.added) emit(name, 'added');
+      for (const name of sets.removed) emit(name, 'removed');
+    }
+  }
+
+  return { attributions, currentHead: head, needsFullRescan: false, unresolvedCount };
+}
diff --git a/src/issue-history/parse-diff.ts b/src/issue-history/parse-diff.ts
new file mode 100644
index 00000000..e697cbdc
--- /dev/null
+++ b/src/issue-history/parse-diff.ts
@@ -0,0 +1,208 @@
+/**
+ * Diff parsing for issue → symbol attribution
+ *
+ * Pure parser: no I/O, no git invocations beyond the one `git show` it
+ * uses to fetch a commit's full diff. Splits into two distinct signals
+ * per (commit, file):
+ *
+ *   modCtx  — the *enclosing* function/class of each hunk, taken from
+ *             git's `@@ -... +... @@ <ctx>` header. Cross-language
+ *             because git's userdiff regex covers it (TS/JS/Py/Go/
+ *             Java/C/C++/Rust/Ruby out of the box).
+ *
+ *   added   — declarations on `+` lines (newly-introduced symbols).
+ *   removed — declarations on `-` lines (deleted symbols).
+ *
+ * Both signals matter independently: an issue that *modifies* `foo()`
+ * is different evidence from an issue that *adds* `foo()`. The MCP
+ * surface renders them with explicit kind tags so an agent can tell
+ * the difference.
+ */
+
+import { execFileSync } from 'child_process';
+
+/** Hard cap on git output we'll buffer (bytes). */
+const MAX_GIT_BUFFER = 200 * 1024 * 1024;
+/** Wall-clock cap on a single git invocation (ms). */
+const GIT_TIMEOUT_MS = 60_000;
+
+/** Identifiers that look like declarations to the loose `name(` regex
+ * but are actually keywords / locals — never represent indexable
+ * symbols. Filtering them keeps the resolved hit-rate high. */
+const SKIP_NAMES = new Set([
+  'if', 'for', 'while', 'switch', 'catch', 'return', 'throw', 'await',
+  'new', 'function', 'class', 'interface', 'const', 'let', 'var',
+  'export', 'import', 'public', 'private', 'protected', 'static',
+  'async', 'abstract', 'default', 'super', 'this', 'true', 'false',
+  'null', 'undefined', 'void', 'typeof', 'instanceof',
+  'describe', 'it', 'expect', 'test', 'beforeEach', 'afterEach',
+  'beforeAll', 'afterAll', // popular test-framework names; not symbols
+  'constructor',           // not a top-level symbol — owned by class
+]);
+
+/** Path patterns we never extract diff symbols from. */
+const SKIP_PATH_RE =
+  /^(?:dist\/|node_modules\/|\.codegraph\/|coverage\/|build\/|out\/)|\.lock$|\.snap$|^package(?:-lock)?\.json$|\.md$|\.json$|\.svg$|\.png$|\.jpg$|\.gif$|\.ico$|\.txt$|\.yml$|\.yaml$|\.toml$/i;
+
+/** Declaration patterns; capture group 1 is the symbol name.
+ * Designed to be loose — better to over-collect and miss in the
+ * symbol-resolver step than to under-collect (the resolver is cheap). */
+const DECL_PATTERNS: RegExp[] = [
+  // function foo / function* foo / async function foo
+  /^[+\-]\s*(?:export\s+)?(?:async\s+)?function\s*\*?\s+([A-Za-z_$][\w$]*)/,
+  // class Foo / abstract class Foo / export class Foo
+  /^[+\-]\s*(?:export\s+)?(?:abstract\s+)?class\s+([A-Za-z_$][\w$]*)/,
+  // interface Foo
+  /^[+\-]\s*(?:export\s+)?interface\s+([A-Za-z_$][\w$]*)/,
+  // type Foo = ... / type alias
+  /^[+\-]\s*(?:export\s+)?type\s+([A-Za-z_$][\w$]*)\s*=/,
+  // enum Foo
+  /^[+\-]\s*(?:export\s+)?(?:const\s+)?enum\s+([A-Za-z_$][\w$]*)/,
+  // const Foo = (..) =>  /  const Foo = function
+  /^[+\-]\s*(?:export\s+)?const\s+([A-Z][\w$]*)\s*=\s*(?:\([^)]*\)\s*=>|function|async\s)/,
+  // method-like:  visibility?  name(    (loose; SKIP_NAMES filters keywords)
+  /^[+\-]\s*(?:public|private|protected|static|async)\s+(?:[a-z]+\s+)*([A-Za-z_$][\w$]*)\s*\(/,
+  // Python: def name(  /  async def name(
+  /^[+\-]\s*(?:async\s+)?def\s+([A-Za-z_][\w]*)\s*\(/,
+  // Go: func name(  /  func (recv) name(
+  /^[+\-]\s*func\s+(?:\([^)]*\)\s+)?([A-Za-z_][\w]*)\s*\(/,
+  // Rust: fn name(  /  pub fn name<...>(
+  /^[+\-]\s*(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([A-Za-z_][\w]*)\s*[<(]/,
+];
+
+export interface FileDiffSets {
+  modCtx: Set<string>;
+  added: Set<string>;
+  removed: Set<string>;
+}
+
+/**
+ * Pull the symbol name out of a git `@@ ... @@ <ctx>` context line.
+ * Git's userdiff regexes already give us a single line that includes
+ * the enclosing definition (e.g. `function processOrder(order: Order)
+ * {`). We take the first identifier following a recognised keyword,
+ * falling back to "first identifier-followed-by-paren" for languages
+ * git doesn't have explicit userdiff for.
+ */
+export function extractSymbolFromContext(ctx: string): string | null {
+  const trimmed = ctx.trim();
+  if (!trimmed) return null;
+  // Order of patterns matters: anchor on keyword first, then on
+  // identifier-followed-by-paren.
+  const m1 = trimmed.match(/(?:function|class|interface|type|enum|def|func|fn)\s+([A-Za-z_$][\w$]*)/);
+  if (m1 && !SKIP_NAMES.has(m1[1]!)) return m1[1]!;
+  const m2 = trimmed.match(/^([A-Za-z_$][\w$]*)\s*\(/);
+  if (m2 && !SKIP_NAMES.has(m2[1]!)) return m2[1]!;
+  // Methods: `  async foo(` after some indentation, with possibly a
+  // visibility modifier we already skipped above.
+  const m3 = trimmed.match(/(?:async\s+)?([A-Za-z_$][\w$]*)\s*\(/);
+  if (m3 && !SKIP_NAMES.has(m3[1]!)) return m3[1]!;
+  return null;
+}
+
+/**
+ * Pull a declared symbol name out of a single `+` or `-` diff line.
+ */
+export function extractDeclaration(diffLine: string): { name: string; sign: '+' | '-' } | null {
+  if (!diffLine || (diffLine[0] !== '+' && diffLine[0] !== '-')) return null;
+  // Skip the file-marker lines emitted by git.
+  if (diffLine.startsWith('+++') || diffLine.startsWith('---')) return null;
+  for (const re of DECL_PATTERNS) {
+    const m = re.exec(diffLine);
+    if (m && m[1] && !SKIP_NAMES.has(m[1])) {
+      return { name: m[1], sign: diffLine[0] as '+' | '-' };
+    }
+  }
+  return null;
+}
+
+/**
+ * Pull a declaration name out of an unchanged (` `-prefixed) diff
+ * line. Used to detect the enclosing function when git's `@@ ... @@
+ * <ctx>` header is empty (which happens when the changed hunk lives
+ * inside a function that starts at line 1, so there's no enclosing
+ * scope *above* the hunk for git's userdiff to reference).
+ *
+ * Matches the same patterns as `extractDeclaration` but allows a
+ * leading space (the diff context-line prefix).
+ */
+export function extractContextDeclaration(diffLine: string): string | null {
+  if (!diffLine || diffLine[0] !== ' ') return null;
+  for (const re of DECL_PATTERNS) {
+    // DECL_PATTERNS anchor on `[+\-]` — accept space too by trying
+    // again with that prefix swapped.
+    const swapped = '+' + diffLine.slice(1);
+    const m = re.exec(swapped);
+    if (m && m[1] && !SKIP_NAMES.has(m[1])) return m[1];
+  }
+  return null;
+}
+
+/**
+ * Run `git show <sha>` and parse the diff into per-file
+ * (modCtx, added, removed) sets.
+ *
+ * Throws if git fails (caller should catch + log + skip the commit).
+ */
+export function parseCommitDiff(rootDir: string, commitSha: string): Map<string, FileDiffSets> {
+  const out = execFileSync(
+    'git',
+    ['show', commitSha, '--unified=3', '--no-color', '--no-renames'],
+    {
+      cwd: rootDir,
+      encoding: 'utf-8',
+      timeout: GIT_TIMEOUT_MS,
+      maxBuffer: MAX_GIT_BUFFER,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    }
+  );
+  const lines = out.split('\n');
+  const perFile = new Map<string, FileDiffSets>();
+  let curFile: string | null = null;
+
+  for (const L of lines) {
+    if (L.startsWith('diff --git ')) {
+      // `diff --git a/<old> b/<new>` — take the new path (post-rename
+      // would normally apply here but we passed --no-renames).
+      const m = L.match(/^diff --git a\/(.+?) b\/(.+)$/);
+      if (m) {
+        curFile = m[2]!;
+        if (SKIP_PATH_RE.test(curFile)) {
+          curFile = null; // signal to subsequent rows: skip
+          continue;
+        }
+        if (!perFile.has(curFile)) {
+          perFile.set(curFile, { modCtx: new Set(), added: new Set(), removed: new Set() });
+        }
+      }
+      continue;
+    }
+    if (curFile === null) continue;
+    if (L.startsWith('@@')) {
+      // `@@ -a,b +c,d @@ <enclosing context>`
+      const m = L.match(/^@@\s+-\d+(?:,\d+)?\s+\+\d+(?:,\d+)?\s+@@\s*(.*)$/);
+      if (m && m[1]) {
+        const sym = extractSymbolFromContext(m[1]);
+        if (sym) perFile.get(curFile)!.modCtx.add(sym);
+      }
+      continue;
+    }
+    const decl = extractDeclaration(L);
+    if (decl) {
+      const sets = perFile.get(curFile)!;
+      if (decl.sign === '+') sets.added.add(decl.name);
+      else sets.removed.add(decl.name);
+      continue;
+    }
+    // Fallback: an unchanged context line within a hunk that contains
+    // a declaration is the enclosing scope for that hunk. This catches
+    // the case where the function's signature is at line 1 (so git's
+    // userdiff has no scope *above* the hunk to use as @@ <ctx>).
+    const ctxName = extractContextDeclaration(L);
+    if (ctxName) {
+      perFile.get(curFile)!.modCtx.add(ctxName);
+    }
+  }
+
+  return perFile;
+}
diff --git a/src/mcp/index.ts b/src/mcp/index.ts
index bc3552ae..02c0bc14 100644
--- a/src/mcp/index.ts
+++ b/src/mcp/index.ts
@@ -18,7 +18,8 @@
 import * as path from 'path';
 import CodeGraph, { findNearestCodeGraphRoot } from '../index';
 import { StdioTransport, JsonRpcRequest, JsonRpcNotification, ErrorCodes } from './transport';
-import { tools, ToolHandler } from './tools';
+import { ToolHandler } from './tools';
+import { getToolModule } from './tools/registry';
 
 /**
  * Convert a file:// URI to a filesystem path.
@@ -34,8 +35,10 @@ function fileUriToPath(uri: string): string {
     }
     return path.resolve(filePath);
   } catch {
-    // Fallback for non-standard URIs
-    return uri.replace(/^file:\/\/\/?/, '');
+    // Fallback for non-standard URIs — still resolve through path.resolve
+    // so a malformed `file:///../etc/passwd` is normalized rather than
+    // returned raw to downstream filesystem code.
+    return path.resolve(uri.replace(/^file:\/\/\/?/, ''));
   }
 }
 
@@ -309,8 +312,9 @@ export class MCPServer {
     const toolName = params.name;
     const toolArgs = params.arguments || {};
 
-    // Validate tool exists
-    const tool = tools.find(t => t.name === toolName);
+    // Validate tool exists — O(1) Map lookup against the registry,
+    // matches the path `ToolHandler.execute()` uses internally.
+    const tool = getToolModule(toolName)?.definition;
     if (!tool) {
       this.transport.sendError(
         request.id,
diff --git a/src/mcp/tool-types.ts b/src/mcp/tool-types.ts
new file mode 100644
index 00000000..90e94fe8
--- /dev/null
+++ b/src/mcp/tool-types.ts
@@ -0,0 +1,39 @@
+/**
+ * Shared MCP tool types.
+ *
+ * Lives in its own module so per-tool files in `./tools/` and
+ * the legacy class wrapper in `./tools.ts` can import the same
+ * type definitions without a circular dependency.
+ */
+
+export interface PropertySchema {
+  type: string;
+  description: string;
+  enum?: string[];
+  default?: unknown;
+}
+
+export interface ToolDefinition {
+  name: string;
+  description: string;
+  inputSchema: {
+    type: 'object';
+    properties: Record<string, PropertySchema>;
+    required?: string[];
+  };
+}
+
+export interface ToolResult {
+  content: Array<{ type: 'text'; text: string }>;
+  isError?: boolean;
+}
+
+/**
+ * Shared `projectPath` schema property — every tool's inputSchema
+ * accepts it for cross-project queries.
+ */
+export const projectPathProperty: PropertySchema = {
+  type: 'string',
+  description:
+    'Path to a different project with .codegraph/ initialized. If omitted, uses current project. Use this to query other codebases.',
+};
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 53713145..e991702a 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -11,6 +11,25 @@ import { writeFileSync, readFileSync, existsSync } from 'fs';
 import { clamp, validatePathWithinRoot } from '../utils';
 import { tmpdir } from 'os';
 import { join } from 'path';
+import type { ToolDefinition, ToolResult } from './tool-types';
+import type { ToolHandlerLike } from './tools/types';
+import { getToolModule, tools as registryTools } from './tools/registry';
+
+// Re-export shared types so existing consumers (`import { ToolDefinition,
+// ToolResult } from './tools'`) keep working unchanged.
+export type { ToolDefinition, ToolResult } from './tool-types';
+
+/**
+ * The MCP `list_tools` array, derived from the per-tool registry
+ * (`./tools/<name>.ts`). Adding a new tool no longer touches this
+ * array — drop a file in `./tools/` and add it to
+ * `./tools/registry.ts`.
+ *
+ * Typed as a mutable array (matching the original export shape)
+ * even though the underlying registry produces a readonly value;
+ * we slice() to materialize a fresh, mutable copy at module load.
+ */
+export const tools: ToolDefinition[] = registryTools.slice();
 
 /** Maximum output length to prevent context bloat (characters) */
 const MAX_OUTPUT_LENGTH = 15000;
@@ -42,248 +61,6 @@ function markSessionConsulted(sessionId: string): void {
   }
 }
 
-/**
- * MCP Tool definition
- */
-export interface ToolDefinition {
-  name: string;
-  description: string;
-  inputSchema: {
-    type: 'object';
-    properties: Record<string, PropertySchema>;
-    required?: string[];
-  };
-}
-
-interface PropertySchema {
-  type: string;
-  description: string;
-  enum?: string[];
-  default?: unknown;
-}
-
-/**
- * Tool execution result
- */
-export interface ToolResult {
-  content: Array<{
-    type: 'text';
-    text: string;
-  }>;
-  isError?: boolean;
-}
-
-/**
- * Common projectPath property for cross-project queries
- */
-const projectPathProperty: PropertySchema = {
-  type: 'string',
-  description: 'Path to a different project with .codegraph/ initialized. If omitted, uses current project. Use this to query other codebases.',
-};
-
-/**
- * All CodeGraph MCP tools
- *
- * Designed for minimal context usage - use codegraph_context as the primary tool,
- * and only use other tools for targeted follow-up queries.
- *
- * All tools support cross-project queries via the optional `projectPath` parameter.
- */
-export const tools: ToolDefinition[] = [
-  {
-    name: 'codegraph_search',
-    description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_context instead for comprehensive task context.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        query: {
-          type: 'string',
-          description: 'Symbol name or partial name (e.g., "auth", "signIn", "UserService")',
-        },
-        kind: {
-          type: 'string',
-          description: 'Filter by node kind',
-          enum: ['function', 'method', 'class', 'interface', 'type', 'variable', 'route', 'component'],
-        },
-        limit: {
-          type: 'number',
-          description: 'Maximum results (default: 10)',
-          default: 10,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['query'],
-    },
-  },
-  {
-    name: 'codegraph_context',
-    description: 'PRIMARY TOOL: Build comprehensive context for a task. Returns entry points, related symbols, and key code - often enough to understand the codebase without additional tool calls. NOTE: This provides CODE context, not product requirements. For new features, still clarify UX/behavior questions with the user before implementing.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        task: {
-          type: 'string',
-          description: 'Description of the task, bug, or feature to build context for',
-        },
-        maxNodes: {
-          type: 'number',
-          description: 'Maximum symbols to include (default: 20)',
-          default: 20,
-        },
-        includeCode: {
-          type: 'boolean',
-          description: 'Include code snippets for key symbols (default: true)',
-          default: true,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['task'],
-    },
-  },
-  {
-    name: 'codegraph_callers',
-    description: 'Find all functions/methods that call a specific symbol. Useful for understanding usage patterns and impact of changes.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        symbol: {
-          type: 'string',
-          description: 'Name of the function, method, or class to find callers for',
-        },
-        limit: {
-          type: 'number',
-          description: 'Maximum number of callers to return (default: 20)',
-          default: 20,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['symbol'],
-    },
-  },
-  {
-    name: 'codegraph_callees',
-    description: 'Find all functions/methods that a specific symbol calls. Useful for understanding dependencies and code flow.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        symbol: {
-          type: 'string',
-          description: 'Name of the function, method, or class to find callees for',
-        },
-        limit: {
-          type: 'number',
-          description: 'Maximum number of callees to return (default: 20)',
-          default: 20,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['symbol'],
-    },
-  },
-  {
-    name: 'codegraph_impact',
-    description: 'Analyze the impact radius of changing a symbol. Shows what code could be affected by modifications.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        symbol: {
-          type: 'string',
-          description: 'Name of the symbol to analyze impact for',
-        },
-        depth: {
-          type: 'number',
-          description: 'How many levels of dependencies to traverse (default: 2)',
-          default: 2,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['symbol'],
-    },
-  },
-  {
-    name: 'codegraph_node',
-    description: 'Get detailed information about a specific code symbol. Use includeCode=true only when you need the full source code - otherwise just get location and signature to minimize context usage.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        symbol: {
-          type: 'string',
-          description: 'Name of the symbol to get details for',
-        },
-        includeCode: {
-          type: 'boolean',
-          description: 'Include full source code (default: false to minimize context)',
-          default: false,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['symbol'],
-    },
-  },
-  {
-    name: 'codegraph_explore',
-    description: 'Deep exploration tool — returns comprehensive context for a topic in a SINGLE call. Groups all relevant source code by file (contiguous sections, not snippets), includes a relationship map, and uses deeper graph traversal. Designed to replace multiple codegraph_node + file Read calls. Use this instead of codegraph_context when you need thorough understanding. IMPORTANT: Use specific symbol names, file names, or short code terms in your query — NOT natural language sentences. Before calling this, use codegraph_search to discover relevant symbol names, then include those names in your query. Bad: "how are agent prompts loaded and passed to the CLI". Good: "readAgentsFromDirectory createClaudeSession chat-manager agents.ts".',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        query: {
-          type: 'string',
-          description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.',
-        },
-        maxFiles: {
-          type: 'number',
-          description: 'Maximum number of files to include source code from (default: 12)',
-          default: 12,
-        },
-        projectPath: projectPathProperty,
-      },
-      required: ['query'],
-    },
-  },
-  {
-    name: 'codegraph_status',
-    description: 'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        projectPath: projectPathProperty,
-      },
-    },
-  },
-  {
-    name: 'codegraph_files',
-    description: 'REQUIRED for file/folder exploration. Get the project file structure from the CodeGraph index. Returns a tree view of all indexed files with metadata (language, symbol count). Much faster than Glob/filesystem scanning. Use this FIRST when exploring project structure, finding files, or understanding codebase organization.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        path: {
-          type: 'string',
-          description: 'Filter to files under this directory path (e.g., "src/components"). Returns all files if not specified.',
-        },
-        pattern: {
-          type: 'string',
-          description: 'Filter files matching this glob pattern (e.g., "*.tsx", "**/*.test.ts")',
-        },
-        format: {
-          type: 'string',
-          description: 'Output format: "tree" (hierarchical, default), "flat" (simple list), "grouped" (by language)',
-          enum: ['tree', 'flat', 'grouped'],
-          default: 'tree',
-        },
-        includeMetadata: {
-          type: 'boolean',
-          description: 'Include file metadata like language and symbol count (default: true)',
-          default: true,
-        },
-        maxDepth: {
-          type: 'number',
-          description: 'Maximum directory depth to show (default: unlimited)',
-        },
-        projectPath: projectPathProperty,
-      },
-    },
-  },
-];
 
 /**
  * Tool handler that executes tools against a CodeGraph instance
@@ -291,7 +68,7 @@ export const tools: ToolDefinition[] = [
  * Supports cross-project queries via the projectPath parameter.
  * Other projects are opened on-demand and cached for performance.
  */
-export class ToolHandler {
+export class ToolHandler implements ToolHandlerLike {
   // Cache of opened CodeGraph instances for cross-project queries
   private projectCache: Map<string, CodeGraph> = new Map();
 
@@ -404,32 +181,24 @@ export class ToolHandler {
   }
 
   /**
-   * Execute a tool by name
+   * Execute a tool by name.
+   *
+   * The dispatch table lives in `./tools/registry.ts` — this method
+   * just looks up the tool's `handlerKey` and invokes the matching
+   * `handle<Name>` method on this class. Adding a new tool means
+   * registering a `ToolModule` (one new file under `./tools/`,
+   * one entry in the registry) plus implementing
+   * `handle<Name>(args)` here.
    */
   async execute(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
     try {
-      switch (toolName) {
-        case 'codegraph_search':
-          return await this.handleSearch(args);
-        case 'codegraph_context':
-          return await this.handleContext(args);
-        case 'codegraph_callers':
-          return await this.handleCallers(args);
-        case 'codegraph_callees':
-          return await this.handleCallees(args);
-        case 'codegraph_impact':
-          return await this.handleImpact(args);
-        case 'codegraph_explore':
-          return await this.handleExplore(args);
-        case 'codegraph_node':
-          return await this.handleNode(args);
-        case 'codegraph_status':
-          return await this.handleStatus(args);
-        case 'codegraph_files':
-          return await this.handleFiles(args);
-        default:
-          return this.errorResult(`Unknown tool: ${toolName}`);
-      }
+      const mod = getToolModule(toolName);
+      if (!mod) return this.errorResult(`Unknown tool: ${toolName}`);
+      // `implements ToolHandlerLike` makes this lookup type-safe:
+      // `mod.handlerKey` is constrained to `HandlerKey`, and every
+      // member of that union maps to an `(args) => Promise<ToolResult>`
+      // method on `this` (verified at compile time, not at runtime).
+      return await this[mod.handlerKey](args);
     } catch (err) {
       return this.errorResult(`Tool execution failed: ${err instanceof Error ? err.message : String(err)}`);
     }
@@ -438,7 +207,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_search
    */
-  private async handleSearch(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleSearch(args: Record<string, unknown>): Promise<ToolResult> {
     const query = this.validateString(args.query, 'query');
     if (typeof query !== 'string') return query;
 
@@ -463,7 +232,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_context
    */
-  private async handleContext(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleContext(args: Record<string, unknown>): Promise<ToolResult> {
     const task = this.validateString(args.task, 'task');
     if (typeof task !== 'string') return task;
 
@@ -529,7 +298,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_callers
    */
-  private async handleCallers(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleCallers(args: Record<string, unknown>): Promise<ToolResult> {
     const symbol = this.validateString(args.symbol, 'symbol');
     if (typeof symbol !== 'string') return symbol;
 
@@ -564,7 +333,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_callees
    */
-  private async handleCallees(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleCallees(args: Record<string, unknown>): Promise<ToolResult> {
     const symbol = this.validateString(args.symbol, 'symbol');
     if (typeof symbol !== 'string') return symbol;
 
@@ -599,7 +368,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_impact
    */
-  private async handleImpact(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleImpact(args: Record<string, unknown>): Promise<ToolResult> {
     const symbol = this.validateString(args.symbol, 'symbol');
     if (typeof symbol !== 'string') return symbol;
 
@@ -650,7 +419,7 @@ export class ToolHandler {
    * then read contiguous file sections covering all symbols per file.
    * This replaces multiple codegraph_node + Read calls.
    */
-  private async handleExplore(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleExplore(args: Record<string, unknown>): Promise<ToolResult> {
     const query = this.validateString(args.query, 'query');
     if (typeof query !== 'string') return query;
 
@@ -936,7 +705,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_node
    */
-  private async handleNode(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleNode(args: Record<string, unknown>): Promise<ToolResult> {
     const symbol = this.validateString(args.symbol, 'symbol');
     if (typeof symbol !== 'string') return symbol;
 
@@ -955,14 +724,17 @@ export class ToolHandler {
       code = await cg.getCode(match.node.id);
     }
 
-    const formatted = this.formatNodeDetails(match.node, code) + match.note;
+    // Surface issue history (mined from `Fixes #N` commits).
+    const issues = cg.getIssuesForNode(match.node.id);
+
+    const formatted = this.formatNodeDetails(match.node, code, issues) + match.note;
     return this.textResult(this.truncateOutput(formatted));
   }
 
   /**
    * Handle codegraph_status
    */
-  private async handleStatus(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleStatus(args: Record<string, unknown>): Promise<ToolResult> {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const stats = cg.getStats();
 
@@ -996,7 +768,7 @@ export class ToolHandler {
   /**
    * Handle codegraph_files - get project file structure from the index
    */
-  private async handleFiles(args: Record<string, unknown>): Promise<ToolResult> {
+  async handleFiles(args: Record<string, unknown>): Promise<ToolResult> {
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const pathFilter = args.path as string | undefined;
     const pattern = args.pattern as string | undefined;
@@ -1044,6 +816,153 @@ export class ToolHandler {
     return this.textResult(this.truncateOutput(output));
   }
 
+  /**
+   * Handle codegraph_config — env-var / config read-site queries.
+   */
+  async handleConfig(args: Record<string, unknown>): Promise<ToolResult> {
+    const cg = this.getCodeGraph(args.projectPath as string | undefined);
+    const key = typeof args.key === 'string' ? args.key.trim() : '';
+
+    if (!key) {
+      const limit = args.limit != null ? clamp(args.limit as number, 1, 500) : 30;
+      const rows = cg.getConfigKeys({ configKind: 'env', limit });
+      if (rows.length === 0) {
+        return this.textResult(
+          'No config reads found. Either the index has no env-var read sites, or `enableConfigRefs` is disabled in config.'
+        );
+      }
+      const lines: string[] = [
+        `## Config keys read in this project (top ${rows.length})`,
+        '',
+        '| # | Key | Reads | Files |',
+        '|---|-----|------:|------:|',
+      ];
+      rows.forEach((r, i) => {
+        lines.push(`| ${i + 1} | \`${r.configKey}\` | ${r.reads} | ${r.distinctFiles} |`);
+      });
+      lines.push('', 'Pass `key` to a follow-up call to see exact read sites.');
+      return this.textResult(this.truncateOutput(lines.join('\n')));
+    }
+
+    const sites = cg.getConfigRefsByKey(key, { configKind: 'env' });
+    if (sites.length === 0) {
+      return this.textResult(`No reads found for env var "${key}".`);
+    }
+    const lines: string[] = [
+      `## Reads of \`${key}\` (${sites.length} site${sites.length === 1 ? '' : 's'})`,
+      '',
+    ];
+    for (const s of sites) {
+      const enclosing = s.sourceName
+        ? ` — ${s.sourceKind ?? 'symbol'} \`${s.sourceName}\``
+        : ' — top-level';
+      lines.push(`- \`${s.filePath}:${s.line}\`${enclosing}`);
+    }
+    return this.textResult(this.truncateOutput(lines.join('\n')));
+  }
+
+  /**
+   * Handle codegraph_sql — SQL call-site queries.
+   */
+  async handleSql(args: Record<string, unknown>): Promise<ToolResult> {
+    const cg = this.getCodeGraph(args.projectPath as string | undefined);
+    const table = typeof args.table === 'string' ? args.table.trim() : '';
+    const op =
+      args.op === 'read' || args.op === 'write' || args.op === 'ddl'
+        ? args.op
+        : undefined;
+
+    if (!table) {
+      const limit = args.limit != null ? clamp(args.limit as number, 1, 500) : 30;
+      const rows = cg.getSqlTables({ limit });
+      if (rows.length === 0) {
+        return this.textResult(
+          'No SQL refs found. Either the index has no SQL string-literal call sites, or `enableSqlRefs` is disabled in config.'
+        );
+      }
+      const lines: string[] = [
+        `## SQL tables touched by this codebase (top ${rows.length})`,
+        '',
+        '| # | Table | Reads | Writes | DDL | Total |',
+        '|---|-------|------:|-------:|----:|------:|',
+      ];
+      rows.forEach((r, i) => {
+        lines.push(
+          `| ${i + 1} | \`${r.tableName}\` | ${r.reads} | ${r.writes} | ${r.ddl} | ${r.total} |`
+        );
+      });
+      lines.push('', 'Pass `table` to a follow-up call to see exact call sites.');
+      return this.textResult(this.truncateOutput(lines.join('\n')));
+    }
+
+    const sites = cg.getSqlRefsByTable(table, op ? { op } : {});
+    if (sites.length === 0) {
+      return this.textResult(`No SQL refs found for table "${table}"${op ? ` (op=${op})` : ''}.`);
+    }
+    const lines: string[] = [
+      `## Call sites for \`${table}\`${op ? ` (op=${op})` : ''} — ${sites.length} site${sites.length === 1 ? '' : 's'}`,
+      '',
+    ];
+    for (const s of sites) {
+      const enclosing = s.sourceName
+        ? ` — ${s.sourceKind ?? 'symbol'} \`${s.sourceName}\``
+        : ' — top-level';
+      lines.push(`- [${s.op}] \`${s.filePath}:${s.line}\`${enclosing}`);
+    }
+    return this.textResult(this.truncateOutput(lines.join('\n')));
+  }
+
+  /**
+   * Handle codegraph_hotspots — files ranked by risk = centrality × churn.
+   */
+  async handleHotspots(args: Record<string, unknown>): Promise<ToolResult> {
+    const cg = this.getCodeGraph(args.projectPath as string | undefined);
+    const limit = args.limit != null ? clamp(args.limit as number, 1, 100) : 15;
+    const minCommits = args.minCommits != null ? Math.max(0, args.minCommits as number) : 3;
+    const minCentrality = args.minCentrality != null ? Math.max(0, args.minCentrality as number) : 0;
+    const sortBy = (args.sortBy as 'risk' | 'centrality' | 'churn' | undefined) ?? 'risk';
+
+    const rows = cg.getHotspots({ limit, minCommits, minCentrality, sortBy });
+    if (rows.length === 0) {
+      const lines = [
+        'No hotspots to report.',
+        '',
+        'This typically means one of:',
+        '- Index has not been built yet (`codegraph index`)',
+        '- Project is not a git repo (churn data unavailable)',
+        '- `enableCentrality` / `enableChurn` are disabled in config',
+        '- `minCommits` is set higher than any file in the project',
+      ];
+      return this.textResult(lines.join('\n'));
+    }
+
+    const now = Math.floor(Date.now() / 1000);
+    const fmtAge = (ts: number | null) => {
+      if (!ts) return '—';
+      const days = Math.floor((now - ts) / 86400);
+      if (days <= 0) return 'today';
+      if (days === 1) return '1d ago';
+      if (days < 30) return `${days}d ago`;
+      const months = Math.floor(days / 30);
+      return months === 1 ? '1mo ago' : `${months}mo ago`;
+    };
+
+    const lines: string[] = [
+      `## Hotspots (sortBy=${sortBy}, top ${rows.length})`,
+      '',
+      'High-risk files = high structural centrality × high git churn. Review these first.',
+      '',
+      '| # | File | PR | Commits | LOC | Last touched | Risk |',
+      '|---|------|----:|--------:|----:|--------------|-----:|',
+    ];
+    rows.forEach((r, i) => {
+      lines.push(
+        `| ${i + 1} | \`${r.filePath}\` | ${r.fileCentrality.toFixed(4)} | ${r.commitCount} | ${r.loc} | ${fmtAge(r.lastTouchedTs)} | ${r.riskScore.toFixed(4)} |`
+      );
+    });
+    return this.textResult(this.truncateOutput(lines.join('\n')));
+  }
+
   /**
    * Convert glob pattern to regex
    */
@@ -1336,7 +1255,15 @@ export class ToolHandler {
     return lines.join('\n');
   }
 
-  private formatNodeDetails(node: Node, code: string | null): string {
+  private formatNodeDetails(
+    node: Node,
+    code: string | null,
+    issues: Array<{
+      issueNumber: number;
+      kind: 'modified' | 'added' | 'removed';
+      commitSha: string;
+    }> = []
+  ): string {
     const location = node.startLine ? `:${node.startLine}` : '';
     const lines: string[] = [
       `## ${node.name} (${node.kind})`,
@@ -1348,6 +1275,25 @@ export class ToolHandler {
       lines.push(`**Signature:** \`${node.signature}\``);
     }
 
+    if (issues.length > 0) {
+      const byKind: Record<'modified' | 'added' | 'removed', Set<number>> = {
+        modified: new Set(),
+        added: new Set(),
+        removed: new Set(),
+      };
+      for (const i of issues) byKind[i.kind].add(i.issueNumber);
+      const parts: string[] = [];
+      for (const k of ['modified', 'added', 'removed'] as const) {
+        const set = byKind[k];
+        if (set.size === 0) continue;
+        const sorted = [...set].sort((a, b) => a - b);
+        parts.push(`#${sorted.join(', #')} (${k})`);
+      }
+      if (parts.length > 0) {
+        lines.push(`**Issues:** ${parts.join(' — ')}`);
+      }
+    }
+
     // Only include docstring if it's short and useful
     if (node.docstring && node.docstring.length < 200) {
       lines.push('', node.docstring);
@@ -1364,13 +1310,13 @@ export class ToolHandler {
     return context.summary || 'No context found';
   }
 
-  private textResult(text: string): ToolResult {
+  textResult(text: string): ToolResult {
     return {
       content: [{ type: 'text', text }],
     };
   }
 
-  private errorResult(message: string): ToolResult {
+  errorResult(message: string): ToolResult {
     return {
       content: [{ type: 'text', text: `Error: ${message}` }],
       isError: true,
diff --git a/src/mcp/tools/callees.ts b/src/mcp/tools/callees.ts
new file mode 100644
index 00000000..3c0d9740
--- /dev/null
+++ b/src/mcp/tools/callees.ts
@@ -0,0 +1,27 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const CALLEES_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_callees',
+    description:
+      'Find all functions/methods that a specific symbol calls. Useful for understanding dependencies and code flow.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        symbol: {
+          type: 'string',
+          description: 'Name of the function, method, or class to find callees for',
+        },
+        limit: {
+          type: 'number',
+          description: 'Maximum number of callees to return (default: 20)',
+          default: 20,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['symbol'],
+    },
+  },
+  handlerKey: 'handleCallees',
+};
diff --git a/src/mcp/tools/callers.ts b/src/mcp/tools/callers.ts
new file mode 100644
index 00000000..a5d33912
--- /dev/null
+++ b/src/mcp/tools/callers.ts
@@ -0,0 +1,27 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const CALLERS_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_callers',
+    description:
+      'Find all functions/methods that call a specific symbol. Useful for understanding usage patterns and impact of changes.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        symbol: {
+          type: 'string',
+          description: 'Name of the function, method, or class to find callers for',
+        },
+        limit: {
+          type: 'number',
+          description: 'Maximum number of callers to return (default: 20)',
+          default: 20,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['symbol'],
+    },
+  },
+  handlerKey: 'handleCallers',
+};
diff --git a/src/mcp/tools/config.ts b/src/mcp/tools/config.ts
new file mode 100644
index 00000000..fa11a5e1
--- /dev/null
+++ b/src/mcp/tools/config.ts
@@ -0,0 +1,26 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const CONFIG_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_config',
+    description:
+      "Surface environment-variable read sites across the codebase. Use to answer 'what reads OBSIDIAN_PORT?' or 'what config does this codebase read?'. Returns either (a) all distinct keys with read counts (no `key`), or (b) the precise read sites and their enclosing functions for a specific key. Beats grep because it skips comments/docs/tests-of-tests and attributes each hit to its enclosing function.",
+    inputSchema: {
+      type: 'object',
+      properties: {
+        key: {
+          type: 'string',
+          description:
+            'Specific env var to look up (e.g. "OBSIDIAN_PORT"). If omitted, returns the top-N keys with read counts.',
+        },
+        limit: {
+          type: 'number',
+          description: 'Max keys to return when no `key` is specified (default: 30).',
+        },
+        projectPath: projectPathProperty,
+      },
+    },
+  },
+  handlerKey: 'handleConfig',
+};
diff --git a/src/mcp/tools/context.ts b/src/mcp/tools/context.ts
new file mode 100644
index 00000000..e8618671
--- /dev/null
+++ b/src/mcp/tools/context.ts
@@ -0,0 +1,32 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const CONTEXT_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_context',
+    description:
+      'PRIMARY TOOL: Build comprehensive context for a task. Returns entry points, related symbols, and key code - often enough to understand the codebase without additional tool calls. NOTE: This provides CODE context, not product requirements. For new features, still clarify UX/behavior questions with the user before implementing.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        task: {
+          type: 'string',
+          description: 'Description of the task, bug, or feature to build context for',
+        },
+        maxNodes: {
+          type: 'number',
+          description: 'Maximum symbols to include (default: 20)',
+          default: 20,
+        },
+        includeCode: {
+          type: 'boolean',
+          description: 'Include code snippets for key symbols (default: true)',
+          default: true,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['task'],
+    },
+  },
+  handlerKey: 'handleContext',
+};
diff --git a/src/mcp/tools/explore.ts b/src/mcp/tools/explore.ts
new file mode 100644
index 00000000..d61b24e9
--- /dev/null
+++ b/src/mcp/tools/explore.ts
@@ -0,0 +1,28 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const EXPLORE_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_explore',
+    description:
+      'Deep exploration tool — returns comprehensive context for a topic in a SINGLE call. Groups all relevant source code by file (contiguous sections, not snippets), includes a relationship map, and uses deeper graph traversal. Designed to replace multiple codegraph_node + file Read calls. Use this instead of codegraph_context when you need thorough understanding. IMPORTANT: Use specific symbol names, file names, or short code terms in your query — NOT natural language sentences. Before calling this, use codegraph_search to discover relevant symbol names, then include those names in your query. Bad: "how are agent prompts loaded and passed to the CLI". Good: "readAgentsFromDirectory createClaudeSession chat-manager agents.ts".',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        query: {
+          type: 'string',
+          description:
+            'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.',
+        },
+        maxFiles: {
+          type: 'number',
+          description: 'Maximum number of files to include source code from (default: 12)',
+          default: 12,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['query'],
+    },
+  },
+  handlerKey: 'handleExplore',
+};
diff --git a/src/mcp/tools/files.ts b/src/mcp/tools/files.ts
new file mode 100644
index 00000000..117b0676
--- /dev/null
+++ b/src/mcp/tools/files.ts
@@ -0,0 +1,40 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const FILES_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_files',
+    description:
+      'REQUIRED for file/folder exploration. Get the project file structure from the CodeGraph index. Returns a tree view of all indexed files with metadata (language, symbol count). Much faster than Glob/filesystem scanning. Use this FIRST when exploring project structure, finding files, or understanding codebase organization.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        path: {
+          type: 'string',
+          description: 'Filter to files under this directory path (e.g., "src/components"). Returns all files if not specified.',
+        },
+        pattern: {
+          type: 'string',
+          description: 'Filter files matching this glob pattern (e.g., "*.tsx", "**/*.test.ts")',
+        },
+        format: {
+          type: 'string',
+          description: 'Output format: "tree" (hierarchical, default), "flat" (simple list), "grouped" (by language)',
+          enum: ['tree', 'flat', 'grouped'],
+          default: 'tree',
+        },
+        includeMetadata: {
+          type: 'boolean',
+          description: 'Include file metadata like language and symbol count (default: true)',
+          default: true,
+        },
+        maxDepth: {
+          type: 'number',
+          description: 'Maximum directory depth to show (default: unlimited)',
+        },
+        projectPath: projectPathProperty,
+      },
+    },
+  },
+  handlerKey: 'handleFiles',
+};
diff --git a/src/mcp/tools/hotspots.ts b/src/mcp/tools/hotspots.ts
new file mode 100644
index 00000000..a30c62cc
--- /dev/null
+++ b/src/mcp/tools/hotspots.ts
@@ -0,0 +1,37 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const HOTSPOTS_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_hotspots',
+    description:
+      "Identify high-risk files: high PageRank centrality (many things depend on them) AND high churn (frequently changed). Use when triaging an unfamiliar codebase, hunting for refactor targets, or asking 'where do bugs hide?'. Returns ranked file list with both signals plus a combined risk score (centrality × churn). Sort options: 'risk' (default), 'centrality', 'churn'.",
+    inputSchema: {
+      type: 'object',
+      properties: {
+        limit: {
+          type: 'number',
+          description: 'Maximum number of files to return (default: 15)',
+        },
+        minCommits: {
+          type: 'number',
+          description:
+            'Filter out files touched in fewer than N commits (default: 3 — excludes test fixtures and one-off files)',
+        },
+        minCentrality: {
+          type: 'number',
+          description:
+            'Filter out files whose total node centrality (Σ PageRank of nodes in file) is below this threshold (default: 0 — no filter). Useful to drop docs/config files from the list.',
+        },
+        sortBy: {
+          type: 'string',
+          enum: ['risk', 'centrality', 'churn'],
+          description:
+            'Sort dimension: risk = centrality × churn (default), centrality = pure structural importance, churn = pure change frequency',
+        },
+        projectPath: projectPathProperty,
+      },
+    },
+  },
+  handlerKey: 'handleHotspots',
+};
diff --git a/src/mcp/tools/impact.ts b/src/mcp/tools/impact.ts
new file mode 100644
index 00000000..45386e6b
--- /dev/null
+++ b/src/mcp/tools/impact.ts
@@ -0,0 +1,27 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const IMPACT_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_impact',
+    description:
+      'Analyze the impact radius of changing a symbol. Shows what code could be affected by modifications.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        symbol: {
+          type: 'string',
+          description: 'Name of the symbol to analyze impact for',
+        },
+        depth: {
+          type: 'number',
+          description: 'How many levels of dependencies to traverse (default: 2)',
+          default: 2,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['symbol'],
+    },
+  },
+  handlerKey: 'handleImpact',
+};
diff --git a/src/mcp/tools/node.ts b/src/mcp/tools/node.ts
new file mode 100644
index 00000000..fe61b254
--- /dev/null
+++ b/src/mcp/tools/node.ts
@@ -0,0 +1,27 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const NODE_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_node',
+    description:
+      'Get detailed information about a specific code symbol. Use includeCode=true only when you need the full source code - otherwise just get location and signature to minimize context usage.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        symbol: {
+          type: 'string',
+          description: 'Name of the symbol to get details for',
+        },
+        includeCode: {
+          type: 'boolean',
+          description: 'Include full source code (default: false to minimize context)',
+          default: false,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['symbol'],
+    },
+  },
+  handlerKey: 'handleNode',
+};
diff --git a/src/mcp/tools/registry.ts b/src/mcp/tools/registry.ts
new file mode 100644
index 00000000..a5f1a9cd
--- /dev/null
+++ b/src/mcp/tools/registry.ts
@@ -0,0 +1,71 @@
+/**
+ * MCP tool registry.
+ *
+ * Adding a new MCP tool is:
+ *
+ *   1. Create `src/mcp/tools/<name>.ts` exporting an
+ *      `<NAME>_TOOL: ToolModule` constant (definition + handlerKey).
+ *   2. Add **one** import line and **one** array entry to this file.
+ *   3. Add a `handle<Name>` method on `ToolHandler` in `../tools.ts`,
+ *      and add the new key to `HandlerKey` in `./types.ts`.
+ *
+ * The third step is currently the only "shared method on a single
+ * class" surface that competing PRs can collide on. Extracting
+ * handler bodies into per-tool files (so step 3 also becomes a
+ * single-file addition) is left as a follow-up.
+ */
+
+import type { ToolDefinition } from '../tool-types';
+import type { ToolModule } from './types';
+
+import { CALLEES_TOOL } from './callees';
+import { CALLERS_TOOL } from './callers';
+import { CONFIG_TOOL } from './config';
+import { CONTEXT_TOOL } from './context';
+import { EXPLORE_TOOL } from './explore';
+import { FILES_TOOL } from './files';
+import { HOTSPOTS_TOOL } from './hotspots';
+import { IMPACT_TOOL } from './impact';
+import { NODE_TOOL } from './node';
+import { SEARCH_TOOL } from './search';
+import { SQL_TOOL } from './sql';
+import { STATUS_TOOL } from './status';
+
+const ALL_TOOLS: readonly ToolModule[] = [
+  CALLEES_TOOL,
+  CALLERS_TOOL,
+  CONFIG_TOOL,
+  CONTEXT_TOOL,
+  EXPLORE_TOOL,
+  FILES_TOOL,
+  HOTSPOTS_TOOL,
+  IMPACT_TOOL,
+  NODE_TOOL,
+  SEARCH_TOOL,
+  SQL_TOOL,
+  STATUS_TOOL,
+];
+
+let byName: Map<string, ToolModule> | null = null;
+function ensureIndex(): Map<string, ToolModule> {
+  if (byName) return byName;
+  byName = new Map();
+  for (const t of ALL_TOOLS) byName.set(t.definition.name, t);
+  return byName;
+}
+
+export function getToolModules(): readonly ToolModule[] {
+  return ALL_TOOLS;
+}
+
+export function getToolModule(name: string): ToolModule | undefined {
+  return ensureIndex().get(name);
+}
+
+/**
+ * The `tools[]` array advertised in MCP `list_tools`. Derived from
+ * the registry; sorted alphabetically by tool name for stable output.
+ */
+export const tools: readonly ToolDefinition[] = ALL_TOOLS
+  .map((t) => t.definition)
+  .sort((a, b) => a.name.localeCompare(b.name));
diff --git a/src/mcp/tools/search.ts b/src/mcp/tools/search.ts
new file mode 100644
index 00000000..c6678333
--- /dev/null
+++ b/src/mcp/tools/search.ts
@@ -0,0 +1,32 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const SEARCH_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_search',
+    description:
+      'Quick symbol search by name. Returns locations only (no code). Use codegraph_context instead for comprehensive task context.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        query: {
+          type: 'string',
+          description: 'Symbol name or partial name (e.g., "auth", "signIn", "UserService")',
+        },
+        kind: {
+          type: 'string',
+          description: 'Filter by node kind',
+          enum: ['function', 'method', 'class', 'interface', 'type', 'variable', 'route', 'component'],
+        },
+        limit: {
+          type: 'number',
+          description: 'Maximum results (default: 10)',
+          default: 10,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['query'],
+    },
+  },
+  handlerKey: 'handleSearch',
+};
diff --git a/src/mcp/tools/sql.ts b/src/mcp/tools/sql.ts
new file mode 100644
index 00000000..1f90ffe2
--- /dev/null
+++ b/src/mcp/tools/sql.ts
@@ -0,0 +1,32 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const SQL_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_sql',
+    description:
+      "Surface SQL string-literal references to tables across the codebase. Use to answer 'what code touches the users table?' or 'what tables does this codebase access?'. Returns either (a) the top-N distinct tables with read/write counts (no `table`), or (b) the precise read sites and their enclosing functions for a specific table. Beats grep because it requires a SQL keyword prefix (FROM/JOIN/INTO/UPDATE/DELETE), filtering out non-SQL uses of the same identifier.",
+    inputSchema: {
+      type: 'object',
+      properties: {
+        table: {
+          type: 'string',
+          description:
+            'Specific table to look up (e.g. "users"). Case-insensitive. If omitted, returns the top-N tables with read/write counts.',
+        },
+        op: {
+          type: 'string',
+          enum: ['read', 'write', 'ddl'],
+          description:
+            'Filter to one operation kind: read (SELECT/JOIN), write (INSERT/UPDATE/DELETE), or ddl (CREATE/ALTER/DROP). Only meaningful with `table`.',
+        },
+        limit: {
+          type: 'number',
+          description: 'Max tables to return when no `table` is specified (default: 30).',
+        },
+        projectPath: projectPathProperty,
+      },
+    },
+  },
+  handlerKey: 'handleSql',
+};
diff --git a/src/mcp/tools/status.ts b/src/mcp/tools/status.ts
new file mode 100644
index 00000000..84bebcc3
--- /dev/null
+++ b/src/mcp/tools/status.ts
@@ -0,0 +1,17 @@
+import { projectPathProperty } from '../tool-types';
+import type { ToolModule } from './types';
+
+export const STATUS_TOOL: ToolModule = {
+  definition: {
+    name: 'codegraph_status',
+    description:
+      'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        projectPath: projectPathProperty,
+      },
+    },
+  },
+  handlerKey: 'handleStatus',
+};
diff --git a/src/mcp/tools/types.ts b/src/mcp/tools/types.ts
new file mode 100644
index 00000000..8b4ef015
--- /dev/null
+++ b/src/mcp/tools/types.ts
@@ -0,0 +1,53 @@
+/**
+ * MCP tool registry types.
+ *
+ * Each tool ships its own self-contained `ToolModule` (definition
+ * + handler-key reference) so adding an MCP tool is a single-file
+ * addition for the metadata and dispatch entry. The actual handler
+ * bodies still live as methods on the `ToolHandler` class in
+ * `../tools.ts` (the helpers they call are tightly coupled and a
+ * full body extraction is left as a follow-up); each tool's
+ * `handlerKey` is the string name of the method to invoke.
+ *
+ * The registry (`./registry`) imports each module and exposes
+ * `tools[]` (for `list_tools`) plus a `getModule(name)` lookup
+ * used by `ToolHandler.execute`.
+ */
+
+import type { ToolDefinition, ToolResult } from '../tool-types';
+
+/**
+ * Names of methods on `ToolHandler` that can serve as tool handlers.
+ * Kept as a string union (not a `keyof ToolHandler` lookup) to
+ * avoid a circular import — the type list is the source of truth
+ * and is checked structurally at the call site in `execute()`.
+ */
+export type HandlerKey =
+  | 'handleSearch'
+  | 'handleContext'
+  | 'handleCallers'
+  | 'handleCallees'
+  | 'handleImpact'
+  | 'handleExplore'
+  | 'handleNode'
+  | 'handleStatus'
+  | 'handleFiles'
+  | 'handleHotspots'
+  | 'handleConfig'
+  | 'handleSql';
+
+/**
+ * The minimum surface a `ToolHandler`-shaped object exposes for
+ * dispatch. Extending `HandlerKey` adds a new entry here too.
+ */
+export type ToolHandlerLike = {
+  [K in HandlerKey]: (args: Record<string, unknown>) => Promise<ToolResult>;
+} & {
+  errorResult(message: string): ToolResult;
+};
+
+export interface ToolModule {
+  readonly definition: ToolDefinition;
+  /** Method name on `ToolHandler` that runs this tool. */
+  readonly handlerKey: HandlerKey;
+}
diff --git a/src/resolution/frameworks/csharp.ts b/src/resolution/frameworks/csharp.ts
index 1e170be4..9effb53f 100644
--- a/src/resolution/frameworks/csharp.ts
+++ b/src/resolution/frameworks/csharp.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const aspnetResolver: FrameworkResolver = {
   name: 'aspnet',
@@ -117,6 +118,9 @@ export const aspnetResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Strip `//` and `/* */` comments so XML-doc examples like
+    // `/// [HttpGet("/x")]` aren't treated as real route attributes.
+    const safe = stripCommentsForRegex(content, 'csharp');
 
     // Extract route attributes
     // [HttpGet("path")], [HttpPost("path")], [Route("path")]
@@ -128,8 +132,8 @@ export const aspnetResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
-        const line = content.slice(0, match.index).split('\n').length;
+      while ((match = pattern.exec(safe)) !== null) {
+        const line = safe.slice(0, match.index).split('\n').length;
 
         if (pattern.source.includes('Http')) {
           if (match[3]) {
@@ -190,9 +194,9 @@ export const aspnetResolver: FrameworkResolver = {
     const minimalApiPattern = /\.Map(Get|Post|Put|Patch|Delete)\s*\(\s*["']([^"']+)["']/g;
 
     let match;
-    while ((match = minimalApiPattern.exec(content)) !== null) {
+    while ((match = minimalApiPattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
diff --git a/src/resolution/frameworks/express.ts b/src/resolution/frameworks/express.ts
index 0afa7e03..07851769 100644
--- a/src/resolution/frameworks/express.ts
+++ b/src/resolution/frameworks/express.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const expressResolver: FrameworkResolver = {
   name: 'express',
@@ -93,6 +94,9 @@ export const expressResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Neutralize comments and JSDoc blocks so a `app.get('/x')` example in
+    // a comment isn't extracted as a real route.
+    const safe = stripCommentsForRegex(content, 'javascript');
 
     // Extract route definitions
     // app.get('/path', handler) or router.get('/path', handler)
@@ -102,9 +106,9 @@ export const expressResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         const [, _obj, method, path] = match;
-        const line = content.slice(0, match.index).split('\n').length;
+        const line = safe.slice(0, match.index).split('\n').length;
 
         // Skip middleware use() without paths
         if (method === 'use' && !path?.startsWith('/')) {
diff --git a/src/resolution/frameworks/laravel.ts b/src/resolution/frameworks/laravel.ts
index d6a79885..4b3b5e00 100644
--- a/src/resolution/frameworks/laravel.ts
+++ b/src/resolution/frameworks/laravel.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 /**
  * Laravel facade mappings to underlying classes
@@ -93,6 +94,7 @@ export const laravelResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'php');
 
     // Extract route definitions
     const routePatterns = [
@@ -106,10 +108,10 @@ export const laravelResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         if (pattern.source.includes('resource')) {
           const [, resourceName] = match;
-          const line = content.slice(0, match.index).split('\n').length;
+          const line = safe.slice(0, match.index).split('\n').length;
           nodes.push({
             id: `route:${filePath}:resource:${resourceName}:${line}`,
             kind: 'route',
@@ -125,7 +127,7 @@ export const laravelResolver: FrameworkResolver = {
           });
         } else {
           const [, method, path] = match;
-          const line = content.slice(0, match.index).split('\n').length;
+          const line = safe.slice(0, match.index).split('\n').length;
           nodes.push({
             id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
             kind: 'route',
diff --git a/src/resolution/frameworks/python.ts b/src/resolution/frameworks/python.ts
index 88f5034a..021fbd1d 100644
--- a/src/resolution/frameworks/python.ts
+++ b/src/resolution/frameworks/python.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const djangoResolver: FrameworkResolver = {
   name: 'django',
@@ -77,6 +78,10 @@ export const djangoResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Neutralize comments and docstrings so a `path('/x', view)` example in
+    // a docstring isn't extracted as a real route. Newlines preserved so
+    // line numbers stay correct.
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract URL patterns
     // path('route/', view, name='name')
@@ -87,9 +92,9 @@ export const djangoResolver: FrameworkResolver = {
 
     for (const pattern of urlPatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         const [, urlPath] = match;
-        const line = content.slice(0, match.index).split('\n').length;
+        const line = safe.slice(0, match.index).split('\n').length;
 
         nodes.push({
           id: `route:${filePath}:${urlPath}:${line}`,
@@ -157,15 +162,16 @@ export const flaskResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract Flask route decorators
     // @app.route('/path') or @blueprint.route('/path')
     const routePattern = /@(\w+)\.route\s*\(\s*['"]([^'"]+)['"]/g;
 
     let match;
-    while ((match = routePattern.exec(content)) !== null) {
+    while ((match = routePattern.exec(safe)) !== null) {
       const [, _appOrBp, routePath] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${routePath}:${line}`,
@@ -245,15 +251,16 @@ export const fastapiResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract FastAPI route decorators
     // @app.get('/path') or @router.post('/path')
     const routePattern = /@(\w+)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"]([^'"]+)['"]/g;
 
     let match;
-    while ((match = routePattern.exec(content)) !== null) {
+    while ((match = routePattern.exec(safe)) !== null) {
       const [, _appOrRouter, method, routePath] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${routePath}:${line}`,
diff --git a/src/resolution/frameworks/rust.ts b/src/resolution/frameworks/rust.ts
index 5ab10bc3..92d92060 100644
--- a/src/resolution/frameworks/rust.ts
+++ b/src/resolution/frameworks/rust.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const rustResolver: FrameworkResolver = {
   name: 'rust',
@@ -74,15 +75,18 @@ export const rustResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Strip `//` and `/* */` comments so doc-comment examples like
+    // `/// #[get("/x")]` aren't treated as real route attributes.
+    const safe = stripCommentsForRegex(content, 'rust');
 
     // Extract Actix-web routes
     // #[get("/path")], #[post("/path")], etc.
     const actixRoutePattern = /#\[(get|post|put|patch|delete)\s*\(\s*["']([^"']+)["']/g;
 
     let match;
-    while ((match = actixRoutePattern.exec(content)) !== null) {
+    while ((match = actixRoutePattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
@@ -103,9 +107,9 @@ export const rustResolver: FrameworkResolver = {
     // #[get("/path")], #[post("/path", ...)]
     const rocketRoutePattern = /#\[(get|post|put|patch|delete|head|options)\s*\(\s*["']([^"']+)["']/g;
 
-    while ((match = rocketRoutePattern.exec(content)) !== null) {
+    while ((match = rocketRoutePattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       // Avoid duplicates from actix pattern
       const routeId = `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`;
@@ -130,9 +134,9 @@ export const rustResolver: FrameworkResolver = {
     // .route("/path", get(handler))
     const axumRoutePattern = /\.route\s*\(\s*["']([^"']+)["']\s*,\s*(get|post|put|patch|delete)/g;
 
-    while ((match = axumRoutePattern.exec(content)) !== null) {
+    while ((match = axumRoutePattern.exec(safe)) !== null) {
       const [, path, method] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
diff --git a/src/search/query-utils.ts b/src/search/query-utils.ts
index 9a61acae..eaa282c4 100644
--- a/src/search/query-utils.ts
+++ b/src/search/query-utils.ts
@@ -31,6 +31,17 @@ export const STOP_WORDS = new Set([
   'fix', 'bug', 'called',
 ]);
 
+/**
+ * Drop {@link STOP_WORDS} from a list of query terms. Returns the
+ * original list if every term is a stopword (so a degenerate input like
+ * `["the"]` still returns something rather than producing an empty
+ * downstream FTS query).
+ */
+export function filterStopwords(terms: string[]): string[] {
+  const filtered = terms.filter((t) => !STOP_WORDS.has(t.toLowerCase()));
+  return filtered.length > 0 ? filtered : terms;
+}
+
 /**
  * Generate stem variants of a search term by removing common English suffixes.
  * Used for FTS query expansion so "caching" also finds "cache", "eviction" finds "evict", etc.
@@ -333,3 +344,49 @@ export function kindBonus(kind: Node['kind']): number {
   };
   return bonuses[kind] ?? 0;
 }
+
+/**
+ * Cap consecutive results from the same file. Preserves overall ranking:
+ * the highest-scoring hit from each file is taken first (up to `perFileCap`
+ * per file), in score order. If `limit` isn't filled after the capped
+ * pass, the remaining slots are filled with the next-best hits regardless
+ * of file (preserves correctness — never hides a hit that would have
+ * otherwise been returned).
+ *
+ * Why: queries like `"ExtractionOrchestrator"` return the matching class
+ * plus 9 of its members from the same file. The first hit is informative;
+ * the next 9 are implementation detail that pushes peer files (subclasses,
+ * callers, sibling modules) past the limit. Capping per file surfaces
+ * representative breadth without losing the top hit.
+ */
+export function diversifyByFile<T extends { node: Node }>(
+  results: T[],
+  limit: number,
+  perFileCap: number
+): T[] {
+  if (perFileCap <= 0) return results.slice(0, limit);
+  const perFile = new Map<string, number>();
+  const picked: T[] = [];
+  const skipped: T[] = [];
+  for (const r of results) {
+    const f = r.node.filePath;
+    const c = perFile.get(f) ?? 0;
+    if (c < perFileCap) {
+      picked.push(r);
+      perFile.set(f, c + 1);
+      if (picked.length >= limit) return picked;
+    } else {
+      skipped.push(r);
+    }
+  }
+  // Backfill from skipped (in original score order) so we don't return
+  // fewer results than the caller asked for. This also handles the
+  // edge case where `results.length <= limit`: nothing was actually
+  // dropped, but the per-file cap reordered them so peer files appear
+  // earlier — `picked` first, then any leftover same-file hits.
+  for (const r of skipped) {
+    if (picked.length >= limit) break;
+    picked.push(r);
+  }
+  return picked;
+}
diff --git a/src/sql-refs/index.ts b/src/sql-refs/index.ts
new file mode 100644
index 00000000..91b58d9d
--- /dev/null
+++ b/src/sql-refs/index.ts
@@ -0,0 +1,252 @@
+/**
+ * SQL call-site extraction
+ *
+ * Scans indexed source files for SQL string-literal patterns (FROM,
+ * JOIN, INTO, UPDATE, DELETE FROM, CREATE TABLE) and records each
+ * (table, op) pair as a row in `sql_refs`. Each row links to its
+ * enclosing function via line-range lookup against the existing
+ * nodes table, so an agent asking "what code touches the users
+ * table?" gets a list of real functions, not a grep wall.
+ *
+ * Why a separate table, not graph nodes/edges: tables aren't
+ * declared in code that the existing extractors parse — they live
+ * in `.sql` migration files. Once #95 (SQL language extractor)
+ * merges, `table_name` can be joined against indexed SQL DDL nodes
+ * for cross-language navigation. This PR ships the call-site
+ * detection now so the agent-useful queries already work; full
+ * graph integration follows when the prerequisite lands.
+ *
+ * Spike validation (codegraph indexing itself): 87 SQL call sites
+ * across the 8 tables defined in `src/db/schema.sql`, each
+ * attributed to its enclosing QueryBuilder method. Beats grep
+ * because grep matches `const nodes = ...` (a JS variable named
+ * `nodes`) too — this regex requires the SQL keyword prefix
+ * (FROM/INTO/UPDATE/JOIN), eliminating that class of false positive.
+ *
+ * V1 scope: table-level only. Column extraction (`SELECT email FROM
+ * users` → `users.email`) is best-effort and deferred until #95
+ * provides reliable column-name DDL nodes to join against.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { logDebug } from '../errors';
+
+export type SqlOp = 'read' | 'write' | 'ddl';
+
+export interface SqlRef {
+  tableName: string;
+  op: SqlOp;
+  /** Indexed-symbol id for the enclosing function/method. NULL = top-level. */
+  sourceNodeId: string | null;
+  filePath: string;
+  line: number;
+}
+
+/**
+ * Languages we scan. Anything not in this set is skipped — most
+ * non-source files have no SQL to find. SQL files themselves are
+ * skipped here because #95 will own DDL extraction.
+ */
+const SUPPORTED_LANGUAGES = new Set<string>([
+  'typescript',
+  'javascript',
+  'tsx',
+  'jsx',
+  'python',
+  'go',
+  'rust',
+  'java',
+  'kotlin',
+  'csharp',
+  'php',
+  'ruby',
+]);
+
+/**
+ * SQL identifier regex. Allows simple unquoted identifiers and
+ * double-quoted (Postgres) or backtick-quoted (MySQL) identifiers,
+ * with optional schema-qualifier prefix (`public.users`,
+ * `"public"."users"`). For v1 we record only the *table* part —
+ * schema goes into a future column when we have join targets.
+ */
+const IDENT = '(?:`([^`]+)`|"([^"]+)"|([A-Za-z_][\\w]*))';
+
+interface PatternDef {
+  /** Capture group containing the table name (1, 2, or 3 in IDENT). */
+  re: RegExp;
+  op: SqlOp;
+}
+
+/**
+ * SQL keyword + identifier patterns. `i` flag makes them case-
+ * insensitive; `g` is required for `exec` loops to advance through
+ * multiple matches per line.
+ *
+ * Each regex captures the table name in groups 1/2/3 (backtick /
+ * double-quote / unquoted) — at most one is set per match.
+ */
+const PATTERNS: PatternDef[] = [
+  // SELECT ... FROM <table>
+  // FROM appears in SELECT and DELETE statements; we tag it 'read' here
+  // and let DELETE's own regex below tag it 'write'. Last write wins
+  // because Map dedup is keyed by (table, op), so the DELETE one
+  // produces a separate write row alongside this read row.
+  { re: new RegExp(`\\bFROM\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'read' },
+  { re: new RegExp(`\\bJOIN\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'read' },
+  // INSERT INTO <table>
+  { re: new RegExp(`\\bINSERT\\s+INTO\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'write' },
+  // UPDATE <table> ... SET
+  { re: new RegExp(`\\bUPDATE\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}\\s+SET\\b`, 'gi'), op: 'write' },
+  // DELETE FROM <table>
+  { re: new RegExp(`\\bDELETE\\s+FROM\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'write' },
+  // CREATE TABLE [IF NOT EXISTS] <table>
+  { re: new RegExp(`\\bCREATE\\s+(?:TEMP(?:ORARY)?\\s+)?TABLE\\s+(?:IF\\s+NOT\\s+EXISTS\\s+)?(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'ddl' },
+  // ALTER TABLE / DROP TABLE
+  { re: new RegExp(`\\bALTER\\s+TABLE\\s+(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'ddl' },
+  { re: new RegExp(`\\bDROP\\s+TABLE\\s+(?:IF\\s+EXISTS\\s+)?(?:[A-Za-z_]\\w*\\s*\\.\\s*)?${IDENT}`, 'gi'), op: 'ddl' },
+];
+
+/**
+ * Identifier names we drop because they're SQL keywords or noise
+ * that the regex over-matches on:
+ *   - `WHERE` / `ON` / `GROUP` after `JOIN` (chained JOIN clauses)
+ *   - `AS`/`USING` aliasing
+ *   - `SELECT` / `INTO` (CTE-shaped or `SELECT ... INTO`)
+ */
+const RESERVED_TABLE_NAMES = new Set<string>([
+  'where', 'on', 'group', 'order', 'limit', 'using', 'as',
+  'select', 'into', 'values', 'set', 'and', 'or', 'not',
+  'null', 'true', 'false',
+]);
+
+/**
+ * Resolver supplied by caller: (filePath, line) → enclosing nodeId.
+ * Returns null when the read is at the file's top level.
+ */
+export type EnclosingNodeResolver = (filePath: string, line: number) => string | null;
+
+export interface FileTarget {
+  path: string;
+  language: string;
+}
+
+/**
+ * Strip line and same-line block comments before SQL detection.
+ *
+ * Without this, a line like
+ *   // example: db.prepare('SELECT name FROM the docs')
+ * passes the prose-rejection (it has a quote AND a SQL verb) and
+ * extracts `the` as a "table name". The comment is the actual
+ * problem — strip it first.
+ *
+ * Naive split on `//` / `#` is acceptable: SQL syntax doesn't use
+ * either as operators, so truncating SQL after a `//` inside a
+ * string is implausible (SQL line comments are `--`). Block
+ * comments on a single line (`/* ... *\/`) are stripped via
+ * regex; multi-line block comments are a documented v1 miss.
+ */
+function stripComments(line: string, language: string): string {
+  // Same-line block comments first (works for C-family languages).
+  let stripped = line.replace(/\/\*[\s\S]*?\*\//g, '');
+  if (language === 'python' || language === 'ruby') {
+    const idx = stripped.indexOf('#');
+    if (idx >= 0) stripped = stripped.slice(0, idx);
+  } else {
+    const idx = stripped.indexOf('//');
+    if (idx >= 0) stripped = stripped.slice(0, idx);
+  }
+  return stripped;
+}
+
+/**
+ * Pre-filter: line (with comments stripped) must contain a quote
+ * (so it's plausibly a string literal) AND a SQL verb. Anchoring on
+ * a verb is critical — without it, prose like
+ *   const note = "get the value from the array";
+ * pollutes results because `from the` matches our `FROM <table>`
+ * regex. Requiring `SELECT|INSERT|UPDATE|...` on the same line
+ * filters those out.
+ */
+function lineLooksLikeSql(line: string): boolean {
+  if (!/['"`]/.test(line)) return false;
+  return /\b(?:SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP|TRUNCATE)\b/i.test(line);
+}
+
+/**
+ * Sanity check: the captured `FROM <table>` (or similar) should be
+ * inside a string literal, not in a comment. Approximated by
+ * requiring a quote (`'`, `"`, `` ` ``) somewhere before the match
+ * position on the same line. Doesn't handle multi-line template
+ * literals where the open-quote is on a previous line — that's a v1
+ * acceptable miss.
+ */
+function isInsideString(line: string, matchIndex: number): boolean {
+  const prefix = line.slice(0, matchIndex);
+  return /['"`]/.test(prefix);
+}
+
+/**
+ * Pull the table name out of a regex match. Exactly one of the
+ * three identifier capture groups is set per IDENT alternation.
+ */
+function extractTableName(m: RegExpExecArray): string | null {
+  const name = m[1] ?? m[2] ?? m[3];
+  if (!name) return null;
+  if (RESERVED_TABLE_NAMES.has(name.toLowerCase())) return null;
+  return name;
+}
+
+/**
+ * Scan a list of (path, language) targets and return all SQL refs
+ * found. Pure I/O + regex; the caller owns DB writes via
+ * `applySqlRefs`.
+ */
+export function extractSqlRefs(
+  rootDir: string,
+  targets: Iterable<FileTarget>,
+  resolveEnclosing: EnclosingNodeResolver
+): SqlRef[] {
+  const refs: SqlRef[] = [];
+  for (const t of targets) {
+    if (!SUPPORTED_LANGUAGES.has(t.language)) continue;
+    let src: string;
+    try {
+      src = fs.readFileSync(path.join(rootDir, t.path), 'utf8');
+    } catch (err) {
+      logDebug(`extractSqlRefs: read failed for ${t.path}: ${err instanceof Error ? err.message : String(err)}`);
+      continue;
+    }
+    const lines = src.split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      const rawLine = lines[i]!;
+      const line = stripComments(rawLine, t.language);
+      if (!lineLooksLikeSql(line)) continue;
+      const lineNo = i + 1;
+      // Per-line dedup: if the same (table, op) appears twice via
+      // overlapping regex (e.g. `FROM` and `JOIN` in one line for
+      // different tables, but the same table doesn't double-record).
+      const seen = new Set<string>();
+      for (const pat of PATTERNS) {
+        pat.re.lastIndex = 0;
+        let m: RegExpExecArray | null;
+        while ((m = pat.re.exec(line)) !== null) {
+          if (!isInsideString(line, m.index)) continue;
+          const name = extractTableName(m);
+          if (!name) continue;
+          const key = `${name.toLowerCase()}|${pat.op}`;
+          if (seen.has(key)) continue;
+          seen.add(key);
+          refs.push({
+            tableName: name,
+            op: pat.op,
+            sourceNodeId: resolveEnclosing(t.path, lineNo),
+            filePath: t.path,
+            line: lineNo,
+          });
+        }
+      }
+    }
+  }
+  return refs;
+}
diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts
index d3ef24b3..d059934c 100644
--- a/src/sync/watcher.ts
+++ b/src/sync/watcher.ts
@@ -177,17 +177,27 @@ export class FileWatcher {
     this.hasChanges = false;
     this.syncing = true;
 
+    let syncFailed = false;
     try {
       const result = await this.syncFn();
       this.onSyncComplete?.(result);
     } catch (err) {
+      syncFailed = true;
       const error = err instanceof Error ? err : new Error(String(err));
       logWarn('Watch sync failed', { error: error.message });
       this.onSyncError?.(error);
     } finally {
       this.syncing = false;
 
-      // If new changes arrived during sync, schedule another
+      // Re-set hasChanges if the sync failed so the dropped batch isn't
+      // forgotten — without this, a transient sync failure leaves the index
+      // stale until a *new* file event happens to retrigger.
+      if (syncFailed) {
+        this.hasChanges = true;
+      }
+
+      // If we have pending changes (either from the failed sync or new
+      // events that arrived during it), schedule another flush.
       if (this.hasChanges && !this.stopped) {
         this.scheduleSync();
       }
diff --git a/src/types.ts b/src/types.ts
index 6834483d..00b0505e 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -75,6 +75,8 @@ export type Language =
   | 'svelte'
   | 'liquid'
   | 'pascal'
+  | 'hcl'
+  | 'r'
   | 'unknown';
 
 // =============================================================================
@@ -144,6 +146,13 @@ export interface Node {
 
   /** When the node was last updated */
   updatedAt: number;
+
+  /**
+   * PageRank centrality score over calls+references edges, in (0, 1).
+   * NULL/undefined when not yet computed (fresh DB before first
+   * indexAll, or `enableCentrality: false`).
+   */
+  centrality?: number | null;
 }
 
 /**
@@ -199,6 +208,21 @@ export interface FileRecord {
 
   /** Any extraction errors */
   errors?: ExtractionError[];
+
+  /**
+   * Number of git commits touching this path. 0 when uncommitted or
+   * mining disabled. Lower bound on shallow clones.
+   */
+  commitCount?: number;
+
+  /** Current line count of the file on disk (newline-delimited). */
+  loc?: number;
+
+  /** Unix seconds, first commit timestamp touching this path. */
+  firstSeenTs?: number | null;
+
+  /** Unix seconds, most recent commit timestamp touching this path. */
+  lastTouchedTs?: number | null;
 }
 
 // =============================================================================
@@ -297,7 +321,12 @@ export interface Subgraph {
  * Options for graph traversal
  */
 export interface TraversalOptions {
-  /** Maximum depth to traverse (default: Infinity) */
+  /**
+   * Maximum depth to traverse (default: 10).
+   * Pass `Infinity` to traverse the full reachable subgraph; callers should
+   * combine that with a sensible `limit` since highly connected graphs can
+   * produce a frontier far larger than `limit` allows during traversal.
+   */
   maxDepth?: number;
 
   /** Edge types to follow (default: all) */
@@ -340,6 +369,17 @@ export interface SearchOptions {
 
   /** Whether search is case-sensitive */
   caseSensitive?: boolean;
+
+  /**
+   * Cap the number of results from any single file before returning.
+   * Default 3. Set to 0 to disable diversification (return raw ranked
+   * results, even if 10 of them come from the same class). The class /
+   * function / interface members of the same file are usually less
+   * informative as multiple distinct results than as "this file plus
+   * representative members" — diversification surfaces context across
+   * the codebase rather than burying the user in one file's internals.
+   */
+  perFileCap?: number;
 }
 
 /**
@@ -474,208 +514,47 @@ export interface CodeGraphConfig {
     /** Node kind to assign */
     kind: NodeKind;
   }[];
+
+  /**
+   * Compute PageRank centrality over calls+references after each
+   * indexAll/sync. Cheap (sub-second on realistic projects); enabled
+   * by default.
+   */
+  enableCentrality?: boolean;
+
+  /**
+   * Mine git log for per-file churn metrics (commit count, LOC,
+   * first-seen / last-touched timestamps). Set to false on shallow
+   * clones or non-git checkouts where the data would be misleading.
+   * Enabled by default.
+   */
+  enableChurn?: boolean;
+
+  /**
+   * Mine `Fixes/Closes/Resolves #N` commits and attribute issues to
+   * symbols touched by their hunks. Enabled by default; turn off on
+   * non-GitHub repos or where issue refs are noisy.
+   */
+  enableIssueHistory?: boolean;
+
+  /**
+   * Extract env-var / feature-flag read sites into config_refs.
+   * Enabled by default.
+   */
+  enableConfigRefs?: boolean;
+
+  /**
+   * Extract SQL string-literal references (table reads/writes/DDL)
+   * into sql_refs. Enabled by default.
+   */
+  enableSqlRefs?: boolean;
 }
 
-/**
- * Default configuration values
- */
-export const DEFAULT_CONFIG: CodeGraphConfig = {
-  version: 1,
-  rootDir: '.',
-  include: [
-    // TypeScript/JavaScript
-    '**/*.ts',
-    '**/*.tsx',
-    '**/*.js',
-    '**/*.jsx',
-    // Python
-    '**/*.py',
-    // Go
-    '**/*.go',
-    // Rust
-    '**/*.rs',
-    // Java
-    '**/*.java',
-    // C/C++
-    '**/*.c',
-    '**/*.h',
-    '**/*.cpp',
-    '**/*.hpp',
-    '**/*.cc',
-    '**/*.cxx',
-    // C#
-    '**/*.cs',
-    // PHP
-    '**/*.php',
-    // Ruby
-    '**/*.rb',
-    // Swift
-    '**/*.swift',
-    // Kotlin
-    '**/*.kt',
-    '**/*.kts',
-    // Dart
-    '**/*.dart',
-    // Svelte
-    '**/*.svelte',
-    // Liquid (Shopify themes)
-    '**/*.liquid',
-    // Pascal / Delphi
-    '**/*.pas',
-    '**/*.dpr',
-    '**/*.dpk',
-    '**/*.lpr',
-    '**/*.dfm',
-    '**/*.fmx',
-  ],
-  exclude: [
-    // Version control
-    '**/.git/**',
-
-    // Dependencies
-    '**/node_modules/**',
-    '**/vendor/**',
-    '**/Pods/**',
-
-    // Generic build outputs
-    '**/dist/**',
-    '**/build/**',
-    '**/out/**',
-    '**/bin/**',
-    '**/obj/**',
-    '**/target/**',
-
-    // JavaScript/TypeScript
-    '**/*.min.js',
-    '**/*.bundle.js',
-    '**/.next/**',
-    '**/.nuxt/**',
-    '**/.svelte-kit/**',
-    '**/.output/**',
-    '**/.turbo/**',
-    '**/.cache/**',
-    '**/.parcel-cache/**',
-    '**/.vite/**',
-    '**/.astro/**',
-    '**/.docusaurus/**',
-    '**/.gatsby/**',
-    '**/.webpack/**',
-    '**/.nx/**',
-    '**/.yarn/cache/**',
-    '**/.pnpm-store/**',
-    '**/storybook-static/**',
-
-    // React Native / Expo
-    '**/.expo/**',
-    '**/web-build/**',
-    '**/ios/Pods/**',
-    '**/ios/build/**',
-    '**/android/build/**',
-    '**/android/.gradle/**',
-
-    // Python
-    '**/__pycache__/**',
-    '**/.venv/**',
-    '**/venv/**',
-    '**/site-packages/**',
-    '**/dist-packages/**',
-    '**/.pytest_cache/**',
-    '**/.mypy_cache/**',
-    '**/.ruff_cache/**',
-    '**/.tox/**',
-    '**/.nox/**',
-    '**/*.egg-info/**',
-    '**/.eggs/**',
-
-    // Go
-    '**/go/pkg/mod/**',
-
-    // Rust
-    '**/target/debug/**',
-    '**/target/release/**',
-
-    // Java/Kotlin/Gradle
-    '**/.gradle/**',
-    '**/.m2/**',
-    '**/generated-sources/**',
-    '**/.kotlin/**',
-
-    // Dart/Flutter
-    '**/.dart_tool/**',
-
-    // C#/.NET
-    '**/.vs/**',
-    '**/.nuget/**',
-    '**/artifacts/**',
-    '**/publish/**',
-
-    // C/C++
-    '**/cmake-build-*/**',
-    '**/CMakeFiles/**',
-    '**/bazel-*/**',
-    '**/vcpkg_installed/**',
-    '**/.conan/**',
-    '**/Debug/**',
-    '**/Release/**',
-    '**/x64/**',
-    '**/.pio/**',  // Platform.io (IoT/embedded build artifacts and library deps)
-
-    // Electron
-    '**/release/**',
-    '**/*.app/**',
-    '**/*.asar',
-
-    // Swift/iOS/Xcode
-    '**/DerivedData/**',
-    '**/.build/**',
-    '**/.swiftpm/**',
-    '**/xcuserdata/**',
-    '**/Carthage/Build/**',
-    '**/SourcePackages/**',
-
-    // Delphi/Pascal
-    '**/__history/**',
-    '**/__recovery/**',
-    '**/*.dcu',
-
-    // PHP
-    '**/.composer/**',
-    '**/storage/framework/**',
-    '**/bootstrap/cache/**',
-
-    // Ruby
-    '**/.bundle/**',
-    '**/tmp/cache/**',
-    '**/public/assets/**',
-    '**/public/packs/**',
-    '**/.yardoc/**',
-
-    // Testing/Coverage
-    '**/coverage/**',
-    '**/htmlcov/**',
-    '**/.nyc_output/**',
-    '**/test-results/**',
-    '**/.coverage/**',
-
-    // IDE/Editor
-    '**/.idea/**',
-
-    // Logs and temp
-    '**/logs/**',
-    '**/tmp/**',
-    '**/temp/**',
-
-    // Documentation build output
-    '**/_build/**',
-    '**/docs/_build/**',
-    '**/site/**',
-  ],
-  languages: [],
-  frameworks: [],
-  maxFileSize: 1024 * 1024, // 1MB
-  extractDocstrings: true,
-  trackCallSites: true,
-};
+// `DEFAULT_CONFIG` lives in `./default-config.ts` so its `include`
+// list can be derived from the language registry without import
+// cycles. Re-exported here for backward compat with consumers that
+// already import it from `'./types'`.
+export { DEFAULT_CONFIG } from './default-config';
 
 // =============================================================================
 // Database Types
diff --git a/src/utils.ts b/src/utils.ts
index e75e58e0..55664bc6 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -122,6 +122,36 @@ export function isPathWithinRoot(filePath: string, rootDir: string): boolean {
   return resolvedPath.startsWith(resolvedRoot + path.sep) || resolvedPath === resolvedRoot;
 }
 
+/**
+ * Like validatePathWithinRoot but also resolves symlinks via fs.realpathSync,
+ * so a regular-looking path that is actually a symlink to outside the root
+ * is rejected. Returns the resolved real path, or null if the file escapes
+ * the root or can't be reached.
+ *
+ * Costs an extra realpath syscall vs. the lexical-only check, so prefer
+ * validatePathWithinRoot for hot paths where symlink TOCTOU isn't relevant.
+ */
+export function validatePathWithinRootReal(projectRoot: string, filePath: string): string | null {
+  const resolved = path.resolve(projectRoot, filePath);
+  const normalizedRoot = path.resolve(projectRoot);
+  if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) {
+    return null;
+  }
+  try {
+    const realPath = fs.realpathSync(resolved);
+    const realRoot = fs.realpathSync(normalizedRoot);
+    if (!realPath.startsWith(realRoot + path.sep) && realPath !== realRoot) {
+      return null;
+    }
+    return realPath;
+  } catch {
+    // realpath failures (broken symlink, permissions) — return the lexically-
+    // resolved path. The downstream readFileSync will fail naturally and the
+    // caller already handles read errors.
+    return resolved;
+  }
+}
+
 /**
  * Like isPathWithinRoot but also resolves symlinks via fs.realpathSync.
  *
@@ -174,6 +204,124 @@ export function normalizePath(filePath: string): string {
   return filePath.replace(/\\/g, '/');
 }
 
+/**
+ * Strip a leading UTF-8 BOM (U+FEFF) if present.
+ */
+export function stripBom(content: string): string {
+  return content.charCodeAt(0) === 0xfeff ? content.slice(1) : content;
+}
+
+function blankPreservingNewlines(text: string): string {
+  return text.replace(/[^\n]/g, ' ');
+}
+
+const BLOCK_COMMENT_LANGUAGES = new Set([
+  'javascript', 'typescript', 'tsx', 'jsx',
+  'java', 'csharp', 'cpp', 'c',
+  'go', 'rust', 'swift', 'kotlin', 'dart', 'scala',
+  'php',
+]);
+
+const LINE_COMMENT_MARKER: Record<string, RegExp> = {
+  javascript: /^[ \t]*\/\//,
+  typescript: /^[ \t]*\/\//,
+  tsx: /^[ \t]*\/\//,
+  jsx: /^[ \t]*\/\//,
+  java: /^[ \t]*\/\//,
+  csharp: /^[ \t]*\/\//,
+  cpp: /^[ \t]*\/\//,
+  c: /^[ \t]*\/\//,
+  go: /^[ \t]*\/\//,
+  rust: /^[ \t]*\/\//,
+  swift: /^[ \t]*\/\//,
+  kotlin: /^[ \t]*\/\//,
+  dart: /^[ \t]*\/\//,
+  scala: /^[ \t]*\/\//,
+  pascal: /^[ \t]*\/\//,
+  python: /^[ \t]*#/,
+  ruby: /^[ \t]*#/,
+  php: /^[ \t]*(?:\/\/|#)/,
+};
+
+export function stripCommentsForRegex(content: string, language: string): string {
+  let out = content;
+
+  if (BLOCK_COMMENT_LANGUAGES.has(language)) {
+    out = out.replace(/\/\*[\s\S]*?\*\//g, blankPreservingNewlines);
+  }
+  if (language === 'python') {
+    out = out.replace(/"""[\s\S]*?"""/g, blankPreservingNewlines);
+    out = out.replace(/'''[\s\S]*?'''/g, blankPreservingNewlines);
+  }
+  if (language === 'ruby') {
+    out = out.replace(/^=begin\b[\s\S]*?^=end\b[^\n]*/gm, blankPreservingNewlines);
+  }
+
+  const lineMarker = LINE_COMMENT_MARKER[language];
+  if (lineMarker) {
+    out = out
+      .split('\n')
+      .map((line) => (lineMarker.test(line) ? blankPreservingNewlines(line) : line))
+      .join('\n');
+  }
+
+  return out;
+}
+
+export function stripCommentLinesForRetry(content: string, language: string): string {
+  const marker = LINE_COMMENT_MARKER[language];
+  if (!marker) return content;
+  return content
+    .split('\n')
+    .map((line) => (marker.test(line) ? '' : line))
+    .join('\n');
+}
+
+/**
+ * Convert a simple `*` / `?` / `**` glob to a safe regex source string.
+ * Hardens against catastrophic backtracking by coalescing runs of `*`.
+ */
+export function globToSafeRegex(glob: string): string | null {
+  if (glob.length > 1024) return null;
+  let out = '';
+  for (let i = 0; i < glob.length; i++) {
+    const ch = glob[i];
+    if (ch === '*') {
+      let runLen = 1;
+      while (glob[i + runLen] === '*') runLen++;
+      out += runLen >= 2 ? '.*' : '[^/]*';
+      i += runLen - 1;
+    } else if (ch === '?') {
+      out += '[^/]';
+    } else if (ch && /[.+^${}()|[\]\\]/.test(ch)) {
+      out += '\\' + ch;
+    } else if (ch) {
+      out += ch;
+    }
+  }
+  return out;
+}
+
+/**
+ * Split an identifier on camelCase, snake_case, kebab-case, dots, and slashes.
+ */
+export function splitIdentifierTokens(name: string): string[] {
+  return name
+    .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
+    .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
+    .split(/[\s_\-.\/:]+/)
+    .map((t) => t.toLowerCase())
+    .filter((t) => t.length > 0);
+}
+
+/**
+ * Build the value stored in the `name_subwords` FTS column.
+ */
+export function buildNameSubwords(name: string): string {
+  const tokens = splitIdentifierTokens(name);
+  return [...new Set([name, ...tokens])].join(' ');
+}
+
 /**
  * Cross-process file lock using a lock file with PID tracking.
  *