Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions __tests__/db-perf.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/**
* DB Performance / Correctness Tests
*
* Regression tests for three changes:
* 1. Batch `getNodesByIds` collapses graph-traversal N+1 reads.
* 2. `insertNode` invalidates the LRU cache so INSERT OR REPLACE
* doesn't serve a stale cached row on next `getNodeById`.
* 3. `runMaintenance` runs `PRAGMA optimize` + `wal_checkpoint(PASSIVE)`
* after indexAll/sync without throwing.
*/

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { DatabaseConnection } from '../src/db';
import { QueryBuilder } from '../src/db/queries';
import { Node } from '../src/types';

function makeNode(id: string, name = id): Node {
return {
id,
kind: 'function',
name,
qualifiedName: name,
filePath: 'a.ts',
language: 'typescript',
startLine: 1,
endLine: 1,
startColumn: 0,
endColumn: 0,
updatedAt: Date.now(),
};
}

describe('getNodesByIds (batch lookup)', () => {
let dir: string;
let db: DatabaseConnection;
let q: QueryBuilder;

beforeEach(() => {
dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-batch-'));
db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
q = new QueryBuilder(db.getDb());
});

afterEach(() => {
db.close();
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
});

it('returns a Map keyed by id, with one entry per existing node', () => {
q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
const out = q.getNodesByIds(['n1', 'n2', 'n3']);
expect(out.size).toBe(3);
expect(out.get('n1')!.name).toBe('n1');
expect(out.get('n3')!.name).toBe('n3');
});

it('omits missing IDs from the result map (no nulls, no exceptions)', () => {
q.insertNodes([makeNode('n1'), makeNode('n2')]);
const out = q.getNodesByIds(['n1', 'missing', 'n2']);
expect(out.size).toBe(2);
expect(out.has('missing')).toBe(false);
expect(out.has('n1')).toBe(true);
expect(out.has('n2')).toBe(true);
});

it('handles an empty input array', () => {
expect(q.getNodesByIds([]).size).toBe(0);
});

it('handles batches over the SQLite parameter limit (chunking)', () => {
// Insert 1500 nodes; the helper chunks at 500 internally.
const nodes = Array.from({ length: 1500 }, (_, i) => makeNode(`n${i}`));
q.insertNodes(nodes);
const ids = nodes.map((n) => n.id);
const out = q.getNodesByIds(ids);
expect(out.size).toBe(1500);
// Spot-check a few from the first / middle / last chunk.
expect(out.has('n0')).toBe(true);
expect(out.has('n750')).toBe(true);
expect(out.has('n1499')).toBe(true);
});

it('serves cache hits from memory and queries only the misses', () => {
q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
// Warm the cache for n1 only.
q.getNodeById('n1');
// Replace the underlying row to make a miss-vs-cache-hit detectable.
db.getDb().prepare('UPDATE nodes SET name = ? WHERE id = ?').run('changed', 'n1');
const out = q.getNodesByIds(['n1', 'n2']);
// The cached n1 (still 'n1', not 'changed') must be returned.
expect(out.get('n1')!.name).toBe('n1');
expect(out.get('n2')!.name).toBe('n2');
});
});

describe('insertNode cache invalidation', () => {
let dir: string;
let db: DatabaseConnection;
let q: QueryBuilder;

beforeEach(() => {
dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-cache-'));
db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
q = new QueryBuilder(db.getDb());
});

afterEach(() => {
db.close();
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
});

it('does not serve a stale cached node after INSERT OR REPLACE', () => {
// Regression: insertNode (which uses INSERT OR REPLACE) used to skip
// cache invalidation, so the next getNodeById returned the pre-replace
// version until LRU eviction.
const original = makeNode('n1', 'oldName');
q.insertNode(original);
const beforeReplace = q.getNodeById('n1');
expect(beforeReplace!.name).toBe('oldName');

// Replace via insertNode (the bug path).
q.insertNode({ ...original, name: 'newName', updatedAt: Date.now() });
const afterReplace = q.getNodeById('n1');
expect(afterReplace!.name).toBe('newName');
});
});

describe('runMaintenance', () => {
let dir: string;
let db: DatabaseConnection;

beforeEach(() => {
dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-maint-'));
db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
});

afterEach(() => {
db.close();
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
});

it('runs without throwing on a fresh database', () => {
expect(() => db.runMaintenance()).not.toThrow();
});

it('runs without throwing after writes', () => {
const q = new QueryBuilder(db.getDb());
q.insertNodes([makeNode('n1'), makeNode('n2')]);
expect(() => db.runMaintenance()).not.toThrow();
});

it('swallows failures rather than propagating (best-effort)', () => {
// Close the DB so the underlying handle would normally throw on any
// exec(). runMaintenance must still not propagate.
db.close();
expect(() => db.runMaintenance()).not.toThrow();
});
});
30 changes: 30 additions & 0 deletions src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,36 @@ export class DatabaseConnection {
this.db.exec('ANALYZE');
}

/**
* Lightweight, non-blocking maintenance to run after bulk writes
* (indexAll, sync). Two operations:
*
* - `PRAGMA optimize` — incremental ANALYZE; SQLite only re-analyzes
* tables whose row counts changed materially since the last
* ANALYZE. Without it, the query planner has no statistics on the
* freshly-bulk-loaded tables and can pick suboptimal indexes.
*
* - `PRAGMA wal_checkpoint(PASSIVE)` — fold pending WAL pages back
* into the main database file so the WAL file doesn't grow
* unboundedly between automatic checkpoints (auto-fires at 1000
* pages by default; large indexAll runs blow past that).
*
* Both operations are silently swallowed on failure — they're a
* best-effort optimization, never load-bearing for correctness.
*/
runMaintenance(): void {
try {
this.db.exec('PRAGMA optimize');
} catch {
// ignore
}
try {
this.db.exec('PRAGMA wal_checkpoint(PASSIVE)');
} catch {
// ignore (e.g., not in WAL mode)
}
}

/**
* Close the database connection
*/
Expand Down
59 changes: 59 additions & 0 deletions src/db/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,12 @@ export class QueryBuilder {
return;
}

// INSERT OR REPLACE may overwrite a node we have cached. Drop the
// stale entry so the next getNodeById sees the new row, not the old
// one (matches the cache-invalidation pattern used by updateNode and
// deleteNode below).
this.nodeCache.delete(node.id);

try {
this.stmts.insertNode.run({
id: node.id,
Expand Down Expand Up @@ -379,6 +385,59 @@ export class QueryBuilder {
return node;
}

/**
* Batch lookup: fetch many nodes by ID in a single SQL round-trip.
*
* Replaces the N+1 pattern in graph traversal where every edge would
* trigger its own `getNodeById` call. For a function with 50 callers
* this collapses 50 point reads into one IN-list query (~10-50x
* faster end-to-end).
*
* Returns a Map keyed by id so callers can preserve their own ordering
* (typically the order edges were returned from the graph). Missing IDs
* are simply absent from the map.
*
* Cache-aware: ids already in the LRU cache are served from memory and
* the SQL query only touches the misses.
*/
getNodesByIds(ids: readonly string[]): Map<string, Node> {
const out = new Map<string, Node>();
if (ids.length === 0) return out;

// Serve cache hits first; build the miss list for SQL.
const misses: string[] = [];
for (const id of ids) {
const cached = this.nodeCache.get(id);
if (cached !== undefined) {
// LRU touch
this.nodeCache.delete(id);
this.nodeCache.set(id, cached);
out.set(id, cached);
} else {
misses.push(id);
}
}
if (misses.length === 0) return out;

// Chunk under SQLite's parameter limit (default 999, raised to 32766
// in better-sqlite3 builds — chunk at 500 for safety across both
// backends and to keep the query plan simple).
const CHUNK = 500;
for (let i = 0; i < misses.length; i += CHUNK) {
const chunk = misses.slice(i, i + CHUNK);
const placeholders = chunk.map(() => '?').join(',');
const rows = this.db
.prepare(`SELECT * FROM nodes WHERE id IN (${placeholders})`)
.all(...chunk) as NodeRow[];
for (const row of rows) {
const node = rowToNode(row);
out.set(node.id, node);
this.cacheNode(node);
}
}
return out;
}

/**
* Add a node to the cache, evicting oldest if needed
*/
Expand Down
Loading