Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion __tests__/foundation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ describe('Database Connection', () => {

const version = db.getSchemaVersion();
expect(version).not.toBeNull();
expect(version?.version).toBe(3);
expect(version?.version).toBe(4);

db.close();
});
Expand Down
78 changes: 77 additions & 1 deletion __tests__/pr19-improvements.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
describe('Schema v2 Migration', () => {
it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
expect(CURRENT_SCHEMA_VERSION).toBe(3);
expect(CURRENT_SCHEMA_VERSION).toBe(4);
});

it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
Expand All @@ -308,6 +308,82 @@ describe('Schema v2 Migration', () => {
});
});

// =============================================================================
// Schema v4 Migration: drop redundant edge indexes
// =============================================================================

describe('Schema v4 Migration: drop redundant edge indexes', () => {
let tempDir: string;

beforeEach(() => {
tempDir = createTempDir();
});

afterEach(() => {
cleanupTempDir(tempDir);
});

it.skipIf(!HAS_SQLITE)('fresh DB does not create idx_edges_source / idx_edges_target', async () => {
const { DatabaseConnection } = await import('../src/db/index');
const db = DatabaseConnection.initialize(path.join(tempDir, 'fresh.db'));

const indexes = db.getDb()
.prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'edges'")
.all() as Array<{ name: string }>;
const names = indexes.map((r) => r.name);

expect(names).not.toContain('idx_edges_source');
expect(names).not.toContain('idx_edges_target');
// The kind-prefixed indexes that cover the dropped ones must remain.
expect(names).toContain('idx_edges_source_kind');
expect(names).toContain('idx_edges_target_kind');

db.close();
});

it.skipIf(!HAS_SQLITE)('upgrade path drops both narrow indexes if present', async () => {
const dbPath = path.join(tempDir, 'upgrade.db');
const { createDatabase } = await import('../src/db/sqlite-adapter');
const adapter = createDatabase(dbPath);

// Simulate a v3 database: minimal edges table + the two narrow indexes.
adapter.exec(`
CREATE TABLE edges (
id INTEGER PRIMARY KEY,
source TEXT NOT NULL,
target TEXT NOT NULL,
kind TEXT NOT NULL
);
CREATE INDEX idx_edges_source ON edges(source);
CREATE INDEX idx_edges_target ON edges(target);
CREATE INDEX idx_edges_source_kind ON edges(source, kind);
CREATE INDEX idx_edges_target_kind ON edges(target, kind);
CREATE TABLE schema_versions (
version INTEGER PRIMARY KEY,
applied_at INTEGER NOT NULL,
description TEXT
);
INSERT INTO schema_versions (version, applied_at, description) VALUES (3, 0, 'v3');
`);

const { runMigrations, getCurrentVersion } = await import('../src/db/migrations');
runMigrations(adapter, getCurrentVersion(adapter));

const indexes = adapter
.prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'edges'")
.all() as Array<{ name: string }>;
const names = indexes.map((r) => r.name);

expect(names).not.toContain('idx_edges_source');
expect(names).not.toContain('idx_edges_target');
expect(names).toContain('idx_edges_source_kind');
expect(names).toContain('idx_edges_target_kind');
expect(getCurrentVersion(adapter)).toBe(4);

adapter.close();
});
});

// =============================================================================
// Database Layer: Batch Insert, getAllNodes, Pragmas
// =============================================================================
Expand Down
119 changes: 119 additions & 0 deletions scripts/spikes/spike-edge-indexes.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env node
/**
* Spike: redundant edge indexes
*
* Drops `idx_edges_source` and `idx_edges_target` and measures
* the impact on:
* - DB size
* - Bulk-insert throughput
* - Latency for `WHERE source = ?` and `WHERE target = ?`
* (the two queries that previously hit the dropped indexes)
*
* The hypothesis: SQLite covers source-only / target-only lookups
* via the wider `(source, kind)` and `(target, kind)` composite
* indexes through left-prefix scan, so dropping the narrow ones
* costs nothing on the read side but saves space and write time.
*
* Synthesises 50K nodes / 250K edges so the measurement scales to
* what real users will hit; codegraph's own DB at ~2K nodes is too
* small for index choices to surface.
*/
import Database from 'better-sqlite3';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';

const NODES = 50_000;
const EDGES_PER_NODE = 5;

function ms(start) { return Number(process.hrtime.bigint() - start) / 1_000_000; }
function fmt(n) { return n < 10 ? n.toFixed(2) : n.toFixed(0); }

console.log('\n=== Spike: redundant edge indexes ===\n');
console.log(`Synthesizing ${NODES.toLocaleString()} nodes, ${(NODES*EDGES_PER_NODE).toLocaleString()} edges...`);

function buildEdgesDb({ withRedundant }) {
const dbPath = path.join(os.tmpdir(), `spike-edges-${Date.now()}-${Math.random()}.db`);
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -64000');
db.exec(`
CREATE TABLE nodes (id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL, target TEXT NOT NULL, kind TEXT NOT NULL,
line INTEGER, col INTEGER
);
CREATE INDEX idx_edges_kind ON edges(kind);
CREATE INDEX idx_edges_source_kind ON edges(source, kind);
CREATE INDEX idx_edges_target_kind ON edges(target, kind);
`);
if (withRedundant) {
db.exec(`
CREATE INDEX idx_edges_source ON edges(source);
CREATE INDEX idx_edges_target ON edges(target);
`);
}

const insNode = db.prepare('INSERT INTO nodes (id, kind, name) VALUES (?, ?, ?)');
const insEdge = db.prepare('INSERT INTO edges (source, target, kind, line, col) VALUES (?, ?, ?, ?, ?)');
const KINDS = ['calls', 'imports', 'references', 'type_of', 'extends', 'instantiates'];
const tStart = process.hrtime.bigint();
db.transaction(() => {
for (let i = 0; i < NODES; i++) {
insNode.run(`n${i}`, 'function', `name${i}`);
}
for (let i = 0; i < NODES; i++) {
for (let j = 0; j < EDGES_PER_NODE; j++) {
const tgt = `n${(i + j + 1) % NODES}`;
const kind = KINDS[j % KINDS.length];
insEdge.run(`n${i}`, tgt, kind, i, j);
}
}
})();
const insertMs = ms(tStart);
db.exec('PRAGMA optimize');

return { db, dbPath, size: fs.statSync(dbPath).size, insertMs };
}

const baseline = buildEdgesDb({ withRedundant: true });
const stripped = buildEdgesDb({ withRedundant: false });

console.log('');
console.log(` baseline (with redundant): size=${(baseline.size / 1024 / 1024).toFixed(1)} MB · bulk insert=${fmt(baseline.insertMs)}ms`);
console.log(` stripped : size=${(stripped.size / 1024 / 1024).toFixed(1)} MB · bulk insert=${fmt(stripped.insertMs)}ms`);
const sizeDelta = ((baseline.size - stripped.size) / baseline.size * 100).toFixed(1);
const insertSpeedup = (baseline.insertMs / stripped.insertMs).toFixed(2);
console.log(` Δ size: -${sizeDelta}% · Δ bulk insert: ${insertSpeedup}× faster without redundant indexes`);

function timeQueries(db, label) {
const N = 500;
const sourceOnly = db.prepare('SELECT COUNT(*) FROM edges WHERE source = ?');
const targetOnly = db.prepare('SELECT COUNT(*) FROM edges WHERE target = ?');
let t = process.hrtime.bigint();
for (let i = 0; i < N; i++) sourceOnly.get(`n${i % NODES}`);
const sourceMs = ms(t) / N;
t = process.hrtime.bigint();
for (let i = 0; i < N; i++) targetOnly.get(`n${i % NODES}`);
const targetMs = ms(t) / N;
console.log(` ${label}: WHERE source=? avg ${fmt(sourceMs)}ms · WHERE target=? avg ${fmt(targetMs)}ms`);
return { sourceMs, targetMs };
}
console.log('');
const baseQ = timeQueries(baseline.db, 'baseline');
const strQ = timeQueries(stripped.db, 'stripped');
console.log(` query speed delta: source ${(strQ.sourceMs / baseQ.sourceMs).toFixed(2)}× · target ${(strQ.targetMs / baseQ.targetMs).toFixed(2)}× (>1 = stripped slower)`);

// EXPLAIN-confirm that the stripped DB still uses an index for these
// queries — we want to know it's a covering scan, not a table scan.
const plan = stripped.db.prepare('EXPLAIN QUERY PLAN SELECT COUNT(*) FROM edges WHERE source = ?').all('n0');
console.log('');
console.log(' EXPLAIN (stripped, source=?):');
for (const row of plan) console.log(` ${row.detail}`);

baseline.db.close(); stripped.db.close();
fs.unlinkSync(baseline.dbPath); fs.unlinkSync(stripped.dbPath);

console.log('\n=== Done ===\n');
19 changes: 18 additions & 1 deletion src/db/migrations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { SqliteDatabase } from './sqlite-adapter';
/**
* Current schema version
*/
export const CURRENT_SCHEMA_VERSION = 3;
export const CURRENT_SCHEMA_VERSION = 4;

/**
* Migration definition
Expand Down Expand Up @@ -54,6 +54,23 @@ const migrations: Migration[] = [
`);
},
},
{
// idx_edges_source and idx_edges_target are fully covered by the
// wider idx_edges_source_kind and idx_edges_target_kind indexes via
// SQLite's left-prefix scan. Keeping the narrow ones costs ~17-22%
// of DB size and ~1.3x bulk-insert time without giving any query
// that the kind-prefixed indexes don't already cover (EXPLAIN
// confirms: SEARCH edges USING COVERING INDEX idx_edges_source_kind).
// See scripts/spikes/spike-edge-indexes.mjs for the reproducer.
version: 4,
description: 'Drop redundant idx_edges_source and idx_edges_target indexes',
up: (db) => {
db.exec(`
DROP INDEX IF EXISTS idx_edges_source;
DROP INDEX IF EXISTS idx_edges_target;
`);
},
},
];

/**
Expand Down
5 changes: 3 additions & 2 deletions src/db/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE ON nodes BEGIN
END;

-- Edge indexes
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source);
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target);
-- Note: narrow source/target indexes are intentionally omitted — the
-- (source, kind) and (target, kind) composite indexes below cover
-- source-only and target-only lookups via SQLite's left-prefix scan.
CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source, kind);
CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target, kind);
Expand Down