Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions __tests__/embeddings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import {
cosineNormalised,
reciprocalRankFusion,
topKByCosine,
topKByCosineMatrix,
EmbeddingCache,
} from '../src/llm/embeddings';

const EMBED_DIM = 8;
Expand Down Expand Up @@ -157,6 +159,84 @@ describe('embedding helpers', () => {
const sorted = [...fused.entries()].sort((a, b) => b[1] - a[1]).map(([id]) => id);
expect(sorted[0]).toBe('y');
});

it('topKByCosineMatrix matches topKByCosine on the same data', () => {
const query = l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0]));
const vecs = [
{ id: 'a', v: l2(Float32Array.from([0.9, 0.1, 0, 0, 0, 0, 0, 0])) },
{ id: 'b', v: l2(Float32Array.from([0, 1, 0, 0, 0, 0, 0, 0])) },
{ id: 'c', v: l2(Float32Array.from([0.5, 0.5, 0, 0, 0, 0, 0, 0])) },
];
const candidates = vecs.map((e) => ({ nodeId: e.id, embedding: vectorToBytes(e.v) }));
const matrix = new Float32Array(vecs.length * EMBED_DIM);
const ids = vecs.map((e) => e.id);
for (let i = 0; i < vecs.length; i++) matrix.set(vecs[i]!.v, i * EMBED_DIM);

const a = topKByCosine(query, candidates, 3).map((h) => h.nodeId);
const b = topKByCosineMatrix(query, matrix, ids, EMBED_DIM, 3).map((h) => h.nodeId);
expect(b).toEqual(a);
});

it('EmbeddingCache returns the same result on hit and miss; invalidate forces refetch', () => {
let fetchCalls = 0;
const v = vectorToBytes(l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0])));
const fetcher = {
getAllEmbeddings: (_model: string) => {
fetchCalls++;
return [{ nodeId: 'a', embedding: v }];
},
};

const cache = new EmbeddingCache();
const r1 = cache.get(fetcher, 'm');
const r2 = cache.get(fetcher, 'm');
expect(fetchCalls).toBe(1);
expect(r1).toBe(r2);
expect(r1.ids).toEqual(['a']);
expect(r1.dim).toBe(EMBED_DIM);

cache.invalidate();
cache.get(fetcher, 'm');
expect(fetchCalls).toBe(2);

// Switching models also forces a refetch.
cache.get(fetcher, 'other-model');
expect(fetchCalls).toBe(3);
});

it('EmbeddingCache skips rows whose dimension does not match the first row', () => {
const v3 = vectorToBytes(l2(Float32Array.from([1, 0, 0, 0, 0, 0, 0, 0])));
// Different shape: 4-dim vector. Should be skipped.
const v4 = Buffer.from(new Float32Array([1, 0, 0, 0]).buffer);
const fetcher = {
getAllEmbeddings: (_model: string) => [
{ nodeId: 'good', embedding: v3 },
{ nodeId: 'bad', embedding: v4 },
{ nodeId: 'good2', embedding: v3 },
],
};
const cache = new EmbeddingCache();
const r = cache.get(fetcher, 'm');
expect(r.ids).toEqual(['good', 'good2']);
expect(r.matrix.length).toBe(2 * EMBED_DIM);
expect(r.dim).toBe(EMBED_DIM);
});

it('EmbeddingCache returns an empty result without calling the fetcher again on hit', () => {
let fetchCalls = 0;
const fetcher = {
getAllEmbeddings: (_model: string) => {
fetchCalls++;
return [];
},
};
const cache = new EmbeddingCache();
const r = cache.get(fetcher, 'm');
expect(r.ids).toEqual([]);
expect(r.dim).toBe(0);
cache.get(fetcher, 'm');
expect(fetchCalls).toBe(1);
});
});

describe('CodeGraph hybrid search & similar', () => {
Expand Down
2 changes: 1 addition & 1 deletion __tests__/foundation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ describe('Database Connection', () => {

const version = db.getSchemaVersion();
expect(version).not.toBeNull();
expect(version?.version).toBe(14);
expect(version?.version).toBe(16);

db.close();
});
Expand Down
148 changes: 148 additions & 0 deletions __tests__/migrations-015-016.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**
* Migration 015 (drop idx_co_changes_a) and 016 (split embeddings).
*
* - 015 verifies the redundant `idx_co_changes_a` index is removed
* on upgrade and absent on a fresh DB; the wider PK still covers
* `WHERE file_a = ?` lookups.
* - 016 verifies embeddings move from `symbol_summaries.embedding`
* into a dedicated `symbol_embeddings` table, the old columns
* are dropped, and existing data is preserved verbatim.
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { createDatabase } from '../src/db/sqlite-adapter';
import { runMigrations, getCurrentVersion } from '../src/db/migrations';
import { DatabaseConnection } from '../src/db';

function tempDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mig-015-016-'));
}

function cleanup(dir: string): void {
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
}

describe('Migration 015 — drop idx_co_changes_a', () => {
let dir: string;
beforeEach(() => { dir = tempDir(); });
afterEach(() => cleanup(dir));

it('fresh DB does not contain idx_co_changes_a, but keeps idx_co_changes_b', () => {
const dbPath = path.join(dir, 'fresh.db');
const db = DatabaseConnection.initialize(dbPath);
try {
const indexes = db.getDb()
.prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'co_changes'")
.all() as Array<{ name: string }>;
const names = indexes.map((r) => r.name);
expect(names).not.toContain('idx_co_changes_a');
expect(names).toContain('idx_co_changes_b');
} finally {
db.close();
}
});
});

describe('Migration 016 — split embeddings into symbol_embeddings table', () => {
let dir: string;
beforeEach(() => { dir = tempDir(); });
afterEach(() => cleanup(dir));

it('moves existing embedding rows; drops the inline columns', () => {
const dbPath = path.join(dir, 'upgrade.db');
const adapter = createDatabase(dbPath);

// Simulate a v14 database: just enough of the relevant schema.
adapter.exec(`
CREATE TABLE nodes (id TEXT PRIMARY KEY);
INSERT INTO nodes (id) VALUES ('n1'), ('n2'), ('n3');
CREATE TABLE symbol_summaries (
node_id TEXT PRIMARY KEY,
content_hash TEXT NOT NULL,
summary TEXT NOT NULL,
model TEXT NOT NULL,
generated_at INTEGER NOT NULL,
embedding BLOB,
embedding_model TEXT,
role TEXT,
role_model TEXT,
FOREIGN KEY (node_id) REFERENCES nodes(id) ON DELETE CASCADE
);
CREATE INDEX idx_summaries_embedding_model ON symbol_summaries(embedding_model);
CREATE TABLE schema_versions (
version INTEGER PRIMARY KEY,
applied_at INTEGER NOT NULL,
description TEXT
);
INSERT INTO schema_versions (version, applied_at, description) VALUES (14, 0, 'v14');
`);

// n1 has both summary and embedding; n2 has summary only;
// n3 has summary + embedding from a stale model — all rows are
// copied into symbol_embeddings so long as embedding_model is set.
const buf1 = Buffer.from(new Float32Array([1, 0, 0]).buffer);
const buf3 = Buffer.from(new Float32Array([0, 1, 0]).buffer);
adapter.prepare(`
INSERT INTO symbol_summaries
(node_id, content_hash, summary, model, generated_at, embedding, embedding_model)
VALUES
('n1', 'h1', 's1', 'chat-m', 100, ?, 'embed-m'),
('n2', 'h2', 's2', 'chat-m', 100, NULL, NULL),
('n3', 'h3', 's3', 'chat-m', 100, ?, 'old-embed-m')
`).run(buf1, buf3);

runMigrations(adapter, getCurrentVersion(adapter));

// Old columns gone
const cols = adapter.prepare("PRAGMA table_info('symbol_summaries')").all() as Array<{ name: string }>;
const colNames = cols.map((c) => c.name);
expect(colNames).not.toContain('embedding');
expect(colNames).not.toContain('embedding_model');

// New table has the rows that had embedding_model set
const moved = adapter
.prepare('SELECT node_id, embedding_model FROM symbol_embeddings ORDER BY node_id')
.all() as Array<{ node_id: string; embedding_model: string }>;
expect(moved).toEqual([
{ node_id: 'n1', embedding_model: 'embed-m' },
{ node_id: 'n3', embedding_model: 'old-embed-m' },
]);

// Embedding bytes preserved verbatim for n1
const n1 = adapter
.prepare('SELECT embedding FROM symbol_embeddings WHERE node_id = ?')
.get('n1') as { embedding: Buffer };
expect(Buffer.from(n1.embedding).equals(buf1)).toBe(true);

// Index on the new table
const idx = adapter
.prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = 'symbol_embeddings'")
.all() as Array<{ name: string }>;
expect(idx.map((r) => r.name)).toContain('idx_embeddings_model');

expect(getCurrentVersion(adapter)).toBeGreaterThanOrEqual(16);

adapter.close();
});

it('fresh DB has symbol_embeddings table and no embedding columns on symbol_summaries', () => {
const db = DatabaseConnection.initialize(path.join(dir, 'fresh.db'));
try {
const cols = db.getDb()
.prepare("PRAGMA table_info('symbol_summaries')")
.all() as Array<{ name: string }>;
const colNames = cols.map((c) => c.name);
expect(colNames).not.toContain('embedding');
expect(colNames).not.toContain('embedding_model');

const tables = db.getDb()
.prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'symbol_embeddings'")
.all() as Array<{ name: string }>;
expect(tables.length).toBe(1);
} finally {
db.close();
}
});
});
2 changes: 1 addition & 1 deletion __tests__/pr19-improvements.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
describe('Schema v2 Migration', () => {
it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
expect(CURRENT_SCHEMA_VERSION).toBe(14);
expect(CURRENT_SCHEMA_VERSION).toBe(16);
});

it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
Expand Down
Loading