Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3ca2f51
feat: index files inside git submodules
andreinknv Apr 26, 2026
05eb7a5
fix: correctness bugs found in audit
andreinknv Apr 26, 2026
4f66d23
fix: defense-in-depth hardening
andreinknv Apr 26, 2026
115d955
fix(sync): detect HEAD-moving git operations to prevent stale index
andreinknv Apr 26, 2026
41b67fa
fix: extraction/resolution accuracy (BOM, comment-strip retry, framew…
andreinknv Apr 26, 2026
064866c
fix: also strip comments before C# minimal-API route regex
andreinknv Apr 26, 2026
8e538bd
fix(db): enforce UNIQUE on edges so INSERT OR IGNORE actually dedupes
andreinknv Apr 26, 2026
98d6c31
fix(scan): honor .codegraphignore on the git fast path
andreinknv Apr 26, 2026
6813d08
feat(search): subword tokens + Porter stemmer + stopword filter for FTS
andreinknv Apr 26, 2026
d28f307
feat(cochange): file-level co-change graph mined from git history
andreinknv Apr 26, 2026
fc03e3b
feat(graph): convention-based tests-as-edges (test file → subject file)
andreinknv Apr 26, 2026
5a45ef2
feat(search): per-file diversification so top-K isn't one class's met…
andreinknv Apr 26, 2026
7b8c279
perf(db): batch node lookups, fix insertNode cache, auto-ANALYZE afte…
andreinknv Apr 26, 2026
1535649
test(watcher): fix fs.watch flake by adding settle delay before file …
andreinknv Apr 26, 2026
0a3b32a
refactor: per-language registry — eliminate cross-PR conflict surface
andreinknv Apr 27, 2026
e43a618
fix(language-registry): TreeSitterExtractor reads from def.grammar.ex…
andreinknv Apr 27, 2026
7a9b997
refactor: per-tool MCP registry — eliminate tools[] + case-switch con…
andreinknv Apr 27, 2026
4b93224
refactor: file-based migrations — eliminate version-collision bug class
andreinknv Apr 27, 2026
20c4a3e
refactor: index-hook framework — eliminate per-pass CodeGraph mutations
andreinknv Apr 27, 2026
6d37c02
Merge branch 'refactor/mcp-tool-registry' into battle-test/all-shipped
andreinknv Apr 27, 2026
53a6358
Merge branch 'refactor/migration-files' into battle-test/all-shipped
andreinknv Apr 27, 2026
d807e23
Merge branch 'refactor/index-hooks' into battle-test/all-shipped
andreinknv Apr 27, 2026
38887ee
feat: PR #112 (centrality + churn + hotspots) on top of refactors
andreinknv Apr 27, 2026
e85ebd0
feat: PR #113 (issue-history) on top of refactors
andreinknv Apr 27, 2026
f8fc536
feat: PR #114 (config-refs) on top of refactors
andreinknv Apr 27, 2026
7c3af0e
feat: PR #115 (sql-refs) on top of refactors
andreinknv Apr 27, 2026
d3120f5
Merge branch 'fix/watcher-test-flake' into battle-test/all-shipped
andreinknv Apr 27, 2026
c3c445d
Merge branch 'fix/edges-unique-constraint' into battle-test/all-shipped
andreinknv Apr 27, 2026
3aef004
Merge branch 'fix/codegraphignore-git-fast-path' into battle-test/all…
andreinknv Apr 27, 2026
e956c33
Merge branch 'fix/sync-detect-head-movement' into battle-test/all-shi…
andreinknv Apr 27, 2026
8d6b9ce
Merge branch 'fix/extraction-resolution-accuracy' into battle-test/al…
andreinknv Apr 27, 2026
125e763
Merge branch 'fix/audit-correctness' into battle-test/all-shipped
andreinknv Apr 27, 2026
a0892e8
Merge branch 'fix/audit-hardening' into battle-test/all-shipped
andreinknv Apr 27, 2026
b65667a
Merge branch 'feat/db-perf-and-cache-fix' into battle-test/all-shipped
andreinknv Apr 27, 2026
2db150d
Merge branch 'feat/fts-search-quality' into battle-test/all-shipped
andreinknv Apr 27, 2026
052f5b0
Merge branch 'feat/search-diversify' into battle-test/all-shipped
andreinknv Apr 27, 2026
b0f09ac
feat: PR #92 (HCL/Terraform) ported onto language registry
andreinknv Apr 27, 2026
b78b99a
feat: PR #94 (R language) ported onto language registry
andreinknv Apr 27, 2026
d91575d
feat: PR #95 (SQL language) ported onto language registry
andreinknv Apr 27, 2026
ea3f628
Merge branch 'feat/index-submodules' into battle-test/all-shipped
andreinknv Apr 27, 2026
8255db1
Merge branch 'feat/cochange-graph' into battle-test/all-shipped
andreinknv Apr 27, 2026
52d85be
Merge branch 'feat/tests-edges' into battle-test/all-shipped
andreinknv Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ SQLite database with:

### Supported Languages

TypeScript, JavaScript, TSX, JSX, Svelte, Python, Go, Rust, Java, C, C++, C#, PHP, Ruby, Swift, Kotlin, Dart, Liquid, Pascal
TypeScript, JavaScript, TSX, JSX, Svelte, Python, Go, Rust, Java, C, C++, C#, PHP, Ruby, Swift, Kotlin, Dart, Liquid, Pascal, R

### Node and Edge Types

Expand Down
134 changes: 134 additions & 0 deletions __tests__/centrality.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { describe, it, expect } from 'vitest';
import { computePageRank, PR_DAMPING, PR_ITERATIONS } from '../src/centrality';

function asNodes(ids: string[]) {
return ids.map((id) => ({ id }));
}

describe('computePageRank', () => {
it('returns empty result for an empty graph', () => {
const r = computePageRank([], []);
expect(r.scores.size).toBe(0);
expect(r.iterations).toBe(0);
});

it('assigns uniform rank to N isolated nodes', () => {
const r = computePageRank(asNodes(['a', 'b', 'c', 'd']), []);
expect(r.scores.size).toBe(4);
// 4 isolated nodes — all dangling — should each end up with 1/N.
for (const v of r.scores.values()) {
expect(v).toBeCloseTo(0.25, 6);
}
});

it('rewards being reached (sinks accumulate rank)', () => {
// a -> b -> c. c has no outgoing, so it accumulates the most.
const r = computePageRank(
asNodes(['a', 'b', 'c']),
[
{ source: 'a', target: 'b' },
{ source: 'b', target: 'c' },
]
);
const a = r.scores.get('a')!;
const b = r.scores.get('b')!;
const c = r.scores.get('c')!;
expect(c).toBeGreaterThan(b);
expect(b).toBeGreaterThan(a);
});

it('star: hub ranks above all leaves; leaves are equal', () => {
const leaves = ['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9'];
const edges = leaves.map((l) => ({ source: l, target: 'hub' }));
const r = computePageRank(asNodes([...leaves, 'hub']), edges);
const hub = r.scores.get('hub')!;
for (const l of leaves) {
const lv = r.scores.get(l)!;
expect(hub).toBeGreaterThan(lv);
}
// Leaves are symmetric — should be within 1e-9.
const first = r.scores.get(leaves[0])!;
for (const l of leaves.slice(1)) {
expect(r.scores.get(l)!).toBeCloseTo(first, 9);
}
});

it('cycle: all nodes have approximately equal rank', () => {
const r = computePageRank(
asNodes(['a', 'b', 'c']),
[
{ source: 'a', target: 'b' },
{ source: 'b', target: 'c' },
{ source: 'c', target: 'a' },
]
);
const a = r.scores.get('a')!;
const b = r.scores.get('b')!;
const c = r.scores.get('c')!;
// Symmetric → all equal at convergence.
expect(a).toBeCloseTo(b, 6);
expect(b).toBeCloseTo(c, 6);
});

it('total rank sums to ~1 (mass is conserved)', () => {
const r = computePageRank(
asNodes(['a', 'b', 'c', 'd', 'e']),
[
{ source: 'a', target: 'b' },
{ source: 'b', target: 'c' },
{ source: 'd', target: 'c' },
{ source: 'e', target: 'd' },
{ source: 'a', target: 'e' },
]
);
let sum = 0;
for (const v of r.scores.values()) sum += v;
expect(sum).toBeCloseTo(1, 6);
});

it('preserves mass across two disconnected components', () => {
const r = computePageRank(
asNodes(['a', 'b', 'c', 'd']),
[
{ source: 'a', target: 'b' },
{ source: 'c', target: 'd' },
]
);
let sum = 0;
for (const v of r.scores.values()) sum += v;
expect(sum).toBeCloseTo(1, 6);
// Within each component, the sink ranks above the source.
expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
expect(r.scores.get('d')!).toBeGreaterThan(r.scores.get('c')!);
});

it('drops edges referencing unknown nodes', () => {
// 'ghost' is not in the node set — that edge should be ignored,
// not crash and not pollute scores.
const r = computePageRank(
asNodes(['a', 'b']),
[
{ source: 'a', target: 'b' },
{ source: 'a', target: 'ghost' },
{ source: 'ghost', target: 'b' },
]
);
expect(r.scores.size).toBe(2);
expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
let sum = 0;
for (const v of r.scores.values()) sum += v;
expect(sum).toBeCloseTo(1, 6);
});

it('reports iteration count and duration', () => {
const r = computePageRank(asNodes(['a', 'b']), [{ source: 'a', target: 'b' }]);
expect(r.iterations).toBe(PR_ITERATIONS);
expect(r.durationMs).toBeGreaterThanOrEqual(0);
});

it('damping constant is the textbook 0.85', () => {
// Sentinel — protects against accidental tuning that would invalidate
// the spike findings the PR was justified on.
expect(PR_DAMPING).toBe(0.85);
});
});
208 changes: 208 additions & 0 deletions __tests__/churn.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { execFileSync } from 'child_process';
import {
mineChurn,
getGitHead,
readFileLoc,
MAX_FILES_PER_COMMIT,
LAST_MINED_CHURN_HEAD_KEY,
} from '../src/churn';

let HAS_GIT = true;
try {
execFileSync('git', ['--version'], { stdio: 'ignore' });
} catch {
HAS_GIT = false;
}

let tempDir: string;

function git(...args: string[]): string {
return execFileSync('git', args, {
cwd: tempDir,
encoding: 'utf-8',
env: {
...process.env,
GIT_AUTHOR_NAME: 'Test',
GIT_AUTHOR_EMAIL: 'test@example.com',
GIT_COMMITTER_NAME: 'Test',
GIT_COMMITTER_EMAIL: 'test@example.com',
GIT_AUTHOR_DATE: process.env.GIT_AUTHOR_DATE,
GIT_COMMITTER_DATE: process.env.GIT_COMMITTER_DATE,
},
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
}

function commitAt(date: string, paths: string[], content?: string) {
for (const p of paths) {
const abs = path.join(tempDir, p);
fs.mkdirSync(path.dirname(abs), { recursive: true });
fs.writeFileSync(abs, content ?? `data for ${p} at ${date}\n`);
}
git('add', ...paths);
// Pin both author and committer dates so timestamps are deterministic.
process.env.GIT_AUTHOR_DATE = date;
process.env.GIT_COMMITTER_DATE = date;
git('commit', '-m', `commit at ${date}`);
delete process.env.GIT_AUTHOR_DATE;
delete process.env.GIT_COMMITTER_DATE;
}

beforeEach(() => {
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-churn-'));
if (HAS_GIT) {
git('init', '-q', '-b', 'main');
git('config', 'commit.gpgsign', 'false');
}
});

afterEach(() => {
delete process.env.GIT_AUTHOR_DATE;
delete process.env.GIT_COMMITTER_DATE;
fs.rmSync(tempDir, { recursive: true, force: true });
});

describe.skipIf(!HAS_GIT)('mineChurn', () => {
it('returns empty + null head when not in a git repo', () => {
const nonGit = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-'));
try {
const r = mineChurn(nonGit, new Set(['foo.ts']), null);
expect(r.currentHead).toBeNull();
expect(r.deltas.size).toBe(0);
expect(r.needsFullRescan).toBe(false);
} finally {
fs.rmSync(nonGit, { recursive: true, force: true });
}
});

it('counts commits per indexed file, ignores files not in index', () => {
commitAt('2025-01-01T00:00:00', ['a.ts', 'b.ts']);
commitAt('2025-01-02T00:00:00', ['a.ts']);
commitAt('2025-01-03T00:00:00', ['a.ts', 'b.ts', 'c.ts']);

const r = mineChurn(tempDir, new Set(['a.ts', 'b.ts']), null);
expect(r.deltas.get('a.ts')?.commitCountDelta).toBe(3);
expect(r.deltas.get('b.ts')?.commitCountDelta).toBe(2);
expect(r.deltas.has('c.ts')).toBe(false);
});

it('records first-seen / last-touched as min/max of commit timestamps', () => {
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
commitAt('2025-06-01T00:00:00Z', ['a.ts']);
commitAt('2025-12-01T00:00:00Z', ['a.ts']);

const r = mineChurn(tempDir, new Set(['a.ts']), null);
const d = r.deltas.get('a.ts')!;
// 2025-01-01 UTC = 1735689600
expect(d.firstSeenTs).toBe(1735689600);
// 2025-12-01 UTC = 1764547200
expect(d.lastTouchedTs).toBe(1764547200);
});

it('skips commits touching more than MAX_FILES_PER_COMMIT files', () => {
const bigBatch: string[] = [];
for (let i = 0; i < MAX_FILES_PER_COMMIT + 1; i++) bigBatch.push(`f${i}.ts`);
commitAt('2025-01-01T00:00:00Z', bigBatch);
// Then a normal commit on one of the same files.
commitAt('2025-02-01T00:00:00Z', ['f0.ts']);

const r = mineChurn(tempDir, new Set(bigBatch), null);
// First commit was skipped; only the second one should count.
expect(r.deltas.get('f0.ts')?.commitCountDelta).toBe(1);
// Files only seen in the skipped commit produce no delta at all.
expect(r.deltas.has('f50.ts')).toBe(false);
});

it('incremental mining returns only commits since the given sha', () => {
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
const sha1 = getGitHead(tempDir)!;
commitAt('2025-01-02T00:00:00Z', ['a.ts']);
commitAt('2025-01-03T00:00:00Z', ['a.ts']);

const incr = mineChurn(tempDir, new Set(['a.ts']), sha1);
// Only the two commits *after* sha1 should be counted.
expect(incr.deltas.get('a.ts')?.commitCountDelta).toBe(2);
expect(incr.needsFullRescan).toBe(false);
});

it('returns needsFullRescan=true when sinceSha is unreachable', () => {
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
const fakeSha = '0'.repeat(40);
const r = mineChurn(tempDir, new Set(['a.ts']), fakeSha);
expect(r.needsFullRescan).toBe(true);
expect(r.deltas.size).toBe(0);
expect(r.currentHead).not.toBeNull();
});

it('returns empty deltas when sinceSha equals current head (no-op)', () => {
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
const head = getGitHead(tempDir)!;
const r = mineChurn(tempDir, new Set(['a.ts']), head);
expect(r.currentHead).toBe(head);
expect(r.deltas.size).toBe(0);
expect(r.needsFullRescan).toBe(false);
});

it('handles paths with spaces and unicode safely (NUL-delimited)', () => {
commitAt('2025-01-01T00:00:00Z', ['name with space.ts']);
commitAt('2025-01-02T00:00:00Z', ['ünïcødë.ts']);

const r = mineChurn(
tempDir,
new Set(['name with space.ts', 'ünïcødë.ts']),
null
);
expect(r.deltas.get('name with space.ts')?.commitCountDelta).toBe(1);
expect(r.deltas.get('ünïcødë.ts')?.commitCountDelta).toBe(1);
});

it('LAST_MINED_CHURN_HEAD_KEY is stable (used as project_metadata key)', () => {
expect(LAST_MINED_CHURN_HEAD_KEY).toBe('last_mined_churn_head');
});
});

describe('readFileLoc', () => {
it('returns 0 for an empty file', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
try {
const f = path.join(dir, 'empty.txt');
fs.writeFileSync(f, '');
expect(readFileLoc(dir, 'empty.txt')).toBe(0);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});

it('counts newline-terminated lines', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
try {
fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc\n');
expect(readFileLoc(dir, 'x.txt')).toBe(3);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});

it('counts a final no-newline chunk as one extra line', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
try {
fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc');
expect(readFileLoc(dir, 'x.txt')).toBe(3);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});

it('returns 0 for a missing file (does not throw)', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
try {
expect(readFileLoc(dir, 'no-such-file.txt')).toBe(0);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
});
});
Loading