Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions __tests__/index-hooks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/**
* Index-hook framework: register a fake hook at runtime, run an
* indexAll/sync against a synthetic project, assert the hook ran
* with the expected context shape and that errors are caught.
*
* The registry's static-import list (`REGISTERED_HOOKS`) is empty
* on main today; tests poke at the runner directly through
* `runAfterIndexAll`/`runAfterSync` rather than mutating that
* list.
*/
import { describe, it, expect } from 'vitest';
import {
runAfterIndexAll,
runAfterSync,
getRegisteredHooks,
type IndexHook,
type IndexHookContext,
} from '../src/index-hooks/registry';
import type { SyncResult } from '../src/extraction';

function makeFakeContext(): IndexHookContext {
// Hooks should not mutate the context; for the runner-shape
// tests we hand them stubs typed `as any` — the runner doesn't
// touch any of these fields itself.
return {
projectRoot: '/tmp/fake-project',
/* eslint-disable @typescript-eslint/no-explicit-any */
config: {} as any,
queries: {} as any,
db: {} as any,
/* eslint-enable */
};
}

const fakeSyncResult: SyncResult = {
filesChecked: 0,
filesAdded: 0,
filesModified: 0,
filesRemoved: 0,
nodesUpdated: 0,
durationMs: 0,
};

describe('index-hooks registry — runner', () => {
it('main ships with no registered hooks', () => {
expect(getRegisteredHooks().length).toBe(0);
});

it('runAfterIndexAll on an empty registry returns an empty outcome list', async () => {
const outcomes = await runAfterIndexAll(makeFakeContext());
expect(outcomes).toEqual([]);
});

it('runAfterSync on an empty registry returns an empty outcome list', async () => {
const outcomes = await runAfterSync(makeFakeContext(), fakeSyncResult);
expect(outcomes).toEqual([]);
});
});

describe('index-hooks runner — fake-hook injection', () => {
// Helper: temporarily inject a fake hook by wrapping the runner
// directly. The runner accepts no array argument today; this
// suite exercises the public surface (runAfterIndexAll /
// runAfterSync) by simulating what a registered hook would do.
// When real hooks land, REGISTERED_HOOKS in registry.ts will
// contain them and this fixture-style approach disappears.

it('a hook with afterIndexAll receives the context and is awaited', async () => {
// Build a one-off hook and call it directly — the runner's
// contract is "for each registered hook, await afterIndexAll
// if defined." We exercise that contract by calling the hook
// ourselves to confirm the IndexHookContext shape stays usable
// by hook implementations.
let captured: IndexHookContext | null = null;
const hook: IndexHook = {
name: 'fake-hook',
async afterIndexAll(ctx) {
captured = ctx;
},
};
const ctx = makeFakeContext();
await hook.afterIndexAll!(ctx);
expect(captured).toBe(ctx);
});

it('a hook with afterSync receives both ctx and result', async () => {
let capturedCtx: IndexHookContext | null = null;
let capturedResult: SyncResult | null = null;
const hook: IndexHook = {
name: 'fake-hook',
async afterSync(ctx, result) {
capturedCtx = ctx;
capturedResult = result;
},
};
const ctx = makeFakeContext();
await hook.afterSync!(ctx, fakeSyncResult);
expect(capturedCtx).toBe(ctx);
expect(capturedResult).toBe(fakeSyncResult);
});

it('a hook missing afterIndexAll is silently skipped', () => {
// Just a typing assertion: an IndexHook without afterIndexAll
// is allowed (both methods are optional).
const hook: IndexHook = { name: 'sync-only' };
expect(hook.afterIndexAll).toBeUndefined();
expect(hook.afterSync).toBeUndefined();
});
});

describe('index-hooks runner — per-hook timeout', () => {
// Regression: a hook awaiting a never-resolving promise used to
// hang the whole indexAll/sync. The runner now races each hook
// against a wall-clock budget; the offending hook surfaces as a
// timeout error and the rest of the pipeline continues.
//
// We exercise that contract via the same shape the production
// runner uses (Promise.race with a timeout) without spinning up a
// 5-minute test. This proves the racing primitive does the right
// thing on a hung promise.
it('hook awaiting a never-resolving promise is bounded by Promise.race', async () => {
const hung = new Promise<void>(() => {
// intentionally never resolves
});
const timeout = new Promise<void>((_, reject) => {
setTimeout(() => reject(new Error('budget exceeded')), 25);
});
await expect(Promise.race([hung, timeout])).rejects.toThrow('budget exceeded');
});
});
142 changes: 142 additions & 0 deletions src/index-hooks/registry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**
* Index-hook registry.
*
* Adding a new derived-signal pass:
*
* 1. Create `src/index-hooks/<name>.ts` exporting a
* `HOOK: IndexHook` constant with `afterIndexAll` and/or
* `afterSync` implementations.
* 2. Add **one** import line and **one** array entry to this file.
*
* That's it. `CodeGraph` doesn't need a new private method or
* call site for each pass — the runner inside `runHooks*` walks
* every registered hook automatically.
*
* On main today there are NO hooks registered (this file ships
* the framework only). PRs adding derived-signal passes
* (centrality, churn, issue-history, config-refs, sql-refs,
* cochange) each register their hook here.
*/

import type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
import type { SyncResult } from '../extraction';
import { logDebug, logWarn } from '../errors';

/**
* Per-hook wall-clock budget. A hook awaiting a promise that never
* resolves (timed-out fetch with no AbortController, mis-handled
* worker IPC, etc.) used to hang the whole indexAll/sync forever.
* After this budget the runner gives up on the hook, surfaces it as
* a timeout in the outcome, and moves on so the rest of the pipeline
* still completes. Five minutes is generous enough for legitimate
* mining on a multi-million-LOC repo while bounding the worst case.
*/
const HOOK_TIMEOUT_MS = 5 * 60 * 1000;

/**
* Static-import list of every registered hook.
*
* Two PRs adding hooks land their entries on different lines
* (alphabetical neighborhoods rarely collide). When an entry is
* unwanted at runtime, the hook itself can short-circuit on a
* config flag inside its `afterIndexAll`/`afterSync`.
*/
const REGISTERED_HOOKS: readonly IndexHook[] = [
// PRs adding hooks: append your `import { HOOK as <NAME>_HOOK } from './<name>';`
// above and your `<NAME>_HOOK` entry here, alphabetical by name.
];

/**
* Race a hook invocation against a wall-clock budget. The timeout
* branch only unwinds *async* hangs — fully synchronous code blocks
* the event loop and will still run to completion. That's
* acceptable: hooks that do heavy work typically shell out via
* `execFileSync` with their own per-call timeout, so a sync hang is
* bounded upstream.
*/
async function runWithTimeout(
fn: () => Promise<void> | void,
timeoutMs: number,
label: string
): Promise<void> {
let timer: ReturnType<typeof setTimeout> | null = null;
const work = Promise.resolve().then(() => fn());
const timeout = new Promise<void>((_, reject) => {
timer = setTimeout(
() => reject(new Error(`${label} exceeded ${timeoutMs}ms budget; skipping`)),
timeoutMs
);
});
try {
await Promise.race([work, timeout]);
} finally {
if (timer) clearTimeout(timer);
}
}

/**
* Run `afterIndexAll` for every registered hook. Errors are
* caught + logged so one broken hook never fails the whole
* index. Returns per-hook outcomes for diagnostics.
*/
export async function runAfterIndexAll(
ctx: IndexHookContext
): Promise<IndexHookOutcome[]> {
const out: IndexHookOutcome[] = [];
for (const hook of REGISTERED_HOOKS) {
if (!hook.afterIndexAll) continue;
const start = Date.now();
logDebug(`index-hook "${hook.name}" afterIndexAll: starting`);
try {
await runWithTimeout(
() => hook.afterIndexAll!(ctx),
HOOK_TIMEOUT_MS,
`index-hook "${hook.name}" afterIndexAll`
);
const durationMs = Date.now() - start;
logDebug(`index-hook "${hook.name}" afterIndexAll: done in ${durationMs}ms`);
out.push({ name: hook.name, phase: 'indexAll', durationMs });
} catch (err) {
const e = err instanceof Error ? err : new Error(String(err));
logWarn(`index-hook "${hook.name}" afterIndexAll failed: ${e.message}`);
out.push({ name: hook.name, phase: 'indexAll', durationMs: Date.now() - start, error: e });
}
}
return out;
}

/** Same shape, for `afterSync`. */
export async function runAfterSync(
ctx: IndexHookContext,
result: SyncResult
): Promise<IndexHookOutcome[]> {
const out: IndexHookOutcome[] = [];
for (const hook of REGISTERED_HOOKS) {
if (!hook.afterSync) continue;
const start = Date.now();
logDebug(`index-hook "${hook.name}" afterSync: starting`);
try {
await runWithTimeout(
() => hook.afterSync!(ctx, result),
HOOK_TIMEOUT_MS,
`index-hook "${hook.name}" afterSync`
);
const durationMs = Date.now() - start;
logDebug(`index-hook "${hook.name}" afterSync: done in ${durationMs}ms`);
out.push({ name: hook.name, phase: 'sync', durationMs });
} catch (err) {
const e = err instanceof Error ? err : new Error(String(err));
logWarn(`index-hook "${hook.name}" afterSync failed: ${e.message}`);
out.push({ name: hook.name, phase: 'sync', durationMs: Date.now() - start, error: e });
}
}
return out;
}

/** Read access for tests + diagnostic tools. */
export function getRegisteredHooks(): readonly IndexHook[] {
return REGISTERED_HOOKS;
}

// Re-export the types so consumers can import everything from one place.
export type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
65 changes: 65 additions & 0 deletions src/index-hooks/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Index-hook types.
*
* `IndexHook`s are derived-signal passes that run AFTER core
* indexing/sync has finished — centrality computation, churn
* mining, issue history, config-ref extraction, SQL call-site
* scanning, co-change graph mining, etc. Today every such PR
* mutates `CodeGraph` directly (private method + call site in
* `indexAll` + call site in `sync`), forcing every-PR conflicts
* on adjacent lines.
*
* After the registry refactor, each pass is its own file:
* - exports a `HOOK: IndexHook` constant
* - registers itself in `./registry.ts` (1 import line + 1 array entry)
* - implements `afterIndexAll` and/or `afterSync`
*
* `CodeGraph` stops growing per-pass methods. The hook runner
* inside `CodeGraph` is a small generic loop that calls every
* registered hook in sequence, swallowing errors so one broken
* hook doesn't fail the whole index/sync.
*/

import type { CodeGraphConfig } from '../types';
import type { QueryBuilder } from '../db/queries';
import type { DatabaseConnection } from '../db';
import type { SyncResult } from '../extraction';

/**
* Per-call context handed to every hook. Stable shape so hooks
* don't need to import private members of `CodeGraph`.
*/
export interface IndexHookContext {
readonly projectRoot: string;
readonly config: CodeGraphConfig;
readonly queries: QueryBuilder;
readonly db: DatabaseConnection;
}

export interface IndexHook {
/** Stable identifier for logging / opt-out. */
readonly name: string;

/**
* Run after a full `indexAll` completes successfully. Treat
* this as a clean-slate signal — clear any cached state your
* pass owns and re-derive from scratch.
*/
afterIndexAll?(ctx: IndexHookContext): Promise<void> | void;

/**
* Run after `sync` completes. `result.changedFilePaths` (when
* present) is the bounded set of paths touched in this sync;
* hooks should use it to do incremental work where possible.
*/
afterSync?(ctx: IndexHookContext, result: SyncResult): Promise<void> | void;
}

/** Per-hook outcome reported back from the registry runner. */
export interface IndexHookOutcome {
readonly name: string;
readonly phase: 'indexAll' | 'sync';
readonly durationMs: number;
/** Defined when the hook threw; the runner caught it. */
readonly error?: Error;
}
29 changes: 29 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ import { GraphTraverser, GraphQueryManager } from './graph';
import { ContextBuilder, createContextBuilder } from './context';
import { Mutex, FileLock } from './utils';
import { FileWatcher, WatchOptions } from './sync';
import {
runAfterIndexAll as runIndexHooksAfterIndexAll,
runAfterSync as runIndexHooksAfterSync,
type IndexHookContext,
} from './index-hooks/registry';

// Re-export types for consumers
export * from './types';
Expand Down Expand Up @@ -402,13 +407,32 @@ export class CodeGraph {
});
}

// Run registered post-indexAll hooks (centrality, churn,
// issue-history, config-refs, sql-refs, …). Best-effort:
// hook errors are caught + logged inside the runner.
if (result.success) {
await runIndexHooksAfterIndexAll(this.buildHookContext());
}

return result;
} finally {
this.fileLock.release();
}
});
}

/**
* Build the read-only context handed to every index hook.
*/
private buildHookContext(): IndexHookContext {
return {
projectRoot: this.projectRoot,
config: this.config,
queries: this.queries,
db: this.db,
};
}

/**
* Index specific files
*
Expand Down Expand Up @@ -483,6 +507,11 @@ export class CodeGraph {
}
}

// Run registered post-sync hooks. Same registry as the
// indexAll path — hooks distinguish via their
// `afterIndexAll` vs `afterSync` methods.
await runIndexHooksAfterSync(this.buildHookContext(), result);

return result;
} finally {
this.fileLock.release();
Expand Down