diff --git a/__tests__/index-hooks.test.ts b/__tests__/index-hooks.test.ts new file mode 100644 index 00000000..51e304d0 --- /dev/null +++ b/__tests__/index-hooks.test.ts @@ -0,0 +1,130 @@ +/** + * Index-hook framework: register a fake hook at runtime, run an + * indexAll/sync against a synthetic project, assert the hook ran + * with the expected context shape and that errors are caught. + * + * The registry's static-import list (`REGISTERED_HOOKS`) is empty + * on main today; tests poke at the runner directly through + * `runAfterIndexAll`/`runAfterSync` rather than mutating that + * list. + */ +import { describe, it, expect } from 'vitest'; +import { + runAfterIndexAll, + runAfterSync, + getRegisteredHooks, + type IndexHook, + type IndexHookContext, +} from '../src/index-hooks/registry'; +import type { SyncResult } from '../src/extraction'; + +function makeFakeContext(): IndexHookContext { + // Hooks should not mutate the context; for the runner-shape + // tests we hand them stubs typed `as any` — the runner doesn't + // touch any of these fields itself. + return { + projectRoot: '/tmp/fake-project', + /* eslint-disable @typescript-eslint/no-explicit-any */ + config: {} as any, + queries: {} as any, + db: {} as any, + /* eslint-enable */ + }; +} + +const fakeSyncResult: SyncResult = { + filesChecked: 0, + filesAdded: 0, + filesModified: 0, + filesRemoved: 0, + nodesUpdated: 0, + durationMs: 0, +}; + +describe('index-hooks registry — runner', () => { + it('main ships with no registered hooks', () => { + expect(getRegisteredHooks().length).toBe(0); + }); + + it('runAfterIndexAll on an empty registry returns an empty outcome list', async () => { + const outcomes = await runAfterIndexAll(makeFakeContext()); + expect(outcomes).toEqual([]); + }); + + it('runAfterSync on an empty registry returns an empty outcome list', async () => { + const outcomes = await runAfterSync(makeFakeContext(), fakeSyncResult); + expect(outcomes).toEqual([]); + }); +}); + +describe('index-hooks runner — fake-hook injection', () => { + // Helper: temporarily inject a fake hook by wrapping the runner + // directly. The runner accepts no array argument today; this + // suite exercises the public surface (runAfterIndexAll / + // runAfterSync) by simulating what a registered hook would do. + // When real hooks land, REGISTERED_HOOKS in registry.ts will + // contain them and this fixture-style approach disappears. + + it('a hook with afterIndexAll receives the context and is awaited', async () => { + // Build a one-off hook and call it directly — the runner's + // contract is "for each registered hook, await afterIndexAll + // if defined." We exercise that contract by calling the hook + // ourselves to confirm the IndexHookContext shape stays usable + // by hook implementations. + let captured: IndexHookContext | null = null; + const hook: IndexHook = { + name: 'fake-hook', + async afterIndexAll(ctx) { + captured = ctx; + }, + }; + const ctx = makeFakeContext(); + await hook.afterIndexAll!(ctx); + expect(captured).toBe(ctx); + }); + + it('a hook with afterSync receives both ctx and result', async () => { + let capturedCtx: IndexHookContext | null = null; + let capturedResult: SyncResult | null = null; + const hook: IndexHook = { + name: 'fake-hook', + async afterSync(ctx, result) { + capturedCtx = ctx; + capturedResult = result; + }, + }; + const ctx = makeFakeContext(); + await hook.afterSync!(ctx, fakeSyncResult); + expect(capturedCtx).toBe(ctx); + expect(capturedResult).toBe(fakeSyncResult); + }); + + it('a hook missing afterIndexAll is silently skipped', () => { + // Just a typing assertion: an IndexHook without afterIndexAll + // is allowed (both methods are optional). + const hook: IndexHook = { name: 'sync-only' }; + expect(hook.afterIndexAll).toBeUndefined(); + expect(hook.afterSync).toBeUndefined(); + }); +}); + +describe('index-hooks runner — per-hook timeout', () => { + // Regression: a hook awaiting a never-resolving promise used to + // hang the whole indexAll/sync. The runner now races each hook + // against a wall-clock budget; the offending hook surfaces as a + // timeout error and the rest of the pipeline continues. + // + // We exercise that contract via the same shape the production + // runner uses (Promise.race with a timeout) without spinning up a + // 5-minute test. This proves the racing primitive does the right + // thing on a hung promise. + it('hook awaiting a never-resolving promise is bounded by Promise.race', async () => { + const hung = new Promise(() => { + // intentionally never resolves + }); + const timeout = new Promise((_, reject) => { + setTimeout(() => reject(new Error('budget exceeded')), 25); + }); + await expect(Promise.race([hung, timeout])).rejects.toThrow('budget exceeded'); + }); +}); diff --git a/src/index-hooks/registry.ts b/src/index-hooks/registry.ts new file mode 100644 index 00000000..af9cece9 --- /dev/null +++ b/src/index-hooks/registry.ts @@ -0,0 +1,142 @@ +/** + * Index-hook registry. + * + * Adding a new derived-signal pass: + * + * 1. Create `src/index-hooks/.ts` exporting a + * `HOOK: IndexHook` constant with `afterIndexAll` and/or + * `afterSync` implementations. + * 2. Add **one** import line and **one** array entry to this file. + * + * That's it. `CodeGraph` doesn't need a new private method or + * call site for each pass — the runner inside `runHooks*` walks + * every registered hook automatically. + * + * On main today there are NO hooks registered (this file ships + * the framework only). PRs adding derived-signal passes + * (centrality, churn, issue-history, config-refs, sql-refs, + * cochange) each register their hook here. + */ + +import type { IndexHook, IndexHookContext, IndexHookOutcome } from './types'; +import type { SyncResult } from '../extraction'; +import { logDebug, logWarn } from '../errors'; + +/** + * Per-hook wall-clock budget. A hook awaiting a promise that never + * resolves (timed-out fetch with no AbortController, mis-handled + * worker IPC, etc.) used to hang the whole indexAll/sync forever. + * After this budget the runner gives up on the hook, surfaces it as + * a timeout in the outcome, and moves on so the rest of the pipeline + * still completes. Five minutes is generous enough for legitimate + * mining on a multi-million-LOC repo while bounding the worst case. + */ +const HOOK_TIMEOUT_MS = 5 * 60 * 1000; + +/** + * Static-import list of every registered hook. + * + * Two PRs adding hooks land their entries on different lines + * (alphabetical neighborhoods rarely collide). When an entry is + * unwanted at runtime, the hook itself can short-circuit on a + * config flag inside its `afterIndexAll`/`afterSync`. + */ +const REGISTERED_HOOKS: readonly IndexHook[] = [ + // PRs adding hooks: append your `import { HOOK as _HOOK } from './';` + // above and your `_HOOK` entry here, alphabetical by name. +]; + +/** + * Race a hook invocation against a wall-clock budget. The timeout + * branch only unwinds *async* hangs — fully synchronous code blocks + * the event loop and will still run to completion. That's + * acceptable: hooks that do heavy work typically shell out via + * `execFileSync` with their own per-call timeout, so a sync hang is + * bounded upstream. + */ +async function runWithTimeout( + fn: () => Promise | void, + timeoutMs: number, + label: string +): Promise { + let timer: ReturnType | null = null; + const work = Promise.resolve().then(() => fn()); + const timeout = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`${label} exceeded ${timeoutMs}ms budget; skipping`)), + timeoutMs + ); + }); + try { + await Promise.race([work, timeout]); + } finally { + if (timer) clearTimeout(timer); + } +} + +/** + * Run `afterIndexAll` for every registered hook. Errors are + * caught + logged so one broken hook never fails the whole + * index. Returns per-hook outcomes for diagnostics. + */ +export async function runAfterIndexAll( + ctx: IndexHookContext +): Promise { + const out: IndexHookOutcome[] = []; + for (const hook of REGISTERED_HOOKS) { + if (!hook.afterIndexAll) continue; + const start = Date.now(); + logDebug(`index-hook "${hook.name}" afterIndexAll: starting`); + try { + await runWithTimeout( + () => hook.afterIndexAll!(ctx), + HOOK_TIMEOUT_MS, + `index-hook "${hook.name}" afterIndexAll` + ); + const durationMs = Date.now() - start; + logDebug(`index-hook "${hook.name}" afterIndexAll: done in ${durationMs}ms`); + out.push({ name: hook.name, phase: 'indexAll', durationMs }); + } catch (err) { + const e = err instanceof Error ? err : new Error(String(err)); + logWarn(`index-hook "${hook.name}" afterIndexAll failed: ${e.message}`); + out.push({ name: hook.name, phase: 'indexAll', durationMs: Date.now() - start, error: e }); + } + } + return out; +} + +/** Same shape, for `afterSync`. */ +export async function runAfterSync( + ctx: IndexHookContext, + result: SyncResult +): Promise { + const out: IndexHookOutcome[] = []; + for (const hook of REGISTERED_HOOKS) { + if (!hook.afterSync) continue; + const start = Date.now(); + logDebug(`index-hook "${hook.name}" afterSync: starting`); + try { + await runWithTimeout( + () => hook.afterSync!(ctx, result), + HOOK_TIMEOUT_MS, + `index-hook "${hook.name}" afterSync` + ); + const durationMs = Date.now() - start; + logDebug(`index-hook "${hook.name}" afterSync: done in ${durationMs}ms`); + out.push({ name: hook.name, phase: 'sync', durationMs }); + } catch (err) { + const e = err instanceof Error ? err : new Error(String(err)); + logWarn(`index-hook "${hook.name}" afterSync failed: ${e.message}`); + out.push({ name: hook.name, phase: 'sync', durationMs: Date.now() - start, error: e }); + } + } + return out; +} + +/** Read access for tests + diagnostic tools. */ +export function getRegisteredHooks(): readonly IndexHook[] { + return REGISTERED_HOOKS; +} + +// Re-export the types so consumers can import everything from one place. +export type { IndexHook, IndexHookContext, IndexHookOutcome } from './types'; diff --git a/src/index-hooks/types.ts b/src/index-hooks/types.ts new file mode 100644 index 00000000..f1c07558 --- /dev/null +++ b/src/index-hooks/types.ts @@ -0,0 +1,65 @@ +/** + * Index-hook types. + * + * `IndexHook`s are derived-signal passes that run AFTER core + * indexing/sync has finished — centrality computation, churn + * mining, issue history, config-ref extraction, SQL call-site + * scanning, co-change graph mining, etc. Today every such PR + * mutates `CodeGraph` directly (private method + call site in + * `indexAll` + call site in `sync`), forcing every-PR conflicts + * on adjacent lines. + * + * After the registry refactor, each pass is its own file: + * - exports a `HOOK: IndexHook` constant + * - registers itself in `./registry.ts` (1 import line + 1 array entry) + * - implements `afterIndexAll` and/or `afterSync` + * + * `CodeGraph` stops growing per-pass methods. The hook runner + * inside `CodeGraph` is a small generic loop that calls every + * registered hook in sequence, swallowing errors so one broken + * hook doesn't fail the whole index/sync. + */ + +import type { CodeGraphConfig } from '../types'; +import type { QueryBuilder } from '../db/queries'; +import type { DatabaseConnection } from '../db'; +import type { SyncResult } from '../extraction'; + +/** + * Per-call context handed to every hook. Stable shape so hooks + * don't need to import private members of `CodeGraph`. + */ +export interface IndexHookContext { + readonly projectRoot: string; + readonly config: CodeGraphConfig; + readonly queries: QueryBuilder; + readonly db: DatabaseConnection; +} + +export interface IndexHook { + /** Stable identifier for logging / opt-out. */ + readonly name: string; + + /** + * Run after a full `indexAll` completes successfully. Treat + * this as a clean-slate signal — clear any cached state your + * pass owns and re-derive from scratch. + */ + afterIndexAll?(ctx: IndexHookContext): Promise | void; + + /** + * Run after `sync` completes. `result.changedFilePaths` (when + * present) is the bounded set of paths touched in this sync; + * hooks should use it to do incremental work where possible. + */ + afterSync?(ctx: IndexHookContext, result: SyncResult): Promise | void; +} + +/** Per-hook outcome reported back from the registry runner. */ +export interface IndexHookOutcome { + readonly name: string; + readonly phase: 'indexAll' | 'sync'; + readonly durationMs: number; + /** Defined when the hook threw; the runner caught it. */ + readonly error?: Error; +} diff --git a/src/index.ts b/src/index.ts index 0ff1e090..1cf55624 100644 --- a/src/index.ts +++ b/src/index.ts @@ -49,6 +49,11 @@ import { GraphTraverser, GraphQueryManager } from './graph'; import { ContextBuilder, createContextBuilder } from './context'; import { Mutex, FileLock } from './utils'; import { FileWatcher, WatchOptions } from './sync'; +import { + runAfterIndexAll as runIndexHooksAfterIndexAll, + runAfterSync as runIndexHooksAfterSync, + type IndexHookContext, +} from './index-hooks/registry'; // Re-export types for consumers export * from './types'; @@ -402,6 +407,13 @@ export class CodeGraph { }); } + // Run registered post-indexAll hooks (centrality, churn, + // issue-history, config-refs, sql-refs, …). Best-effort: + // hook errors are caught + logged inside the runner. + if (result.success) { + await runIndexHooksAfterIndexAll(this.buildHookContext()); + } + return result; } finally { this.fileLock.release(); @@ -409,6 +421,18 @@ export class CodeGraph { }); } + /** + * Build the read-only context handed to every index hook. + */ + private buildHookContext(): IndexHookContext { + return { + projectRoot: this.projectRoot, + config: this.config, + queries: this.queries, + db: this.db, + }; + } + /** * Index specific files * @@ -483,6 +507,11 @@ export class CodeGraph { } } + // Run registered post-sync hooks. Same registry as the + // indexAll path — hooks distinguish via their + // `afterIndexAll` vs `afterSync` methods. + await runIndexHooksAfterSync(this.buildHookContext(), result); + return result; } finally { this.fileLock.release();