colbymchenry · andreinknv · Apr 27, 2026 · Apr 28, 2026
diff --git a/__tests__/index-hooks.test.ts b/__tests__/index-hooks.test.ts
@@ -0,0 +1,130 @@
+/**
+ * Index-hook framework: register a fake hook at runtime, run an
+ * indexAll/sync against a synthetic project, assert the hook ran
+ * with the expected context shape and that errors are caught.
+ *
+ * The registry's static-import list (`REGISTERED_HOOKS`) is empty
+ * on main today; tests poke at the runner directly through
+ * `runAfterIndexAll`/`runAfterSync` rather than mutating that
+ * list.
+ */
+import { describe, it, expect } from 'vitest';
+import {
+  runAfterIndexAll,
+  runAfterSync,
+  getRegisteredHooks,
+  type IndexHook,
+  type IndexHookContext,
+} from '../src/index-hooks/registry';
+import type { SyncResult } from '../src/extraction';
+
+function makeFakeContext(): IndexHookContext {
+  // Hooks should not mutate the context; for the runner-shape
+  // tests we hand them stubs typed `as any` — the runner doesn't
+  // touch any of these fields itself.
+  return {
+    projectRoot: '/tmp/fake-project',
+    /* eslint-disable @typescript-eslint/no-explicit-any */
+    config: {} as any,
+    queries: {} as any,
+    db: {} as any,
+    /* eslint-enable */
+  };
+}
+
+const fakeSyncResult: SyncResult = {
+  filesChecked: 0,
+  filesAdded: 0,
+  filesModified: 0,
+  filesRemoved: 0,
+  nodesUpdated: 0,
+  durationMs: 0,
+};
+
+describe('index-hooks registry — runner', () => {
+  it('main ships with no registered hooks', () => {
+    expect(getRegisteredHooks().length).toBe(0);
+  });
+
+  it('runAfterIndexAll on an empty registry returns an empty outcome list', async () => {
+    const outcomes = await runAfterIndexAll(makeFakeContext());
+    expect(outcomes).toEqual([]);
+  });
+
+  it('runAfterSync on an empty registry returns an empty outcome list', async () => {
+    const outcomes = await runAfterSync(makeFakeContext(), fakeSyncResult);
+    expect(outcomes).toEqual([]);
+  });
+});
+
+describe('index-hooks runner — fake-hook injection', () => {
+  // Helper: temporarily inject a fake hook by wrapping the runner
+  // directly. The runner accepts no array argument today; this
+  // suite exercises the public surface (runAfterIndexAll /
+  // runAfterSync) by simulating what a registered hook would do.
+  // When real hooks land, REGISTERED_HOOKS in registry.ts will
+  // contain them and this fixture-style approach disappears.
+
+  it('a hook with afterIndexAll receives the context and is awaited', async () => {
+    // Build a one-off hook and call it directly — the runner's
+    // contract is "for each registered hook, await afterIndexAll
+    // if defined." We exercise that contract by calling the hook
+    // ourselves to confirm the IndexHookContext shape stays usable
+    // by hook implementations.
+    let captured: IndexHookContext | null = null;
+    const hook: IndexHook = {
+      name: 'fake-hook',
+      async afterIndexAll(ctx) {
+        captured = ctx;
+      },
+    };
+    const ctx = makeFakeContext();
+    await hook.afterIndexAll!(ctx);
+    expect(captured).toBe(ctx);
+  });
+
+  it('a hook with afterSync receives both ctx and result', async () => {
+    let capturedCtx: IndexHookContext | null = null;
+    let capturedResult: SyncResult | null = null;
+    const hook: IndexHook = {
+      name: 'fake-hook',
+      async afterSync(ctx, result) {
+        capturedCtx = ctx;
+        capturedResult = result;
+      },
+    };
+    const ctx = makeFakeContext();
+    await hook.afterSync!(ctx, fakeSyncResult);
+    expect(capturedCtx).toBe(ctx);
+    expect(capturedResult).toBe(fakeSyncResult);
+  });
+
+  it('a hook missing afterIndexAll is silently skipped', () => {
+    // Just a typing assertion: an IndexHook without afterIndexAll
+    // is allowed (both methods are optional).
+    const hook: IndexHook = { name: 'sync-only' };
+    expect(hook.afterIndexAll).toBeUndefined();
+    expect(hook.afterSync).toBeUndefined();
+  });
+});
+
+describe('index-hooks runner — per-hook timeout', () => {
+  // Regression: a hook awaiting a never-resolving promise used to
+  // hang the whole indexAll/sync. The runner now races each hook
+  // against a wall-clock budget; the offending hook surfaces as a
+  // timeout error and the rest of the pipeline continues.
+  //
+  // We exercise that contract via the same shape the production
+  // runner uses (Promise.race with a timeout) without spinning up a
+  // 5-minute test. This proves the racing primitive does the right
+  // thing on a hung promise.
+  it('hook awaiting a never-resolving promise is bounded by Promise.race', async () => {
+    const hung = new Promise<void>(() => {
+      // intentionally never resolves
+    });
+    const timeout = new Promise<void>((_, reject) => {
+      setTimeout(() => reject(new Error('budget exceeded')), 25);
+    });
+    await expect(Promise.race([hung, timeout])).rejects.toThrow('budget exceeded');
+  });
+});
diff --git a/src/index-hooks/registry.ts b/src/index-hooks/registry.ts
@@ -0,0 +1,142 @@
+/**
+ * Index-hook registry.
+ *
+ * Adding a new derived-signal pass:
+ *
+ *   1. Create `src/index-hooks/<name>.ts` exporting a
+ *      `HOOK: IndexHook` constant with `afterIndexAll` and/or
+ *      `afterSync` implementations.
+ *   2. Add **one** import line and **one** array entry to this file.
+ *
+ * That's it. `CodeGraph` doesn't need a new private method or
+ * call site for each pass — the runner inside `runHooks*` walks
+ * every registered hook automatically.
+ *
+ * On main today there are NO hooks registered (this file ships
+ * the framework only). PRs adding derived-signal passes
+ * (centrality, churn, issue-history, config-refs, sql-refs,
+ * cochange) each register their hook here.
+ */
+
+import type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
+import type { SyncResult } from '../extraction';
+import { logDebug, logWarn } from '../errors';
+
+/**
+ * Per-hook wall-clock budget. A hook awaiting a promise that never
+ * resolves (timed-out fetch with no AbortController, mis-handled
+ * worker IPC, etc.) used to hang the whole indexAll/sync forever.
+ * After this budget the runner gives up on the hook, surfaces it as
+ * a timeout in the outcome, and moves on so the rest of the pipeline
+ * still completes. Five minutes is generous enough for legitimate
+ * mining on a multi-million-LOC repo while bounding the worst case.
+ */
+const HOOK_TIMEOUT_MS = 5 * 60 * 1000;
+
+/**
+ * Static-import list of every registered hook.
+ *
+ * Two PRs adding hooks land their entries on different lines
+ * (alphabetical neighborhoods rarely collide). When an entry is
+ * unwanted at runtime, the hook itself can short-circuit on a
+ * config flag inside its `afterIndexAll`/`afterSync`.
+ */
+const REGISTERED_HOOKS: readonly IndexHook[] = [
+  // PRs adding hooks: append your `import { HOOK as <NAME>_HOOK } from './<name>';`
+  // above and your `<NAME>_HOOK` entry here, alphabetical by name.
+];
+
+/**
+ * Race a hook invocation against a wall-clock budget. The timeout
+ * branch only unwinds *async* hangs — fully synchronous code blocks
+ * the event loop and will still run to completion. That's
+ * acceptable: hooks that do heavy work typically shell out via
+ * `execFileSync` with their own per-call timeout, so a sync hang is
+ * bounded upstream.
+ */
+async function runWithTimeout(
+  fn: () => Promise<void> | void,
+  timeoutMs: number,
+  label: string
+): Promise<void> {
+  let timer: ReturnType<typeof setTimeout> | null = null;
+  const work = Promise.resolve().then(() => fn());
+  const timeout = new Promise<void>((_, reject) => {
+    timer = setTimeout(
+      () => reject(new Error(`${label} exceeded ${timeoutMs}ms budget; skipping`)),
+      timeoutMs
+    );
+  });
+  try {
+    await Promise.race([work, timeout]);
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+}
+
+/**
+ * Run `afterIndexAll` for every registered hook. Errors are
+ * caught + logged so one broken hook never fails the whole
+ * index. Returns per-hook outcomes for diagnostics.
+ */
+export async function runAfterIndexAll(
+  ctx: IndexHookContext
+): Promise<IndexHookOutcome[]> {
+  const out: IndexHookOutcome[] = [];
+  for (const hook of REGISTERED_HOOKS) {
+    if (!hook.afterIndexAll) continue;
+    const start = Date.now();
+    logDebug(`index-hook "${hook.name}" afterIndexAll: starting`);
+    try {
+      await runWithTimeout(
+        () => hook.afterIndexAll!(ctx),
+        HOOK_TIMEOUT_MS,
+        `index-hook "${hook.name}" afterIndexAll`
+      );
+      const durationMs = Date.now() - start;
+      logDebug(`index-hook "${hook.name}" afterIndexAll: done in ${durationMs}ms`);
+      out.push({ name: hook.name, phase: 'indexAll', durationMs });
+    } catch (err) {
+      const e = err instanceof Error ? err : new Error(String(err));
+      logWarn(`index-hook "${hook.name}" afterIndexAll failed: ${e.message}`);
+      out.push({ name: hook.name, phase: 'indexAll', durationMs: Date.now() - start, error: e });
+    }
+  }
+  return out;
+}
+
+/** Same shape, for `afterSync`. */
+export async function runAfterSync(
+  ctx: IndexHookContext,
+  result: SyncResult
+): Promise<IndexHookOutcome[]> {
+  const out: IndexHookOutcome[] = [];
+  for (const hook of REGISTERED_HOOKS) {
+    if (!hook.afterSync) continue;
+    const start = Date.now();
+    logDebug(`index-hook "${hook.name}" afterSync: starting`);
+    try {
+      await runWithTimeout(
+        () => hook.afterSync!(ctx, result),
+        HOOK_TIMEOUT_MS,
+        `index-hook "${hook.name}" afterSync`
+      );
+      const durationMs = Date.now() - start;
+      logDebug(`index-hook "${hook.name}" afterSync: done in ${durationMs}ms`);
+      out.push({ name: hook.name, phase: 'sync', durationMs });
+    } catch (err) {
+      const e = err instanceof Error ? err : new Error(String(err));
+      logWarn(`index-hook "${hook.name}" afterSync failed: ${e.message}`);
+      out.push({ name: hook.name, phase: 'sync', durationMs: Date.now() - start, error: e });
+    }
+  }
+  return out;
+}
+
+/** Read access for tests + diagnostic tools. */
+export function getRegisteredHooks(): readonly IndexHook[] {
+  return REGISTERED_HOOKS;
+}
+
+// Re-export the types so consumers can import everything from one place.
+export type { IndexHook, IndexHookContext, IndexHookOutcome } from './types';
diff --git a/src/index-hooks/types.ts b/src/index-hooks/types.ts
@@ -0,0 +1,65 @@
+/**
+ * Index-hook types.
+ *
+ * `IndexHook`s are derived-signal passes that run AFTER core
+ * indexing/sync has finished — centrality computation, churn
+ * mining, issue history, config-ref extraction, SQL call-site
+ * scanning, co-change graph mining, etc. Today every such PR
+ * mutates `CodeGraph` directly (private method + call site in
+ * `indexAll` + call site in `sync`), forcing every-PR conflicts
+ * on adjacent lines.
+ *
+ * After the registry refactor, each pass is its own file:
+ *   - exports a `HOOK: IndexHook` constant
+ *   - registers itself in `./registry.ts` (1 import line + 1 array entry)
+ *   - implements `afterIndexAll` and/or `afterSync`
+ *
+ * `CodeGraph` stops growing per-pass methods. The hook runner
+ * inside `CodeGraph` is a small generic loop that calls every
+ * registered hook in sequence, swallowing errors so one broken
+ * hook doesn't fail the whole index/sync.
+ */
+
+import type { CodeGraphConfig } from '../types';
+import type { QueryBuilder } from '../db/queries';
+import type { DatabaseConnection } from '../db';
+import type { SyncResult } from '../extraction';
+
+/**
+ * Per-call context handed to every hook. Stable shape so hooks
+ * don't need to import private members of `CodeGraph`.
+ */
+export interface IndexHookContext {
+  readonly projectRoot: string;
+  readonly config: CodeGraphConfig;
+  readonly queries: QueryBuilder;
+  readonly db: DatabaseConnection;
+}
+
+export interface IndexHook {
+  /** Stable identifier for logging / opt-out. */
+  readonly name: string;
+
+  /**
+   * Run after a full `indexAll` completes successfully. Treat
+   * this as a clean-slate signal — clear any cached state your
+   * pass owns and re-derive from scratch.
+   */
+  afterIndexAll?(ctx: IndexHookContext): Promise<void> | void;
+
+  /**
+   * Run after `sync` completes. `result.changedFilePaths` (when
+   * present) is the bounded set of paths touched in this sync;
+   * hooks should use it to do incremental work where possible.
+   */
+  afterSync?(ctx: IndexHookContext, result: SyncResult): Promise<void> | void;
+}
+
+/** Per-hook outcome reported back from the registry runner. */
+export interface IndexHookOutcome {
+  readonly name: string;
+  readonly phase: 'indexAll' | 'sync';
+  readonly durationMs: number;
+  /** Defined when the hook threw; the runner caught it. */
+  readonly error?: Error;
+}
diff --git a/src/index.ts b/src/index.ts
@@ -49,6 +49,11 @@ import { GraphTraverser, GraphQueryManager } from './graph';
 import { ContextBuilder, createContextBuilder } from './context';
 import { Mutex, FileLock } from './utils';
 import { FileWatcher, WatchOptions } from './sync';
+import {
+  runAfterIndexAll as runIndexHooksAfterIndexAll,
+  runAfterSync as runIndexHooksAfterSync,
+  type IndexHookContext,
+} from './index-hooks/registry';
 
 // Re-export types for consumers
 export * from './types';
@@ -402,13 +407,32 @@ export class CodeGraph {
           });
         }
 
+        // Run registered post-indexAll hooks (centrality, churn,
+        // issue-history, config-refs, sql-refs, …). Best-effort:
+        // hook errors are caught + logged inside the runner.
+        if (result.success) {
+          await runIndexHooksAfterIndexAll(this.buildHookContext());
+        }
+
         return result;
       } finally {
         this.fileLock.release();
       }
     });
   }
 
+  /**
+   * Build the read-only context handed to every index hook.
+   */
+  private buildHookContext(): IndexHookContext {
+    return {
+      projectRoot: this.projectRoot,
+      config: this.config,
+      queries: this.queries,
+      db: this.db,
+    };
+  }
+
   /**
    * Index specific files
    *
@@ -483,6 +507,11 @@ export class CodeGraph {
           }
         }
 
+        // Run registered post-sync hooks. Same registry as the
+        // indexAll path — hooks distinguish via their
+        // `afterIndexAll` vs `afterSync` methods.
+        await runIndexHooksAfterSync(this.buildHookContext(), result);
+
         return result;
       } finally {
         this.fileLock.release();