diff --git a/docs/concepts/activity-and-traces.mdx b/docs/concepts/activity-and-traces.mdx
index 5e50588..56ba37e 100644
--- a/docs/concepts/activity-and-traces.mdx
+++ b/docs/concepts/activity-and-traces.mdx
@@ -198,9 +198,15 @@ WAL, online prune doesn't block live writes for long.
   no `llm_exchange` entries — the proxy doesn't speak HPACK yet.
   In practice the Anthropic SDK defaults to HTTP/1.1 for
   `/v1/messages`, so this is rarely hit.
-- **Anthropic parser only.** OpenAI / Gemini / Mistral land as
+- **Anthropic parser only.** OpenAI / Azure OpenAI traffic IS
+  captured (those hosts are on the MITM allowlist) but lands as
   `opaque_http`. Codex traces today fall in this bucket — adding
-  typed parsers is a follow-up.
+  typed parsers for OpenAI's Chat Completions / Responses APIs
+  is a follow-up. Gemini / Mistral / Bedrock are not currently
+  on the allowlist — their traffic passes through unmodified
+  and produces no activity events. See
+  [tracing.mdx → Host allowlist](/docs/tracing#host-allowlist)
+  for how to extend.
 - **Uploader queue cap.** The uploader caps in-flight at 1000
   events / 1 MB and evicts oldest-first under sustained broker
   unreachability. Events dropped here won't appear later.
diff --git a/docs/tracing.mdx b/docs/tracing.mdx
index 88cfffb..78a0bdc 100644
--- a/docs/tracing.mdx
+++ b/docs/tracing.mdx
@@ -15,13 +15,30 @@ without embedding observability hooks into the agent itself.
 
 The runner runs **upstream** of the agent process and intercepts
 its network traffic at the TLS layer via a **loopback MITM TLS
-proxy** with a per-session local CA. Every HTTPS request the
-agent makes is transparently decrypted by the proxy, observed as
-plaintext, re-encrypted toward the real upstream, and passed
-through. From the upstream's point of view we are a normal TLS
-client doing standard SNI + cert validation — it can't tell us
-apart from any other user-agent, which means OAuth flows, token
-refreshes, streaming responses, and SSE all work identically.
+proxy** with a per-session local CA. HTTPS requests to **known
+LLM-provider hosts** (Anthropic, OpenAI, Azure OpenAI — see the
+allowlist below) are transparently decrypted by the proxy,
+observed as plaintext, re-encrypted toward the real upstream,
+and passed through. From the upstream's point of view we are a
+normal TLS client doing standard SNI + cert validation — it can't
+tell us apart from any other user-agent, which means OAuth flows,
+token refreshes, streaming responses, and SSE all work
+identically.
+
+Traffic to any **other** host (GitHub, npm, package registries,
+agent-spawned `curl`/`git`/`python` calls, telemetry endpoints,
+etc.) is **not** decrypted. The proxy still routes those
+connections — it has to, because `HTTPS_PROXY` is set on the
+agent child — but it passes the TCP bytes through as a raw
+tunnel. The agent's TLS client negotiates directly with the real
+upstream cert and the agent's system trust store is what
+validates it; the runner never sees plaintext.
+
+This is the honest privacy claim: **ac7 only decrypts traffic to
+LLM provider hosts on a maintained allowlist.** Adding a host
+requires editing
+[`known-hosts.ts`](https://github.com/anthropics/agentc7) (and
+ideally a parser for whatever shape lives behind it).
 
 Zero external tools. No tshark. No pcap. No SSLKEYLOGFILE
 shenanigans. Just Node's built-in `crypto` + `tls` + a small
@@ -124,6 +141,39 @@ Node-only and would confuse reqwest.
 
 For codex specifics see [runners/codex](/docs/runners/codex).
 
+## The host allowlist
+
+The proxy is host-agnostic: every CONNECT lands in the same code
+path. The decision of "decrypt this session" vs "pass it through
+unmodified" is gated by a single predicate, `isKnownLlmHost`,
+exported from `runtime/trace/known-hosts.ts`. Production wires
+that predicate into the proxy at startup; tests can substitute
+any predicate they want to drive both code paths.
+
+Current allowlist patterns (regex, case-insensitive,
+anchored against the apex):
+
+```
+(?:^|\.)anthropic\.com$       # api.anthropic.com, console.anthropic.com, auth.anthropic.com
+(?:^|\.)openai\.com$          # api.openai.com, auth.openai.com
+(?:^|\.)openai\.azure\.com$   # <customer>.openai.azure.com
+```
+
+The bar for adding a host: the agent (or its bundled tools)
+makes inference-related calls to it — model invocations, token
+refresh against the same provider, provider-specific telemetry
+that the trace pipeline knows how to parse. Hosts we just
+"happen to see" (a package registry the agent installs from, a
+GitHub API call from an MCP tool) do not belong here.
+
+Non-allowlisted CONNECTs produce **no activity events** today.
+At the proxy layer the runner still observes session metadata
+(host, port, byte counts, duration) via the `onSessionEnd`
+callback, but this data isn't currently surfaced into the
+activity stream — promoting it to a new `network_session` event
+kind is a candidate follow-up if presence visibility for non-LLM
+hosts becomes valuable.
+
 ## How the MITM works
 
 When the agent issues `CONNECT api.anthropic.com:443` through the
@@ -250,23 +300,32 @@ the work. ac7 mitigates this with defense in depth:
    only to `127.0.0.1` on a random ephemeral port. The CA is
    generated fresh per runner process; its cert is written with
    `0o600`; its private key never touches disk.
-2. **Redaction at parse time.** Secrets are replaced with
+2. **Scoped to an LLM-host allowlist.** TLS termination only
+   fires for hosts on the `known-hosts.ts` allowlist. Everything
+   else passes through as a raw TCP tunnel — the agent's TLS
+   client validates the real upstream cert against the agent's
+   system trust store and the runner never observes plaintext.
+   This is also why setting `SSL_CERT_FILE` / `REQUESTS_CA_BUNDLE`
+   / `CURL_CA_BUNDLE` / `GIT_SSL_CAINFO` is deliberately *not*
+   done: they would replace the system trust store with our
+   single-CA pem and break every non-allowlisted HTTPS call.
+3. **Redaction at parse time.** Secrets are replaced with
    `[REDACTED]` before entries leave the runner. The server
    never sees the plaintext token.
-3. **Permission-gated view.** Only members with `activity.read`
+4. **Permission-gated view.** Only members with `activity.read`
    (or the captured member themselves) can read the activity
    stream. Watchers, originators, and assignees of OTHER
    members' objectives all get 403 on the GET endpoint.
-4. **CA cert deleted on runner exit.** The cert PEM is unlinked
+5. **CA cert deleted on runner exit.** The cert PEM is unlinked
    on every exit path (normal, SIGINT, SIGTERM,
    uncaughtException).
-5. **`.mcp.json` restored on every exit** (claude-code only) —
+6. **`.mcp.json` restored on every exit** (claude-code only) —
    the original is backed up and restored idempotently.
-6. **Ephemeral CODEX_HOME removed on exit** (codex only) — the
+7. **Ephemeral CODEX_HOME removed on exit** (codex only) — the
    entire temp directory is `rm -rf`'d, including the symlink to
    the user's `~/.codex/auth.json` (the symlink is removed; the
    real file isn't).
-7. **Upload is best-effort.** If the upload fails, the runner
+8. **Upload is best-effort.** If the upload fails, the runner
    logs and moves on. It does NOT retry past the queue cap, and
    it does NOT persist the trace to disk.
 
@@ -344,11 +403,15 @@ audit requirements.
   ALPN) produce no `llm_exchange` events. Adding an HPACK-aware
   parser is a follow-up. In practice the Anthropic SDK defaults
   to HTTP/1.1 for `/v1/messages`, so this is rarely hit.
-- **Anthropic parser only.** Other LLM providers (OpenAI,
-  Gemini, Mistral) land as `opaque_http`. **Codex traces today
-  fall in this bucket** — adding a typed OpenAI parser so codex
-  traces render the same way claude-code traces do is a
-  follow-up.
+- **Anthropic parser only.** OpenAI / Azure OpenAI traffic IS
+  decrypted (those hosts are on the allowlist) but lands as
+  `opaque_http` because there's no structured parser for the
+  Chat Completions / Responses API shapes yet. **Codex traces
+  today fall in this bucket** — adding a typed OpenAI parser so
+  codex traces render the same way claude-code traces do is a
+  follow-up. Other providers (Gemini, Mistral, Bedrock) are
+  passed through unmodified (not on the allowlist); adding them
+  is a code change to `known-hosts.ts` plus, ideally, a parser.
 - **Uploader queue cap.** The uploader caps in-flight at 1000
   events / 1 MB and evicts oldest-first under sustained broker
   unreachability. Events dropped here won't appear in the UI.
diff --git a/packages/cli/src/commands/claude-code.ts b/packages/cli/src/commands/claude-code.ts
index 1812e21..98dae34 100644
--- a/packages/cli/src/commands/claude-code.ts
+++ b/packages/cli/src/commands/claude-code.ts
@@ -39,8 +39,10 @@ import { resolve } from 'node:path';
 import { DEFAULT_PORT, ENV } from '@agentc7/sdk/protocol';
 import {
   ClaudeCodeAdapterError,
+  type ClaudeSettingsHandle,
   findClaudeBinary,
   type McpConfigHandle,
+  prepareClaudeSettings,
   prepareMcpConfig,
 } from '../runtime/agents/claude-code.js';
 import { HUD_HEIGHT, startHud } from '../runtime/hud.js';
@@ -226,6 +228,7 @@ export async function runClaudeCodeCommand(input: ClaudeCodeCommandInput): Promi
   //    here tears down the runner before propagating so we don't leave
   //    an orphaned IPC socket.
   let mcpHandle: McpConfigHandle;
+  let settingsHandle: ClaudeSettingsHandle | null = null;
   // Auto-detect the bridge command from the currently-running cli
   // process. `process.execPath` is the node binary; `process.argv[1]`
   // is the absolute path to the cli's entry script (dist/index.js in
@@ -277,6 +280,26 @@ export async function runClaudeCodeCommand(input: ClaudeCodeCommandInput): Promi
   }
   log('claude-code: .mcp.json prepared', { path: mcpHandle.path });
 
+  // 3b. If tracing is enabled, write a `.claude/settings.json` hook
+  //    config so PreToolUse / PostToolUse events drive the busy
+  //    signal. Skipped under `--no-trace` since there's no hook
+  //    endpoint to point at. Failures here are non-fatal — they
+  //    only degrade the busy-signal accuracy during tool runs, not
+  //    correctness of the agent itself.
+  if (runner.traceHost) {
+    try {
+      settingsHandle = prepareClaudeSettings({
+        cwd,
+        hookUrl: runner.traceHost.hookEndpointUrl,
+      });
+      log('claude-code: .claude/settings.json prepared', { path: settingsHandle.path });
+    } catch (err) {
+      log('claude-code: .claude/settings.json prepare failed (busy hooks disabled)', {
+        error: err instanceof Error ? err.message : String(err),
+      });
+    }
+  }
+
   // 4. Spawn claude. In interactive sessions we route through a
   //    node-pty relay so we can (a) reserve the bottom `HUD_HEIGHT`
   //    rows for the ac7 status strip and (b) own the stream for
@@ -302,6 +325,15 @@ export async function runClaudeCodeCommand(input: ClaudeCodeCommandInput): Promi
         error: err instanceof Error ? err.message : String(err),
       });
     }
+    if (settingsHandle) {
+      try {
+        settingsHandle.restore();
+      } catch (err) {
+        log('claude-code: settings.json restore threw', {
+          error: err instanceof Error ? err.message : String(err),
+        });
+      }
+    }
     await runner.shutdown(reason).catch((err) => {
       log('claude-code: runner shutdown threw', {
         error: err instanceof Error ? err.message : String(err),
@@ -435,6 +467,11 @@ export async function runClaudeCodeCommand(input: ClaudeCodeCommandInput): Promi
     } catch {
       /* ignore */
     }
+    try {
+      settingsHandle?.restore();
+    } catch {
+      /* ignore */
+    }
   };
   process.on('uncaughtException', onUncaught);
   process.on('unhandledRejection', onUncaught);
diff --git a/packages/cli/src/commands/codex.ts b/packages/cli/src/commands/codex.ts
index 51d2039..f14ac31 100644
--- a/packages/cli/src/commands/codex.ts
+++ b/packages/cli/src/commands/codex.ts
@@ -184,6 +184,10 @@ export async function runCodexCommand(input: CodexCommandInput): Promise<number>
       cwd,
       model: input.model,
       presence,
+      // Share the trace host's busy signal so codex tool-lifecycle
+      // notifications and MITM-derived LLM bumps both feed one
+      // observable. Null when --no-trace.
+      busy: runner.traceHost?.busy,
       log,
     });
   } catch (err) {
diff --git a/packages/cli/src/runtime/agents/claude-code.ts b/packages/cli/src/runtime/agents/claude-code.ts
index 74f1361..a7f627c 100644
--- a/packages/cli/src/runtime/agents/claude-code.ts
+++ b/packages/cli/src/runtime/agents/claude-code.ts
@@ -41,6 +41,7 @@ import {
   existsSync,
   constants as FS,
   fsyncSync,
+  mkdirSync,
   mkdtempSync,
   openSync,
   readFileSync,
@@ -299,6 +300,254 @@ export function prepareMcpConfig(options: PrepareMcpConfigOptions): McpConfigHan
   return { path: mcpConfigPath, restore };
 }
 
+/**
+ * Shape of `.claude/settings.json`. Only `hooks` is modeled; everything
+ * else is preserved verbatim during merge/restore.
+ *
+ * Claude Code accepts `hooks` as a map from event name → array of hook
+ * matchers. For the busy-signal feeder we use `type: "http"` so each
+ * event becomes a localhost POST rather than a process fork.
+ */
+interface ClaudeSettingsConfig {
+  hooks?: Record<string, ClaudeHookMatcher[]>;
+  [k: string]: unknown;
+}
+
+interface ClaudeHookMatcher {
+  matcher?: string;
+  hooks: ClaudeHookEntry[];
+}
+
+interface ClaudeHookEntry {
+  type: 'command' | 'http';
+  command?: string;
+  url?: string;
+  [k: string]: unknown;
+}
+
+export interface PrepareClaudeSettingsOptions {
+  /** Directory containing `.claude/settings.json`. Usually the project cwd. */
+  cwd: string;
+  /**
+   * Full URL the Claude Code harness should POST to for each hook
+   * event. Comes from `TraceHost.hookEndpointUrl`. The same URL handles
+   * PreToolUse / PostToolUse / PostToolUseFailure — the runner routes by
+   * `hook_event_name` in the payload.
+   */
+  hookUrl: string;
+}
+
+export interface ClaudeSettingsHandle {
+  readonly path: string;
+  /**
+   * Restore `.claude/settings.json` to its pre-run state. If the file
+   * didn't exist before we touched it, delete it (and remove the
+   * `.claude/` dir if we created it). Idempotent.
+   */
+  restore(): void;
+}
+
+/**
+ * Marker key we add under each ac7-managed hook entry so a later
+ * restore (or stale state from a previous crash) can identify our
+ * entries unambiguously even if the individual contributor later edits
+ * the file.
+ */
+const AC7_HOOK_MARKER = 'x_ac7_busy_feeder';
+
+/**
+ * Merge our HTTP hook entries into `.claude/settings.json`, backing up
+ * the existing file first. Returns a handle whose `.restore()` undoes
+ * the modification.
+ *
+ * The hook config writes one entry per relevant lifecycle event
+ * (PreToolUse, PostToolUse, PostToolUseFailure) pointing at the
+ * loopback URL. Each entry is tagged with `x_ac7_busy_feeder: true`
+ * so we don't accidentally drop unrelated hooks the user has
+ * configured.
+ *
+ * Failure modes that leave the existing file UNTOUCHED:
+ *   - file exists but is not valid JSON
+ *   - backup write fails
+ *   - staging temp file write fails (before rename)
+ */
+export function prepareClaudeSettings(options: PrepareClaudeSettingsOptions): ClaudeSettingsHandle {
+  const claudeDir = resolve(options.cwd, '.claude');
+  const settingsPath = join(claudeDir, 'settings.json');
+  const dirExistedBefore = existsSync(claudeDir);
+  const existedBefore = existsSync(settingsPath);
+
+  // Parse before we touch anything. Invalid JSON → throw with a clear
+  // message rather than overwriting the user's file.
+  let originalBytes: string | null = null;
+  let existingConfig: ClaudeSettingsConfig = {};
+  if (existedBefore) {
+    originalBytes = readFileSync(settingsPath, 'utf8');
+    try {
+      const parsed = JSON.parse(originalBytes);
+      if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
+        existingConfig = parsed as ClaudeSettingsConfig;
+      } else {
+        throw new Error('top-level value is not an object');
+      }
+    } catch (err) {
+      throw new ClaudeCodeAdapterError(
+        `refusing to modify ${settingsPath}: existing file is not a valid JSON object ` +
+          `(${err instanceof Error ? err.message : String(err)}). ` +
+          `Fix or delete the file, then re-run.`,
+      );
+    }
+  }
+
+  // Backup BEFORE writing the target — same invariant as prepareMcpConfig.
+  const backupDir = mkdtempSync(join(tmpdir(), 'ac7-runner-'));
+  const backupPath = join(backupDir, 'claude-settings.json.bak');
+  let backupWritten = false;
+  if (existedBefore && originalBytes !== null) {
+    try {
+      atomicWrite(backupPath, originalBytes);
+      backupWritten = true;
+    } catch (err) {
+      try {
+        rmdirSync(backupDir);
+      } catch {
+        /* ignore */
+      }
+      throw new ClaudeCodeAdapterError(
+        `failed to write backup of ${settingsPath}: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+    }
+  }
+
+  // Build the merged config. Preserve all existing keys, including any
+  // hooks the user already configured for events we don't touch.
+  const existingHooks: Record<string, ClaudeHookMatcher[]> =
+    existingConfig.hooks && typeof existingConfig.hooks === 'object'
+      ? { ...existingConfig.hooks }
+      : {};
+
+  const ac7Entry: ClaudeHookEntry = {
+    type: 'http',
+    url: options.hookUrl,
+    [AC7_HOOK_MARKER]: true,
+  };
+
+  for (const event of ['PreToolUse', 'PostToolUse', 'PostToolUseFailure']) {
+    const existing = Array.isArray(existingHooks[event]) ? existingHooks[event] : [];
+    // Drop any prior ac7 entries (e.g. from a previous crash that
+    // didn't restore cleanly) so we don't accumulate duplicates.
+    const cleaned = existing.map((matcher) => ({
+      ...matcher,
+      hooks: (matcher.hooks ?? []).filter(
+        (h) => !(typeof h === 'object' && h !== null && AC7_HOOK_MARKER in h),
+      ),
+    }));
+    // Match-all matcher carrying just our hook entry.
+    cleaned.push({ matcher: '*', hooks: [ac7Entry] });
+    existingHooks[event] = cleaned;
+  }
+
+  const mergedConfig: ClaudeSettingsConfig = {
+    ...existingConfig,
+    hooks: existingHooks,
+  };
+
+  // Ensure the .claude directory exists before writing. atomicWrite
+  // does a same-directory rename, so the dir has to be in place first.
+  if (!dirExistedBefore) {
+    try {
+      mkdirSync(claudeDir, { recursive: true });
+    } catch (err) {
+      if (backupWritten) {
+        try {
+          unlinkSync(backupPath);
+        } catch {
+          /* ignore */
+        }
+      }
+      try {
+        rmdirSync(backupDir);
+      } catch {
+        /* ignore */
+      }
+      throw new ClaudeCodeAdapterError(
+        `failed to create ${claudeDir}: ${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+  }
+
+  try {
+    atomicWrite(settingsPath, `${JSON.stringify(mergedConfig, null, 2)}\n`);
+  } catch (err) {
+    if (backupWritten) {
+      try {
+        unlinkSync(backupPath);
+      } catch {
+        /* ignore */
+      }
+    }
+    try {
+      rmdirSync(backupDir);
+    } catch {
+      /* ignore */
+    }
+    throw new ClaudeCodeAdapterError(
+      `failed to write ${settingsPath}: ${err instanceof Error ? err.message : String(err)}`,
+    );
+  }
+
+  let restored = false;
+  const restore = (): void => {
+    if (restored) return;
+    restored = true;
+    try {
+      if (existedBefore && originalBytes !== null) {
+        atomicWrite(settingsPath, originalBytes);
+      } else {
+        try {
+          unlinkSync(settingsPath);
+        } catch (err) {
+          const code = (err as NodeJS.ErrnoException).code;
+          if (code !== 'ENOENT') throw err;
+        }
+        // If we created the .claude/ dir for our own settings file
+        // and it's still empty, clean it up. If the individual
+        // contributor added unrelated files we leave it alone.
+        if (!dirExistedBefore) {
+          try {
+            rmdirSync(claudeDir);
+          } catch {
+            // Directory not empty or some other error — leave it.
+          }
+        }
+      }
+    } catch (err) {
+      process.stderr.write(
+        `ac7: warning: failed to restore ${settingsPath} from backup ${backupPath}: ${
+          err instanceof Error ? err.message : String(err)
+        }\n` + `  The backup file is still at ${backupPath} — you can copy it back manually.\n`,
+      );
+      return;
+    }
+    if (backupWritten) {
+      try {
+        unlinkSync(backupPath);
+      } catch {
+        /* ignore */
+      }
+    }
+    try {
+      rmdirSync(backupDir);
+    } catch {
+      /* ignore */
+    }
+  };
+
+  return { path: settingsPath, restore };
+}
+
 /**
  * Atomically write `body` to `path`. Same pattern as the server's
  * slot-config writer: open a temp file in the same directory with
diff --git a/packages/cli/src/runtime/agents/codex/adapter.ts b/packages/cli/src/runtime/agents/codex/adapter.ts
index b133cf8..4d41989 100644
--- a/packages/cli/src/runtime/agents/codex/adapter.ts
+++ b/packages/cli/src/runtime/agents/codex/adapter.ts
@@ -31,7 +31,9 @@ import { existsSync } from 'node:fs';
 import type { BriefingResponse } from '@agentc7/sdk/types';
 import { CLI_VERSION } from '../../../version.js';
 import type { Presence } from '../../presence.js';
+import type { BusySignal } from '../../trace/busy.js';
 import type { TraceHost } from '../../trace/host.js';
+import { attachCodexBusySniff, type CodexBusySniff } from './busy-sniff.js';
 import type { CodexChannelSink } from './channel-sink.js';
 import { createCodexChannelSink } from './channel-sink.js';
 import { setupCodexHome } from './codex-home.js';
@@ -108,6 +110,15 @@ export interface CodexSpawnOptions {
   model?: string;
   /** Presence signal — flipped by status notifications. */
   presence: Presence;
+  /**
+   * Busy signal — bumped on each tool-execution `item/started` and
+   * decremented on the matching `item/completed`. Optional: when
+   * absent (e.g. `--no-trace`), tool-lifecycle events still get
+   * logged but don't drive the indicator. Pass the same signal the
+   * trace host uses so LLM-call and tool-execution bumps share one
+   * 0↔busy transition contract.
+   */
+  busy?: BusySignal;
   /** Logger, structured JSON to stderr by default. */
   log: (msg: string, ctx?: Record<string, unknown>) => void;
 }
@@ -311,6 +322,13 @@ export async function spawnCodex(opts: CodexSpawnOptions): Promise<CodexSpawnRes
       opts.log('codex: item completed', { type: p.item.type, turnId: p.turnId });
     }
   });
+  // Tool-execution busy sniff (only when a busy signal is provided —
+  // i.e., tracing is enabled). Subscribes to the same notifications
+  // above; logging stays here, busy-counter ownership lives in the
+  // shared helper so tests can exercise the same code path.
+  const busySniff: CodexBusySniff | null = opts.busy
+    ? attachCodexBusySniff({ rpc, busy: opts.busy, log: opts.log })
+    : null;
   rpc.onNotification(NOTIFICATIONS.error, (params) => {
     opts.log('codex: error notification', params as Record<string, unknown>);
   });
@@ -344,6 +362,10 @@ export async function spawnCodex(opts: CodexSpawnOptions): Promise<CodexSpawnRes
         /* best-effort */
       }
     }
+    // Drain any in-flight tool handles before we tear down the rpc
+    // client — codex won't be sending matching `item/completed` once
+    // we close, so anything left here would wedge the busy signal.
+    busySniff?.drain();
     rpc.close(reason);
     if (child.exitCode === null && child.signalCode === null) {
       try {
diff --git a/packages/cli/src/runtime/agents/codex/busy-sniff.ts b/packages/cli/src/runtime/agents/codex/busy-sniff.ts
new file mode 100644
index 0000000..16842cd
--- /dev/null
+++ b/packages/cli/src/runtime/agents/codex/busy-sniff.ts
@@ -0,0 +1,111 @@
+/**
+ * Drive a `BusySignal` from codex app-server JSON-RPC notifications.
+ *
+ * Subscribes to `item/started`, `item/completed`, and `turn/completed`
+ * on the supplied JSON-RPC client and bumps `tool_inflight` for
+ * tool-execution items. Reused by `spawnCodex` and the busy-sniff
+ * test so the contract under test is the same one production runs.
+ *
+ * Returns a `drain()` callback the caller should invoke on teardown:
+ * codex won't send matching `item/completed` events for items still
+ * in flight when we close, so any leftover busy handles need an
+ * explicit drain to avoid wedging the indicator.
+ */
+
+import type { BusySignal } from '../../trace/busy.js';
+import type { JsonRpcClient } from './json-rpc.js';
+import {
+  type ItemCompletedNotification,
+  type ItemStartedNotification,
+  NOTIFICATIONS,
+} from './protocol.js';
+
+/**
+ * Codex item types that represent agent-side WORK (not model output,
+ * not user input, not metadata). Lighting up `tool_inflight` for these
+ * gives the UI a "the agent is running something locally" signal that
+ * the LLM-call busy bump would otherwise miss.
+ *
+ * `commandExecution` covers Bash; `fileChange` covers apply_patch /
+ * file edits; `mcpToolCall` covers MCP-bridged tool dispatch. New
+ * codex item types default to "not a tool" — better to under-bump
+ * than to falsely light up busy on, say, an `agentMessage` (model
+ * output, already covered by the LLM-call bump).
+ */
+export const TOOL_ITEM_TYPES: ReadonlySet<string> = new Set([
+  'commandExecution',
+  'fileChange',
+  'mcpToolCall',
+]);
+
+export interface CodexBusySniffOptions {
+  rpc: JsonRpcClient;
+  busy: BusySignal;
+  log?: (msg: string, ctx?: Record<string, unknown>) => void;
+}
+
+export interface CodexBusySniff {
+  /**
+   * Drain any handles still in flight. Safe to call multiple times —
+   * second call is a no-op since the underlying map is empty after
+   * the first.
+   */
+  drain(): void;
+  /**
+   * Outstanding tool handle count. Useful for assertions.
+   */
+  readonly inFlight: number;
+}
+
+export function attachCodexBusySniff(options: CodexBusySniffOptions): CodexBusySniff {
+  const { rpc, busy } = options;
+  const log = options.log ?? (() => {});
+
+  // Per-item busy handles, keyed by item.id. Codex's `item/completed`
+  // notifications are normally reliable, but a turn interrupt or
+  // transport error can drop them; the turnCompleted handler below
+  // sweeps any leftovers so busy can't stay wedged.
+  const toolHandles = new Map<string, { finish: () => void }>();
+
+  const drainAll = (reason: string): void => {
+    if (toolHandles.size === 0) return;
+    log('codex-busy-sniff: draining tool handles', { count: toolHandles.size, reason });
+    for (const handle of toolHandles.values()) handle.finish();
+    toolHandles.clear();
+  };
+
+  rpc.onNotification(NOTIFICATIONS.itemStarted, (params) => {
+    const p = params as ItemStartedNotification;
+    if (!p?.item?.type || !p.item.id) return;
+    if (!TOOL_ITEM_TYPES.has(p.item.type)) return;
+    // Duplicate item/started for the same id is a no-op — preserve
+    // the first handle so the matching item/completed still drains
+    // exactly one.
+    if (!toolHandles.has(p.item.id)) {
+      toolHandles.set(p.item.id, busy.start('tool_inflight'));
+    }
+  });
+
+  rpc.onNotification(NOTIFICATIONS.itemCompleted, (params) => {
+    const p = params as ItemCompletedNotification;
+    if (!p?.item?.id) return;
+    const handle = toolHandles.get(p.item.id);
+    if (handle) {
+      handle.finish();
+      toolHandles.delete(p.item.id);
+    }
+  });
+
+  rpc.onNotification(NOTIFICATIONS.turnCompleted, () => {
+    drainAll('turn-completed');
+  });
+
+  return {
+    drain(): void {
+      drainAll('explicit-drain');
+    },
+    get inFlight() {
+      return toolHandles.size;
+    },
+  };
+}
diff --git a/packages/cli/src/runtime/trace/anthropic.ts b/packages/cli/src/runtime/trace/anthropic.ts
index 55cb766..4c9e23a 100644
--- a/packages/cli/src/runtime/trace/anthropic.ts
+++ b/packages/cli/src/runtime/trace/anthropic.ts
@@ -25,6 +25,7 @@
  * a crashed upload.
  */
 
+import { isKnownLlmHost } from './known-hosts.js';
 import { redactHeaders, redactJson } from './redact.js';
 
 export interface HttpRequestRecord {
@@ -113,6 +114,17 @@ export interface OpaqueHttpEntry {
 
 const ANTHROPIC_HOST_RE = /(?:^|\.)anthropic\.com$/i;
 const MESSAGES_PATH_RE = /\/v1\/messages(?:\?|$)/;
+
+// Sanity check: the structured decoder targets Anthropic's apex, which
+// must appear on the trace MITM allowlist. If a future edit drops
+// anthropic.com from `known-hosts.ts`, the decoder would silently stop
+// firing — catch that at module load instead.
+if (!isKnownLlmHost('api.anthropic.com')) {
+  throw new Error(
+    'trace/anthropic: anthropic.com is missing from KNOWN_LLM_HOST_PATTERNS — ' +
+      'the structured /v1/messages decoder can only run on MITM-decrypted traffic.',
+  );
+}
 const OPAQUE_BODY_PREVIEW_BYTES = 4096;
 
 /**
diff --git a/packages/cli/src/runtime/trace/busy.ts b/packages/cli/src/runtime/trace/busy.ts
index 31c8328..8f562c4 100644
--- a/packages/cli/src/runtime/trace/busy.ts
+++ b/packages/cli/src/runtime/trace/busy.ts
@@ -1,39 +1,159 @@
 /**
  * "Agent is working" signal for the runner.
  *
- * Tracks an integer count of in-flight upstream HTTP requests captured
- * by the MITM proxy. The runner reports `busy = count > 0` to the
- * broker, which surfaces it on `/roster` so the web UI can render a
- * spinner next to the agent's name.
+ * Tracks in-flight work the agent is doing across multiple independent
+ * sources. The runner reports `busy = anyCount > 0` to the broker,
+ * which surfaces it on `/roster` so the web UI can render a spinner
+ * next to the agent's name.
  *
- * Why a count rather than a bool: many concurrent calls are normal
- * (parallel tool fan-out), and using a count means we don't accidentally
- * flip "not busy" mid-burst when one of N calls finishes. Subscribers
- * still see only the boolean transitions, so the UI never thrashes.
+ * The signal is multi-sourced because no single observation point sees
+ * everything an agent does:
  *
- * Listeners are notified on every transition between 0 and >0 (and
- * vice versa). Subsequent increments while already busy don't fire —
- * the public observable is the boolean, not the count.
+ *   - `llm_inflight` — bumped by the MITM trace pipeline whenever a
+ *     request to an allowlisted LLM-provider host is in flight. Lights
+ *     up while the model is generating between tool calls. This is the
+ *     historical bumper and the one that still fires today.
+ *   - `tool_inflight` — bumped by per-runner integrations watching
+ *     tool lifecycle events (claude-code hooks, codex app-server
+ *     `item/started`/`item/completed` notifications). Lights up during
+ *     bash, file-edit, MCP-tool, and other tool execution windows that
+ *     the LLM bump alone wouldn't cover.
+ *
+ * Why per-source counters: any single feeder can stall (a misbehaving
+ * hook that never decrements, a JSON-RPC stream that drops a
+ * notification). With separate counters, a stuck source can't poison
+ * the others — and `getSourceCounts()` lets diagnostics tell us which
+ * one is wedged. The public observable stays a single boolean so the
+ * UI never has to merge state.
+ *
+ * Why a count rather than a bool per source: many concurrent in-flight
+ * units are normal (parallel tool fan-out, streaming LLM requests).
+ * Using a count means we don't accidentally flip "not busy" mid-burst
+ * when one of N completes. Subscribers still see only the boolean
+ * transitions, so the UI never thrashes.
+ *
+ * Listeners are notified on every transition between "any source has
+ * work" and "no source has work". Subsequent increments while already
+ * busy don't fire.
+ *
+ * Defense in depth — handles that never see their `finish()` call:
+ *
+ *   - Each handle has an auto-finish timer (see `DEFAULT_MAX_AGE_MS`).
+ *     If `finish()` hasn't run by then we force it. A keep-alive socket
+ *     surviving a TUI interrupt, a dropped tool-lifecycle notification,
+ *     or a parser failing to complete an exchange would all otherwise
+ *     wedge the indicator. We'd rather flicker idle after a long
+ *     legitimate operation than show busy forever.
+ *   - `forceFinishAll()` drains every live handle and is called from
+ *     `TraceHost.close()` after sub-systems shut down — a teardown-time
+ *     safety net for any handle a sub-system's own cleanup missed.
  */
 
+export type BusySource = 'llm_inflight' | 'tool_inflight';
+
+const ALL_SOURCES: readonly BusySource[] = ['llm_inflight', 'tool_inflight'];
+
+/**
+ * Per-source upper bound on how long a single handle is allowed to
+ * stay open before the safety net force-finishes it.
+ *
+ *   - `llm_inflight` — 5 minutes. Extended-thinking turns and long
+ *     batch generations can take ~1-2 minutes legitimately; 5 minutes
+ *     leaves plenty of headroom while still bounding the stuck case.
+ *   - `tool_inflight` — 15 minutes. Tool calls can include long bash
+ *     commands (npm install, docker build, large checkouts). Beyond
+ *     15 minutes we'd rather risk flickering than show stuck-busy
+ *     forever.
+ *
+ * Callers with legitimate need for a different cap (or no cap) can
+ * pass `maxAgeMs` explicitly to `start()`. Use `Infinity` to disable
+ * the timer entirely; non-positive / non-finite values fall back to
+ * the source default.
+ */
+export const DEFAULT_MAX_AGE_MS: Readonly<Record<BusySource, number>> = {
+  llm_inflight: 5 * 60_000,
+  tool_inflight: 15 * 60_000,
+};
+
+export interface BusySignalOptions {
+  /**
+   * Optional logger for non-routine events: handle auto-finished by
+   * the max-age timer, handles drained via `forceFinishAll()`, etc.
+   * The signal is normally silent on the happy path.
+   */
+  log?: (msg: string, ctx?: Record<string, unknown>) => void;
+}
+
+export interface BusyStartOptions {
+  /**
+   * Hard cap on the handle's lifetime in milliseconds. If `finish()`
+   * isn't called by then we force-finish, log a warning, and drop the
+   * count. Defaults to `DEFAULT_MAX_AGE_MS[source]`. Pass `Infinity`
+   * to disable the safety net for this handle.
+   */
+  maxAgeMs?: number;
+}
+
+export interface BusyHandle {
+  finish(): void;
+}
+
 export interface BusySignal {
-  /** Current count of in-flight requests. */
+  /** Sum of in-flight counts across all sources. */
   readonly count: number;
-  /** Whether at least one request is in flight (`count > 0`). */
+  /** Whether at least one source has work in flight. */
   readonly busy: boolean;
-  /** Mark a new request as started. Returns a handle that decrements on `finish()`. */
-  start(): { finish: () => void };
+  /**
+   * Mark a new unit of work as started. Returns a handle that
+   * decrements on `finish()`. Defaults to `llm_inflight` so existing
+   * MITM bumper code that doesn't pass a source remains correct.
+   *
+   * Each handle auto-finishes after `maxAgeMs` if `finish()` hasn't
+   * been called — see the file-level comment on defense in depth.
+   */
+  start(source?: BusySource, options?: BusyStartOptions): BusyHandle;
   /**
    * Subscribe to busy-state changes. Listener fires immediately with
    * the current state, then on every transition. Returns an unsubscribe
    * function.
    */
   subscribe(listener: (busy: boolean) => void): () => void;
+  /**
+   * Diagnostics: read the live per-source counts. Useful when a
+   * subscriber suspects one source is stuck — see which counter
+   * refuses to drain.
+   */
+  getSourceCounts(): Readonly<Record<BusySource, number>>;
+  /**
+   * Force every outstanding handle to finish. Returns the number of
+   * handles that were drained (zero when no work was in flight). Emits
+   * a single busy→idle transition if at least one handle was open.
+   *
+   * The trace host calls this from its `close()` path after the proxy
+   * and hook server have shut down, as a final safety net for handles
+   * a sub-system's own cleanup missed. Tests can also use it to
+   * scrub state between cases.
+   */
+  forceFinishAll(): number;
 }
 
-export function createBusySignal(): BusySignal {
-  let count = 0;
+interface InternalHandle {
+  source: BusySource;
+  finish: (reason: 'normal' | 'timeout' | 'force') => void;
+}
+
+export function createBusySignal(options: BusySignalOptions = {}): BusySignal {
+  const log = options.log ?? (() => {});
+  const counts = new Map<BusySource, number>();
+  for (const source of ALL_SOURCES) counts.set(source, 0);
   const listeners = new Set<(busy: boolean) => void>();
+  const liveHandles = new Set<InternalHandle>();
+
+  const totalCount = (): number => {
+    let total = 0;
+    for (const v of counts.values()) total += v;
+    return total;
+  };
 
   const emit = (busy: boolean): void => {
     for (const listener of listeners) {
@@ -45,36 +165,98 @@ export function createBusySignal(): BusySignal {
     }
   };
 
-  const start = (): { finish: () => void } => {
-    const wasBusy = count > 0;
-    count += 1;
+  const resolveMaxAge = (source: BusySource, requested: number | undefined): number => {
+    if (requested === undefined) return DEFAULT_MAX_AGE_MS[source];
+    if (typeof requested !== 'number') return DEFAULT_MAX_AGE_MS[source];
+    if (Number.isNaN(requested)) return DEFAULT_MAX_AGE_MS[source];
+    // Positive Infinity is the documented opt-out signal — let it
+    // through so the timer setup below sees a non-finite value and
+    // skips scheduling.
+    if (requested === Number.POSITIVE_INFINITY) return requested;
+    if (requested <= 0) return DEFAULT_MAX_AGE_MS[source];
+    return requested;
+  };
+
+  const start = (
+    source: BusySource = 'llm_inflight',
+    startOpts: BusyStartOptions = {},
+  ): BusyHandle => {
+    const wasBusy = totalCount() > 0;
+    counts.set(source, (counts.get(source) ?? 0) + 1);
     if (!wasBusy) emit(true);
+
     let finished = false;
+    let timer: ReturnType<typeof setTimeout> | null = null;
+    const startedAt = Date.now();
+
+    const finish = (reason: 'normal' | 'timeout' | 'force'): void => {
+      // Idempotent — a callback wired to two completion paths
+      // (e.g., onExchange + closeSession) shouldn't double-decrement.
+      if (finished) return;
+      finished = true;
+      if (timer !== null) {
+        clearTimeout(timer);
+        timer = null;
+      }
+      liveHandles.delete(handleEntry);
+      const next = Math.max(0, (counts.get(source) ?? 0) - 1);
+      counts.set(source, next);
+      if (totalCount() === 0) emit(false);
+      if (reason !== 'normal') {
+        log('busy: handle auto-finished', {
+          source,
+          reason,
+          ageMs: Date.now() - startedAt,
+        });
+      }
+    };
+
+    const handleEntry: InternalHandle = {
+      source,
+      finish,
+    };
+    liveHandles.add(handleEntry);
+
+    const maxAgeMs = resolveMaxAge(source, startOpts.maxAgeMs);
+    if (Number.isFinite(maxAgeMs) && maxAgeMs > 0) {
+      timer = setTimeout(() => finish('timeout'), maxAgeMs);
+      // Don't keep the runner process alive just to fire this watchdog —
+      // if the runner is exiting and this is the only live timer, we
+      // want the loop to drain so close() can proceed.
+      if (typeof timer === 'object' && 'unref' in timer) {
+        (timer as { unref: () => void }).unref();
+      }
+    }
+
     return {
-      finish: () => {
-        // Idempotent — a callback wired to two completion paths
-        // (e.g., onExchange + closeSession) shouldn't double-decrement.
-        if (finished) return;
-        finished = true;
-        count = Math.max(0, count - 1);
-        if (count === 0) emit(false);
-      },
+      finish: () => finish('normal'),
     };
   };
 
+  const forceFinishAll = (): number => {
+    if (liveHandles.size === 0) return 0;
+    const drained = liveHandles.size;
+    // Snapshot before iterating since each finish() mutates the set.
+    for (const entry of [...liveHandles]) {
+      entry.finish('force');
+    }
+    log('busy: force-finished outstanding handles', { drained });
+    return drained;
+  };
+
   return {
     get count() {
-      return count;
+      return totalCount();
     },
     get busy() {
-      return count > 0;
+      return totalCount() > 0;
     },
     start,
     subscribe(listener) {
       listeners.add(listener);
       // Late subscribers see the current state.
       try {
-        listener(count > 0);
+        listener(totalCount() > 0);
       } catch {
         /* ignore */
       }
@@ -82,5 +264,11 @@ export function createBusySignal(): BusySignal {
         listeners.delete(listener);
       };
     },
+    getSourceCounts() {
+      const out = { llm_inflight: 0, tool_inflight: 0 } as Record<BusySource, number>;
+      for (const [k, v] of counts) out[k] = v;
+      return out;
+    },
+    forceFinishAll,
   };
 }
diff --git a/packages/cli/src/runtime/trace/hook-server.ts b/packages/cli/src/runtime/trace/hook-server.ts
new file mode 100644
index 0000000..92e2edd
--- /dev/null
+++ b/packages/cli/src/runtime/trace/hook-server.ts
@@ -0,0 +1,192 @@
+/**
+ * Loopback HTTP endpoint for Claude Code hook events.
+ *
+ * Claude Code's hook system fires lifecycle callbacks at points in the
+ * agent loop the MITM proxy can't see (tool execution windows that
+ * don't generate LLM calls). We bind a small HTTP server here, write
+ * its URL into `.claude/settings.json` as a `type: "http"` hook target,
+ * and let Claude Code POST to us on PreToolUse / PostToolUse /
+ * PostToolUseFailure.
+ *
+ * Why HTTP and not `type: "command"`:
+ *   - Each `type: "command"` hook forks a process per event. With ~50
+ *     tool calls per turn over a session, that's hundreds of Node
+ *     startups for what should be a counter bump.
+ *   - HTTP hooks are single localhost round-trips — sub-millisecond on
+ *     loopback.
+ *   - We already bind two listeners (the MITM proxy, the runner IPC
+ *     socket); a third is cheap.
+ *
+ * The server is single-purpose: bumps `busy('tool_inflight')` on
+ * PreToolUse, decrements on PostToolUse / PostToolUseFailure. It
+ * keeps a per-`tool_use_id` map of busy handles so out-of-order
+ * matching (e.g., a hook event arrives twice or out of sequence)
+ * stays correct: PreToolUse for an id we already have is a no-op;
+ * PostToolUse for an id we don't have is a no-op; double Post
+ * decrements at most once.
+ *
+ * On close, all outstanding handles are drained so a torn-down runner
+ * can't leave the indicator wedged at "busy".
+ */
+
+import { createServer, type Server } from 'node:http';
+import type { BusySignal } from './busy.js';
+
+export type ClaudeHookEventName =
+  | 'PreToolUse'
+  | 'PostToolUse'
+  | 'PostToolUseFailure'
+  | 'PostToolBatch';
+
+interface HookRequestBody {
+  hook_event_name?: string;
+  tool_use_id?: string;
+  tool_name?: string;
+}
+
+export interface HookServer {
+  /** The full URL that goes into the `type: "http"` hook config. */
+  readonly url: string;
+  /** Live count of outstanding tool handles. Useful for diagnostics. */
+  readonly inFlight: number;
+  /** Tear down: drain any remaining handles, close the listener. */
+  close(): Promise<void>;
+}
+
+export interface HookServerOptions {
+  busy: BusySignal;
+  log?: (msg: string, ctx?: Record<string, unknown>) => void;
+}
+
+export async function startHookServer(options: HookServerOptions): Promise<HookServer> {
+  const log =
+    options.log ??
+    ((msg: string, ctx: Record<string, unknown> = {}): void => {
+      const record = { ts: new Date().toISOString(), component: 'hook-server', msg, ...ctx };
+      process.stderr.write(`${JSON.stringify(record)}\n`);
+    });
+
+  // Per-tool-use-id handles. The same id appears in PreToolUse and
+  // PostToolUse, so the matching is exact when Claude Code is
+  // well-behaved. If we get an unexpected duplicate or out-of-order
+  // event we err on the side of "do nothing surprising" rather than
+  // double-bump or under-decrement.
+  const handles = new Map<string, { finish: () => void }>();
+
+  const readBody = (req: NodeJS.ReadableStream): Promise<string> =>
+    new Promise((resolve, reject) => {
+      const parts: Buffer[] = [];
+      let total = 0;
+      // 64 KB is far more than any hook payload should be; cap to
+      // defang slow-loris / oversized-body adversaries even on
+      // loopback. A real claude payload is ~1-2 KB.
+      const cap = 64 * 1024;
+      req.on('data', (chunk: Buffer) => {
+        total += chunk.length;
+        if (total > cap) {
+          req.removeAllListeners('data');
+          req.removeAllListeners('end');
+          reject(new Error('hook payload exceeded 64 KB cap'));
+          return;
+        }
+        parts.push(chunk);
+      });
+      req.on('end', () => resolve(Buffer.concat(parts).toString('utf8')));
+      req.on('error', reject);
+    });
+
+  const server: Server = createServer(async (req, res) => {
+    // Liveness only — anything except POST /hook/tool-event gets a 404
+    // so misconfiguration is loud.
+    if (req.method !== 'POST' || req.url !== '/hook/tool-event') {
+      res.writeHead(404, { 'Content-Type': 'text/plain' });
+      res.end('not found');
+      return;
+    }
+    let body: HookRequestBody;
+    try {
+      const raw = await readBody(req);
+      const parsed = raw.length === 0 ? {} : JSON.parse(raw);
+      if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
+        throw new Error('hook body is not a JSON object');
+      }
+      body = parsed as HookRequestBody;
+    } catch (err) {
+      log('hook-server: bad request', {
+        error: err instanceof Error ? err.message : String(err),
+      });
+      res.writeHead(400, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ error: 'bad request' }));
+      return;
+    }
+
+    const event = body.hook_event_name;
+    const toolUseId = body.tool_use_id;
+    if (typeof event !== 'string' || typeof toolUseId !== 'string' || toolUseId.length === 0) {
+      // Missing essentials. Don't 4xx — Claude Code might keep
+      // retrying. 2xx with a no-op semantics is safer.
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ accepted: false, reason: 'missing fields' }));
+      return;
+    }
+
+    if (event === 'PreToolUse') {
+      // Duplicate PreToolUse for the same id is a no-op — keep the
+      // first handle so the matching Post still finds something.
+      if (!handles.has(toolUseId)) {
+        handles.set(toolUseId, options.busy.start('tool_inflight'));
+      }
+    } else if (
+      event === 'PostToolUse' ||
+      event === 'PostToolUseFailure' ||
+      event === 'PostToolBatch'
+    ) {
+      const handle = handles.get(toolUseId);
+      if (handle) {
+        handle.finish();
+        handles.delete(toolUseId);
+      }
+      // Note: PostToolBatch may carry a synthetic batch id rather than
+      // a real tool_use_id; we still try to drain the matching handle
+      // in case Claude Code uses the same id space. Missing matches
+      // are silent (no-op).
+    }
+    // Any other event (SessionStart, Stop, etc.) is accepted but
+    // doesn't drive busy. We ignore them politely so the user can
+    // share one hook config block across multiple events without
+    // worrying about which ones we care about.
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify({ accepted: true }));
+  });
+
+  await new Promise<void>((resolve, reject) => {
+    server.once('listening', () => resolve());
+    server.once('error', (err) => reject(err));
+    server.listen(0, '127.0.0.1');
+  });
+
+  const address = server.address();
+  if (!address || typeof address === 'string') {
+    server.close();
+    throw new Error('hook-server: server.address() returned non-TCP binding');
+  }
+  const url = `http://127.0.0.1:${address.port}/hook/tool-event`;
+  log('hook-server: listening', { url });
+
+  return {
+    url,
+    get inFlight() {
+      return handles.size;
+    },
+    async close(): Promise<void> {
+      if (handles.size > 0) {
+        log('hook-server: draining handles at close', { count: handles.size });
+        for (const handle of handles.values()) handle.finish();
+        handles.clear();
+      }
+      await new Promise<void>((resolve) => {
+        server.close(() => resolve());
+      });
+    },
+  };
+}
diff --git a/packages/cli/src/runtime/trace/host.ts b/packages/cli/src/runtime/trace/host.ts
index af69de4..f265ab6 100644
--- a/packages/cli/src/runtime/trace/host.ts
+++ b/packages/cli/src/runtime/trace/host.ts
@@ -5,10 +5,13 @@
  * The runner constructs a `TraceHost` at startup when tracing is
  * enabled, bakes its `envVars()` into the agent child's environment,
  * and calls `noteObjective{Open,Close}()` from the objectives
- * tracker when SSE objective events arrive. Every HTTPS flow the
- * agent makes is decrypted transparently via the MITM proxy;
- * completed HTTP/1.1 exchanges stream up to the broker via the
- * activity uploader in real time, not at span close.
+ * tracker when SSE objective events arrive. HTTPS flows to known LLM-
+ * provider hosts (see `known-hosts.ts`) are MITM-decrypted and the
+ * resulting HTTP/1.1 exchanges stream up to the broker via the
+ * activity uploader in real time. Traffic to non-allowlisted hosts
+ * passes through the proxy as a raw TCP tunnel — the agent's TLS
+ * client talks to the real upstream cert end-to-end, system trust
+ * applies, no plaintext is observed.
  *
  * There's no TraceBuffer, no span boundary, no per-objective
  * copying. The agent's activity log is the source of truth; per-
@@ -28,7 +31,9 @@ import type { ActivityEvent, TraceEntry } from '@agentc7/sdk/types';
 import { ActivityUploader } from './activity-uploader.js';
 import { extractEntries, type HttpExchange } from './anthropic.js';
 import { type BusySignal, createBusySignal } from './busy.js';
+import { type HookServer, startHookServer } from './hook-server.js';
 import { type Http1Exchange, Http1Reassembler } from './http1-reassembler.js';
+import { isKnownLlmHost } from './known-hosts.js';
 import { type CertPool, createCertPool, createTraceCa, type TraceCa } from './mitm/ca.js';
 import { type ProxyRelay, startProxyRelay } from './proxy.js';
 import { looksLikeSseStream, reassembleAnthropicSse } from './sse.js';
@@ -74,6 +79,19 @@ export interface TraceHost {
    * can render a spinner next to the agent's name.
    */
   readonly busy: BusySignal;
+  /**
+   * Loopback HTTP endpoint URL that Claude Code hooks should POST to.
+   * Used by the `claude-code` adapter to write a `type: "http"` hook
+   * config into `.claude/settings.json` so PreToolUse / PostToolUse
+   * events drive `busy('tool_inflight')`.
+   *
+   * Co-located with the trace host because the lifecycles are identical
+   * (started together, torn down together, shares the busy signal).
+   * Null is impossible here — when tracing is enabled, hooks are
+   * available — but the field exists so callers can plumb without
+   * a separate option.
+   */
+  readonly hookEndpointUrl: string;
   /**
    * Env vars to merge into the agent child's environment (see the
    * comment on the implementation for the full list). Returns a
@@ -122,7 +140,10 @@ export async function startTraceHost(options: TraceHostOptions): Promise<TraceHo
   // matching response (or session-close request-only flush)
   // completes the exchange. Pending → finish handles are stored by
   // sessionId+startedAt so we can decrement at exchange time.
-  const busy = createBusySignal();
+  // Pass the host's structured logger through so handle auto-finish
+  // warnings (stuck handles tripping the max-age safety net) land in
+  // the same stream the rest of the trace layer logs into.
+  const busy = createBusySignal({ log });
   const pendingHandles = new Map<string, { finish: () => void }>();
   const handleKey = (sessionId: number, startedAt: number): string => `${sessionId}:${startedAt}`;
 
@@ -133,7 +154,7 @@ export async function startTraceHost(options: TraceHostOptions): Promise<TraceHo
   const reassembler = new Http1Reassembler({
     log,
     onRequestStart: ({ sessionId, startedAt }) => {
-      pendingHandles.set(handleKey(sessionId, startedAt), busy.start());
+      pendingHandles.set(handleKey(sessionId, startedAt), busy.start('llm_inflight'));
     },
     onExchange: (exchange) => {
       const handle = pendingHandles.get(handleKey(exchange.sessionId, exchange.startedAt));
@@ -149,12 +170,27 @@ export async function startTraceHost(options: TraceHostOptions): Promise<TraceHo
   const proxy = await startProxyRelay({
     log,
     certPool,
+    // Scoped MITM: only allowlisted LLM-provider hosts get TLS-
+    // terminated and parsed. Everything else flows through the proxy
+    // as a raw TCP tunnel so the agent's standard system trust store
+    // applies — we never see plaintext for non-LLM traffic, which
+    // both keeps the privacy posture honest and avoids breaking
+    // curl/git/python/etc. that don't honor NODE_EXTRA_CA_CERTS.
+    shouldMitm: isKnownLlmHost,
     onChunk: (chunk) => reassembler.ingest(chunk),
     onSessionEnd: (session) => reassembler.closeSession(session.id),
   });
 
+  // Loopback HTTP endpoint for Claude Code hook events. The runner
+  // writes its URL into `.claude/settings.json` so PreToolUse /
+  // PostToolUse callbacks bump the same `busy` signal the MITM uses.
+  // For codex this is unused — codex feeds busy via JSON-RPC
+  // notifications on the app-server stream.
+  const hookServer: HookServer = await startHookServer({ busy, log });
+
   log('trace-host: started', {
     proxyUrl: proxy.proxyUrl,
+    hookUrl: hookServer.url,
     caCertPath,
     name: options.name,
   });
@@ -167,6 +203,24 @@ export async function startTraceHost(options: TraceHostOptions): Promise<TraceHo
     certPool,
     caCertPath,
     busy,
+    hookEndpointUrl: hookServer.url,
+    /**
+     * Env vars the agent child needs to route through us and trust
+     * our MITM CA. Under scoped MITM (see `known-hosts.ts`), only the
+     * LLM-host allowlist gets TLS-terminated, so we deliberately do
+     * NOT inject the universal CA-bundle vars (`SSL_CERT_FILE`,
+     * `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, `GIT_SSL_CAINFO`):
+     *
+     *   - For allowlisted hosts, the agent's Node TLS client honors
+     *     `NODE_EXTRA_CA_CERTS` and accepts our leaf certs. Agents
+     *     that use a non-Node TLS stack (e.g. codex via reqwest) get
+     *     adapter-specific translation in their spawn paths.
+     *   - For non-allowlisted hosts, the proxy is a raw TCP tunnel —
+     *     the agent sees the real upstream cert, system trust
+     *     applies, our CA is irrelevant. Setting `SSL_CERT_FILE`
+     *     universally would actively break curl/git/python by
+     *     replacing the system trust store with our single-CA pem.
+     */
     envVars(existingEnv: NodeJS.ProcessEnv = {}): Record<string, string> {
       const existingNoProxy = existingEnv.NO_PROXY ?? existingEnv.no_proxy ?? '';
       const noProxyHosts = ['localhost', '127.0.0.1', '::1'];
@@ -218,6 +272,31 @@ export async function startTraceHost(options: TraceHostOptions): Promise<TraceHo
           error: err instanceof Error ? err.message : String(err),
         });
       });
+      await hookServer.close().catch((err: unknown) => {
+        log('trace-host: hook server close failed', {
+          error: err instanceof Error ? err.message : String(err),
+        });
+      });
+      // Final safety net for the busy signal. Sub-system closes above
+      // (reassembler flush, hook server drain, codex sniff drain at
+      // its own teardown) should have drained every handle they own.
+      // If anything slipped through — a keep-alive socket that never
+      // emitted onSessionEnd, a dropped item/completed notification,
+      // a hook event that never fired — this guarantees the indicator
+      // goes idle before the runner exits rather than waiting on the
+      // 30s server-side TTL.
+      //
+      // Snapshot per-source counts BEFORE the drain so the diagnostic
+      // log tells us which source leaked (the counts are all zero
+      // after forceFinishAll, which would be useless on its own).
+      const leakedCounts = busy.getSourceCounts();
+      const drained = busy.forceFinishAll();
+      if (drained > 0) {
+        log('trace-host: force-drained leaked busy handles at teardown', {
+          drained,
+          sourceCounts: leakedCounts,
+        });
+      }
       try {
         await fs.unlink(caCertPath);
       } catch (err) {
diff --git a/packages/cli/src/runtime/trace/known-hosts.ts b/packages/cli/src/runtime/trace/known-hosts.ts
new file mode 100644
index 0000000..bc4b835
--- /dev/null
+++ b/packages/cli/src/runtime/trace/known-hosts.ts
@@ -0,0 +1,59 @@
+/**
+ * Allowlist of hosts whose HTTPS traffic the trace proxy decrypts.
+ *
+ * The proxy itself is host-agnostic: it accepts `CONNECT host:port`,
+ * dials upstream, and pipes bytes. The decision of "decrypt this
+ * session" vs "pass it through unmodified" is gated by the predicate
+ * exported here. Hosts that match get MITM'd — the proxy terminates
+ * TLS on both sides, captures plaintext, and runs the request/response
+ * pair through the reassembler + decoders. Hosts that don't match get
+ * a raw TCP tunnel: the agent's TLS client talks to the real upstream
+ * cert end-to-end, system trust applies, no plaintext is observed.
+ *
+ * Why an allowlist instead of decrypting everything:
+ *   - The honest privacy claim is "ac7 decrypts traffic to known LLM
+ *     providers." That's only defensible if non-LLM hosts genuinely
+ *     bypass our TLS termination.
+ *   - Non-LLM HTTPS calls (git fetch, package installs, telemetry,
+ *     arbitrary curl/wget from agents) "just work" with system trust
+ *     and don't require us to ship CA-bundle env vars per-tool.
+ *   - The activity feed only shows traffic we actually want to inspect.
+ *
+ * The bar for adding a host:
+ *   The agent (or its bundled tools) makes inference-related calls to
+ *   it — model invocations, token refresh against the same provider,
+ *   provider-specific telemetry that the trace pipeline knows how to
+ *   parse. Hosts we just "happen to see" because the agent shells out
+ *   to them do NOT belong here.
+ *
+ * Patterns match the CONNECT target hostname (no port, no scheme).
+ * Use `(?:^|\.)domain$` to match the apex and any subdomain.
+ */
+
+export const KNOWN_LLM_HOST_PATTERNS: readonly RegExp[] = [
+  // Anthropic — `api.anthropic.com` for /v1/messages, plus auth/console
+  // subdomains used during token refresh.
+  /(?:^|\.)anthropic\.com$/i,
+  // OpenAI — `api.openai.com` for chat/completions, `auth.openai.com`
+  // for codex token refresh. The wildcard covers both without listing
+  // each subdomain.
+  /(?:^|\.)openai\.com$/i,
+  // Azure OpenAI — customer-specific subdomains under
+  // `*.openai.azure.com`.
+  /(?:^|\.)openai\.azure\.com$/i,
+];
+
+/**
+ * True if `host` is on the LLM allowlist and should be MITM-decrypted.
+ *
+ * Accepts the raw hostname from a CONNECT line (no port, no scheme).
+ * Falls back to literal compare so callers don't have to worry about
+ * regex specials in `host`.
+ */
+export function isKnownLlmHost(host: string): boolean {
+  if (host.length === 0) return false;
+  for (const pattern of KNOWN_LLM_HOST_PATTERNS) {
+    if (pattern.test(host)) return true;
+  }
+  return false;
+}
diff --git a/packages/cli/src/runtime/trace/proxy.ts b/packages/cli/src/runtime/trace/proxy.ts
index a79005a..a4fc895 100644
--- a/packages/cli/src/runtime/trace/proxy.ts
+++ b/packages/cli/src/runtime/trace/proxy.ts
@@ -95,10 +95,23 @@ export interface ProxyRelayOptions {
   log?: (msg: string, ctx?: Record<string, unknown>) => void;
   /**
    * Per-hostname leaf cert issuer. When provided, CONNECT targets
-   * get the MITM treatment; captured chunks are plaintext. Omit
-   * for pure TCP relay behavior.
+   * eligible per `shouldMitm` get the MITM treatment; captured
+   * chunks are plaintext. Omit for pure TCP relay behavior on every
+   * session.
    */
   certPool?: CertPool;
+  /**
+   * Predicate consulted on each CONNECT to decide MITM vs raw tunnel.
+   * Only consulted when `certPool` is also set. When omitted, every
+   * session with a `certPool` is MITM'd — the legacy "decrypt
+   * everything" behavior, useful for tests and one-off debugging.
+   *
+   * Production callers pass `isKnownLlmHost` here so only allowlisted
+   * LLM-provider traffic gets decrypted; all other HTTPS passes
+   * through unmodified end-to-end (agent's system trust applies, we
+   * never see plaintext).
+   */
+  shouldMitm?: (host: string) => boolean;
   /**
    * Extra options merged into the upstream `tls.connect()` call
    * during MITM. Production uses this for nothing (the defaults
@@ -136,6 +149,7 @@ export async function startProxyRelay(options: ProxyRelayOptions = {}): Promise<
       client,
       sessionId,
       certPool: options.certPool,
+      shouldMitm: options.shouldMitm,
       upstreamTlsOptions: options.upstreamTlsOptions,
       onChunk: options.onChunk,
       onSessionEnd: options.onSessionEnd,
@@ -175,6 +189,7 @@ interface SessionContext {
   client: Socket;
   sessionId: number;
   certPool: CertPool | undefined;
+  shouldMitm: ((host: string) => boolean) | undefined;
   upstreamTlsOptions: ConnectionOptions | undefined;
   onChunk: ((chunk: ProxyChunk) => void) | undefined;
   onSessionEnd: ((session: ProxySession) => void) | undefined;
@@ -259,14 +274,25 @@ function handleSession(ctx: SessionContext): void {
       upstreamPort = parsed.port;
       phase = 'connecting';
 
-      // MITM whenever a CertPool is configured, regardless of port.
-      // In practice agents only CONNECT to HTTPS targets (443 or
-      // vanity ports like 8443); the TLS handshake would fail
-      // cleanly on any non-TLS traffic. Keeping the code path
-      // uniform makes testing easier and avoids silent misbehavior.
-      if (ctx.certPool) {
+      // MITM only when (a) we have a CertPool, and (b) the host is
+      // on the allowlist (or no allowlist is configured — legacy
+      // mode used by tests). Everything else gets a raw TCP tunnel:
+      // the agent's TLS client talks straight to the real upstream
+      // cert, system trust applies, we never see plaintext. We still
+      // produce a ProxySession with byte counts + host:port so the
+      // activity layer retains "agent talked to X" metadata.
+      const wantMitm =
+        ctx.certPool !== undefined &&
+        (ctx.shouldMitm === undefined || ctx.shouldMitm(upstreamHost));
+      if (wantMitm) {
         startMitmBridge();
       } else {
+        log('proxy: passthrough (no MITM)', {
+          sessionId,
+          host: upstreamHost,
+          port: upstreamPort,
+          reason: ctx.certPool === undefined ? 'no-cert-pool' : 'host-not-allowlisted',
+        });
         startRawBridge();
       }
       return;
diff --git a/packages/cli/test/runtime/busy.test.ts b/packages/cli/test/runtime/busy.test.ts
index bfce033..69c0019 100644
--- a/packages/cli/test/runtime/busy.test.ts
+++ b/packages/cli/test/runtime/busy.test.ts
@@ -7,8 +7,8 @@
  * /presence/busy traffic to thrash on parallel tool fan-outs.
  */
 
-import { describe, expect, it, vi } from 'vitest';
-import { createBusySignal } from '../../src/runtime/trace/busy.js';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { createBusySignal, DEFAULT_MAX_AGE_MS } from '../../src/runtime/trace/busy.js';
 
 describe('createBusySignal', () => {
   it('starts idle (count=0, busy=false)', () => {
@@ -89,4 +89,225 @@ describe('createBusySignal', () => {
     b.start();
     expect(listener).not.toHaveBeenCalled();
   });
+
+  it('tracks per-source counts independently', () => {
+    const b = createBusySignal();
+    const llm = b.start('llm_inflight');
+    const tool1 = b.start('tool_inflight');
+    const tool2 = b.start('tool_inflight');
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 1, tool_inflight: 2 });
+    expect(b.count).toBe(3);
+    expect(b.busy).toBe(true);
+
+    llm.finish();
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 0, tool_inflight: 2 });
+    // Still busy — tool_inflight is non-zero.
+    expect(b.busy).toBe(true);
+
+    tool1.finish();
+    tool2.finish();
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 0, tool_inflight: 0 });
+    expect(b.busy).toBe(false);
+  });
+
+  it('emits only one 0→busy transition across mixed sources', () => {
+    const b = createBusySignal();
+    const listener = vi.fn();
+    b.subscribe(listener);
+    listener.mockClear();
+
+    const llm = b.start('llm_inflight');
+    const tool = b.start('tool_inflight');
+    expect(listener).toHaveBeenCalledTimes(1);
+    expect(listener).toHaveBeenLastCalledWith(true);
+
+    llm.finish();
+    // tool_inflight still in flight — no transition.
+    expect(listener).toHaveBeenCalledTimes(1);
+
+    tool.finish();
+    // Now all sources drained — transition fires.
+    expect(listener).toHaveBeenCalledTimes(2);
+    expect(listener).toHaveBeenLastCalledWith(false);
+  });
+
+  it("defaults to 'llm_inflight' when start() is called without a source", () => {
+    // Backwards compatibility — existing MITM call sites that don't
+    // pass a source must keep behaving as they did.
+    const b = createBusySignal();
+    const h = b.start();
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 1, tool_inflight: 0 });
+    h.finish();
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 0, tool_inflight: 0 });
+  });
+
+  it('a wedged source does not poison drain of the other', () => {
+    // Regression: if one feeder forgets to decrement, the other
+    // should still be able to report idle on its own track. The
+    // overall `busy` stays true (correct — there IS in-flight work
+    // by the contract) but `getSourceCounts()` makes the culprit
+    // diagnosable instead of leaving us guessing.
+    const b = createBusySignal();
+    b.start('llm_inflight', { maxAgeMs: Infinity }); // intentionally not finished, safety disabled
+    const tool = b.start('tool_inflight');
+    tool.finish();
+    expect(b.busy).toBe(true);
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 1, tool_inflight: 0 });
+  });
+});
+
+describe('createBusySignal — handle max-age safety net', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it('auto-finishes a handle that exceeds its max-age and emits the idle transition', () => {
+    const log = vi.fn();
+    const b = createBusySignal({ log });
+    const listener = vi.fn();
+    b.subscribe(listener);
+    listener.mockClear();
+    log.mockClear();
+
+    b.start('llm_inflight', { maxAgeMs: 1000 });
+    expect(b.busy).toBe(true);
+    expect(listener).toHaveBeenCalledWith(true);
+
+    vi.advanceTimersByTime(999);
+    expect(b.busy).toBe(true); // not yet
+
+    vi.advanceTimersByTime(2);
+    expect(b.busy).toBe(false);
+    expect(listener).toHaveBeenLastCalledWith(false);
+    // Log carries the diagnostic context.
+    expect(log).toHaveBeenCalledWith(
+      'busy: handle auto-finished',
+      expect.objectContaining({ source: 'llm_inflight', reason: 'timeout' }),
+    );
+  });
+
+  it('respects a custom maxAgeMs even when the default is much larger', () => {
+    const b = createBusySignal();
+    b.start('tool_inflight', { maxAgeMs: 50 });
+    expect(b.busy).toBe(true);
+    vi.advanceTimersByTime(60);
+    expect(b.busy).toBe(false);
+  });
+
+  it('Infinity maxAgeMs disables the safety net entirely', () => {
+    const b = createBusySignal();
+    b.start('llm_inflight', { maxAgeMs: Number.POSITIVE_INFINITY });
+    expect(b.busy).toBe(true);
+    // Crank well past any default — handle should still be live.
+    vi.advanceTimersByTime(DEFAULT_MAX_AGE_MS.llm_inflight * 10);
+    expect(b.busy).toBe(true);
+  });
+
+  it('finish() before the deadline cancels the timer (no double-finish)', () => {
+    const log = vi.fn();
+    const b = createBusySignal({ log });
+    const h = b.start('llm_inflight', { maxAgeMs: 1000 });
+    expect(b.getSourceCounts().llm_inflight).toBe(1);
+
+    h.finish();
+    expect(b.getSourceCounts().llm_inflight).toBe(0);
+
+    // Advance well past the original deadline — the timer should
+    // have been cleared, so no auto-finish fires (which would
+    // otherwise underflow the count or fire a spurious idle
+    // transition).
+    vi.advanceTimersByTime(5000);
+    expect(b.getSourceCounts().llm_inflight).toBe(0);
+    expect(log).not.toHaveBeenCalledWith('busy: handle auto-finished', expect.anything());
+  });
+
+  it('applies the per-source default when maxAgeMs is omitted', () => {
+    const b = createBusySignal();
+    b.start('tool_inflight');
+    // 14:59 — not yet.
+    vi.advanceTimersByTime(DEFAULT_MAX_AGE_MS.tool_inflight - 1000);
+    expect(b.busy).toBe(true);
+    // Past 15min.
+    vi.advanceTimersByTime(2000);
+    expect(b.busy).toBe(false);
+  });
+
+  it('falls back to the default when maxAgeMs is non-positive or NaN', () => {
+    const b = createBusySignal();
+    b.start('llm_inflight', { maxAgeMs: 0 });
+    b.start('llm_inflight', { maxAgeMs: -100 });
+    b.start('llm_inflight', { maxAgeMs: Number.NaN });
+    expect(b.getSourceCounts().llm_inflight).toBe(3);
+
+    // None of those should auto-finish before the source default.
+    vi.advanceTimersByTime(DEFAULT_MAX_AGE_MS.llm_inflight - 1);
+    expect(b.getSourceCounts().llm_inflight).toBe(3);
+    // Past the default.
+    vi.advanceTimersByTime(2);
+    expect(b.getSourceCounts().llm_inflight).toBe(0);
+  });
+});
+
+describe('createBusySignal — forceFinishAll', () => {
+  it('returns 0 and emits nothing when no work is in flight', () => {
+    const b = createBusySignal();
+    const listener = vi.fn();
+    b.subscribe(listener);
+    listener.mockClear();
+    const drained = b.forceFinishAll();
+    expect(drained).toBe(0);
+    expect(listener).not.toHaveBeenCalled();
+  });
+
+  it('drains every live handle across both sources with a single idle transition', () => {
+    const b = createBusySignal();
+    const listener = vi.fn();
+    b.subscribe(listener);
+    listener.mockClear();
+
+    b.start('llm_inflight');
+    b.start('llm_inflight');
+    b.start('tool_inflight');
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 2, tool_inflight: 1 });
+    expect(listener).toHaveBeenCalledTimes(1); // 0→busy
+
+    const drained = b.forceFinishAll();
+    expect(drained).toBe(3);
+    expect(b.getSourceCounts()).toEqual({ llm_inflight: 0, tool_inflight: 0 });
+    expect(b.busy).toBe(false);
+    // One additional listener fire — the busy→idle transition. No
+    // intermediate per-handle transitions because the counters stayed
+    // > 0 until the last one drained.
+    expect(listener).toHaveBeenCalledTimes(2);
+    expect(listener).toHaveBeenLastCalledWith(false);
+  });
+
+  it('subsequent finish() on a force-finished handle is a no-op', () => {
+    const b = createBusySignal();
+    const h = b.start('llm_inflight');
+    expect(b.getSourceCounts().llm_inflight).toBe(1);
+
+    b.forceFinishAll();
+    expect(b.getSourceCounts().llm_inflight).toBe(0);
+
+    // The caller's handle.finish() shouldn't underflow.
+    h.finish();
+    expect(b.getSourceCounts().llm_inflight).toBe(0);
+    expect(b.busy).toBe(false);
+  });
+
+  it('logs the drain count for diagnostics', () => {
+    const log = vi.fn();
+    const b = createBusySignal({ log });
+    b.start('llm_inflight');
+    b.start('tool_inflight');
+    b.forceFinishAll();
+    expect(log).toHaveBeenCalledWith(
+      'busy: force-finished outstanding handles',
+      expect.objectContaining({ drained: 2 }),
+    );
+  });
 });
diff --git a/packages/cli/test/runtime/claude-code-adapter.test.ts b/packages/cli/test/runtime/claude-code-adapter.test.ts
index 0b1bf98..e49b26d 100644
--- a/packages/cli/test/runtime/claude-code-adapter.test.ts
+++ b/packages/cli/test/runtime/claude-code-adapter.test.ts
@@ -21,7 +21,11 @@ import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'no
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
-import { ClaudeCodeAdapterError, prepareMcpConfig } from '../../src/runtime/agents/claude-code.js';
+import {
+  ClaudeCodeAdapterError,
+  prepareClaudeSettings,
+  prepareMcpConfig,
+} from '../../src/runtime/agents/claude-code.js';
 
 describe('prepareMcpConfig', () => {
   let cwd: string;
@@ -182,3 +186,155 @@ describe('prepareMcpConfig', () => {
     handle.restore();
   });
 });
+
+describe('prepareClaudeSettings', () => {
+  let cwd: string;
+  const hookUrl = 'http://127.0.0.1:55555/hook/tool-event';
+
+  beforeEach(() => {
+    cwd = mkdtempSync(join(tmpdir(), 'ac7-claude-settings-test-'));
+  });
+
+  afterEach(() => {
+    rmSync(cwd, { recursive: true, force: true });
+  });
+
+  it('creates .claude/settings.json when neither dir nor file existed, restore removes both', () => {
+    const dirPath = join(cwd, '.claude');
+    const settingsPath = join(dirPath, 'settings.json');
+    expect(existsSync(dirPath)).toBe(false);
+
+    const handle = prepareClaudeSettings({ cwd, hookUrl });
+
+    expect(handle.path).toBe(settingsPath);
+    expect(existsSync(settingsPath)).toBe(true);
+    const written = JSON.parse(readFileSync(settingsPath, 'utf8'));
+    for (const event of ['PreToolUse', 'PostToolUse', 'PostToolUseFailure']) {
+      const matchers = written.hooks[event];
+      expect(Array.isArray(matchers)).toBe(true);
+      const ac7 = matchers
+        .flatMap((m: { hooks: unknown[] }) => m.hooks)
+        .find(
+          (h: Record<string, unknown>) =>
+            h.type === 'http' && h.url === hookUrl && h.x_ac7_busy_feeder === true,
+        );
+      expect(ac7).toBeTruthy();
+    }
+
+    handle.restore();
+    expect(existsSync(settingsPath)).toBe(false);
+    expect(existsSync(dirPath)).toBe(false);
+  });
+
+  it('merges into existing settings.json while preserving other keys + other hooks', () => {
+    const dirPath = join(cwd, '.claude');
+    const settingsPath = join(dirPath, 'settings.json');
+    // Pre-existing user config: a Stop hook AND an unrelated top-level key.
+    require('node:fs').mkdirSync(dirPath, { recursive: true });
+    writeFileSync(
+      settingsPath,
+      JSON.stringify({
+        permissions: { allow: ['Bash'] },
+        hooks: {
+          Stop: [{ matcher: '*', hooks: [{ type: 'command', command: 'notify-send done' }] }],
+        },
+      }),
+    );
+
+    const handle = prepareClaudeSettings({ cwd, hookUrl });
+    const merged = JSON.parse(readFileSync(settingsPath, 'utf8'));
+
+    // Unrelated top-level key preserved.
+    expect(merged.permissions).toEqual({ allow: ['Bash'] });
+    // User's Stop hook preserved verbatim.
+    expect(merged.hooks.Stop).toEqual([
+      { matcher: '*', hooks: [{ type: 'command', command: 'notify-send done' }] },
+    ]);
+    // Our PreToolUse hook injected.
+    expect(merged.hooks.PreToolUse).toBeTruthy();
+    const preToolEntries = merged.hooks.PreToolUse.flatMap((m: { hooks: unknown[] }) => m.hooks);
+    expect(preToolEntries).toContainEqual({
+      type: 'http',
+      url: hookUrl,
+      x_ac7_busy_feeder: true,
+    });
+
+    handle.restore();
+    // Restore writes the original bytes back — Stop hook still there,
+    // our PreToolUse gone.
+    const restored = JSON.parse(readFileSync(settingsPath, 'utf8'));
+    expect(restored.hooks.PreToolUse).toBeUndefined();
+    expect(restored.hooks.Stop).toEqual([
+      { matcher: '*', hooks: [{ type: 'command', command: 'notify-send done' }] },
+    ]);
+    expect(restored.permissions).toEqual({ allow: ['Bash'] });
+  });
+
+  it('drops a stale ac7 hook entry from a previous crash before injecting fresh ones', () => {
+    const dirPath = join(cwd, '.claude');
+    const settingsPath = join(dirPath, 'settings.json');
+    require('node:fs').mkdirSync(dirPath, { recursive: true });
+    // Simulate a previous run that crashed mid-restore, leaving a
+    // stale ac7 entry behind. Our prepare should NOT duplicate it.
+    writeFileSync(
+      settingsPath,
+      JSON.stringify({
+        hooks: {
+          PreToolUse: [
+            {
+              matcher: '*',
+              hooks: [
+                { type: 'http', url: 'http://stale.local/hook', x_ac7_busy_feeder: true },
+                { type: 'command', command: 'audit-log' },
+              ],
+            },
+          ],
+        },
+      }),
+    );
+
+    const handle = prepareClaudeSettings({ cwd, hookUrl });
+    const merged = JSON.parse(readFileSync(settingsPath, 'utf8'));
+    const entries = merged.hooks.PreToolUse.flatMap(
+      (m: { hooks: Record<string, unknown>[] }) => m.hooks,
+    );
+    // Stale ac7 entry gone.
+    expect(
+      entries.filter((e: Record<string, unknown>) => e.url === 'http://stale.local/hook'),
+    ).toHaveLength(0);
+    // User's unrelated audit-log hook preserved.
+    expect(entries.find((e: Record<string, unknown>) => e.command === 'audit-log')).toBeTruthy();
+    // Fresh ac7 entry pointing at the current hook URL.
+    expect(entries.find((e: Record<string, unknown>) => e.url === hookUrl)).toBeTruthy();
+    handle.restore();
+  });
+
+  it('refuses to modify when existing settings.json is not valid JSON', () => {
+    const dirPath = join(cwd, '.claude');
+    const settingsPath = join(dirPath, 'settings.json');
+    require('node:fs').mkdirSync(dirPath, { recursive: true });
+    writeFileSync(settingsPath, 'not-json');
+    expect(() => prepareClaudeSettings({ cwd, hookUrl })).toThrow(ClaudeCodeAdapterError);
+    // Original file untouched.
+    expect(readFileSync(settingsPath, 'utf8')).toBe('not-json');
+  });
+
+  it('restore() is idempotent — second call is a no-op', () => {
+    const handle = prepareClaudeSettings({ cwd, hookUrl });
+    handle.restore();
+    expect(() => handle.restore()).not.toThrow();
+  });
+
+  it('preserves the .claude/ dir on restore if other files live there', () => {
+    const dirPath = join(cwd, '.claude');
+    require('node:fs').mkdirSync(dirPath, { recursive: true });
+    // User had something else in .claude/ but no settings.json yet.
+    writeFileSync(join(dirPath, 'agents.md'), '# my agents');
+    const handle = prepareClaudeSettings({ cwd, hookUrl });
+    handle.restore();
+    // Settings file gone, but the .claude/ dir + agents.md stay because
+    // the dir existed before our prepare touched it.
+    expect(existsSync(join(dirPath, 'settings.json'))).toBe(false);
+    expect(existsSync(join(dirPath, 'agents.md'))).toBe(true);
+  });
+});
diff --git a/packages/cli/test/runtime/codex/busy-sniff.test.ts b/packages/cli/test/runtime/codex/busy-sniff.test.ts
new file mode 100644
index 0000000..7705ac4
--- /dev/null
+++ b/packages/cli/test/runtime/codex/busy-sniff.test.ts
@@ -0,0 +1,215 @@
+/**
+ * Tests for the codex tool-busy sniff layer.
+ *
+ * Drives the real `attachCodexBusySniff` helper against the real
+ * JSON-RPC client over a pair of in-memory streams — same wiring the
+ * adapter uses in production, no subprocess required.
+ *
+ * Pins the contract:
+ *   - `item/started` with a TOOL_ITEM_TYPES type bumps tool_inflight
+ *   - `item/started` with a non-tool type (agentMessage, reasoning,
+ *     userMessage) is a no-op
+ *   - matching `item/completed` decrements
+ *   - missing `item/completed` is swept by `turn/completed`
+ *   - explicit `drain()` finishes anything left over
+ */
+
+import { PassThrough } from 'node:stream';
+import { afterEach, describe, expect, it } from 'vitest';
+import { attachCodexBusySniff } from '../../../src/runtime/agents/codex/busy-sniff.js';
+import { createJsonRpcClient } from '../../../src/runtime/agents/codex/json-rpc.js';
+import { createBusySignal } from '../../../src/runtime/trace/busy.js';
+
+function pair(): {
+  client: ReturnType<typeof createJsonRpcClient>;
+  send: (notification: unknown) => void;
+  cleanup: () => void;
+} {
+  const serverOut = new PassThrough();
+  const serverIn = new PassThrough();
+  const client = createJsonRpcClient(serverOut, serverIn);
+  // The notification stream is server→client; we WRITE to serverOut
+  // and the client reads it. Each message is one JSON line.
+  const send = (notification: unknown): void => {
+    serverOut.write(`${JSON.stringify(notification)}\n`);
+  };
+  return {
+    client,
+    send,
+    cleanup: () => {
+      client.close('test-end');
+      serverOut.destroy();
+      serverIn.destroy();
+    },
+  };
+}
+
+// Vitest waits for events to drain between writes; in practice the
+// notifications post within microtask order, but a small delay makes
+// the test resilient to scheduler quirks.
+const tick = (): Promise<void> => new Promise((r) => setImmediate(r));
+
+describe('attachCodexBusySniff', () => {
+  const teardowns: Array<() => void> = [];
+
+  afterEach(() => {
+    while (teardowns.length > 0) teardowns.pop()?.();
+  });
+
+  it('bumps tool_inflight on commandExecution start and drains on completion', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    send({
+      method: 'item/started',
+      params: {
+        threadId: 't1',
+        turnId: 'turn-1',
+        item: { type: 'commandExecution', id: 'item-1', command: 'ls' },
+      },
+    });
+    await tick();
+    expect(busy.busy).toBe(true);
+    expect(busy.getSourceCounts().tool_inflight).toBe(1);
+
+    send({
+      method: 'item/completed',
+      params: {
+        threadId: 't1',
+        turnId: 'turn-1',
+        item: { type: 'commandExecution', id: 'item-1' },
+      },
+    });
+    await tick();
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('bumps for fileChange and mcpToolCall types too', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    for (const [type, id] of [
+      ['fileChange', 'fc-1'],
+      ['mcpToolCall', 'mcp-1'],
+    ]) {
+      send({
+        method: 'item/started',
+        params: { threadId: 't1', turnId: 'turn-1', item: { type, id } },
+      });
+    }
+    await tick();
+    expect(busy.getSourceCounts().tool_inflight).toBe(2);
+  });
+
+  it('does NOT bump for non-tool item types (agentMessage, reasoning, userMessage)', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    for (const type of ['agentMessage', 'reasoning', 'userMessage', 'turnPlan']) {
+      send({
+        method: 'item/started',
+        params: { threadId: 't1', turnId: 'turn-1', item: { type, id: `${type}-1` } },
+      });
+    }
+    await tick();
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('ignores duplicate item/started for the same id', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    for (let i = 0; i < 3; i++) {
+      send({
+        method: 'item/started',
+        params: {
+          threadId: 't1',
+          turnId: 'turn-1',
+          item: { type: 'commandExecution', id: 'dup' },
+        },
+      });
+    }
+    await tick();
+    expect(busy.getSourceCounts().tool_inflight).toBe(1);
+
+    send({
+      method: 'item/completed',
+      params: { threadId: 't1', turnId: 'turn-1', item: { type: 'commandExecution', id: 'dup' } },
+    });
+    await tick();
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('turn/completed sweeps tool handles that never got matching item/completed', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    // Two tool starts, no completions.
+    send({
+      method: 'item/started',
+      params: { threadId: 't1', turnId: 'turn-1', item: { type: 'commandExecution', id: 'a' } },
+    });
+    send({
+      method: 'item/started',
+      params: { threadId: 't1', turnId: 'turn-1', item: { type: 'commandExecution', id: 'b' } },
+    });
+    await tick();
+    expect(busy.getSourceCounts().tool_inflight).toBe(2);
+
+    send({
+      method: 'turn/completed',
+      params: { threadId: 't1', turn: { id: 'turn-1', status: 'completed' } },
+    });
+    await tick();
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('explicit drain() finishes anything still in flight', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    const sniff = attachCodexBusySniff({ rpc: client, busy });
+
+    send({
+      method: 'item/started',
+      params: {
+        threadId: 't1',
+        turnId: 'turn-1',
+        item: { type: 'commandExecution', id: 'orphan' },
+      },
+    });
+    await tick();
+    expect(sniff.inFlight).toBe(1);
+
+    sniff.drain();
+    expect(sniff.inFlight).toBe(0);
+    expect(busy.busy).toBe(false);
+  });
+
+  it('items lacking an id are skipped (cannot key the handle)', async () => {
+    const { client, send, cleanup } = pair();
+    teardowns.push(cleanup);
+    const busy = createBusySignal();
+    attachCodexBusySniff({ rpc: client, busy });
+
+    send({
+      method: 'item/started',
+      params: { threadId: 't1', turnId: 'turn-1', item: { type: 'commandExecution' } },
+    });
+    await tick();
+    expect(busy.busy).toBe(false);
+  });
+});
diff --git a/packages/cli/test/runtime/trace-hook-server.test.ts b/packages/cli/test/runtime/trace-hook-server.test.ts
new file mode 100644
index 0000000..d3e9735
--- /dev/null
+++ b/packages/cli/test/runtime/trace-hook-server.test.ts
@@ -0,0 +1,164 @@
+/**
+ * Hook server tests.
+ *
+ * Pins the busy-signal contract the Claude Code hook endpoint
+ * implements:
+ *
+ *   - PreToolUse bumps tool_inflight by tool_use_id
+ *   - PostToolUse / PostToolUseFailure decrement the matching handle
+ *   - Mismatched events (Post without a prior Pre, duplicate Pre, etc.)
+ *     do not corrupt the count
+ *   - Bad bodies are rejected with 4xx
+ *   - close() drains every outstanding handle so a torn-down runner
+ *     can't leave the indicator wedged
+ *
+ * The HTTP server binds on 127.0.0.1:0 (random ephemeral port) so the
+ * tests are hermetic and don't collide with anything else listening.
+ */
+
+import { afterEach, describe, expect, it } from 'vitest';
+import { createBusySignal } from '../../src/runtime/trace/busy.js';
+import { type HookServer, startHookServer } from '../../src/runtime/trace/hook-server.js';
+
+async function postJson(url: string, body: unknown): Promise<{ status: number; text: string }> {
+  const res = await fetch(url, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+  });
+  return { status: res.status, text: await res.text() };
+}
+
+describe('hook server', () => {
+  let server: HookServer | null = null;
+
+  afterEach(async () => {
+    if (server) {
+      await server.close().catch(() => {});
+      server = null;
+    }
+  });
+
+  it('PreToolUse bumps tool_inflight; PostToolUse drains it', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+
+    const r1 = await postJson(server.url, {
+      hook_event_name: 'PreToolUse',
+      tool_use_id: 'tool-1',
+      tool_name: 'Bash',
+    });
+    expect(r1.status).toBe(200);
+    expect(busy.busy).toBe(true);
+    expect(busy.getSourceCounts().tool_inflight).toBe(1);
+
+    const r2 = await postJson(server.url, {
+      hook_event_name: 'PostToolUse',
+      tool_use_id: 'tool-1',
+      tool_name: 'Bash',
+    });
+    expect(r2.status).toBe(200);
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('counts overlapping tool calls correctly', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'a' });
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'b' });
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'c' });
+    expect(busy.getSourceCounts().tool_inflight).toBe(3);
+
+    await postJson(server.url, { hook_event_name: 'PostToolUse', tool_use_id: 'b' });
+    expect(busy.getSourceCounts().tool_inflight).toBe(2);
+    expect(busy.busy).toBe(true);
+
+    await postJson(server.url, { hook_event_name: 'PostToolUse', tool_use_id: 'a' });
+    await postJson(server.url, { hook_event_name: 'PostToolUseFailure', tool_use_id: 'c' });
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+    expect(busy.busy).toBe(false);
+  });
+
+  it('duplicate PreToolUse for the same id is a no-op', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'dup' });
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'dup' });
+    expect(busy.getSourceCounts().tool_inflight).toBe(1);
+
+    await postJson(server.url, { hook_event_name: 'PostToolUse', tool_use_id: 'dup' });
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('PostToolUse for an unknown id is silently ignored (no underflow)', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    const res = await postJson(server.url, {
+      hook_event_name: 'PostToolUse',
+      tool_use_id: 'never-saw-this',
+    });
+    expect(res.status).toBe(200);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('non-tool events (SessionStart, Stop, etc.) are accepted without bumping', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    await postJson(server.url, { hook_event_name: 'SessionStart' });
+    await postJson(server.url, { hook_event_name: 'Stop' });
+    await postJson(server.url, { hook_event_name: 'UserPromptSubmit' });
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('rejects malformed bodies with 400', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    const res = await fetch(server.url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: 'not-json',
+    });
+    expect(res.status).toBe(400);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+
+  it('returns 200 with accepted=false when fields are missing (avoid retry storms)', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    const res = await postJson(server.url, { hook_event_name: 'PreToolUse' });
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.text);
+    expect(body.accepted).toBe(false);
+  });
+
+  it('non-matching routes return 404', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    const res = await fetch(server.url.replace('/hook/tool-event', '/something-else'));
+    expect(res.status).toBe(404);
+  });
+
+  it('close() drains outstanding handles so busy unwedges', async () => {
+    const busy = createBusySignal();
+    server = await startHookServer({ busy, log: () => {} });
+
+    await postJson(server.url, { hook_event_name: 'PreToolUse', tool_use_id: 'left-dangling' });
+    expect(busy.busy).toBe(true);
+
+    await server.close();
+    server = null;
+    expect(busy.busy).toBe(false);
+    expect(busy.getSourceCounts().tool_inflight).toBe(0);
+  });
+});
diff --git a/packages/cli/test/runtime/trace-host.test.ts b/packages/cli/test/runtime/trace-host.test.ts
index bf0189e..f01204c 100644
--- a/packages/cli/test/runtime/trace-host.test.ts
+++ b/packages/cli/test/runtime/trace-host.test.ts
@@ -160,4 +160,65 @@ describe('TraceHost', () => {
       await host.close();
     }
   });
+
+  it('close() force-drains leaked busy handles as the final teardown safety net', async () => {
+    // Simulates the production failure mode: a sub-system (proxy
+    // reassembler, hook server, codex sniff) failed to drain a
+    // handle before its own close. Without this safety net, the
+    // runner's busy reporter would keep heartbeating `busy: true`
+    // until its AbortController fires, then post a final
+    // `busy: false`. The forceFinishAll() inside TraceHost.close()
+    // makes the indicator drop earlier and protects against bugs
+    // where the reassembler's flush path silently misses a handle.
+    const logged: Array<{ msg: string; ctx?: Record<string, unknown> }> = [];
+    const host = await startTraceHost({
+      log: (msg, ctx) => logged.push({ msg, ctx }),
+      caCertPath: join(tmpDir, 'leak.pem'),
+      brokerClient: stubBrokerClient(),
+      name: 'TEST',
+    });
+
+    // Leak a handle as if a reassembler bug forgot to call finish().
+    // Disable the max-age timer so this test isn't racing the 5min
+    // default — we want close() to do the draining, not the timer.
+    host.busy.start('llm_inflight', { maxAgeMs: Number.POSITIVE_INFINITY });
+    host.busy.start('tool_inflight', { maxAgeMs: Number.POSITIVE_INFINITY });
+    expect(host.busy.busy).toBe(true);
+    expect(host.busy.getSourceCounts()).toEqual({ llm_inflight: 1, tool_inflight: 1 });
+
+    await host.close();
+
+    expect(host.busy.busy).toBe(false);
+    expect(host.busy.getSourceCounts()).toEqual({ llm_inflight: 0, tool_inflight: 0 });
+    // The diagnostic log fires with the pre-drain counts so
+    // operators can tell which source leaked.
+    const drainLog = logged.find(
+      (l) => l.msg === 'trace-host: force-drained leaked busy handles at teardown',
+    );
+    expect(drainLog).toBeTruthy();
+    expect(drainLog?.ctx?.drained).toBe(2);
+    expect(drainLog?.ctx?.sourceCounts).toEqual({
+      llm_inflight: 1,
+      tool_inflight: 1,
+    });
+  });
+
+  it('close() does not log the drain message when no handles leaked', async () => {
+    // Happy path: every sub-system drained its handles correctly,
+    // so forceFinishAll() returns 0 and stays quiet. We don't want
+    // the diagnostic log to fire on normal shutdowns since it would
+    // be noise.
+    const logged: Array<{ msg: string }> = [];
+    const host = await startTraceHost({
+      log: (msg) => logged.push({ msg }),
+      caCertPath: join(tmpDir, 'clean.pem'),
+      brokerClient: stubBrokerClient(),
+      name: 'TEST',
+    });
+    await host.close();
+    const drainLog = logged.find(
+      (l) => l.msg === 'trace-host: force-drained leaked busy handles at teardown',
+    );
+    expect(drainLog).toBeUndefined();
+  });
 });
diff --git a/packages/cli/test/runtime/trace-known-hosts.test.ts b/packages/cli/test/runtime/trace-known-hosts.test.ts
new file mode 100644
index 0000000..a2d9994
--- /dev/null
+++ b/packages/cli/test/runtime/trace-known-hosts.test.ts
@@ -0,0 +1,66 @@
+/**
+ * Allowlist predicate tests — pin the hostnames the trace proxy is
+ * willing to MITM-decrypt. The proxy itself takes the predicate by
+ * injection, but the production wiring (`host.ts`) uses this exact
+ * function; if the predicate's behavior changes silently, agents stop
+ * generating structured LLM traces or, worse, the proxy starts
+ * decrypting hosts we promised it wouldn't.
+ */
+
+import { describe, expect, it } from 'vitest';
+import { isKnownLlmHost, KNOWN_LLM_HOST_PATTERNS } from '../../src/runtime/trace/known-hosts.js';
+
+describe('isKnownLlmHost', () => {
+  it('matches the Anthropic apex and its subdomains', () => {
+    expect(isKnownLlmHost('anthropic.com')).toBe(true);
+    expect(isKnownLlmHost('api.anthropic.com')).toBe(true);
+    expect(isKnownLlmHost('console.anthropic.com')).toBe(true);
+    expect(isKnownLlmHost('auth.anthropic.com')).toBe(true);
+  });
+
+  it('matches the OpenAI apex and its subdomains', () => {
+    expect(isKnownLlmHost('openai.com')).toBe(true);
+    expect(isKnownLlmHost('api.openai.com')).toBe(true);
+    expect(isKnownLlmHost('auth.openai.com')).toBe(true);
+  });
+
+  it('matches Azure OpenAI customer subdomains', () => {
+    expect(isKnownLlmHost('example.openai.azure.com')).toBe(true);
+    expect(isKnownLlmHost('my-deployment.openai.azure.com')).toBe(true);
+  });
+
+  it('is case-insensitive', () => {
+    expect(isKnownLlmHost('API.ANTHROPIC.COM')).toBe(true);
+    expect(isKnownLlmHost('Api.Openai.Com')).toBe(true);
+  });
+
+  it('rejects hosts not on the allowlist', () => {
+    expect(isKnownLlmHost('github.com')).toBe(false);
+    expect(isKnownLlmHost('api.github.com')).toBe(false);
+    expect(isKnownLlmHost('example.com')).toBe(false);
+    expect(isKnownLlmHost('telemetry.example.com')).toBe(false);
+    expect(isKnownLlmHost('registry.npmjs.org')).toBe(false);
+  });
+
+  it('does NOT match suffix tricks (anthropic.com-as-prefix attacks)', () => {
+    // `anthropic.com.evil.com` ends with `.com`, not `anthropic.com`.
+    // The regex anchors the apex on the right and requires either
+    // line-start or a `.` separator on the left.
+    expect(isKnownLlmHost('anthropic.com.evil.com')).toBe(false);
+    expect(isKnownLlmHost('evil-anthropic.com')).toBe(false);
+    expect(isKnownLlmHost('xanthropic.com')).toBe(false);
+    expect(isKnownLlmHost('openai.com.attacker.net')).toBe(false);
+  });
+
+  it('rejects empty / malformed input safely', () => {
+    expect(isKnownLlmHost('')).toBe(false);
+  });
+
+  it('exports the underlying patterns so callers can introspect', () => {
+    expect(KNOWN_LLM_HOST_PATTERNS.length).toBeGreaterThan(0);
+    // Sanity: each pattern is a RegExp.
+    for (const p of KNOWN_LLM_HOST_PATTERNS) {
+      expect(p).toBeInstanceOf(RegExp);
+    }
+  });
+});
diff --git a/packages/cli/test/runtime/trace-proxy.test.ts b/packages/cli/test/runtime/trace-proxy.test.ts
index 3831422..2891899 100644
--- a/packages/cli/test/runtime/trace-proxy.test.ts
+++ b/packages/cli/test/runtime/trace-proxy.test.ts
@@ -334,3 +334,220 @@ describe('proxy raw TCP fallback path', () => {
     expect(proxy.proxyUrl.startsWith('http://')).toBe(true);
   });
 });
+
+describe('proxy shouldMitm gating', () => {
+  const cleanups: Array<() => Promise<void>> = [];
+
+  afterEach(async () => {
+    while (cleanups.length > 0) {
+      const c = cleanups.pop();
+      if (c) await c().catch(() => {});
+    }
+  });
+
+  it('MITMs hosts the predicate accepts (mitm: true, plaintext captured)', async () => {
+    const upstream = await startHttpsEchoServer();
+    cleanups.push(() => new Promise<void>((r) => upstream.server.close(() => r())));
+
+    const chunks: ProxyChunk[] = [];
+    const sessions: ProxySession[] = [];
+    const ca = createTraceCa();
+    const certPool = createCertPool(ca);
+    const proxy = await startProxyRelay({
+      log: () => {},
+      certPool,
+      // Predicate always accepts — equivalent to legacy behavior.
+      shouldMitm: () => true,
+      upstreamTlsOptions: { rejectUnauthorized: false },
+      onChunk: (c) => chunks.push({ ...c, bytes: Buffer.from(c.bytes) }),
+      onSessionEnd: (s) => sessions.push(s),
+    });
+    cleanups.push(() => proxy.close());
+
+    // CONNECT + TLS-trust-our-CA handshake (mirrors what a real Node
+    // agent does under HTTPS_PROXY + NODE_EXTRA_CA_CERTS).
+    const tcp = tcpConnect({ host: proxy.host, port: proxy.port });
+    await new Promise<void>((r, j) => {
+      tcp.once('connect', () => r());
+      tcp.once('error', j);
+    });
+    tcp.write(
+      `CONNECT localhost:${upstream.port} HTTP/1.1\r\nHost: localhost:${upstream.port}\r\n\r\n`,
+    );
+    await new Promise<void>((resolve) => {
+      let buf = '';
+      const onData = (d: Buffer): void => {
+        buf += d.toString('utf8');
+        if (buf.includes('\r\n\r\n')) {
+          tcp.off('data', onData);
+          resolve();
+        }
+      };
+      tcp.on('data', onData);
+    });
+    const client = tlsConnect({
+      socket: tcp,
+      servername: 'localhost',
+      ca: [ca.caCertPem],
+      minVersion: 'TLSv1.2',
+    });
+    await new Promise<void>((r, j) => {
+      client.once('secureConnect', () => r());
+      client.once('error', j);
+    });
+    client.write(`POST /v1/messages HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nhello`);
+    await new Promise<Buffer>((resolve) => {
+      const parts: Buffer[] = [];
+      client.on('data', (d: Buffer) => parts.push(d));
+      client.on('end', () => resolve(Buffer.concat(parts)));
+    });
+    await new Promise((r) => setTimeout(r, 50));
+
+    const clientText = Buffer.concat(
+      chunks.filter((c) => c.direction === 'client_to_upstream').map((c) => c.bytes),
+    ).toString('utf8');
+    expect(clientText).toContain('POST /v1/messages');
+    expect(sessions).toHaveLength(1);
+    expect(sessions[0]?.mitm).toBe(true);
+  }, 15_000);
+
+  it('raw-tunnels hosts the predicate rejects (mitm: false, ciphertext only)', async () => {
+    const upstream = await startHttpsEchoServer();
+    cleanups.push(() => new Promise<void>((r) => upstream.server.close(() => r())));
+
+    const chunks: ProxyChunk[] = [];
+    const sessions: ProxySession[] = [];
+    const ca = createTraceCa();
+    const certPool = createCertPool(ca);
+    const proxy = await startProxyRelay({
+      log: () => {},
+      certPool,
+      // Predicate rejects — proxy must fall back to raw TCP tunnel
+      // even though a CertPool is configured. The client's TLS
+      // handshake completes against the upstream's real cert.
+      shouldMitm: () => false,
+      onChunk: (c) => chunks.push({ ...c, bytes: Buffer.from(c.bytes) }),
+      onSessionEnd: (s) => sessions.push(s),
+    });
+    cleanups.push(() => proxy.close());
+
+    const tcp = tcpConnect({ host: proxy.host, port: proxy.port });
+    await new Promise<void>((r, j) => {
+      tcp.once('connect', () => r());
+      tcp.once('error', j);
+    });
+    tcp.write(
+      `CONNECT localhost:${upstream.port} HTTP/1.1\r\nHost: localhost:${upstream.port}\r\n\r\n`,
+    );
+    await new Promise<void>((resolve) => {
+      let buf = '';
+      const onData = (d: Buffer): void => {
+        buf += d.toString('utf8');
+        if (buf.includes('\r\n\r\n')) {
+          tcp.off('data', onData);
+          resolve();
+        }
+      };
+      tcp.on('data', onData);
+    });
+    // Important: trust the upstream cert directly (no MITM cert
+    // involved), exactly as a real agent would when system trust
+    // applies end-to-end.
+    const client = tlsConnect({
+      socket: tcp,
+      servername: 'localhost',
+      ca: [upstream.cert],
+      // The test upstream cert is self-signed for 'localhost'; we
+      // still want a real handshake to validate the raw-tunnel
+      // works at the application layer.
+      rejectUnauthorized: true,
+      minVersion: 'TLSv1.2',
+    });
+    await new Promise<void>((r, j) => {
+      client.once('secureConnect', () => r());
+      client.once('error', j);
+    });
+    client.write(`POST /v1/messages HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nhello`);
+    await new Promise<Buffer>((resolve) => {
+      const parts: Buffer[] = [];
+      client.on('data', (d: Buffer) => parts.push(d));
+      client.on('end', () => resolve(Buffer.concat(parts)));
+    });
+    await new Promise((r) => setTimeout(r, 50));
+
+    // Captured bytes are encrypted TLS records — the plaintext HTTP
+    // request never appears in the chunks. This is the privacy
+    // promise of scoped MITM: hosts off the allowlist are not
+    // decrypted, even though we routed their traffic.
+    const everything = Buffer.concat(chunks.map((c) => c.bytes)).toString('utf8');
+    expect(everything).not.toContain('POST /v1/messages');
+    expect(everything).not.toContain('hello');
+    // Session metadata is still produced — we know the agent talked
+    // to this host, just not what was said.
+    expect(sessions).toHaveLength(1);
+    expect(sessions[0]?.mitm).toBe(false);
+    expect(sessions[0]?.upstream.host).toBe('localhost');
+    expect(sessions[0]?.upstream.port).toBe(upstream.port);
+    expect(sessions[0]?.bytesOut).toBeGreaterThan(0);
+    expect(sessions[0]?.bytesIn).toBeGreaterThan(0);
+  }, 15_000);
+
+  it('omitted shouldMitm preserves legacy "MITM whenever certPool is set" behavior', async () => {
+    // This guards the migration path: existing callers that didn't
+    // pass shouldMitm should still get MITM behavior.
+    const upstream = await startHttpsEchoServer();
+    cleanups.push(() => new Promise<void>((r) => upstream.server.close(() => r())));
+
+    const sessions: ProxySession[] = [];
+    const ca = createTraceCa();
+    const certPool = createCertPool(ca);
+    const proxy = await startProxyRelay({
+      log: () => {},
+      certPool,
+      // shouldMitm intentionally omitted.
+      upstreamTlsOptions: { rejectUnauthorized: false },
+      onSessionEnd: (s) => sessions.push(s),
+    });
+    cleanups.push(() => proxy.close());
+
+    const tcp = tcpConnect({ host: proxy.host, port: proxy.port });
+    await new Promise<void>((r, j) => {
+      tcp.once('connect', () => r());
+      tcp.once('error', j);
+    });
+    tcp.write(
+      `CONNECT localhost:${upstream.port} HTTP/1.1\r\nHost: localhost:${upstream.port}\r\n\r\n`,
+    );
+    await new Promise<void>((resolve) => {
+      let buf = '';
+      const onData = (d: Buffer): void => {
+        buf += d.toString('utf8');
+        if (buf.includes('\r\n\r\n')) {
+          tcp.off('data', onData);
+          resolve();
+        }
+      };
+      tcp.on('data', onData);
+    });
+    const client = tlsConnect({
+      socket: tcp,
+      servername: 'localhost',
+      ca: [ca.caCertPem],
+      minVersion: 'TLSv1.2',
+    });
+    await new Promise<void>((r, j) => {
+      client.once('secureConnect', () => r());
+      client.once('error', j);
+    });
+    client.write(`GET / HTTP/1.1\r\nHost: localhost\r\n\r\n`);
+    await new Promise<Buffer>((resolve) => {
+      const parts: Buffer[] = [];
+      client.on('data', (d: Buffer) => parts.push(d));
+      client.on('end', () => resolve(Buffer.concat(parts)));
+    });
+    await new Promise((r) => setTimeout(r, 50));
+
+    expect(sessions).toHaveLength(1);
+    expect(sessions[0]?.mitm).toBe(true);
+  }, 15_000);
+});