diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 105e86692a..3af408ef91 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -40,7 +40,7 @@ jobs:
           SCENARIOS: ${{ inputs.scenarios }}
         run: |
           set -euo pipefail
-          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,network-policy-vitest,rebuild-openclaw-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest"
+          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,network-policy-vitest,rebuild-openclaw-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest"
           if [ -n "${JOBS}" ] && [ -n "${SCENARIOS}" ]; then
             echo "::error::Use either scenarios or jobs, not both." >&2
             exit 1
@@ -93,12 +93,12 @@ jobs:
           SCENARIOS: ${{ inputs.scenarios }}
         run: |
           set -euo pipefail
-          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,network-policy-vitest,rebuild-openclaw-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest"
+          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,network-policy-vitest,rebuild-openclaw-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest"
           args=(--emit-live-matrix)
           matrix=""
           hermes_selected=false
           registry_scenarios=()
-          free_standing_scenarios=(openshell-version-pin onboard-negative-paths runtime-overrides hermes-e2e network-policy rebuild-openclaw token-rotation openclaw-tui-chat-correlation double-onboard issue-4434-tui-unreachable-inference)
+          free_standing_scenarios=(openshell-version-pin onboard-negative-paths inference-routing runtime-overrides hermes-e2e network-policy rebuild-openclaw token-rotation openclaw-tui-chat-correlation double-onboard issue-4434-tui-unreachable-inference)
           is_free_standing_scenario() {
             local id="$1"
             local known
@@ -162,7 +162,7 @@ jobs:
           fi
           echo "matrix=${matrix}" >> "$GITHUB_OUTPUT"
           echo "hermes_selected=${hermes_selected}" >> "$GITHUB_OUTPUT"
-          MATRIX_JSON="${matrix}" python - <<'PY' >> "$GITHUB_STEP_SUMMARY"
+          MATRIX_JSON="${matrix}" python3 - <<'PY' >> "$GITHUB_STEP_SUMMARY"
           import json
           import os
 
@@ -219,7 +219,7 @@ jobs:
           SCENARIO_ID: ${{ matrix.id }}
           SCENARIO_LABEL: ${{ matrix.label }}
         run: |
-          python - <<'PY' >> "$GITHUB_STEP_SUMMARY"
+          python3 - <<'PY' >> "$GITHUB_STEP_SUMMARY"
           import json
           import os
           from pathlib import Path
@@ -363,6 +363,54 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  inference-routing-vitest:
+    needs: [validate-jobs, generate-matrix]
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') || contains(format(',{0},', inputs.scenarios), ',inference-routing,') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    env:
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/inference-routing
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build CLI
+        run: npm run build:cli
+
+      - name: Run inference routing live test
+        # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The
+        # always-on PR-safe slices prove invalid-key and unreachable-endpoint
+        # classification/cleanup without spending live provider quota; real
+        # NVIDIA credential isolation and third-party provider smokes stay
+        # skipped unless their secrets are explicitly supplied by a future
+        # workflow.
+        run: |
+          set -euo pipefail
+          npx vitest run --project e2e-scenarios-live \
+            test/e2e-scenario/live/inference-routing.test.ts \
+            --silent=false --reporter=default
+
+      - name: Upload inference routing artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-inference-routing
+          path: e2e-artifacts/vitest/inference-routing/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   issue-4434-tui-unreachable-inference-vitest:
     needs: [validate-jobs, generate-matrix]
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',issue-4434-tui-unreachable-inference-vitest,') || contains(format(',{0},', inputs.scenarios), ',issue-4434-tui-unreachable-inference,') }}
@@ -1210,6 +1258,7 @@ jobs:
         live-scenarios,
         openshell-version-pin-vitest,
         onboard-negative-paths-vitest,
+        inference-routing-vitest,
         credential-migration-vitest,
         runtime-overrides-vitest,
         hermes-e2e-vitest,
diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
new file mode 100644
index 0000000000..ff958867fa
--- /dev/null
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -0,0 +1,967 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { spawn } from "node:child_process";
+import crypto from "node:crypto";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { ArtifactSink } from "../fixtures/artifacts.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import type { SandboxClient } from "../fixtures/clients/sandbox.ts";
+import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import { redactString } from "../fixtures/redaction.ts";
+
+// Migrated from test/e2e/test-inference-routing.sh. This stays a simple live
+// Vitest conversion: direct CLI/onboard subprocesses plus OpenShell sandbox
+// probes, with local helpers only where raw in-memory output is required to
+// prove credential non-exposure before redacted artifacts are written.
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js");
+const NEMOCLAW_STATE_DIR = path.join(os.homedir(), ".nemoclaw");
+const ONBOARD_SESSION_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard-session.json");
+const ONBOARD_LOCK_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard.lock");
+const ONBOARD_ARGS = [
+  "onboard",
+  "--non-interactive",
+  "--yes",
+  "--yes-i-accept-third-party-software",
+];
+const STACK_TRACE_PATTERNS = [
+  /^\s+at (Object\.|Module\.|node:internal|process\.)/m,
+  /\bat node:internal/m,
+];
+const CREDENTIAL_CLASSIFICATION_PATTERN =
+  /authorization|credential|invalid|401|unauthorized|api[._-]?key/i;
+const TRANSPORT_CLASSIFICATION_PATTERN =
+  /unreachable|timeout|connect|ECONNREFUSED|ETIMEDOUT|ENETUNREACH|EHOSTUNREACH|ENOTFOUND|EAI_AGAIN|No route to host|transport|network|endpoint|dns/i;
+const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip;
+
+function shouldRunProviderSmoke(provider: "openai" | "anthropic" | "compatible"): boolean {
+  // The legacy shell script auto-ran these smokes when provider secrets were
+  // present. This Vitest migration requires an explicit opt-in so PR-safe jobs
+  // cannot spend third-party quota accidentally; any future secret-backed lane
+  // must set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=all or a provider name.
+  const requested = process.env.NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE?.trim().toLowerCase();
+  return requested === "1" || requested === "true" || requested === "all" || requested === provider;
+}
+
+type SkipFn = (note?: string) => void;
+
+function skipLive(skip: SkipFn, note: string): never {
+  skip(note);
+  throw new Error(note);
+}
+
+interface RawRunResult {
+  readonly command: readonly string[];
+  readonly exitCode: number | null;
+  readonly signal: NodeJS.Signals | null;
+  readonly timedOut: boolean;
+  readonly stdout: string;
+  readonly stderr: string;
+  readonly redactedStdout: string;
+  readonly redactedStderr: string;
+}
+
+interface RawRunOptions {
+  readonly artifactName: string;
+  readonly artifacts: ArtifactSink;
+  readonly cwd?: string;
+  readonly env?: NodeJS.ProcessEnv;
+  readonly redactionValues?: readonly string[];
+  readonly timeoutMs?: number;
+}
+
+function resultText(result: { stdout: string; stderr: string }): string {
+  return [result.stdout, result.stderr].filter(Boolean).join("\n");
+}
+
+function redactedResultText(
+  result: Pick<RawRunResult, "redactedStdout" | "redactedStderr">,
+): string {
+  return [result.redactedStdout, result.redactedStderr].filter(Boolean).join("\n");
+}
+
+function hasRawNodeStackTrace(text: string): boolean {
+  return STACK_TRACE_PATTERNS.some((pattern) => pattern.test(text));
+}
+
+function inferenceSandboxName(prefix: string): string {
+  const name = `${prefix}-${process.pid}`;
+  validateSandboxName(name);
+  return name;
+}
+
+function onboardEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    ...extra,
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+  };
+}
+
+function clearOnboardState(): void {
+  fs.rmSync(ONBOARD_LOCK_FILE, { force: true });
+  fs.rmSync(ONBOARD_SESSION_FILE, { force: true });
+}
+
+function redactedCommand(command: readonly string[], values: readonly string[]): string[] {
+  return command.map((part) => redactString(part, values));
+}
+
+async function runRawCommand(
+  command: string,
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  const timeoutMs = options.timeoutMs ?? 60_000;
+  const redactionValues = [...(options.redactionValues ?? [])];
+  const child = spawn(command, [...args], {
+    cwd: options.cwd ?? REPO_ROOT,
+    detached: true,
+    env: options.env,
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+  const fullCommand = [command, ...args];
+  let stdout = "";
+  let stderr = "";
+  let timedOut = false;
+  let spawnError: Error | undefined;
+
+  const killProcessGroup = (signal: NodeJS.Signals): void => {
+    if (child.pid === undefined) return;
+    try {
+      process.kill(-child.pid, signal);
+    } catch {
+      child.kill(signal);
+    }
+  };
+
+  const timeout = setTimeout(() => {
+    timedOut = true;
+    killProcessGroup("SIGTERM");
+    setTimeout(() => killProcessGroup("SIGKILL"), 1_000).unref();
+  }, timeoutMs);
+  timeout.unref();
+
+  child.stdout?.on("data", (chunk: Buffer) => {
+    stdout += chunk.toString("utf8");
+  });
+  child.stderr?.on("data", (chunk: Buffer) => {
+    stderr += chunk.toString("utf8");
+  });
+  child.on("error", (error) => {
+    spawnError = error;
+  });
+
+  const { exitCode, signal } = await new Promise<{
+    exitCode: number | null;
+    signal: NodeJS.Signals | null;
+  }>((resolve) => {
+    child.on("close", (code, closeSignal) => resolve({ exitCode: code, signal: closeSignal }));
+  });
+  clearTimeout(timeout);
+
+  if (spawnError) {
+    const message = redactString(spawnError.message, redactionValues);
+    throw new Error(`failed to spawn ${redactString(command, redactionValues)}: ${message}`);
+  }
+
+  const redactedStdout = redactString(stdout, redactionValues);
+  const redactedStderr = redactString(stderr, redactionValues);
+  await options.artifacts.writeText(`raw-shell/${options.artifactName}.stdout.txt`, redactedStdout);
+  await options.artifacts.writeText(`raw-shell/${options.artifactName}.stderr.txt`, redactedStderr);
+  await options.artifacts.writeJson(`raw-shell/${options.artifactName}.result.json`, {
+    command: redactedCommand(fullCommand, redactionValues),
+    exitCode,
+    signal,
+    timedOut,
+    stdout: redactedStdout,
+    stderr: redactedStderr,
+  });
+
+  return {
+    command: fullCommand,
+    exitCode,
+    signal,
+    timedOut,
+    stdout,
+    stderr,
+    redactedStdout,
+    redactedStderr,
+  };
+}
+
+async function runNemoclawCli(
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  return runRawCommand(process.execPath, [CLI_ENTRYPOINT, ...args], options);
+}
+
+function rawOpenShellEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+    ...extra,
+  };
+}
+
+async function runOpenShell(
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  return runRawCommand("openshell", args, {
+    ...options,
+    env: rawOpenShellEnv(options.env),
+  });
+}
+
+async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Promise<void> {
+  expect(
+    fs.existsSync(DIST_ENTRYPOINT),
+    "run `npm run build:cli` before live inference-routing scenarios",
+  ).toBe(true);
+
+  const docker = await host.command("docker", ["info"], {
+    artifactName: "prereq-docker-info-inference-routing",
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  if (docker.exitCode !== 0) {
+    const message = `Docker is required for live inference-routing coverage: ${resultText(docker)}`;
+    if (process.env.GITHUB_ACTIONS === "true") throw new Error(message);
+    skipLive(skip, message);
+  }
+
+  try {
+    const openshell = await host.command("openshell", ["--version"], {
+      artifactName: "prereq-openshell-version-inference-routing",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    if (openshell.exitCode !== 0) {
+      // A fresh GitHub runner may not have OpenShell before the first onboard;
+      // `nemoclaw onboard` installs it. Record the prereq probe without blocking.
+      return;
+    }
+  } catch {
+    // Same as non-zero: fresh runner may not have openshell until onboard.
+    return;
+  }
+}
+
+interface CleanupSandboxOptions {
+  readonly strict?: boolean;
+}
+
+function isExpectedPreOnboardCleanupMiss(text: string): boolean {
+  return /does not exist|run 'nemoclaw onboard'|no active gateway|not found|no such file|enoent/i.test(
+    text,
+  );
+}
+
+async function optionalCleanupStep(
+  label: string,
+  run: () => Promise<{ exitCode: number | null; stdout: string; stderr: string }>,
+): Promise<void> {
+  try {
+    const result = await run();
+    if (result.exitCode === 0) return;
+    const text = resultText(result);
+    if (isExpectedPreOnboardCleanupMiss(text)) return;
+    throw new Error(`${label} failed unexpectedly during pre-onboard cleanup: ${text}`);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    if (isExpectedPreOnboardCleanupMiss(message)) return;
+    throw error;
+  }
+}
+
+function probeSummary(
+  label: string,
+  result: { exitCode: number | null; stdout: string; stderr: string },
+): string {
+  const text = resultText(result).trim();
+  return `${label} exit=${result.exitCode}${text ? `: ${text.slice(0, 500)}` : ""}`;
+}
+
+async function cleanupSandbox(
+  host: HostCliClient,
+  sandbox: SandboxClient,
+  sandboxName: string,
+  options: CleanupSandboxOptions = {},
+): Promise<void> {
+  if (!options.strict) {
+    await optionalCleanupStep("nemoclaw destroy", () =>
+      host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
+        artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 120_000,
+      }),
+    );
+    await optionalCleanupStep("openshell sandbox delete", () =>
+      sandbox.openshell(["sandbox", "delete", sandboxName], {
+        artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 60_000,
+      }),
+    );
+    clearOnboardState();
+    return;
+  }
+
+  const cleanupEvidence: string[] = [];
+  try {
+    const destroy = await host.command(
+      process.execPath,
+      [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"],
+      {
+        artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 120_000,
+      },
+    );
+    cleanupEvidence.push(probeSummary("nemoclaw destroy", destroy));
+  } catch (error) {
+    cleanupEvidence.push(
+      `nemoclaw destroy threw: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+
+  try {
+    const deletion = await sandbox.openshell(["sandbox", "delete", sandboxName], {
+      artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 60_000,
+    });
+    cleanupEvidence.push(probeSummary("openshell sandbox delete", deletion));
+  } catch (error) {
+    cleanupEvidence.push(
+      `openshell sandbox delete threw: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+
+  clearOnboardState();
+
+  const status = await sandbox.status(sandboxName, {
+    artifactName: `cleanup-openshell-sandbox-status-${sandboxName}`,
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  cleanupEvidence.push(probeSummary("openshell sandbox status", status));
+  if (status.exitCode === 0) {
+    throw new Error(
+      `sandbox '${sandboxName}' still exists after strict cleanup\n${cleanupEvidence.join("\n")}`,
+    );
+  }
+}
+
+async function expectNoActiveSandbox(host: HostCliClient, sandboxName: string): Promise<void> {
+  const status = await host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "status"], {
+    artifactName: `post-failure-status-${sandboxName}`,
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  const text = resultText(status);
+  expect(
+    /running|ready/i.test(text),
+    `sandbox '${sandboxName}' is still active after failed onboard: ${text}`,
+  ).toBe(false);
+}
+
+async function onboardSandbox(
+  artifacts: ArtifactSink,
+  sandboxName: string,
+  extraEnv: NodeJS.ProcessEnv,
+  redactionValues: readonly string[],
+  artifactName: string,
+  timeoutMs = 10 * 60_000,
+): Promise<RawRunResult> {
+  clearOnboardState();
+  return runNemoclawCli(ONBOARD_ARGS, {
+    artifactName,
+    artifacts,
+    env: onboardEnv({
+      NEMOCLAW_POLICY_TIER: "open",
+      NEMOCLAW_SANDBOX_NAME: sandboxName,
+      ...extraEnv,
+    }),
+    redactionValues,
+    timeoutMs,
+  });
+}
+
+function expectOnboardSuccess(result: RawRunResult, label: string): void {
+  const redacted = redactedResultText(result);
+  expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false);
+  expect(result.exitCode, `${label} failed\n${redacted}`).toBe(0);
+}
+
+function expectOnboardFailure(result: RawRunResult, label: string): void {
+  const redacted = redactedResultText(result);
+  expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false);
+  expect(result.exitCode, `${label} unexpectedly succeeded\n${redacted}`).not.toBe(0);
+}
+
+function parseJsonBody(body: string, label: string): unknown {
+  try {
+    return JSON.parse(body);
+  } catch (error) {
+    throw new Error(
+      `${label} response was not JSON: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+}
+
+function openAiContent(json: unknown): string {
+  if (!json || typeof json !== "object") return "";
+  const choices = (json as { choices?: unknown }).choices;
+  if (!Array.isArray(choices)) return "";
+  for (const choice of choices) {
+    if (!choice || typeof choice !== "object") continue;
+    const message = (choice as { message?: unknown }).message;
+    if (message && typeof message === "object") {
+      const content = (message as { content?: unknown }).content;
+      if (typeof content === "string" && content.trim()) return content;
+    }
+    const text = (choice as { text?: unknown }).text;
+    if (typeof text === "string" && text.trim()) return text;
+  }
+  return "";
+}
+
+function anthropicContent(json: unknown): string {
+  if (!json || typeof json !== "object") return "";
+  const content = (json as { content?: unknown }).content;
+  if (Array.isArray(content)) {
+    return content
+      .map((part) => {
+        if (
+          part &&
+          typeof part === "object" &&
+          typeof (part as { text?: unknown }).text === "string"
+        ) {
+          return (part as { text: string }).text;
+        }
+        return "";
+      })
+      .join("")
+      .trim();
+  }
+  return openAiContent(json);
+}
+
+async function expectOpenAiChatThroughSandbox(
+  sandbox: SandboxClient,
+  sandboxName: string,
+  model: string,
+  redactionValues: readonly string[],
+  artifactName: string,
+): Promise<void> {
+  const payload = JSON.stringify({
+    model,
+    messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+    max_tokens: 50,
+  });
+  const response = await sandbox.exec(
+    sandboxName,
+    [
+      "curl",
+      "-sS",
+      "--max-time",
+      "60",
+      "https://inference.local/v1/chat/completions",
+      "-H",
+      "Content-Type: application/json",
+      "--data-raw",
+      payload,
+    ],
+    {
+      artifactName,
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [...redactionValues],
+      timeoutMs: 90_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  const content = openAiContent(parseJsonBody(response.stdout, artifactName));
+  expect(content, `no chat content in response: ${response.stdout.slice(0, 500)}`).not.toBe("");
+}
+
+async function expectAnthropicMessageThroughSandbox(
+  sandbox: SandboxClient,
+  sandboxName: string,
+  model: string,
+  redactionValues: readonly string[],
+): Promise<void> {
+  const payload = JSON.stringify({
+    model,
+    messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+    max_tokens: 50,
+  });
+  const response = await sandbox.exec(
+    sandboxName,
+    [
+      "curl",
+      "-sS",
+      "--max-time",
+      "60",
+      "https://inference.local/v1/messages",
+      "-H",
+      "Content-Type: application/json",
+      "--data-raw",
+      payload,
+    ],
+    {
+      artifactName: "anthropic-inference-local-message",
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [...redactionValues],
+      timeoutMs: 90_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  const content = anthropicContent(parseJsonBody(response.stdout, "anthropic inference.local"));
+  expect(content, `no Anthropic content in response: ${response.stdout.slice(0, 500)}`).not.toBe(
+    "",
+  );
+}
+
+liveTest(
+  "TC-INF-06 invalid API key fails with credential classification and cleanup",
+  { timeout: 5 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-invalid-key");
+    cleanup.add(`remove inference-routing invalid-key residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-invalid-api-key",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "invalid NVIDIA key exits non-zero",
+        "output contains credential classification",
+        "output does not expose raw stack trace or submitted key",
+        "failed onboard leaves no active sandbox",
+      ],
+    });
+
+    const invalidKey = ["nvapi", "INTENTIONALLY", "INVALID", "KEY", "FOR", "E2E", "TEST"].join("-");
+    const result = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NVIDIA_API_KEY: invalidKey },
+      [invalidKey],
+      "tc-inf-06-onboard-invalid-api-key",
+      120_000,
+    );
+    const raw = resultText(result);
+    const redacted = redactedResultText(result);
+
+    expectOnboardFailure(result, "TC-INF-06 invalid-key onboard");
+    expect(CREDENTIAL_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true);
+    expect(hasRawNodeStackTrace(raw), redacted).toBe(false);
+    expect(raw.includes("INTENTIONALLY-INVALID-KEY-FOR-E2E-TEST"), redacted).toBe(false);
+    await expectNoActiveSandbox(host, sandboxName);
+  },
+);
+
+liveTest(
+  "TC-INF-07 unreachable endpoint fails with transport classification and cleanup",
+  { timeout: 5 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-unreachable");
+    cleanup.add(`remove inference-routing unreachable residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-unreachable-endpoint",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "unreachable custom endpoint exits non-zero",
+        "output contains transport classification",
+        "output does not expose raw stack trace",
+        "failed onboard leaves no active sandbox",
+      ],
+    });
+
+    const nvidiaKey = ["nvapi", "valid", "format", "but", "fake", "key", "1234567890"].join("-");
+    const compatibleKey = "fake-key-for-unreachable-test";
+    const result = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      {
+        COMPATIBLE_API_KEY: compatibleKey,
+        NEMOCLAW_ENDPOINT_URL: "https://nemoclaw-e2e.invalid/v1",
+        NEMOCLAW_MODEL: "test-model",
+        NEMOCLAW_PROVIDER: "custom",
+        NVIDIA_API_KEY: nvidiaKey,
+      },
+      [nvidiaKey, compatibleKey],
+      "tc-inf-07-onboard-unreachable-endpoint",
+      120_000,
+    );
+    const raw = resultText(result);
+    const redacted = redactedResultText(result);
+
+    expectOnboardFailure(result, "TC-INF-07 unreachable-endpoint onboard");
+    expect(TRANSPORT_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true);
+    expect(hasRawNodeStackTrace(raw), redacted).toBe(false);
+    await expectNoActiveSandbox(host, sandboxName);
+  },
+);
+
+liveTest(
+  "TC-INF-05 real NVIDIA key is isolated from sandbox env, process list, and filesystem",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    const apiKey =
+      secrets.optional("NVIDIA_API_KEY") ??
+      skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-inf-cred");
+    cleanup.add(
+      `best-effort inference-routing credential-isolation cleanup for ${sandboxName}`,
+      () => cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-credential-isolation",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "real NVIDIA_API_KEY does not appear in sandbox environment",
+        "real NVIDIA_API_KEY does not appear in sandbox process list when ps is available",
+        "real NVIDIA_API_KEY does not appear in sampled sandbox filesystem",
+        "sandbox NVIDIA_API_KEY, when present, is a placeholder rather than the real key",
+      ],
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NVIDIA_API_KEY: apiKey },
+      [apiKey],
+      "tc-inf-05-onboard-credential-isolation",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-05 credential-isolation onboard");
+    cleanup.add(`strict inference-routing credential-isolation cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
+
+    const sandboxEnv = await runOpenShell(["sandbox", "exec", "-n", sandboxName, "--", "env"], {
+      artifactName: "tc-inf-05-sandbox-env",
+      artifacts,
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [apiKey],
+      timeoutMs: 60_000,
+    });
+    expect(sandboxEnv.exitCode, redactedResultText(sandboxEnv)).toBe(0);
+    expect(sandboxEnv.stdout.includes(apiKey), redactedResultText(sandboxEnv)).toBe(false);
+
+    const processList = await runOpenShell(
+      [
+        "sandbox",
+        "exec",
+        "-n",
+        sandboxName,
+        "--",
+        "sh",
+        "-lc",
+        "ps aux 2>/dev/null || ps -ef 2>/dev/null",
+      ],
+      {
+        artifactName: "tc-inf-05-sandbox-process-list",
+        artifacts,
+        env: buildAvailabilityProbeEnv(),
+        redactionValues: [apiKey],
+        timeoutMs: 60_000,
+      },
+    );
+    if (processList.exitCode === 0 && processList.stdout.trim()) {
+      expect(processList.stdout.includes(apiKey), redactedResultText(processList)).toBe(false);
+    } else {
+      await artifacts.writeJson("tc-inf-05-process-list-skipped.json", {
+        reason: "ps not available in hardened sandbox",
+        exitCode: processList.exitCode,
+      });
+    }
+
+    const scanScript = [
+      "const crypto=require('crypto')",
+      "const fs=require('fs')",
+      "const {execFileSync}=require('child_process')",
+      "const len=Number(process.env.KEY_LEN||'0')",
+      "const salt=process.env.SCAN_SALT||''",
+      "const target=process.env.TARGET_HASH||''",
+      "const digest=(value)=>crypto.createHash('sha256').update(salt).update(value).digest('hex')",
+      "if(!len||!salt||!target){console.log('SCAN_CONFIG_MISSING');process.exit(0)}",
+      "let out=''",
+      "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}",
+      "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');for(let i=0;i<=content.length-len;i++){if(digest(content.slice(i,i+len))===target){console.log('FOUND:'+file);break}}}catch{}}",
+      "console.log('SCAN_DONE')",
+    ].join(";");
+    const leakCanary = `nemoclaw-fs-scan-canary-${crypto.randomUUID()}`;
+    const canaryPath = "/tmp/nemoclaw-fs-scan-canary.txt";
+    const plantCanary = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript(`printf '%s' '${leakCanary}' > ${canaryPath}`),
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-plant",
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 30_000,
+      },
+    );
+    expect(plantCanary.exitCode, resultText(plantCanary)).toBe(0);
+    const canarySalt = crypto.randomUUID();
+    const canaryScan = await runOpenShell(
+      ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-scan",
+        artifacts,
+        env: rawOpenShellEnv({
+          KEY_LEN: String(leakCanary.length),
+          SCAN_SALT: canarySalt,
+          TARGET_HASH: crypto
+            .createHash("sha256")
+            .update(canarySalt)
+            .update(leakCanary)
+            .digest("hex"),
+        }),
+        timeoutMs: 90_000,
+      },
+    );
+    expect(canaryScan.stdout, redactedResultText(canaryScan)).toContain(`FOUND:${canaryPath}`);
+
+    const removeCanary = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript(`rm -f ${canaryPath}`),
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-remove",
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 30_000,
+      },
+    );
+    expect(removeCanary.exitCode, resultText(removeCanary)).toBe(0);
+
+    const secretScanSalt = crypto.randomUUID();
+    const filesystemScan = await runOpenShell(
+      ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-scan",
+        artifacts,
+        env: rawOpenShellEnv({
+          KEY_LEN: String(apiKey.length),
+          SCAN_SALT: secretScanSalt,
+          TARGET_HASH: crypto
+            .createHash("sha256")
+            .update(secretScanSalt)
+            .update(apiKey)
+            .digest("hex"),
+        }),
+        redactionValues: [apiKey],
+        timeoutMs: 90_000,
+      },
+    );
+    expect(filesystemScan.stdout).not.toContain("SCAN_CONFIG_MISSING");
+    expect(filesystemScan.stdout).not.toContain("FOUND:");
+    expect(filesystemScan.stdout, redactedResultText(filesystemScan)).toContain("SCAN_DONE");
+
+    const placeholder = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript("printenv NVIDIA_API_KEY 2>/dev/null || true"),
+      {
+        artifactName: "tc-inf-05-sandbox-placeholder",
+        env: buildAvailabilityProbeEnv(),
+        redactionValues: [apiKey],
+        timeoutMs: 30_000,
+      },
+    );
+    const placeholderValue = placeholder.stdout.trim();
+    if (!placeholderValue) {
+      await artifacts.writeJson("tc-inf-05-placeholder-skipped.json", {
+        reason: "NVIDIA_API_KEY not set in sandbox; placeholder injection may not be active",
+      });
+    } else {
+      expect(placeholderValue, "sandbox has the real key, not a placeholder").not.toBe(apiKey);
+    }
+  },
+);
+
+liveTest(
+  "TC-INF-02 OpenAI provider responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("openai")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=openai or all to run OpenAI smoke",
+      );
+    }
+    const apiKey = secrets.optional("OPENAI_API_KEY") ?? skipLive(skip, "OPENAI_API_KEY not set");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-openai");
+    const model = process.env.NEMOCLAW_OPENAI_MODEL || "gpt-4o-mini";
+    cleanup.add(`best-effort inference-routing OpenAI cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-openai",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: ["OpenAI provider onboards", "sandbox inference.local routes chat to OpenAI"],
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "openai", OPENAI_API_KEY: apiKey },
+      [apiKey],
+      "tc-inf-02-onboard-openai",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-02 OpenAI onboard");
+    cleanup.add(`strict inference-routing OpenAI cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
+    await expectOpenAiChatThroughSandbox(
+      sandbox,
+      sandboxName,
+      model,
+      [apiKey],
+      "openai-inference-local-chat",
+    );
+  },
+);
+
+liveTest(
+  "TC-INF-03 Anthropic provider responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("anthropic")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=anthropic or all to run Anthropic smoke",
+      );
+    }
+    const apiKey =
+      secrets.optional("ANTHROPIC_API_KEY") ?? skipLive(skip, "ANTHROPIC_API_KEY not set");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-anthropic");
+    const model = process.env.NEMOCLAW_ANTHROPIC_MODEL || "claude-sonnet-4-6";
+    cleanup.add(`best-effort inference-routing Anthropic cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-anthropic",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "Anthropic provider onboards",
+        "sandbox inference.local routes Messages API to Anthropic",
+      ],
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { ANTHROPIC_API_KEY: apiKey, NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "anthropic" },
+      [apiKey],
+      "tc-inf-03-onboard-anthropic",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-03 Anthropic onboard");
+    cleanup.add(`strict inference-routing Anthropic cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
+    await expectAnthropicMessageThroughSandbox(sandbox, sandboxName, model, [apiKey]);
+  },
+);
+
+liveTest(
+  "TC-INF-09 custom OpenAI-compatible endpoint responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("compatible")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=compatible or all to run compatible endpoint smoke",
+      );
+    }
+    const endpointUrl =
+      process.env.NEMOCLAW_ENDPOINT_URL ??
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    const model =
+      process.env.NEMOCLAW_COMPAT_MODEL ||
+      process.env.NEMOCLAW_MODEL ||
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    const apiKey =
+      secrets.optional("COMPATIBLE_API_KEY") ??
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-compat-ep");
+    cleanup.add(
+      `best-effort inference-routing compatible-endpoint cleanup for ${sandboxName}`,
+      () => cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-compatible-endpoint",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "custom OpenAI-compatible endpoint onboards",
+        "sandbox inference.local routes chat to compatible endpoint",
+      ],
+      endpointUrl: redactString(endpointUrl, [apiKey]),
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      {
+        COMPATIBLE_API_KEY: apiKey,
+        NEMOCLAW_ENDPOINT_URL: endpointUrl,
+        NEMOCLAW_MODEL: model,
+        NEMOCLAW_PROVIDER: "custom",
+      },
+      [apiKey],
+      "tc-inf-09-onboard-compatible-endpoint",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-09 compatible-endpoint onboard");
+    cleanup.add(`strict inference-routing compatible-endpoint cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
+    await expectOpenAiChatThroughSandbox(
+      sandbox,
+      sandboxName,
+      model,
+      [apiKey],
+      "compatible-endpoint-inference-local-chat",
+    );
+  },
+);
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 183375e4e9..8481599173 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -130,6 +130,22 @@ describe("e2e-vitest-scenarios workflow boundary", () => {
       selectedFreeStandingJobs: ["runtime-overrides-vitest"],
       registryScenarios: [],
     });
+    expect(
+      evaluateE2eVitestWorkflowDispatchSelectors({ scenarios: "inference-routing" }),
+    ).toMatchObject({
+      valid: true,
+      liveScenariosRuns: false,
+      selectedFreeStandingJobs: ["inference-routing-vitest"],
+      registryScenarios: [],
+    });
+    expect(
+      evaluateE2eVitestWorkflowDispatchSelectors({ jobs: "inference-routing-vitest" }),
+    ).toMatchObject({
+      valid: true,
+      liveScenariosRuns: false,
+      selectedFreeStandingJobs: ["inference-routing-vitest"],
+      registryScenarios: [],
+    });
     expect(evaluateE2eVitestWorkflowDispatchSelectors({ scenarios: "hermes-e2e" })).toMatchObject({
       valid: true,
       liveScenariosRuns: false,
@@ -181,6 +197,16 @@ describe("e2e-vitest-scenarios workflow boundary", () => {
       hermes_selected: "false",
       matrix: "[]",
     });
+    expect(
+      generateMatrixForDispatch({ JOBS: "inference-routing-vitest", SCENARIOS: "" }),
+    ).toMatchObject({
+      hermes_selected: "false",
+      matrix: "[]",
+    });
+    expect(generateMatrixForDispatch({ JOBS: "", SCENARIOS: "inference-routing" })).toMatchObject({
+      hermes_selected: "false",
+      matrix: "[]",
+    });
     expect(
       generateMatrixForDispatch({ JOBS: "rebuild-openclaw-vitest", SCENARIOS: "" }),
     ).toMatchObject({
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 9851792dbe..5384b25214 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -21,6 +21,7 @@ const SELECTOR_PATTERN = /^[A-Za-z0-9_-]+(,[A-Za-z0-9_-]+)*$/;
 const FREE_STANDING_SCENARIO_JOBS = new Map([
   ["openshell-version-pin", "openshell-version-pin-vitest"],
   ["onboard-negative-paths", "onboard-negative-paths-vitest"],
+  ["inference-routing", "inference-routing-vitest"],
   ["runtime-overrides", "runtime-overrides-vitest"],
   ["hermes-e2e", "hermes-e2e-vitest"],
   ["network-policy", "network-policy-vitest"],
@@ -279,6 +280,7 @@ function validateJobsSelector(errors: string[], jobs: WorkflowRecord): void {
   requireRunContains(errors, validate, "allowed_jobs=");
   requireRunContains(errors, validate, "openshell-version-pin-vitest");
   requireRunContains(errors, validate, "onboard-negative-paths-vitest");
+  requireRunContains(errors, validate, "inference-routing-vitest");
   requireRunContains(errors, validate, "credential-migration-vitest");
   requireRunContains(errors, validate, "runtime-overrides-vitest");
   requireRunContains(errors, validate, "double-onboard-vitest");
@@ -1166,6 +1168,8 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   requireRunContains(errors, generate, "allowed_jobs=");
   requireRunContains(errors, generate, "Use either scenarios or jobs, not both");
   requireRunContains(errors, generate, "Unknown free-standing Vitest job");
+  requireRunContains(errors, generate, "inference-routing-vitest");
+  requireRunContains(errors, generate, "inference-routing");
   requireRunContains(errors, generate, "runtime-overrides-vitest");
   requireRunContains(errors, generate, "runtime-overrides");
   requireRunContains(errors, generate, "double-onboard-vitest");
@@ -1326,6 +1330,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   validateOpenShellVersionPinVitestJob(errors, jobs);
   validateOnboardNegativePathsVitestJob(errors, jobs);
   validateFreeStandingJobSelector(errors, jobs, "credential-migration-vitest");
+  validateFreeStandingJobSelector(errors, jobs, "inference-routing-vitest", "inference-routing");
   validateRuntimeOverridesVitestJob(errors, jobs);
   validateDoubleOnboardVitestJob(errors, jobs);
   validateHermesE2EVitestJob(errors, jobs);
@@ -1357,6 +1362,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
       "live-scenarios",
       "openshell-version-pin-vitest",
       "onboard-negative-paths-vitest",
+      "inference-routing-vitest",
       "credential-migration-vitest",
       "runtime-overrides-vitest",
       "hermes-e2e-vitest",