From cece29fb53b259feb8e62298a1611b9326d6e227 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 09:24:30 -0400
Subject: [PATCH 01/11] test(e2e): add inference routing Vitest coverage

---
 .github/workflows/e2e-vitest-scenarios.yaml   |  47 +
 .../live/inference-routing.test.ts            | 823 ++++++++++++++++++
 2 files changed, 870 insertions(+)
 create mode 100644 test/e2e-scenario/live/inference-routing.test.ts

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 07bb8c54f5..af85fa8793 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -250,6 +250,53 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  inference-routing-vitest:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    env:
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/inference-routing
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build CLI
+        run: npm run build:cli
+
+      - name: Run inference routing live test
+        env:
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+        # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The
+        # always-on slices prove invalid-key classification, unreachable
+        # endpoint classification, and NVIDIA credential isolation; optional
+        # third-party provider smokes stay skipped unless their secrets are
+        # explicitly enabled by a future workflow.
+        run: |
+          set -euo pipefail
+          npx vitest run --project e2e-scenarios-live \
+            test/e2e-scenario/live/inference-routing.test.ts \
+            --silent=false --reporter=default
+
+      - name: Upload inference routing artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-inference-routing
+          path: e2e-artifacts/vitest/inference-routing/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   # Focused coverage slice for the #2603/#3145 OpenClaw websocket
   # protocol/history contract. The retained legacy bash lane remains the
   # source for full closeout until a later PR proves replacement and deletes it.
diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
new file mode 100644
index 0000000000..d9cf31514d
--- /dev/null
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -0,0 +1,823 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { ArtifactSink } from "../fixtures/artifacts.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import type { SandboxClient } from "../fixtures/clients/sandbox.ts";
+import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import { redactString } from "../fixtures/redaction.ts";
+
+// Migrated from test/e2e/test-inference-routing.sh. This stays a simple live
+// Vitest conversion: direct CLI/onboard subprocesses plus OpenShell sandbox
+// probes, with local helpers only where raw in-memory output is required to
+// prove credential non-exposure before redacted artifacts are written.
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js");
+const NEMOCLAW_STATE_DIR = path.join(os.homedir(), ".nemoclaw");
+const ONBOARD_SESSION_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard-session.json");
+const ONBOARD_LOCK_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard.lock");
+const ONBOARD_ARGS = [
+  "onboard",
+  "--non-interactive",
+  "--yes",
+  "--yes-i-accept-third-party-software",
+];
+const STACK_TRACE_PATTERNS = [
+  /^\s+at (Object\.|Module\.|node:internal|process\.)/m,
+  /\bat node:internal/m,
+];
+const CREDENTIAL_CLASSIFICATION_PATTERN =
+  /authorization|credential|invalid|401|unauthorized|api[._-]?key/i;
+const TRANSPORT_CLASSIFICATION_PATTERN =
+  /unreachable|timeout|connect|ECONNREFUSED|ETIMEDOUT|ENETUNREACH|EHOSTUNREACH|ENOTFOUND|EAI_AGAIN|No route to host|transport|network|endpoint|dns/i;
+const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip;
+
+function shouldRunProviderSmoke(provider: "openai" | "anthropic" | "compatible"): boolean {
+  const requested = process.env.NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE?.trim().toLowerCase();
+  return requested === "1" || requested === "true" || requested === "all" || requested === provider;
+}
+
+type SkipFn = (note?: string) => void;
+
+function skipLive(skip: SkipFn, note: string): never {
+  skip(note);
+  throw new Error(note);
+}
+
+interface RawRunResult {
+  readonly command: readonly string[];
+  readonly exitCode: number | null;
+  readonly signal: NodeJS.Signals | null;
+  readonly timedOut: boolean;
+  readonly stdout: string;
+  readonly stderr: string;
+  readonly redactedStdout: string;
+  readonly redactedStderr: string;
+}
+
+interface RawRunOptions {
+  readonly artifactName: string;
+  readonly artifacts: ArtifactSink;
+  readonly cwd?: string;
+  readonly env?: NodeJS.ProcessEnv;
+  readonly redactionValues?: readonly string[];
+  readonly timeoutMs?: number;
+}
+
+function resultText(result: { stdout: string; stderr: string }): string {
+  return [result.stdout, result.stderr].filter(Boolean).join("\n");
+}
+
+function redactedResultText(
+  result: Pick<RawRunResult, "redactedStdout" | "redactedStderr">,
+): string {
+  return [result.redactedStdout, result.redactedStderr].filter(Boolean).join("\n");
+}
+
+function hasRawNodeStackTrace(text: string): boolean {
+  return STACK_TRACE_PATTERNS.some((pattern) => pattern.test(text));
+}
+
+function inferenceSandboxName(prefix: string): string {
+  const name = `${prefix}-${process.pid}`;
+  validateSandboxName(name);
+  return name;
+}
+
+function onboardEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    ...extra,
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+  };
+}
+
+function clearOnboardState(): void {
+  fs.rmSync(ONBOARD_LOCK_FILE, { force: true });
+  fs.rmSync(ONBOARD_SESSION_FILE, { force: true });
+}
+
+function redactedCommand(command: readonly string[], values: readonly string[]): string[] {
+  return command.map((part) => redactString(part, values));
+}
+
+async function runRawCommand(
+  command: string,
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  const timeoutMs = options.timeoutMs ?? 60_000;
+  const redactionValues = [...(options.redactionValues ?? [])];
+  const child = spawn(command, [...args], {
+    cwd: options.cwd ?? REPO_ROOT,
+    detached: true,
+    env: options.env,
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+  const fullCommand = [command, ...args];
+  let stdout = "";
+  let stderr = "";
+  let timedOut = false;
+  let spawnError: Error | undefined;
+
+  const killProcessGroup = (signal: NodeJS.Signals): void => {
+    if (child.pid === undefined) return;
+    try {
+      process.kill(-child.pid, signal);
+    } catch {
+      child.kill(signal);
+    }
+  };
+
+  const timeout = setTimeout(() => {
+    timedOut = true;
+    killProcessGroup("SIGTERM");
+    setTimeout(() => killProcessGroup("SIGKILL"), 1_000).unref();
+  }, timeoutMs);
+  timeout.unref();
+
+  child.stdout?.on("data", (chunk: Buffer) => {
+    stdout += chunk.toString("utf8");
+  });
+  child.stderr?.on("data", (chunk: Buffer) => {
+    stderr += chunk.toString("utf8");
+  });
+  child.on("error", (error) => {
+    spawnError = error;
+  });
+
+  const { exitCode, signal } = await new Promise<{
+    exitCode: number | null;
+    signal: NodeJS.Signals | null;
+  }>((resolve) => {
+    child.on("close", (code, closeSignal) => resolve({ exitCode: code, signal: closeSignal }));
+  });
+  clearTimeout(timeout);
+
+  if (spawnError) {
+    const message = redactString(spawnError.message, redactionValues);
+    throw new Error(`failed to spawn ${redactString(command, redactionValues)}: ${message}`);
+  }
+
+  const redactedStdout = redactString(stdout, redactionValues);
+  const redactedStderr = redactString(stderr, redactionValues);
+  await options.artifacts.writeText(`raw-shell/${options.artifactName}.stdout.txt`, redactedStdout);
+  await options.artifacts.writeText(`raw-shell/${options.artifactName}.stderr.txt`, redactedStderr);
+  await options.artifacts.writeJson(`raw-shell/${options.artifactName}.result.json`, {
+    command: redactedCommand(fullCommand, redactionValues),
+    exitCode,
+    signal,
+    timedOut,
+    stdout: redactedStdout,
+    stderr: redactedStderr,
+  });
+
+  return {
+    command: fullCommand,
+    exitCode,
+    signal,
+    timedOut,
+    stdout,
+    stderr,
+    redactedStdout,
+    redactedStderr,
+  };
+}
+
+async function runNemoclawCli(
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  return runRawCommand(process.execPath, [CLI_ENTRYPOINT, ...args], options);
+}
+
+async function runOpenShell(
+  args: readonly string[],
+  options: RawRunOptions,
+): Promise<RawRunResult> {
+  return runRawCommand("openshell", args, options);
+}
+
+async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Promise<void> {
+  expect(
+    fs.existsSync(DIST_ENTRYPOINT),
+    "run `npm run build:cli` before live inference-routing scenarios",
+  ).toBe(true);
+
+  const docker = await host.command("docker", ["info"], {
+    artifactName: "prereq-docker-info-inference-routing",
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  if (docker.exitCode !== 0) {
+    const message = `Docker is required for live inference-routing coverage: ${resultText(docker)}`;
+    if (process.env.GITHUB_ACTIONS === "true") throw new Error(message);
+    skipLive(skip, message);
+  }
+
+  const openshell = await host.command("openshell", ["--version"], {
+    artifactName: "prereq-openshell-version-inference-routing",
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  if (openshell.exitCode !== 0) {
+    // A fresh GitHub runner may not have OpenShell before the first onboard;
+    // `nemoclaw onboard` installs it. Record the prereq probe without blocking.
+    return;
+  }
+}
+
+async function ignoreCleanupError(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {
+    // Cleanup is best-effort before the first onboard because a fresh runner may
+    // not have OpenShell installed until `nemoclaw onboard` reaches that phase.
+  }
+}
+
+async function cleanupSandbox(
+  host: HostCliClient,
+  sandbox: SandboxClient,
+  sandboxName: string,
+): Promise<void> {
+  await ignoreCleanupError(() =>
+    host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
+      artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await ignoreCleanupError(() =>
+    sandbox.openshell(["sandbox", "delete", sandboxName], {
+      artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 60_000,
+    }),
+  );
+  clearOnboardState();
+}
+
+async function expectNoActiveSandbox(host: HostCliClient, sandboxName: string): Promise<void> {
+  const status = await host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "status"], {
+    artifactName: `post-failure-status-${sandboxName}`,
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  const text = resultText(status);
+  expect(
+    /running|ready/i.test(text),
+    `sandbox '${sandboxName}' is still active after failed onboard: ${text}`,
+  ).toBe(false);
+}
+
+async function onboardSandbox(
+  artifacts: ArtifactSink,
+  sandboxName: string,
+  extraEnv: NodeJS.ProcessEnv,
+  redactionValues: readonly string[],
+  artifactName: string,
+  timeoutMs = 10 * 60_000,
+): Promise<RawRunResult> {
+  clearOnboardState();
+  return runNemoclawCli(ONBOARD_ARGS, {
+    artifactName,
+    artifacts,
+    env: onboardEnv({
+      NEMOCLAW_POLICY_TIER: "open",
+      NEMOCLAW_SANDBOX_NAME: sandboxName,
+      ...extraEnv,
+    }),
+    redactionValues,
+    timeoutMs,
+  });
+}
+
+function expectOnboardSuccess(result: RawRunResult, label: string): void {
+  const redacted = redactedResultText(result);
+  expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false);
+  expect(result.exitCode, `${label} failed\n${redacted}`).toBe(0);
+}
+
+function expectOnboardFailure(result: RawRunResult, label: string): void {
+  const redacted = redactedResultText(result);
+  expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false);
+  expect(result.exitCode, `${label} unexpectedly succeeded\n${redacted}`).not.toBe(0);
+}
+
+function parseJsonBody(body: string, label: string): unknown {
+  try {
+    return JSON.parse(body);
+  } catch (error) {
+    throw new Error(
+      `${label} response was not JSON: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+}
+
+function openAiContent(json: unknown): string {
+  if (!json || typeof json !== "object") return "";
+  const choices = (json as { choices?: unknown }).choices;
+  if (!Array.isArray(choices)) return "";
+  for (const choice of choices) {
+    if (!choice || typeof choice !== "object") continue;
+    const message = (choice as { message?: unknown }).message;
+    if (message && typeof message === "object") {
+      const content = (message as { content?: unknown }).content;
+      if (typeof content === "string" && content.trim()) return content;
+    }
+    const text = (choice as { text?: unknown }).text;
+    if (typeof text === "string" && text.trim()) return text;
+  }
+  return "";
+}
+
+function anthropicContent(json: unknown): string {
+  if (!json || typeof json !== "object") return "";
+  const content = (json as { content?: unknown }).content;
+  if (Array.isArray(content)) {
+    return content
+      .map((part) => {
+        if (
+          part &&
+          typeof part === "object" &&
+          typeof (part as { text?: unknown }).text === "string"
+        ) {
+          return (part as { text: string }).text;
+        }
+        return "";
+      })
+      .join("")
+      .trim();
+  }
+  return openAiContent(json);
+}
+
+async function expectOpenAiChatThroughSandbox(
+  sandbox: SandboxClient,
+  sandboxName: string,
+  model: string,
+  redactionValues: readonly string[],
+  artifactName: string,
+): Promise<void> {
+  const payload = JSON.stringify({
+    model,
+    messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+    max_tokens: 50,
+  });
+  const response = await sandbox.exec(
+    sandboxName,
+    [
+      "curl",
+      "-sS",
+      "--max-time",
+      "60",
+      "https://inference.local/v1/chat/completions",
+      "-H",
+      "Content-Type: application/json",
+      "--data-raw",
+      payload,
+    ],
+    {
+      artifactName,
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [...redactionValues],
+      timeoutMs: 90_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  const content = openAiContent(parseJsonBody(response.stdout, artifactName));
+  expect(content, `no chat content in response: ${response.stdout.slice(0, 500)}`).not.toBe("");
+}
+
+async function expectAnthropicMessageThroughSandbox(
+  sandbox: SandboxClient,
+  sandboxName: string,
+  model: string,
+  redactionValues: readonly string[],
+): Promise<void> {
+  const payload = JSON.stringify({
+    model,
+    messages: [{ role: "user", content: "Reply with exactly one word: PONG" }],
+    max_tokens: 50,
+  });
+  const response = await sandbox.exec(
+    sandboxName,
+    [
+      "curl",
+      "-sS",
+      "--max-time",
+      "60",
+      "https://inference.local/v1/messages",
+      "-H",
+      "Content-Type: application/json",
+      "--data-raw",
+      payload,
+    ],
+    {
+      artifactName: "anthropic-inference-local-message",
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [...redactionValues],
+      timeoutMs: 90_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  const content = anthropicContent(parseJsonBody(response.stdout, "anthropic inference.local"));
+  expect(content, `no Anthropic content in response: ${response.stdout.slice(0, 500)}`).not.toBe(
+    "",
+  );
+}
+
+liveTest(
+  "TC-INF-06 invalid API key fails with credential classification and cleanup",
+  { timeout: 5 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-invalid-key");
+    cleanup.add(`remove inference-routing invalid-key residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-invalid-api-key",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "invalid NVIDIA key exits non-zero",
+        "output contains credential classification",
+        "output does not expose raw stack trace or submitted key",
+        "failed onboard leaves no active sandbox",
+      ],
+    });
+
+    const invalidKey = ["nvapi", "INTENTIONALLY", "INVALID", "KEY", "FOR", "E2E", "TEST"].join("-");
+    const result = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NVIDIA_API_KEY: invalidKey },
+      [invalidKey],
+      "tc-inf-06-onboard-invalid-api-key",
+      120_000,
+    );
+    const raw = resultText(result);
+    const redacted = redactedResultText(result);
+
+    expectOnboardFailure(result, "TC-INF-06 invalid-key onboard");
+    expect(CREDENTIAL_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true);
+    expect(hasRawNodeStackTrace(raw), redacted).toBe(false);
+    expect(raw.includes("INTENTIONALLY-INVALID-KEY-FOR-E2E-TEST"), redacted).toBe(false);
+    await expectNoActiveSandbox(host, sandboxName);
+  },
+);
+
+liveTest(
+  "TC-INF-07 unreachable endpoint fails with transport classification and cleanup",
+  { timeout: 5 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-unreachable");
+    cleanup.add(`remove inference-routing unreachable residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-unreachable-endpoint",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "unreachable custom endpoint exits non-zero",
+        "output contains transport classification",
+        "output does not expose raw stack trace",
+        "failed onboard leaves no active sandbox",
+      ],
+    });
+
+    const nvidiaKey = ["nvapi", "valid", "format", "but", "fake", "key", "1234567890"].join("-");
+    const compatibleKey = "fake-key-for-unreachable-test";
+    const result = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      {
+        COMPATIBLE_API_KEY: compatibleKey,
+        NEMOCLAW_ENDPOINT_URL: "https://nemoclaw-e2e.invalid/v1",
+        NEMOCLAW_MODEL: "test-model",
+        NEMOCLAW_PROVIDER: "custom",
+        NVIDIA_API_KEY: nvidiaKey,
+      },
+      [nvidiaKey, compatibleKey],
+      "tc-inf-07-onboard-unreachable-endpoint",
+      120_000,
+    );
+    const raw = resultText(result);
+    const redacted = redactedResultText(result);
+
+    expectOnboardFailure(result, "TC-INF-07 unreachable-endpoint onboard");
+    expect(TRANSPORT_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true);
+    expect(hasRawNodeStackTrace(raw), redacted).toBe(false);
+    await expectNoActiveSandbox(host, sandboxName);
+  },
+);
+
+liveTest(
+  "TC-INF-05 real NVIDIA key is isolated from sandbox env, process list, and filesystem",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    const apiKey =
+      secrets.optional("NVIDIA_API_KEY") ??
+      skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-inf-cred");
+    cleanup.add(`remove inference-routing credential-isolation residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-credential-isolation",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "real NVIDIA_API_KEY does not appear in sandbox environment",
+        "real NVIDIA_API_KEY does not appear in sandbox process list when ps is available",
+        "real NVIDIA_API_KEY does not appear in sampled sandbox filesystem",
+        "sandbox NVIDIA_API_KEY, when present, is a placeholder rather than the real key",
+      ],
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NVIDIA_API_KEY: apiKey },
+      [apiKey],
+      "tc-inf-05-onboard-credential-isolation",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-05 credential-isolation onboard");
+
+    const sandboxEnv = await runOpenShell(["sandbox", "exec", "-n", sandboxName, "--", "env"], {
+      artifactName: "tc-inf-05-sandbox-env",
+      artifacts,
+      env: buildAvailabilityProbeEnv(),
+      redactionValues: [apiKey],
+      timeoutMs: 60_000,
+    });
+    expect(sandboxEnv.exitCode, redactedResultText(sandboxEnv)).toBe(0);
+    expect(sandboxEnv.stdout.includes(apiKey), redactedResultText(sandboxEnv)).toBe(false);
+
+    const processList = await runOpenShell(
+      [
+        "sandbox",
+        "exec",
+        "-n",
+        sandboxName,
+        "--",
+        "sh",
+        "-lc",
+        "ps aux 2>/dev/null || ps -ef 2>/dev/null",
+      ],
+      {
+        artifactName: "tc-inf-05-sandbox-process-list",
+        artifacts,
+        env: buildAvailabilityProbeEnv(),
+        redactionValues: [apiKey],
+        timeoutMs: 60_000,
+      },
+    );
+    if (processList.exitCode === 0 && processList.stdout.trim()) {
+      expect(processList.stdout.includes(apiKey), redactedResultText(processList)).toBe(false);
+    } else {
+      await artifacts.writeJson("tc-inf-05-process-list-skipped.json", {
+        reason: "ps not available in hardened sandbox",
+        exitCode: processList.exitCode,
+      });
+    }
+
+    const keyB64 = Buffer.from(apiKey, "utf8").toString("base64");
+    const scanScript = `
+const fs = require('fs');
+const { execFileSync } = require('child_process');
+const key = Buffer.from(process.env.KEY_B64 || '', 'base64').toString('utf8');
+if (!key) { console.log('NO_KEY_PROVIDED'); process.exit(0); }
+let out = '';
+try {
+  out = execFileSync('sh', ['-lc', 'find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'], { encoding: 'utf8' });
+} catch {
+  console.log('SCAN_ERROR');
+  process.exit(0);
+}
+for (const file of out.trim().split(/\\n/).filter(Boolean)) {
+  try {
+    const content = fs.readFileSync(file, 'utf8');
+    if (content.includes(key)) console.log('FOUND:' + file);
+  } catch {}
+}
+console.log('SCAN_DONE');
+`;
+    const filesystemScan = await runOpenShell(
+      [
+        "sandbox",
+        "exec",
+        "-n",
+        sandboxName,
+        "--",
+        "env",
+        `KEY_B64=${keyB64}`,
+        "node",
+        "-e",
+        scanScript,
+      ],
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-scan",
+        artifacts,
+        env: buildAvailabilityProbeEnv(),
+        redactionValues: [apiKey, keyB64],
+        timeoutMs: 90_000,
+      },
+    );
+    expect(filesystemScan.stdout).not.toContain("NO_KEY_PROVIDED");
+    expect(filesystemScan.stdout).not.toContain("FOUND:");
+    expect(filesystemScan.stdout, redactedResultText(filesystemScan)).toContain("SCAN_DONE");
+
+    const placeholder = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript("printenv NVIDIA_API_KEY 2>/dev/null || true"),
+      {
+        artifactName: "tc-inf-05-sandbox-placeholder",
+        env: buildAvailabilityProbeEnv(),
+        redactionValues: [apiKey],
+        timeoutMs: 30_000,
+      },
+    );
+    const placeholderValue = placeholder.stdout.trim();
+    if (!placeholderValue) {
+      await artifacts.writeJson("tc-inf-05-placeholder-skipped.json", {
+        reason: "NVIDIA_API_KEY not set in sandbox; placeholder injection may not be active",
+      });
+    } else {
+      expect(placeholderValue, "sandbox has the real key, not a placeholder").not.toBe(apiKey);
+    }
+  },
+);
+
+liveTest(
+  "TC-INF-02 OpenAI provider responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("openai")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=openai or all to run OpenAI smoke",
+      );
+    }
+    const apiKey = secrets.optional("OPENAI_API_KEY") ?? skipLive(skip, "OPENAI_API_KEY not set");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-openai");
+    const model = process.env.NEMOCLAW_OPENAI_MODEL || "gpt-4o-mini";
+    cleanup.add(`remove inference-routing OpenAI residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-openai",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: ["OpenAI provider onboards", "sandbox inference.local routes chat to OpenAI"],
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "openai", OPENAI_API_KEY: apiKey },
+      [apiKey],
+      "tc-inf-02-onboard-openai",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-02 OpenAI onboard");
+    await expectOpenAiChatThroughSandbox(
+      sandbox,
+      sandboxName,
+      model,
+      [apiKey],
+      "openai-inference-local-chat",
+    );
+  },
+);
+
+liveTest(
+  "TC-INF-03 Anthropic provider responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("anthropic")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=anthropic or all to run Anthropic smoke",
+      );
+    }
+    const apiKey =
+      secrets.optional("ANTHROPIC_API_KEY") ?? skipLive(skip, "ANTHROPIC_API_KEY not set");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-anthropic");
+    const model = process.env.NEMOCLAW_ANTHROPIC_MODEL || "claude-sonnet-4-6";
+    cleanup.add(`remove inference-routing Anthropic residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-anthropic",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "Anthropic provider onboards",
+        "sandbox inference.local routes Messages API to Anthropic",
+      ],
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      { ANTHROPIC_API_KEY: apiKey, NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "anthropic" },
+      [apiKey],
+      "tc-inf-03-onboard-anthropic",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-03 Anthropic onboard");
+    await expectAnthropicMessageThroughSandbox(sandbox, sandboxName, model, [apiKey]);
+  },
+);
+
+liveTest(
+  "TC-INF-09 custom OpenAI-compatible endpoint responds through inference.local",
+  { timeout: 15 * 60_000 },
+  async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
+    if (!shouldRunProviderSmoke("compatible")) {
+      skipLive(
+        skip,
+        "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=compatible or all to run compatible endpoint smoke",
+      );
+    }
+    const endpointUrl =
+      process.env.NEMOCLAW_ENDPOINT_URL ??
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    const model =
+      process.env.NEMOCLAW_COMPAT_MODEL ||
+      process.env.NEMOCLAW_MODEL ||
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    const apiKey =
+      secrets.optional("COMPATIBLE_API_KEY") ??
+      skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
+    await requireLivePrerequisites(host, skip);
+    const sandboxName = inferenceSandboxName("e2e-compat-ep");
+    cleanup.add(`remove inference-routing compatible-endpoint residue for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName),
+    );
+    await cleanupSandbox(host, sandbox, sandboxName);
+
+    await artifacts.writeJson("scenario.json", {
+      id: "inference-routing-compatible-endpoint",
+      runner: "vitest",
+      migratedFrom: "test/e2e/test-inference-routing.sh",
+      contract: [
+        "custom OpenAI-compatible endpoint onboards",
+        "sandbox inference.local routes chat to compatible endpoint",
+      ],
+      endpointUrl: redactString(endpointUrl, [apiKey]),
+      model,
+    });
+
+    const onboard = await onboardSandbox(
+      artifacts,
+      sandboxName,
+      {
+        COMPATIBLE_API_KEY: apiKey,
+        NEMOCLAW_ENDPOINT_URL: endpointUrl,
+        NEMOCLAW_MODEL: model,
+        NEMOCLAW_PROVIDER: "custom",
+      },
+      [apiKey],
+      "tc-inf-09-onboard-compatible-endpoint",
+    );
+    expectOnboardSuccess(onboard, "TC-INF-09 compatible-endpoint onboard");
+    await expectOpenAiChatThroughSandbox(
+      sandbox,
+      sandboxName,
+      model,
+      [apiKey],
+      "compatible-endpoint-inference-local-chat",
+    );
+  },
+);

From bc9d871f7f37f3a8f6107ca7e1099654c4cb324a Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 09:28:41 -0400
Subject: [PATCH 02/11] test(e2e): tolerate fresh runners before openshell
 install

---
 .../live/inference-routing.test.ts            | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index d9cf31514d..cd944993d6 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -227,14 +227,19 @@ async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Prom
     skipLive(skip, message);
   }
 
-  const openshell = await host.command("openshell", ["--version"], {
-    artifactName: "prereq-openshell-version-inference-routing",
-    env: buildAvailabilityProbeEnv(),
-    timeoutMs: 30_000,
-  });
-  if (openshell.exitCode !== 0) {
-    // A fresh GitHub runner may not have OpenShell before the first onboard;
-    // `nemoclaw onboard` installs it. Record the prereq probe without blocking.
+  try {
+    const openshell = await host.command("openshell", ["--version"], {
+      artifactName: "prereq-openshell-version-inference-routing",
+      env: buildAvailabilityProbeEnv(),
+      timeoutMs: 30_000,
+    });
+    if (openshell.exitCode !== 0) {
+      // A fresh GitHub runner may not have OpenShell before the first onboard;
+      // `nemoclaw onboard` installs it. Record the prereq probe without blocking.
+      return;
+    }
+  } catch {
+    // Same as non-zero: fresh runner may not have openshell until onboard.
     return;
   }
 }

From caf56dc368b1655007e6b6f63f6522cceeb941a9 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 09:34:35 -0400
Subject: [PATCH 03/11] test(e2e): keep sandbox scan command single-line

---
 .../live/inference-routing.test.ts            | 30 +++++++------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index cd944993d6..e0c5f01fb8 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -611,26 +611,16 @@ liveTest(
     }
 
     const keyB64 = Buffer.from(apiKey, "utf8").toString("base64");
-    const scanScript = `
-const fs = require('fs');
-const { execFileSync } = require('child_process');
-const key = Buffer.from(process.env.KEY_B64 || '', 'base64').toString('utf8');
-if (!key) { console.log('NO_KEY_PROVIDED'); process.exit(0); }
-let out = '';
-try {
-  out = execFileSync('sh', ['-lc', 'find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'], { encoding: 'utf8' });
-} catch {
-  console.log('SCAN_ERROR');
-  process.exit(0);
-}
-for (const file of out.trim().split(/\\n/).filter(Boolean)) {
-  try {
-    const content = fs.readFileSync(file, 'utf8');
-    if (content.includes(key)) console.log('FOUND:' + file);
-  } catch {}
-}
-console.log('SCAN_DONE');
-`;
+    const scanScript = [
+      "const fs=require('fs')",
+      "const {execFileSync}=require('child_process')",
+      "const key=Buffer.from(process.env.KEY_B64||'','base64').toString('utf8')",
+      "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}",
+      "let out=''",
+      "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}",
+      "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}",
+      "console.log('SCAN_DONE')",
+    ].join(";");
     const filesystemScan = await runOpenShell(
       [
         "sandbox",

From 6aa71fe330687d7ee75cd73adff30582b6a023ac Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 09:39:14 -0400
Subject: [PATCH 04/11] ci(e2e): keep inference routing job PR-safe

---
 .github/workflows/e2e-vitest-scenarios.yaml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index af85fa8793..55377cc258 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -274,13 +274,12 @@ jobs:
         run: npm run build:cli
 
       - name: Run inference routing live test
-        env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
         # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The
-        # always-on slices prove invalid-key classification, unreachable
-        # endpoint classification, and NVIDIA credential isolation; optional
-        # third-party provider smokes stay skipped unless their secrets are
-        # explicitly enabled by a future workflow.
+        # always-on PR-safe slices prove invalid-key and unreachable-endpoint
+        # classification/cleanup without spending live provider quota; real
+        # NVIDIA credential isolation and third-party provider smokes stay
+        # skipped unless their secrets are explicitly supplied by a future
+        # workflow.
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \

From c63d3112be568af812d1c780d761a6ef0c5a2db4 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 10:32:55 -0400
Subject: [PATCH 05/11] test(e2e): harden inference routing probes

---
 .../live/inference-routing.test.ts            | 177 ++++++++++++++----
 1 file changed, 141 insertions(+), 36 deletions(-)

diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index e0c5f01fb8..0a3daca40f 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { spawn } from "node:child_process";
+import crypto from "node:crypto";
 import fs from "node:fs";
 import os from "node:os";
 import path from "node:path";
@@ -71,6 +72,7 @@ interface RawRunOptions {
   readonly cwd?: string;
   readonly env?: NodeJS.ProcessEnv;
   readonly redactionValues?: readonly string[];
+  readonly stdin?: string;
   readonly timeoutMs?: number;
 }
 
@@ -124,7 +126,7 @@ async function runRawCommand(
     cwd: options.cwd ?? REPO_ROOT,
     detached: true,
     env: options.env,
-    stdio: ["ignore", "pipe", "pipe"],
+    stdio: [options.stdin === undefined ? "ignore" : "pipe", "pipe", "pipe"],
   });
   const fullCommand = [command, ...args];
   let stdout = "";
@@ -148,6 +150,10 @@ async function runRawCommand(
   }, timeoutMs);
   timeout.unref();
 
+  if (options.stdin !== undefined) {
+    child.stdin?.end(options.stdin);
+  }
+
   child.stdout?.on("data", (chunk: Buffer) => {
     stdout += chunk.toString("utf8");
   });
@@ -244,35 +250,96 @@ async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Prom
   }
 }
 
-async function ignoreCleanupError(run: () => Promise<unknown>): Promise<void> {
+interface CleanupSandboxOptions {
+  readonly strict?: boolean;
+}
+
+async function optionalCleanupStep(run: () => Promise<unknown>): Promise<void> {
   try {
     await run();
   } catch {
-    // Cleanup is best-effort before the first onboard because a fresh runner may
-    // not have OpenShell installed until `nemoclaw onboard` reaches that phase.
+    // Pre-onboard cleanup is best-effort because a fresh runner may not have
+    // OpenShell installed until `nemoclaw onboard` reaches that phase.
   }
 }
 
+function probeSummary(
+  label: string,
+  result: { exitCode: number | null; stdout: string; stderr: string },
+): string {
+  const text = resultText(result).trim();
+  return `${label} exit=${result.exitCode}${text ? `: ${text.slice(0, 500)}` : ""}`;
+}
+
 async function cleanupSandbox(
   host: HostCliClient,
   sandbox: SandboxClient,
   sandboxName: string,
+  options: CleanupSandboxOptions = {},
 ): Promise<void> {
-  await ignoreCleanupError(() =>
-    host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
-      artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
-      env: buildAvailabilityProbeEnv(),
-      timeoutMs: 120_000,
-    }),
-  );
-  await ignoreCleanupError(() =>
-    sandbox.openshell(["sandbox", "delete", sandboxName], {
+  if (!options.strict) {
+    await optionalCleanupStep(() =>
+      host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
+        artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 120_000,
+      }),
+    );
+    await optionalCleanupStep(() =>
+      sandbox.openshell(["sandbox", "delete", sandboxName], {
+        artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 60_000,
+      }),
+    );
+    clearOnboardState();
+    return;
+  }
+
+  const cleanupEvidence: string[] = [];
+  try {
+    const destroy = await host.command(
+      process.execPath,
+      [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"],
+      {
+        artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 120_000,
+      },
+    );
+    cleanupEvidence.push(probeSummary("nemoclaw destroy", destroy));
+  } catch (error) {
+    cleanupEvidence.push(
+      `nemoclaw destroy threw: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+
+  try {
+    const deletion = await sandbox.openshell(["sandbox", "delete", sandboxName], {
       artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
       env: buildAvailabilityProbeEnv(),
       timeoutMs: 60_000,
-    }),
-  );
+    });
+    cleanupEvidence.push(probeSummary("openshell sandbox delete", deletion));
+  } catch (error) {
+    cleanupEvidence.push(
+      `openshell sandbox delete threw: ${error instanceof Error ? error.message : String(error)}`,
+    );
+  }
+
   clearOnboardState();
+
+  const status = await sandbox.status(sandboxName, {
+    artifactName: `cleanup-openshell-sandbox-status-${sandboxName}`,
+    env: buildAvailabilityProbeEnv(),
+    timeoutMs: 30_000,
+  });
+  cleanupEvidence.push(probeSummary("openshell sandbox status", status));
+  if (status.exitCode === 0) {
+    throw new Error(
+      `sandbox '${sandboxName}' still exists after strict cleanup\n${cleanupEvidence.join("\n")}`,
+    );
+  }
 }
 
 async function expectNoActiveSandbox(host: HostCliClient, sandboxName: string): Promise<void> {
@@ -546,8 +613,9 @@ liveTest(
       skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation");
     await requireLivePrerequisites(host, skip);
     const sandboxName = inferenceSandboxName("e2e-inf-cred");
-    cleanup.add(`remove inference-routing credential-isolation residue for ${sandboxName}`, () =>
-      cleanupSandbox(host, sandbox, sandboxName),
+    cleanup.add(
+      `best-effort inference-routing credential-isolation cleanup for ${sandboxName}`,
+      () => cleanupSandbox(host, sandbox, sandboxName),
     );
     await cleanupSandbox(host, sandbox, sandboxName);
 
@@ -571,6 +639,9 @@ liveTest(
       "tc-inf-05-onboard-credential-isolation",
     );
     expectOnboardSuccess(onboard, "TC-INF-05 credential-isolation onboard");
+    cleanup.add(`strict inference-routing credential-isolation cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
 
     const sandboxEnv = await runOpenShell(["sandbox", "exec", "-n", sandboxName, "--", "env"], {
       artifactName: "tc-inf-05-sandbox-env",
@@ -610,35 +681,59 @@ liveTest(
       });
     }
 
-    const keyB64 = Buffer.from(apiKey, "utf8").toString("base64");
     const scanScript = [
       "const fs=require('fs')",
       "const {execFileSync}=require('child_process')",
-      "const key=Buffer.from(process.env.KEY_B64||'','base64').toString('utf8')",
+      "const key=fs.readFileSync(0,'utf8')",
       "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}",
       "let out=''",
       "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}",
       "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}",
       "console.log('SCAN_DONE')",
     ].join(";");
+    const leakCanary = `nemoclaw-fs-scan-canary-${crypto.randomUUID()}`;
+    const canaryPath = "/tmp/nemoclaw-fs-scan-canary.txt";
+    const plantCanary = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript(`printf '%s' '${leakCanary}' > ${canaryPath}`),
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-plant",
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 30_000,
+      },
+    );
+    expect(plantCanary.exitCode, resultText(plantCanary)).toBe(0);
+    const canaryScan = await runOpenShell(
+      ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-scan",
+        artifacts,
+        env: buildAvailabilityProbeEnv(),
+        stdin: leakCanary,
+        timeoutMs: 90_000,
+      },
+    );
+    expect(canaryScan.stdout, redactedResultText(canaryScan)).toContain(`FOUND:${canaryPath}`);
+
+    const removeCanary = await sandbox.execShell(
+      sandboxName,
+      trustedSandboxShellScript(`rm -f ${canaryPath}`),
+      {
+        artifactName: "tc-inf-05-sandbox-filesystem-canary-remove",
+        env: buildAvailabilityProbeEnv(),
+        timeoutMs: 30_000,
+      },
+    );
+    expect(removeCanary.exitCode, resultText(removeCanary)).toBe(0);
+
     const filesystemScan = await runOpenShell(
-      [
-        "sandbox",
-        "exec",
-        "-n",
-        sandboxName,
-        "--",
-        "env",
-        `KEY_B64=${keyB64}`,
-        "node",
-        "-e",
-        scanScript,
-      ],
+      ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
       {
         artifactName: "tc-inf-05-sandbox-filesystem-scan",
         artifacts,
         env: buildAvailabilityProbeEnv(),
-        redactionValues: [apiKey, keyB64],
+        redactionValues: [apiKey],
+        stdin: apiKey,
         timeoutMs: 90_000,
       },
     );
@@ -681,7 +776,7 @@ liveTest(
     await requireLivePrerequisites(host, skip);
     const sandboxName = inferenceSandboxName("e2e-openai");
     const model = process.env.NEMOCLAW_OPENAI_MODEL || "gpt-4o-mini";
-    cleanup.add(`remove inference-routing OpenAI residue for ${sandboxName}`, () =>
+    cleanup.add(`best-effort inference-routing OpenAI cleanup for ${sandboxName}`, () =>
       cleanupSandbox(host, sandbox, sandboxName),
     );
     await cleanupSandbox(host, sandbox, sandboxName);
@@ -702,6 +797,9 @@ liveTest(
       "tc-inf-02-onboard-openai",
     );
     expectOnboardSuccess(onboard, "TC-INF-02 OpenAI onboard");
+    cleanup.add(`strict inference-routing OpenAI cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
     await expectOpenAiChatThroughSandbox(
       sandbox,
       sandboxName,
@@ -727,7 +825,7 @@ liveTest(
     await requireLivePrerequisites(host, skip);
     const sandboxName = inferenceSandboxName("e2e-anthropic");
     const model = process.env.NEMOCLAW_ANTHROPIC_MODEL || "claude-sonnet-4-6";
-    cleanup.add(`remove inference-routing Anthropic residue for ${sandboxName}`, () =>
+    cleanup.add(`best-effort inference-routing Anthropic cleanup for ${sandboxName}`, () =>
       cleanupSandbox(host, sandbox, sandboxName),
     );
     await cleanupSandbox(host, sandbox, sandboxName);
@@ -751,6 +849,9 @@ liveTest(
       "tc-inf-03-onboard-anthropic",
     );
     expectOnboardSuccess(onboard, "TC-INF-03 Anthropic onboard");
+    cleanup.add(`strict inference-routing Anthropic cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
     await expectAnthropicMessageThroughSandbox(sandbox, sandboxName, model, [apiKey]);
   },
 );
@@ -777,8 +878,9 @@ liveTest(
       skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY");
     await requireLivePrerequisites(host, skip);
     const sandboxName = inferenceSandboxName("e2e-compat-ep");
-    cleanup.add(`remove inference-routing compatible-endpoint residue for ${sandboxName}`, () =>
-      cleanupSandbox(host, sandbox, sandboxName),
+    cleanup.add(
+      `best-effort inference-routing compatible-endpoint cleanup for ${sandboxName}`,
+      () => cleanupSandbox(host, sandbox, sandboxName),
     );
     await cleanupSandbox(host, sandbox, sandboxName);
 
@@ -807,6 +909,9 @@ liveTest(
       "tc-inf-09-onboard-compatible-endpoint",
     );
     expectOnboardSuccess(onboard, "TC-INF-09 compatible-endpoint onboard");
+    cleanup.add(`strict inference-routing compatible-endpoint cleanup for ${sandboxName}`, () =>
+      cleanupSandbox(host, sandbox, sandboxName, { strict: true }),
+    );
     await expectOpenAiChatThroughSandbox(
       sandbox,
       sandboxName,

From e407091aabf31165b967e0ef7cf0b6aff6d1444c Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 10:41:45 -0400
Subject: [PATCH 06/11] ci(e2e): allow selective Vitest job dispatch

---
 .github/workflows/e2e-vitest-scenarios.yaml | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 92b06c7117..66b948e69d 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -11,6 +11,11 @@ on:
         required: false
         default: ""
         type: string
+      jobs:
+        description: "Optional comma-separated free-standing live Vitest job ids. Empty runs all enabled jobs."
+        required: false
+        default: ""
+        type: string
       pr_number:
         description: Optional PR number for selective-dispatch result comments.
         required: false
@@ -21,7 +26,7 @@ permissions:
   contents: read
 
 concurrency:
-  group: e2e-vitest-scenarios-${{ github.ref }}-${{ inputs.scenarios || 'supported' }}
+  group: e2e-vitest-scenarios-${{ github.ref }}-${{ inputs.scenarios || 'supported' }}-${{ inputs.jobs || 'all-jobs' }}
   cancel-in-progress: false
 
 jobs:
@@ -47,6 +52,7 @@ jobs:
         name: Generate Vitest scenario matrix
         env:
           SCENARIOS: ${{ inputs.scenarios }}
+          JOBS: ${{ inputs.jobs }}
         run: |
           set -euo pipefail
           args=(--emit-live-matrix)
@@ -57,6 +63,10 @@ jobs:
             fi
             args+=(--scenarios "${SCENARIOS}")
           fi
+          if [ -n "${JOBS}" ] && [[ ! "${JOBS}" =~ ^[A-Za-z0-9_-]+(,[A-Za-z0-9_-]+)*$ ]]; then
+            echo "::error::Invalid jobs input: ${JOBS}" >&2
+            exit 1
+          fi
           matrix="$(npx tsx test/e2e-scenario/scenarios/run.ts "${args[@]}")"
           echo "matrix=${matrix}" >> "$GITHUB_OUTPUT"
           MATRIX_JSON="${matrix}" python - <<'PY' >> "$GITHUB_STEP_SUMMARY"
@@ -74,6 +84,7 @@ jobs:
 
   live-scenarios:
     needs: generate-matrix
+    if: ${{ inputs.jobs == '' }}
     runs-on: ${{ matrix.runner }}
     timeout-minutes: 45
     strategy:
@@ -174,6 +185,7 @@ jobs:
   # because the matrix above only runs registry-scenarios.test.ts. Modeled on
   # #5049's free-standing pattern.
   openshell-version-pin-vitest:
+    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openshell-version-pin-vitest,') }}
     runs-on: ubuntu-latest
     timeout-minutes: 15
     env:
@@ -213,6 +225,7 @@ jobs:
           retention-days: 14
 
   onboard-negative-paths-vitest:
+    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',onboard-negative-paths-vitest,') }}
     runs-on: ubuntu-latest
     timeout-minutes: 15
     env:
@@ -256,6 +269,7 @@ jobs:
           retention-days: 14
 
   inference-routing-vitest:
+    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }}
     runs-on: ubuntu-latest
     timeout-minutes: 45
     env:
@@ -305,7 +319,7 @@ jobs:
   # protocol/history contract. The retained legacy bash lane remains the
   # source for full closeout until a later PR proves replacement and deletes it.
   openclaw-tui-chat-correlation-vitest:
-    if: ${{ inputs.scenarios == '' }}
+    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openclaw-tui-chat-correlation-vitest,') }}
     runs-on: ubuntu-latest
     timeout-minutes: 75
     env:

From fac49253e0a58cf1ae45f073c4ada7d977952a37 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 10:45:20 -0400
Subject: [PATCH 07/11] test(e2e): tighten inference routing boundaries

---
 .../live/inference-routing.test.ts            |  60 +++++++---
 .../e2e-scenarios-workflow.test.ts            |  50 +++++++++
 tools/e2e-scenarios/workflow-boundary.mts     | 106 ++++++++++++++++++
 3 files changed, 201 insertions(+), 15 deletions(-)

diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index 0a3daca40f..a3fb4ef7a6 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -44,6 +44,10 @@ const TRANSPORT_CLASSIFICATION_PATTERN =
 const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip;
 
 function shouldRunProviderSmoke(provider: "openai" | "anthropic" | "compatible"): boolean {
+  // The legacy shell script auto-ran these smokes when provider secrets were
+  // present. This Vitest migration requires an explicit opt-in so PR-safe jobs
+  // cannot spend third-party quota accidentally; any future secret-backed lane
+  // must set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=all or a provider name.
   const requested = process.env.NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE?.trim().toLowerCase();
   return requested === "1" || requested === "true" || requested === "all" || requested === provider;
 }
@@ -72,7 +76,6 @@ interface RawRunOptions {
   readonly cwd?: string;
   readonly env?: NodeJS.ProcessEnv;
   readonly redactionValues?: readonly string[];
-  readonly stdin?: string;
   readonly timeoutMs?: number;
 }
 
@@ -126,7 +129,7 @@ async function runRawCommand(
     cwd: options.cwd ?? REPO_ROOT,
     detached: true,
     env: options.env,
-    stdio: [options.stdin === undefined ? "ignore" : "pipe", "pipe", "pipe"],
+    stdio: ["ignore", "pipe", "pipe"],
   });
   const fullCommand = [command, ...args];
   let stdout = "";
@@ -150,10 +153,6 @@ async function runRawCommand(
   }, timeoutMs);
   timeout.unref();
 
-  if (options.stdin !== undefined) {
-    child.stdin?.end(options.stdin);
-  }
-
   child.stdout?.on("data", (chunk: Buffer) => {
     stdout += chunk.toString("utf8");
   });
@@ -209,11 +208,22 @@ async function runNemoclawCli(
   return runRawCommand(process.execPath, [CLI_ENTRYPOINT, ...args], options);
 }
 
+function rawOpenShellEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+    ...extra,
+  };
+}
+
 async function runOpenShell(
   args: readonly string[],
   options: RawRunOptions,
 ): Promise<RawRunResult> {
-  return runRawCommand("openshell", args, options);
+  return runRawCommand("openshell", args, {
+    ...options,
+    env: rawOpenShellEnv(options.env),
+  });
 }
 
 async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Promise<void> {
@@ -682,13 +692,17 @@ liveTest(
     }
 
     const scanScript = [
+      "const crypto=require('crypto')",
       "const fs=require('fs')",
       "const {execFileSync}=require('child_process')",
-      "const key=fs.readFileSync(0,'utf8')",
-      "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}",
+      "const len=Number(process.env.KEY_LEN||'0')",
+      "const salt=process.env.SCAN_SALT||''",
+      "const target=process.env.TARGET_HASH||''",
+      "const digest=(value)=>crypto.createHash('sha256').update(salt).update(value).digest('hex')",
+      "if(!len||!salt||!target){console.log('SCAN_CONFIG_MISSING');process.exit(0)}",
       "let out=''",
       "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}",
-      "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}",
+      "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');for(let i=0;i<=content.length-len;i++){if(digest(content.slice(i,i+len))===target){console.log('FOUND:'+file);break}}}catch{}}",
       "console.log('SCAN_DONE')",
     ].join(";");
     const leakCanary = `nemoclaw-fs-scan-canary-${crypto.randomUUID()}`;
@@ -703,13 +717,21 @@ liveTest(
       },
     );
     expect(plantCanary.exitCode, resultText(plantCanary)).toBe(0);
+    const canarySalt = crypto.randomUUID();
     const canaryScan = await runOpenShell(
       ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
       {
         artifactName: "tc-inf-05-sandbox-filesystem-canary-scan",
         artifacts,
-        env: buildAvailabilityProbeEnv(),
-        stdin: leakCanary,
+        env: rawOpenShellEnv({
+          KEY_LEN: String(leakCanary.length),
+          SCAN_SALT: canarySalt,
+          TARGET_HASH: crypto
+            .createHash("sha256")
+            .update(canarySalt)
+            .update(leakCanary)
+            .digest("hex"),
+        }),
         timeoutMs: 90_000,
       },
     );
@@ -726,18 +748,26 @@ liveTest(
     );
     expect(removeCanary.exitCode, resultText(removeCanary)).toBe(0);
 
+    const secretScanSalt = crypto.randomUUID();
     const filesystemScan = await runOpenShell(
       ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript],
       {
         artifactName: "tc-inf-05-sandbox-filesystem-scan",
         artifacts,
-        env: buildAvailabilityProbeEnv(),
+        env: rawOpenShellEnv({
+          KEY_LEN: String(apiKey.length),
+          SCAN_SALT: secretScanSalt,
+          TARGET_HASH: crypto
+            .createHash("sha256")
+            .update(secretScanSalt)
+            .update(apiKey)
+            .digest("hex"),
+        }),
         redactionValues: [apiKey],
-        stdin: apiKey,
         timeoutMs: 90_000,
       },
     );
-    expect(filesystemScan.stdout).not.toContain("NO_KEY_PROVIDED");
+    expect(filesystemScan.stdout).not.toContain("SCAN_CONFIG_MISSING");
     expect(filesystemScan.stdout).not.toContain("FOUND:");
     expect(filesystemScan.stdout, redactedResultText(filesystemScan)).toContain("SCAN_DONE");
 
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index bacbbfd369..8d6e9cea6c 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -112,6 +112,36 @@ jobs:
           path: .e2e/onboard-negative-paths/
           include-hidden-files: true
           if-no-files-found: error
+  inference-routing-vitest:
+    runs-on: ubuntu-latest
+    needs: generate-matrix
+    if: \${{ inputs.scenarios != '' }}
+    env:
+      E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/inference-routing
+      NEMOCLAW_RUN_E2E_SCENARIOS: "0"
+      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE: all
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: true
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        env:
+          OPENAI_API_KEY: \${{ secrets.OPENAI_API_KEY }}
+      - name: Install root dependencies
+        run: npm install
+      - name: Run inference routing live test
+        env:
+          COMPATIBLE_API_KEY: \${{ secrets.COMPATIBLE_API_KEY }}
+        run: npx vitest run --project e2e-scenarios-live "\${{ inputs.test_filter }}"
+      - name: Upload inference routing artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: inference-routing
+          path: e2e-artifacts/vitest/
+          include-hidden-files: true
+          if-no-files-found: error
 `,
     );
 
@@ -191,6 +221,26 @@ jobs:
           "onboard-negative-paths-vitest artifact upload must set include-hidden-files: false",
           "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts",
           "onboard-negative-paths-vitest artifact upload retention-days must be 14",
+          "inference-routing-vitest job must run independently of generate-matrix",
+          "inference-routing-vitest job must run independently of workflow dispatch scenario filters",
+          "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
+          "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing",
+          "inference-routing-vitest job env must not include NVIDIA_API_KEY",
+          "inference-routing-vitest job env must not include NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE",
+          "inference-routing-vitest checkout action must be pinned to a full commit SHA",
+          "inference-routing-vitest checkout step must set persist-credentials=false",
+          "inference-routing-vitest step 'Set up Node' env must not include OPENAI_API_KEY",
+          "inference-routing-vitest setup-node action must be pinned to a full commit SHA",
+          "inference-routing-vitest job missing step: Build CLI",
+          "inference-routing-vitest step 'Run inference routing live test' env must not include COMPATIBLE_API_KEY",
+          "step 'Run inference routing live test' run script must not interpolate dispatch inputs directly",
+          "step 'Run inference routing live test' run script must include test/e2e-scenario/live/inference-routing.test.ts",
+          "inference-routing-vitest upload-artifact action must be pinned to a full commit SHA",
+          "inference-routing-vitest artifact upload name must be stable",
+          "artifact upload path must include e2e-artifacts/vitest/inference-routing/",
+          "inference-routing-vitest artifact upload must set include-hidden-files: false",
+          "inference-routing-vitest artifact upload must ignore missing fixture artifacts",
+          "inference-routing-vitest artifact upload retention-days must be 14",
         ]),
       );
     } finally {
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 9b06e6d219..a2408eeffe 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -210,6 +210,111 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe
 }
 
 
+function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecord): void {
+  const jobName = "inference-routing-vitest";
+  const job = asRecord(jobs[jobName]);
+  if (Object.keys(job).length === 0) {
+    errors.push("workflow missing inference-routing-vitest job");
+    return;
+  }
+
+  if (job["runs-on"] !== "ubuntu-latest") {
+    errors.push("inference-routing-vitest job must run on ubuntu-latest");
+  }
+  if (Object.hasOwn(job, "needs")) {
+    errors.push("inference-routing-vitest job must run independently of generate-matrix");
+  }
+  if (Object.hasOwn(job, "if")) {
+    errors.push(
+      "inference-routing-vitest job must run independently of workflow dispatch scenario filters",
+    );
+  }
+
+  const providerEnvNames = [
+    "NVIDIA_API_KEY",
+    "OPENAI_API_KEY",
+    "ANTHROPIC_API_KEY",
+    "COMPATIBLE_API_KEY",
+    "NEMOCLAW_ENDPOINT_URL",
+    "NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE",
+  ];
+  const jobEnv = asRecord(job.env);
+  if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") {
+    errors.push("inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1");
+  }
+  if (
+    jobEnv.E2E_ARTIFACT_DIR !== "${{ github.workspace }}/e2e-artifacts/vitest/inference-routing"
+  ) {
+    errors.push(
+      "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing",
+    );
+  }
+  for (const envName of providerEnvNames) {
+    requireEnvDoesNotExposeSecret(errors, "inference-routing-vitest job", jobEnv, envName);
+  }
+
+  const steps = asSteps(job.steps);
+  requireNoDispatchInputInterpolation(errors, steps);
+  for (const step of steps) {
+    const stepEnv = asRecord(step.env);
+    for (const envName of providerEnvNames) {
+      requireEnvDoesNotExposeSecret(
+        errors,
+        `inference-routing-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
+        stepEnv,
+        envName,
+      );
+    }
+  }
+
+  const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@"));
+  if (!checkout) errors.push("inference-routing-vitest job missing checkout step");
+  requireFullShaAction(errors, checkout, "inference-routing-vitest checkout");
+  if (asRecord(checkout?.with)["persist-credentials"] !== false) {
+    errors.push("inference-routing-vitest checkout step must set persist-credentials=false");
+  }
+
+  const setupNode = namedStep(steps, "Set up Node");
+  if (!setupNode) errors.push("inference-routing-vitest job missing step: Set up Node");
+  requireFullShaAction(errors, setupNode, "inference-routing-vitest setup-node");
+
+  const installRootDependencies = requireJobStep(
+    errors,
+    jobName,
+    steps,
+    "Install root dependencies",
+  );
+  requireRunContains(errors, installRootDependencies, "npm ci --ignore-scripts");
+
+  const buildCli = requireJobStep(errors, jobName, steps, "Build CLI");
+  requireRunContains(errors, buildCli, "npm run build:cli");
+
+  const runVitest = requireJobStep(errors, jobName, steps, "Run inference routing live test");
+  requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
+  requireRunContains(errors, runVitest, "test/e2e-scenario/live/inference-routing.test.ts");
+
+  const upload = requireJobStep(errors, jobName, steps, "Upload inference routing artifacts");
+  requireFullShaAction(errors, upload, "inference-routing-vitest upload-artifact");
+  const uploadWith = asRecord(upload?.with);
+  if (uploadWith.name !== "e2e-vitest-scenarios-inference-routing") {
+    errors.push("inference-routing-vitest artifact upload name must be stable");
+  }
+  const uploadPath = stringValue(uploadWith.path);
+  requireUploadPathContains(errors, uploadPath, "e2e-artifacts/vitest/inference-routing/");
+  if (uploadPath.trim() === "e2e-artifacts/vitest/") {
+    errors.push("inference-routing-vitest artifact upload path must not list all Vitest artifacts");
+  }
+  if (uploadWith["include-hidden-files"] !== false) {
+    errors.push("inference-routing-vitest artifact upload must set include-hidden-files: false");
+  }
+  if (uploadWith["if-no-files-found"] !== "ignore") {
+    errors.push("inference-routing-vitest artifact upload must ignore missing fixture artifacts");
+  }
+  if (uploadWith["retention-days"] !== 14) {
+    errors.push("inference-routing-vitest artifact upload retention-days must be 14");
+  }
+}
+
 function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowRecord): void {
   const jobName = "onboard-negative-paths-vitest";
   const job = asRecord(jobs[jobName]);
@@ -482,6 +587,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
 
   validateOpenShellVersionPinVitestJob(errors, jobs);
   validateOnboardNegativePathsVitestJob(errors, jobs);
+  validateInferenceRoutingVitestJob(errors, jobs);
 
   return errors;
 }

From f89ad51dd8295d4fac39f9de29956e9ea2116ede Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 10:46:48 -0400
Subject: [PATCH 08/11] test(e2e): align workflow selector guard

---
 .../e2e-scenarios-workflow.test.ts            |  6 ++--
 tools/e2e-scenarios/workflow-boundary.mts     | 29 +++++++++----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 8d6e9cea6c..767fe2b8d4 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -184,7 +184,7 @@ jobs:
           "artifact upload retention-days must be 14",
           "upload-artifact action must be pinned to a full commit SHA",
           "openshell-version-pin-vitest job must run independently of generate-matrix",
-          "openshell-version-pin-vitest job must run independently of workflow dispatch scenario filters",
+          "openshell-version-pin-vitest job must use the trusted jobs selector",
           "openshell-version-pin-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "openshell-version-pin-vitest job must write artifacts under e2e-artifacts/vitest/openshell-version-pin",
           "openshell-version-pin-vitest job env must not include NVIDIA_API_KEY",
@@ -203,7 +203,7 @@ jobs:
           "openshell-version-pin-vitest artifact upload must ignore missing fixture artifacts",
           "openshell-version-pin-vitest artifact upload retention-days must be 14",
           "onboard-negative-paths-vitest job must run independently of generate-matrix",
-          "onboard-negative-paths-vitest job must run independently of workflow dispatch scenario filters",
+          "onboard-negative-paths-vitest job must use the trusted jobs selector",
           "onboard-negative-paths-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "onboard-negative-paths-vitest job must write artifacts under e2e-artifacts/vitest/onboard-negative-paths",
           "onboard-negative-paths-vitest job env must not include NVIDIA_API_KEY",
@@ -222,7 +222,7 @@ jobs:
           "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts",
           "onboard-negative-paths-vitest artifact upload retention-days must be 14",
           "inference-routing-vitest job must run independently of generate-matrix",
-          "inference-routing-vitest job must run independently of workflow dispatch scenario filters",
+          "inference-routing-vitest job must use the trusted jobs selector",
           "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing",
           "inference-routing-vitest job env must not include NVIDIA_API_KEY",
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index a2408eeffe..133a65581d 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -122,6 +122,17 @@ function requireNoDispatchInputInterpolation(
   }
 }
 
+function requireFreeStandingJobSelector(
+  errors: string[],
+  jobName: string,
+  job: WorkflowRecord,
+): void {
+  const expected = `${"${{"} inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',${jobName},') ${"}}"}`;
+  if (job.if !== expected) {
+    errors.push(`${jobName} job must use the trusted jobs selector`);
+  }
+}
+
 function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRecord): void {
   const jobName = "openshell-version-pin-vitest";
   const job = asRecord(jobs[jobName]);
@@ -136,11 +147,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe
   if (Object.hasOwn(job, "needs")) {
     errors.push("openshell-version-pin-vitest job must run independently of generate-matrix");
   }
-  if (Object.hasOwn(job, "if")) {
-    errors.push(
-      "openshell-version-pin-vitest job must run independently of workflow dispatch scenario filters",
-    );
-  }
+  requireFreeStandingJobSelector(errors, jobName, job);
 
   const jobEnv = asRecord(job.env);
   if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") {
@@ -224,11 +231,7 @@ function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecor
   if (Object.hasOwn(job, "needs")) {
     errors.push("inference-routing-vitest job must run independently of generate-matrix");
   }
-  if (Object.hasOwn(job, "if")) {
-    errors.push(
-      "inference-routing-vitest job must run independently of workflow dispatch scenario filters",
-    );
-  }
+  requireFreeStandingJobSelector(errors, jobName, job);
 
   const providerEnvNames = [
     "NVIDIA_API_KEY",
@@ -329,11 +332,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR
   if (Object.hasOwn(job, "needs")) {
     errors.push("onboard-negative-paths-vitest job must run independently of generate-matrix");
   }
-  if (Object.hasOwn(job, "if")) {
-    errors.push(
-      "onboard-negative-paths-vitest job must run independently of workflow dispatch scenario filters",
-    );
-  }
+  requireFreeStandingJobSelector(errors, jobName, job);
 
   const jobEnv = asRecord(job.env);
   if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") {

From c3b9a688cf7dc8bedf5dc50be026668c3026d03a Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 10:52:26 -0400
Subject: [PATCH 09/11] ci(e2e): gate recovery job by selector

---
 .github/workflows/e2e-vitest-scenarios.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 66b948e69d..2e63f463bf 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -398,6 +398,7 @@ jobs:
   # restore the /tmp guard chain after pod recreate). Will fail on `main`
   # until the #2701 fix lands; flips green afterwards.
   gateway-guard-recovery:
+    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',gateway-guard-recovery,') }}
     runs-on: ubuntu-latest
     timeout-minutes: 45
     env:
@@ -495,6 +496,7 @@ jobs:
         openshell-version-pin-vitest,
         onboard-negative-paths-vitest,
         openclaw-tui-chat-correlation-vitest,
+        inference-routing-vitest,
         gateway-guard-recovery,
       ]
     if: ${{ always() && github.event_name == 'workflow_dispatch' }}

From ef988df2d2b97a991e459ff7782053ac67f6fdfc Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 11:01:32 -0400
Subject: [PATCH 10/11] ci(e2e): validate selective job input

---
 .github/workflows/e2e-vitest-scenarios.yaml   | 58 ++++++++++++++--
 .../live/inference-routing.test.ts            | 28 ++++++--
 .../e2e-scenarios-workflow.test.ts            |  8 ++-
 tools/e2e-scenarios/workflow-boundary.mts     | 67 ++++++++++++++++---
 4 files changed, 135 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 2e63f463bf..7b98c5686a 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -30,6 +30,48 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
+  validate-jobs:
+    runs-on: ubuntu-latest
+    outputs:
+      openshell_version_pin_vitest: ${{ steps.validate.outputs.openshell_version_pin_vitest }}
+      onboard_negative_paths_vitest: ${{ steps.validate.outputs.onboard_negative_paths_vitest }}
+      inference_routing_vitest: ${{ steps.validate.outputs.inference_routing_vitest }}
+      openclaw_tui_chat_correlation_vitest: ${{ steps.validate.outputs.openclaw_tui_chat_correlation_vitest }}
+      gateway_guard_recovery: ${{ steps.validate.outputs.gateway_guard_recovery }}
+    steps:
+      - id: validate
+        name: Validate free-standing job selector
+        env:
+          JOBS: ${{ inputs.jobs }}
+        run: |
+          set -euo pipefail
+          declare -A allowed=(
+            [openshell-version-pin-vitest]=openshell_version_pin_vitest
+            [onboard-negative-paths-vitest]=onboard_negative_paths_vitest
+            [inference-routing-vitest]=inference_routing_vitest
+            [openclaw-tui-chat-correlation-vitest]=openclaw_tui_chat_correlation_vitest
+            [gateway-guard-recovery]=gateway_guard_recovery
+          )
+          declare -A selected=()
+          if [ -n "${JOBS}" ]; then
+            IFS=',' read -ra requested <<< "${JOBS}"
+            for job in "${requested[@]}"; do
+              if [[ ! "${job}" =~ ^[A-Za-z0-9_-]+$ ]] || [[ -z "${allowed[$job]:-}" ]]; then
+                echo "::error::Invalid jobs input: ${job}" >&2
+                exit 1
+              fi
+              selected[$job]=1
+            done
+          fi
+          for job in "${!allowed[@]}"; do
+            output="${allowed[$job]}"
+            if [ -z "${JOBS}" ] || [ -n "${selected[$job]:-}" ]; then
+              echo "${output}=true" >> "$GITHUB_OUTPUT"
+            else
+              echo "${output}=false" >> "$GITHUB_OUTPUT"
+            fi
+          done
+
   generate-matrix:
     runs-on: ubuntu-latest
     outputs:
@@ -185,7 +227,8 @@ jobs:
   # because the matrix above only runs registry-scenarios.test.ts. Modeled on
   # #5049's free-standing pattern.
   openshell-version-pin-vitest:
-    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openshell-version-pin-vitest,') }}
+    needs: validate-jobs
+    if: ${{ needs.validate-jobs.outputs.openshell_version_pin_vitest == 'true' }}
     runs-on: ubuntu-latest
     timeout-minutes: 15
     env:
@@ -225,7 +268,8 @@ jobs:
           retention-days: 14
 
   onboard-negative-paths-vitest:
-    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',onboard-negative-paths-vitest,') }}
+    needs: validate-jobs
+    if: ${{ needs.validate-jobs.outputs.onboard_negative_paths_vitest == 'true' }}
     runs-on: ubuntu-latest
     timeout-minutes: 15
     env:
@@ -269,7 +313,8 @@ jobs:
           retention-days: 14
 
   inference-routing-vitest:
-    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }}
+    needs: validate-jobs
+    if: ${{ needs.validate-jobs.outputs.inference_routing_vitest == 'true' }}
     runs-on: ubuntu-latest
     timeout-minutes: 45
     env:
@@ -319,7 +364,8 @@ jobs:
   # protocol/history contract. The retained legacy bash lane remains the
   # source for full closeout until a later PR proves replacement and deletes it.
   openclaw-tui-chat-correlation-vitest:
-    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openclaw-tui-chat-correlation-vitest,') }}
+    needs: validate-jobs
+    if: ${{ needs.validate-jobs.outputs.openclaw_tui_chat_correlation_vitest == 'true' }}
     runs-on: ubuntu-latest
     timeout-minutes: 75
     env:
@@ -398,7 +444,8 @@ jobs:
   # restore the /tmp guard chain after pod recreate). Will fail on `main`
   # until the #2701 fix lands; flips green afterwards.
   gateway-guard-recovery:
-    if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',gateway-guard-recovery,') }}
+    needs: validate-jobs
+    if: ${{ needs.validate-jobs.outputs.gateway_guard_recovery == 'true' }}
     runs-on: ubuntu-latest
     timeout-minutes: 45
     env:
@@ -491,6 +538,7 @@ jobs:
     runs-on: ubuntu-latest
     needs:
       [
+        validate-jobs,
         generate-matrix,
         live-scenarios,
         openshell-version-pin-vitest,
diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index a3fb4ef7a6..ff958867fa 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -264,12 +264,26 @@ interface CleanupSandboxOptions {
   readonly strict?: boolean;
 }
 
-async function optionalCleanupStep(run: () => Promise<unknown>): Promise<void> {
+function isExpectedPreOnboardCleanupMiss(text: string): boolean {
+  return /does not exist|run 'nemoclaw onboard'|no active gateway|not found|no such file|enoent/i.test(
+    text,
+  );
+}
+
+async function optionalCleanupStep(
+  label: string,
+  run: () => Promise<{ exitCode: number | null; stdout: string; stderr: string }>,
+): Promise<void> {
   try {
-    await run();
-  } catch {
-    // Pre-onboard cleanup is best-effort because a fresh runner may not have
-    // OpenShell installed until `nemoclaw onboard` reaches that phase.
+    const result = await run();
+    if (result.exitCode === 0) return;
+    const text = resultText(result);
+    if (isExpectedPreOnboardCleanupMiss(text)) return;
+    throw new Error(`${label} failed unexpectedly during pre-onboard cleanup: ${text}`);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    if (isExpectedPreOnboardCleanupMiss(message)) return;
+    throw error;
   }
 }
 
@@ -288,14 +302,14 @@ async function cleanupSandbox(
   options: CleanupSandboxOptions = {},
 ): Promise<void> {
   if (!options.strict) {
-    await optionalCleanupStep(() =>
+    await optionalCleanupStep("nemoclaw destroy", () =>
       host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
         artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
         env: buildAvailabilityProbeEnv(),
         timeoutMs: 120_000,
       }),
     );
-    await optionalCleanupStep(() =>
+    await optionalCleanupStep("openshell sandbox delete", () =>
       sandbox.openshell(["sandbox", "delete", sandboxName], {
         artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
         env: buildAvailabilityProbeEnv(),
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 767fe2b8d4..1380fd3b63 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -150,6 +150,7 @@ jobs:
       expect(errors).toEqual(
         expect.arrayContaining([
           "workflow_dispatch missing input: scenarios",
+          "workflow_dispatch missing input: jobs",
           "workflow_dispatch must not expose legacy test_filter input",
           "workflow missing generate-matrix job",
           "generate-matrix job must run on ubuntu-latest",
@@ -183,7 +184,8 @@ jobs:
           "artifact upload path must include e2e-artifacts/vitest/${{ matrix.id }}/shell/",
           "artifact upload retention-days must be 14",
           "upload-artifact action must be pinned to a full commit SHA",
-          "openshell-version-pin-vitest job must run independently of generate-matrix",
+          "workflow missing validate-jobs job",
+          "openshell-version-pin-vitest job must depend on validate-jobs",
           "openshell-version-pin-vitest job must use the trusted jobs selector",
           "openshell-version-pin-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "openshell-version-pin-vitest job must write artifacts under e2e-artifacts/vitest/openshell-version-pin",
@@ -202,7 +204,7 @@ jobs:
           "openshell-version-pin-vitest artifact upload must set include-hidden-files: false",
           "openshell-version-pin-vitest artifact upload must ignore missing fixture artifacts",
           "openshell-version-pin-vitest artifact upload retention-days must be 14",
-          "onboard-negative-paths-vitest job must run independently of generate-matrix",
+          "onboard-negative-paths-vitest job must depend on validate-jobs",
           "onboard-negative-paths-vitest job must use the trusted jobs selector",
           "onboard-negative-paths-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "onboard-negative-paths-vitest job must write artifacts under e2e-artifacts/vitest/onboard-negative-paths",
@@ -221,7 +223,7 @@ jobs:
           "onboard-negative-paths-vitest artifact upload must set include-hidden-files: false",
           "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts",
           "onboard-negative-paths-vitest artifact upload retention-days must be 14",
-          "inference-routing-vitest job must run independently of generate-matrix",
+          "inference-routing-vitest job must depend on validate-jobs",
           "inference-routing-vitest job must use the trusted jobs selector",
           "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1",
           "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing",
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 133a65581d..ef925dc1c4 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -122,12 +122,22 @@ function requireNoDispatchInputInterpolation(
   }
 }
 
+const SELECTABLE_FREE_STANDING_JOBS = [
+  "openshell-version-pin-vitest",
+  "onboard-negative-paths-vitest",
+  "inference-routing-vitest",
+  "openclaw-tui-chat-correlation-vitest",
+  "gateway-guard-recovery",
+] as const;
+
+type SelectableFreeStandingJob = (typeof SELECTABLE_FREE_STANDING_JOBS)[number];
+
 function requireFreeStandingJobSelector(
   errors: string[],
-  jobName: string,
+  jobName: SelectableFreeStandingJob,
   job: WorkflowRecord,
 ): void {
-  const expected = `${"${{"} inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',${jobName},') ${"}}"}`;
+  const expected = `${"${{"} needs.validate-jobs.outputs.${jobName.replaceAll("-", "_")} == 'true' ${"}}"}`;
   if (job.if !== expected) {
     errors.push(`${jobName} job must use the trusted jobs selector`);
   }
@@ -144,9 +154,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe
   if (job["runs-on"] !== "ubuntu-latest") {
     errors.push("openshell-version-pin-vitest job must run on ubuntu-latest");
   }
-  if (Object.hasOwn(job, "needs")) {
-    errors.push("openshell-version-pin-vitest job must run independently of generate-matrix");
-  }
+  requireNeedsValidateJobs(errors, jobName, job);
   requireFreeStandingJobSelector(errors, jobName, job);
 
   const jobEnv = asRecord(job.env);
@@ -228,9 +236,7 @@ function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecor
   if (job["runs-on"] !== "ubuntu-latest") {
     errors.push("inference-routing-vitest job must run on ubuntu-latest");
   }
-  if (Object.hasOwn(job, "needs")) {
-    errors.push("inference-routing-vitest job must run independently of generate-matrix");
-  }
+  requireNeedsValidateJobs(errors, jobName, job);
   requireFreeStandingJobSelector(errors, jobName, job);
 
   const providerEnvNames = [
@@ -329,9 +335,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR
   if (job["runs-on"] !== "ubuntu-latest") {
     errors.push("onboard-negative-paths-vitest job must run on ubuntu-latest");
   }
-  if (Object.hasOwn(job, "needs")) {
-    errors.push("onboard-negative-paths-vitest job must run independently of generate-matrix");
-  }
+  requireNeedsValidateJobs(errors, jobName, job);
   requireFreeStandingJobSelector(errors, jobName, job);
 
   const jobEnv = asRecord(job.env);
@@ -404,6 +408,45 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR
   }
 }
 
+function validateJobsGuard(errors: string[], jobs: WorkflowRecord): void {
+  const jobName = "validate-jobs";
+  const job = asRecord(jobs[jobName]);
+  if (Object.keys(job).length === 0) {
+    errors.push("workflow missing validate-jobs job");
+    return;
+  }
+
+  if (job["runs-on"] !== "ubuntu-latest") {
+    errors.push("validate-jobs job must run on ubuntu-latest");
+  }
+  const outputs = asRecord(job.outputs);
+  for (const selectable of SELECTABLE_FREE_STANDING_JOBS) {
+    const key = selectable.replaceAll("-", "_");
+    if (outputs[key] !== `${"${{"} steps.validate.outputs.${key} ${"}}"}`) {
+      errors.push(`validate-jobs must expose ${key} output`);
+    }
+  }
+
+  const steps = asSteps(job.steps);
+  requireNoDispatchInputInterpolation(errors, steps);
+  const validate = requireJobStep(errors, jobName, steps, "Validate free-standing job selector");
+  const validateEnv = asRecord(validate?.env);
+  if (validateEnv.JOBS !== "${{ inputs.jobs }}") {
+    errors.push("validate-jobs step must pass jobs through JOBS env");
+  }
+  for (const selectable of SELECTABLE_FREE_STANDING_JOBS) {
+    requireRunContains(errors, validate, selectable);
+    requireRunContains(errors, validate, selectable.replaceAll("-", "_"));
+  }
+  requireRunContains(errors, validate, "Invalid jobs input");
+}
+
+function requireNeedsValidateJobs(errors: string[], jobName: string, job: WorkflowRecord): void {
+  if (job.needs !== "validate-jobs") {
+    errors.push(`${jobName} job must depend on validate-jobs`);
+  }
+}
+
 export function validateE2eVitestScenariosWorkflowBoundary(
   workflowPath = DEFAULT_VITEST_WORKFLOW_PATH,
 ): string[] {
@@ -416,6 +459,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
 
   const dispatchInputs = asRecord(workflowDispatch.inputs);
   requireInput(errors, dispatchInputs, "scenarios");
+  requireInput(errors, dispatchInputs, "jobs");
   if (Object.hasOwn(dispatchInputs, "test_filter")) {
     errors.push("workflow_dispatch must not expose legacy test_filter input");
   }
@@ -424,6 +468,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   if (permissions.contents !== "read") errors.push("workflow permissions.contents must be read");
 
   const jobs = asRecord(workflow.jobs);
+  validateJobsGuard(errors, jobs);
   const generateMatrix = asRecord(jobs["generate-matrix"]);
   if (Object.keys(generateMatrix).length === 0) errors.push("workflow missing generate-matrix job");
   if (generateMatrix["runs-on"] !== "ubuntu-latest") {

From 3d32327471232bc3e254215bfb36393ba1957c3c Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Thu, 11 Jun 2026 12:34:50 -0400
Subject: [PATCH 11/11] ci(e2e): align inference-routing-vitest dispatch

---
 .github/workflows/e2e-vitest-scenarios.yaml | 51 ++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 65e06c7eda..e51b444f2e 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -40,7 +40,7 @@ jobs:
           SCENARIOS: ${{ inputs.scenarios }}
         run: |
           set -euo pipefail
-          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery"
+          allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,inference-routing-vitest"
           if [ -n "${JOBS}" ] && [ -n "${SCENARIOS}" ]; then
             echo "::error::Use either scenarios or jobs, not both." >&2
             exit 1
@@ -298,6 +298,54 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  inference-routing-vitest:
+    needs: validate-jobs
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    env:
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/inference-routing
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build CLI
+        run: npm run build:cli
+
+      - name: Run inference routing live test
+        # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The
+        # always-on PR-safe slices prove invalid-key and unreachable-endpoint
+        # classification/cleanup without spending live provider quota; real
+        # NVIDIA credential isolation and third-party provider smokes stay
+        # skipped unless their secrets are explicitly supplied by a future
+        # workflow.
+        run: |
+          set -euo pipefail
+          npx vitest run --project e2e-scenarios-live \
+            test/e2e-scenario/live/inference-routing.test.ts \
+            --silent=false --reporter=default
+
+      - name: Upload inference routing artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-inference-routing
+          path: e2e-artifacts/vitest/inference-routing/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   # Focused coverage slice for the #2603/#3145 OpenClaw websocket
   # protocol/history contract. The retained legacy bash lane remains the
   # source for full closeout until a later PR proves replacement and deletes it.
@@ -481,6 +529,7 @@ jobs:
         live-scenarios,
         openshell-version-pin-vitest,
         onboard-negative-paths-vitest,
+        inference-routing-vitest,
         openclaw-tui-chat-correlation-vitest,
         gateway-guard-recovery,
       ]