diff --git a/.github/workflows/regression-e2e.yaml b/.github/workflows/regression-e2e.yaml
index 1749ceb3d0..cd1dcf1062 100644
--- a/.github/workflows/regression-e2e.yaml
+++ b/.github/workflows/regression-e2e.yaml
@@ -21,7 +21,7 @@ on:
       jobs:
         description: >-
           Comma-separated regression job names to run (empty = all).
-          Valid: dashboard-remote-bind-e2e,gateway-health-honest-e2e,gateway-drift-preflight-e2e,openshell-version-pin-e2e,onboard-inference-smoke-e2e,model-router-provider-routed-inference-e2e,openclaw-plugin-runtime-exdev-e2e,whatsapp-qr-compact-e2e
+          Valid: dashboard-remote-bind-e2e,gateway-health-honest-e2e,gateway-drift-preflight-e2e,openshell-version-pin-e2e,model-router-provider-routed-inference-e2e,openclaw-plugin-runtime-exdev-e2e,whatsapp-qr-compact-e2e
         required: false
         type: string
         default: ""
@@ -48,7 +48,7 @@ jobs:
       gateway: ${{ steps.select.outputs.gateway }}
       gateway_drift_preflight: ${{ steps.select.outputs.gateway_drift_preflight }}
       openshell_version_pin: ${{ steps.select.outputs.openshell_version_pin }}
-      onboard_inference_smoke: ${{ steps.select.outputs.onboard_inference_smoke }}
+
       model_router_provider_routed_inference: ${{ steps.select.outputs.model_router_provider_routed_inference }}
       openclaw_plugin_runtime_exdev: ${{ steps.select.outputs.openclaw_plugin_runtime_exdev }}
       whatsapp_qr_compact: ${{ steps.select.outputs.whatsapp_qr_compact }}
@@ -91,11 +91,6 @@ jobs:
             echo "openshell_version_pin=false" >> "$GITHUB_OUTPUT"
           fi
 
-          if [ -z "$normalized" ] || includes_job "onboard-inference-smoke-e2e"; then
-            echo "onboard_inference_smoke=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "onboard_inference_smoke=false" >> "$GITHUB_OUTPUT"
-          fi
 
           if [ -z "$normalized" ] || includes_job "model-router-provider-routed-inference-e2e"; then
             echo "model_router_provider_routed_inference=true" >> "$GITHUB_OUTPUT"
@@ -209,50 +204,6 @@ jobs:
             /tmp/nemoclaw-e2e-openshell-version-pin-downloads.log
           if-no-files-found: ignore
 
-  # ── Onboard inference smoke E2E ─────────────────────────────
-  # Coverage guard for #3253. Onboard must not report installation success
-  # until the configured provider/model route has served a real chat completion.
-  # This simulates a route that is configured but returns HTTP 503 at runtime.
-  onboard-inference-smoke-e2e:
-    needs: select_regression_jobs
-    if: >-
-      github.repository == 'NVIDIA/NemoClaw' &&
-      needs.select_regression_jobs.outputs.onboard_inference_smoke == 'true'
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Checkout
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
-
-      - name: Setup Node
-        uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-
-      - name: Run onboard inference smoke E2E test
-        env:
-          NEMOCLAW_NON_INTERACTIVE: "1"
-          NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
-          NEMOCLAW_TRACE_DIR: /tmp/nemoclaw-traces
-        run: bash test/e2e/test-onboard-inference-smoke.sh
-
-      - name: Upload onboard inference smoke logs on failure
-        if: failure()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
-        with:
-          name: onboard-inference-smoke-logs
-          path: |
-            /tmp/nemoclaw-e2e-onboard-inference-smoke.log
-            /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log
-          if-no-files-found: ignore
-
-      - name: Upload onboard profiling traces
-        if: always()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
-        with:
-          name: onboard-inference-smoke-traces
-          path: /tmp/nemoclaw-traces/
-          if-no-files-found: ignore
 
   # ── Gateway drift preflight E2E ─────────────────────────────
   # Coverage guard for #3399 / #3423. A stale OpenShell gateway image can
diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts
index db4e8c65a7..da3de1c226 100644
--- a/test/e2e-script-workflow.test.ts
+++ b/test/e2e-script-workflow.test.ts
@@ -51,7 +51,6 @@ const LEGACY_E2E_SHELL_ALLOWLIST = [
   "test/e2e/test-model-router-provider-routed-inference.sh",
   "test/e2e/test-network-policy.sh",
   "test/e2e/test-ollama-auth-proxy-e2e.sh",
-  "test/e2e/test-onboard-inference-smoke.sh",
   "test/e2e/test-onboard-negative-paths.sh",
   "test/e2e/test-onboard-repair.sh",
   "test/e2e/test-onboard-resume.sh",
diff --git a/test/e2e/test-onboard-inference-smoke.sh b/test/e2e/test-onboard-inference-smoke.sh
deleted file mode 100755
index b63919a5ed..0000000000
--- a/test/e2e/test-onboard-inference-smoke.sh
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env bash
-# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Coverage guard for issue #3253 — onboard must not report installation
-# success until the configured inference route has served a real request.
-#
-# Expected RED on main-equivalent code: PASSING inference configuration is
-# treated as enough. setupInference() accepts a provider/model whose route is
-# configured but whose chat/completions endpoint returns HTTP 503, so this test
-# fails because setupInference() resolves successfully and prints only the route
-# success line.
-#
-# Expected GREEN after fix: setupInference() performs a one-shot inference smoke
-# probe, exits non-zero on the upstream 503, and surfaces provider/model/api
-# base/credential-env diagnostics before any "Installation complete" summary.
-
-set -euo pipefail
-
-LOG_FILE="/tmp/nemoclaw-e2e-onboard-inference-smoke.log"
-exec > >(tee "$LOG_FILE") 2>&1
-
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
-
-pass() { echo -e "${GREEN}[PASS]${NC} $1"; }
-info() { echo -e "${YELLOW}[INFO]${NC} $1"; }
-diag() { echo -e "${YELLOW}[DIAG]${NC} $1"; }
-fail() {
-  echo -e "${RED}[FAIL]${NC} $1" >&2
-  diag "onboard inference smoke log tail:"
-  tail -120 "$LOG_FILE" 2>/dev/null || true
-  exit 1
-}
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-cd "$REPO_ROOT"
-
-info "Preparing CLI build"
-if [ ! -d node_modules ]; then
-  npm ci --ignore-scripts
-fi
-npm run build:cli
-
-info "Invoking setupInference() with a gateway route that is configured but runtime-broken"
-set +e
-NEMOCLAW_NON_INTERACTIVE=1 \
-  NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-  NEMOCLAW_ONBOARD_INFERENCE_SMOKE_E2E=1 \
-  node <<'NODE' 2>&1 | tee /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log
-const Module = require("module");
-const originalLoad = Module._load;
-const calls = [];
-
-Module._load = function patchedLoad(request, parent, isMain) {
-  if (request === "./adapters/openshell/resolve" || request.endsWith("/adapters/openshell/resolve")) {
-    return { resolveOpenshell: () => "/usr/bin/openshell" };
-  }
-  if (request === "./runner" || request.endsWith("/runner")) {
-    const actualRunner = originalLoad.apply(this, arguments);
-    return {
-      ...actualRunner,
-      run: (cmd, opts = {}) => {
-        calls.push(["run", cmd]);
-        if (Array.isArray(cmd) && cmd.includes("provider") && cmd.includes("upsert")) {
-          return { status: 0, stdout: "Created provider compatible-endpoint\n", stderr: "" };
-        }
-        if (Array.isArray(cmd) && cmd.includes("inference") && cmd.includes("set")) {
-          return { status: 0, stdout: "Inference configured\n", stderr: "" };
-        }
-        if (Array.isArray(cmd) && cmd.some((part) => String(part).includes("/chat/completions"))) {
-          return {
-            status: 22,
-            stdout: JSON.stringify({ error: { message: "upstream returned HTTP 503 from compatible-endpoint" } }),
-            stderr: "curl: (22) The requested URL returned error: 503",
-          };
-        }
-        return { status: 0, stdout: "", stderr: "" };
-      },
-      runCapture: (cmd) => {
-        calls.push(["runCapture", cmd]);
-        if (Array.isArray(cmd) && cmd.includes("inference") && cmd.includes("get")) {
-          return JSON.stringify({ provider: "compatible-endpoint", model: "broken-model" });
-        }
-        return "";
-      },
-    };
-  }
-  if (request === "./onboard/providers" || request.endsWith("/onboard/providers")) {
-    return {
-      REMOTE_PROVIDER_CONFIG: {
-        custom: {
-          label: "Other OpenAI-compatible endpoint",
-          providerName: "compatible-endpoint",
-          providerType: "openai",
-          credentialEnv: "COMPATIBLE_API_KEY",
-          endpointUrl: "",
-          helpUrl: null,
-          modelMode: "input",
-          defaultModel: "",
-          skipVerify: true,
-        },
-      },
-      LOCAL_INFERENCE_PROVIDERS: [],
-      providerExistsInGateway: () => true,
-      getProviderLabel: (provider) => provider,
-      upsertProvider: (...args) => {
-        calls.push(["upsertProvider", args]);
-        return { ok: true, status: 0, message: "Created provider compatible-endpoint" };
-      },
-    };
-  }
-  if (request === "./registry" || request.endsWith("/registry")) {
-    return {
-      updateSandbox: (_name, patch) => calls.push(["registry.updateSandbox", patch]),
-      getSandbox: () => null,
-      getDisabledChannels: () => [],
-    };
-  }
-  return originalLoad.apply(this, arguments);
-};
-
-const onboard = require("./dist/lib/onboard");
-const result = onboard.setupInference(
-  "test-sandbox",
-  "broken-model",
-  "compatible-endpoint",
-  "https://broken.example.invalid/v1",
-  "BROKEN_API_KEY",
-);
-
-Promise.resolve(result)
-  .then((value) => {
-    console.log("__SETUP_INFERENCE_RESOLVED__");
-    console.log(JSON.stringify(value));
-    console.log("__CALLS__" + JSON.stringify(calls));
-    process.exit(0);
-  })
-  .catch((error) => {
-    console.error("__SETUP_INFERENCE_REJECTED__");
-    console.error(error && error.stack ? error.stack : error);
-    console.log("__CALLS__" + JSON.stringify(calls));
-    process.exit(3);
-  });
-NODE
-NODE_EXIT=$?
-set -e
-cat /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log
-
-info "node exit code: ${NODE_EXIT}"
-
-if grep -q "__SETUP_INFERENCE_RESOLVED__" /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log || [ "$NODE_EXIT" -eq 0 ]; then
-  fail "setupInference() accepted a configured route without proving the chat/completions path; onboard would later print Installation complete while the first real request returns HTTP 503 (#3253)"
-fi
-pass "setupInference() did not accept a runtime-broken inference route"
-
-if ! grep -qiE "503|upstream|compatible-endpoint|broken-model|BROKEN_API_KEY|broken.example.invalid" /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log; then
-  fail "onboard did not surface actionable inference smoke diagnostics (expected provider/model/api_base/credential env/upstream 503)"
-fi
-pass "onboard surfaced actionable inference smoke diagnostics for the broken route"
diff --git a/test/onboard-inference-smoke.test.ts b/test/onboard-inference-smoke.test.ts
new file mode 100644
index 0000000000..4cb549b35d
--- /dev/null
+++ b/test/onboard-inference-smoke.test.ts
@@ -0,0 +1,178 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import assert from "node:assert/strict";
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { describe, it } from "vitest";
+
+import { testTimeoutOptions } from "./helpers/timeouts";
+
+// Coverage guard for #3253. Onboard must not report installation success until
+// the configured provider/model route has served a real chat completion. This
+// caller-level, mock-driven Vitest test replaces test/e2e/test-onboard-inference-smoke.sh
+// per #5119: direct setupInference() probes belong in test/, not in regression-e2e
+// bash or the scenario framework. Refs #5098, #4349.
+const REPO_ROOT = path.join(import.meta.dirname, "..");
+
+describe("onboard inference smoke guard (#3253)", () => {
+  it(
+    "rejects a configured OpenAI-compatible route when chat/completions returns 503",
+    testTimeoutOptions(90_000),
+    () => {
+      const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-inference-smoke-"));
+      const fakeBin = path.join(tmpDir, "bin");
+      const scriptPath = path.join(tmpDir, "setup-inference-smoke-check.cjs");
+      const curlLogPath = path.join(tmpDir, "curl-probes.log");
+      const onboardPath = JSON.stringify(path.join(REPO_ROOT, "dist", "lib", "onboard.js"));
+      const runnerPath = JSON.stringify(path.join(REPO_ROOT, "dist", "lib", "runner.js"));
+      const registryPath = JSON.stringify(
+        path.join(REPO_ROOT, "dist", "lib", "state", "registry.js"),
+      );
+
+      fs.mkdirSync(fakeBin, { recursive: true });
+      fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", {
+        mode: 0o755,
+      });
+      fs.writeFileSync(
+        path.join(fakeBin, "curl"),
+        String.raw`#!/usr/bin/env bash
+set -euo pipefail
+printf '%s\n' "$*" >> "$NEMOCLAW_FAKE_CURL_LOG"
+out=""
+prev=""
+for arg in "$@"; do
+  if [ "$prev" = "-o" ]; then
+    out="$arg"
+    break
+  fi
+  prev="$arg"
+done
+if [ -n "$out" ]; then
+  printf '%s\n' '{"error":{"message":"upstream returned HTTP 503 from compatible-endpoint"}}' > "$out"
+fi
+printf '503'
+`,
+        { mode: 0o755 },
+      );
+      fs.writeFileSync(
+        scriptPath,
+        String.raw`
+const runner = require(${runnerPath});
+const registry = require(${registryPath});
+const calls = [];
+const normalize = (command) => (Array.isArray(command) ? command.join(" ") : String(command));
+
+runner.run = (command) => {
+  const text = normalize(command);
+  calls.push(["run", text]);
+  if (text.includes("provider") && text.includes("upsert")) {
+    return { status: 0, stdout: "Created provider compatible-endpoint\n", stderr: "" };
+  }
+  if (text.includes("inference") && text.includes("set")) {
+    return { status: 0, stdout: "Inference configured\n", stderr: "" };
+  }
+  if (text.includes("/chat/completions")) {
+    return {
+      status: 22,
+      stdout: JSON.stringify({ error: { message: "upstream returned HTTP 503 from compatible-endpoint" } }),
+      stderr: "curl: (22) The requested URL returned error: 503",
+    };
+  }
+  return { status: 0, stdout: "", stderr: "" };
+};
+runner.runCapture = (command) => {
+  const text = normalize(command);
+  calls.push(["runCapture", text]);
+  if (text.includes("inference") && text.includes("get")) {
+    return [
+      "Gateway inference:",
+      "",
+      "  Route: inference.local",
+      "  Provider: compatible-endpoint",
+      "  Model: broken-model",
+      "  Version: 1",
+    ].join("\n");
+  }
+  return "";
+};
+registry.updateSandbox = (_name, patch) => calls.push(["registry.updateSandbox", JSON.stringify(patch)]);
+
+process.env.NEMOCLAW_NON_INTERACTIVE = "1";
+process.env.NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE = "1";
+process.env.NEMOCLAW_ONBOARD_INFERENCE_SMOKE_E2E = "1";
+process.env.NEMOCLAW_TEST_NO_SLEEP = "1";
+process.env.BROKEN_API_KEY = "test-key";
+
+const { setupInference } = require(${onboardPath});
+
+(async () => {
+  await setupInference(
+    "test-sandbox",
+    "broken-model",
+    "compatible-endpoint",
+    "https://broken.example.invalid/v1",
+    "BROKEN_API_KEY",
+  );
+  console.log(JSON.stringify({ outcome: "resolved", calls }));
+})().catch((error) => {
+  console.error(error && error.stack ? error.stack : error);
+  console.log(JSON.stringify({ outcome: "rejected", calls }));
+  process.exitCode = 3;
+});
+`,
+      );
+
+      try {
+        const result = spawnSync(process.execPath, [scriptPath], {
+          cwd: REPO_ROOT,
+          encoding: "utf8",
+          env: {
+            ...process.env,
+            HOME: tmpDir,
+            PATH: `${fakeBin}:${process.env.PATH || ""}`,
+            VITEST: "false",
+            NEMOCLAW_TEST_NO_SLEEP: "1",
+            NEMOCLAW_FAKE_CURL_LOG: curlLogPath,
+            BROKEN_API_KEY: "test-key",
+          },
+          timeout: 80_000,
+        });
+
+        const output = `${result.stdout || ""}\n${result.stderr || ""}`;
+        assert.notEqual(
+          result.status,
+          0,
+          `setupInference accepted a configured route without proving chat/completions; output:\n${output}`,
+        );
+        for (const expectedDiagnostic of [
+          /compatible-endpoint/i,
+          /broken-model/i,
+          /broken\.example\.invalid/i,
+          /Credential env: configured/i,
+          /503|upstream/i,
+        ]) {
+          assert.match(
+            output,
+            expectedDiagnostic,
+            `onboard did not surface actionable inference smoke diagnostics; output:\n${output}`,
+          );
+        }
+
+        const curlLog = fs.existsSync(curlLogPath) ? fs.readFileSync(curlLogPath, "utf8") : "";
+        assert.ok(
+          curlLog.includes("/chat/completions"),
+          `setupInference did not probe chat/completions before failing; curl log:\n${curlLog}`,
+        );
+        assert.ok(
+          !output.includes("Inference route set: compatible-endpoint / broken-model"),
+          `setupInference printed route success after the smoke probe failed; output:\n${output}`,
+        );
+      } finally {
+        fs.rmSync(tmpDir, { recursive: true, force: true });
+      }
+    },
+  );
+});
diff --git a/test/regression-e2e-workflow.test.ts b/test/regression-e2e-workflow.test.ts
index 3179b2bc1e..fa3da7fdea 100644
--- a/test/regression-e2e-workflow.test.ts
+++ b/test/regression-e2e-workflow.test.ts
@@ -26,15 +26,18 @@ type RegressionWorkflow = {
 describe("Regression E2E workflow contract", () => {
   const workflow = readYaml<RegressionWorkflow>(".github/workflows/regression-e2e.yaml");
 
-  it("does not advertise or select the retired docker-unreachable gateway-start lane", () => {
+  it.each([
+    ["docker-unreachable-gateway-start-e2e", "docker_unreachable_gateway_start"],
+    ["onboard-inference-smoke-e2e", "onboard_inference_smoke"],
+  ])("does not advertise or select retired lane %s", (jobName, selectorOutput) => {
     const jobsDescription = workflow.on?.workflow_dispatch?.inputs?.jobs?.description ?? "";
     const selectorScript =
       workflow.jobs?.select_regression_jobs?.steps?.find((step) => step.id === "select")?.run ?? "";
 
-    expect(jobsDescription).not.toContain("docker-unreachable-gateway-start-e2e");
-    expect(Object.keys(workflow.jobs ?? {})).not.toContain("docker-unreachable-gateway-start-e2e");
-    expect(selectorScript).not.toContain("docker-unreachable-gateway-start-e2e");
-    expect(selectorScript).not.toContain("docker_unreachable_gateway_start");
+    expect(jobsDescription).not.toContain(jobName);
+    expect(Object.keys(workflow.jobs ?? {})).not.toContain(jobName);
+    expect(selectorScript).not.toContain(jobName);
+    expect(selectorScript).not.toContain(selectorOutput);
   });
 
   it("does not advertise or select the retired strict-tool-call-probe lane", () => {