diff --git a/.github/workflows/regression-e2e.yaml b/.github/workflows/regression-e2e.yaml index 1749ceb3d0..cd1dcf1062 100644 --- a/.github/workflows/regression-e2e.yaml +++ b/.github/workflows/regression-e2e.yaml @@ -21,7 +21,7 @@ on: jobs: description: >- Comma-separated regression job names to run (empty = all). - Valid: dashboard-remote-bind-e2e,gateway-health-honest-e2e,gateway-drift-preflight-e2e,openshell-version-pin-e2e,onboard-inference-smoke-e2e,model-router-provider-routed-inference-e2e,openclaw-plugin-runtime-exdev-e2e,whatsapp-qr-compact-e2e + Valid: dashboard-remote-bind-e2e,gateway-health-honest-e2e,gateway-drift-preflight-e2e,openshell-version-pin-e2e,model-router-provider-routed-inference-e2e,openclaw-plugin-runtime-exdev-e2e,whatsapp-qr-compact-e2e required: false type: string default: "" @@ -48,7 +48,7 @@ jobs: gateway: ${{ steps.select.outputs.gateway }} gateway_drift_preflight: ${{ steps.select.outputs.gateway_drift_preflight }} openshell_version_pin: ${{ steps.select.outputs.openshell_version_pin }} - onboard_inference_smoke: ${{ steps.select.outputs.onboard_inference_smoke }} + model_router_provider_routed_inference: ${{ steps.select.outputs.model_router_provider_routed_inference }} openclaw_plugin_runtime_exdev: ${{ steps.select.outputs.openclaw_plugin_runtime_exdev }} whatsapp_qr_compact: ${{ steps.select.outputs.whatsapp_qr_compact }} @@ -91,11 +91,6 @@ jobs: echo "openshell_version_pin=false" >> "$GITHUB_OUTPUT" fi - if [ -z "$normalized" ] || includes_job "onboard-inference-smoke-e2e"; then - echo "onboard_inference_smoke=true" >> "$GITHUB_OUTPUT" - else - echo "onboard_inference_smoke=false" >> "$GITHUB_OUTPUT" - fi if [ -z "$normalized" ] || includes_job "model-router-provider-routed-inference-e2e"; then echo "model_router_provider_routed_inference=true" >> "$GITHUB_OUTPUT" @@ -209,50 +204,6 @@ jobs: /tmp/nemoclaw-e2e-openshell-version-pin-downloads.log if-no-files-found: ignore - # ── Onboard inference smoke E2E ───────────────────────────── - # Coverage guard for #3253. Onboard must not report installation success - # until the configured provider/model route has served a real chat completion. - # This simulates a route that is configured but returns HTTP 503 at runtime. - onboard-inference-smoke-e2e: - needs: select_regression_jobs - if: >- - github.repository == 'NVIDIA/NemoClaw' && - needs.select_regression_jobs.outputs.onboard_inference_smoke == 'true' - runs-on: ubuntu-latest - timeout-minutes: 15 - steps: - - name: Checkout - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - - - name: Setup Node - uses: actions/setup-node@v6 - with: - node-version: "22" - - - name: Run onboard inference smoke E2E test - env: - NEMOCLAW_NON_INTERACTIVE: "1" - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" - NEMOCLAW_TRACE_DIR: /tmp/nemoclaw-traces - run: bash test/e2e/test-onboard-inference-smoke.sh - - - name: Upload onboard inference smoke logs on failure - if: failure() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: onboard-inference-smoke-logs - path: | - /tmp/nemoclaw-e2e-onboard-inference-smoke.log - /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log - if-no-files-found: ignore - - - name: Upload onboard profiling traces - if: always() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: onboard-inference-smoke-traces - path: /tmp/nemoclaw-traces/ - if-no-files-found: ignore # ── Gateway drift preflight E2E ───────────────────────────── # Coverage guard for #3399 / #3423. A stale OpenShell gateway image can diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts index db4e8c65a7..da3de1c226 100644 --- a/test/e2e-script-workflow.test.ts +++ b/test/e2e-script-workflow.test.ts @@ -51,7 +51,6 @@ const LEGACY_E2E_SHELL_ALLOWLIST = [ "test/e2e/test-model-router-provider-routed-inference.sh", "test/e2e/test-network-policy.sh", "test/e2e/test-ollama-auth-proxy-e2e.sh", - "test/e2e/test-onboard-inference-smoke.sh", "test/e2e/test-onboard-negative-paths.sh", "test/e2e/test-onboard-repair.sh", "test/e2e/test-onboard-resume.sh", diff --git a/test/e2e/test-onboard-inference-smoke.sh b/test/e2e/test-onboard-inference-smoke.sh deleted file mode 100755 index b63919a5ed..0000000000 --- a/test/e2e/test-onboard-inference-smoke.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Coverage guard for issue #3253 — onboard must not report installation -# success until the configured inference route has served a real request. -# -# Expected RED on main-equivalent code: PASSING inference configuration is -# treated as enough. setupInference() accepts a provider/model whose route is -# configured but whose chat/completions endpoint returns HTTP 503, so this test -# fails because setupInference() resolves successfully and prints only the route -# success line. -# -# Expected GREEN after fix: setupInference() performs a one-shot inference smoke -# probe, exits non-zero on the upstream 503, and surfaces provider/model/api -# base/credential-env diagnostics before any "Installation complete" summary. - -set -euo pipefail - -LOG_FILE="/tmp/nemoclaw-e2e-onboard-inference-smoke.log" -exec > >(tee "$LOG_FILE") 2>&1 - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -pass() { echo -e "${GREEN}[PASS]${NC} $1"; } -info() { echo -e "${YELLOW}[INFO]${NC} $1"; } -diag() { echo -e "${YELLOW}[DIAG]${NC} $1"; } -fail() { - echo -e "${RED}[FAIL]${NC} $1" >&2 - diag "onboard inference smoke log tail:" - tail -120 "$LOG_FILE" 2>/dev/null || true - exit 1 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -cd "$REPO_ROOT" - -info "Preparing CLI build" -if [ ! -d node_modules ]; then - npm ci --ignore-scripts -fi -npm run build:cli - -info "Invoking setupInference() with a gateway route that is configured but runtime-broken" -set +e -NEMOCLAW_NON_INTERACTIVE=1 \ - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ - NEMOCLAW_ONBOARD_INFERENCE_SMOKE_E2E=1 \ - node <<'NODE' 2>&1 | tee /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log -const Module = require("module"); -const originalLoad = Module._load; -const calls = []; - -Module._load = function patchedLoad(request, parent, isMain) { - if (request === "./adapters/openshell/resolve" || request.endsWith("/adapters/openshell/resolve")) { - return { resolveOpenshell: () => "/usr/bin/openshell" }; - } - if (request === "./runner" || request.endsWith("/runner")) { - const actualRunner = originalLoad.apply(this, arguments); - return { - ...actualRunner, - run: (cmd, opts = {}) => { - calls.push(["run", cmd]); - if (Array.isArray(cmd) && cmd.includes("provider") && cmd.includes("upsert")) { - return { status: 0, stdout: "Created provider compatible-endpoint\n", stderr: "" }; - } - if (Array.isArray(cmd) && cmd.includes("inference") && cmd.includes("set")) { - return { status: 0, stdout: "Inference configured\n", stderr: "" }; - } - if (Array.isArray(cmd) && cmd.some((part) => String(part).includes("/chat/completions"))) { - return { - status: 22, - stdout: JSON.stringify({ error: { message: "upstream returned HTTP 503 from compatible-endpoint" } }), - stderr: "curl: (22) The requested URL returned error: 503", - }; - } - return { status: 0, stdout: "", stderr: "" }; - }, - runCapture: (cmd) => { - calls.push(["runCapture", cmd]); - if (Array.isArray(cmd) && cmd.includes("inference") && cmd.includes("get")) { - return JSON.stringify({ provider: "compatible-endpoint", model: "broken-model" }); - } - return ""; - }, - }; - } - if (request === "./onboard/providers" || request.endsWith("/onboard/providers")) { - return { - REMOTE_PROVIDER_CONFIG: { - custom: { - label: "Other OpenAI-compatible endpoint", - providerName: "compatible-endpoint", - providerType: "openai", - credentialEnv: "COMPATIBLE_API_KEY", - endpointUrl: "", - helpUrl: null, - modelMode: "input", - defaultModel: "", - skipVerify: true, - }, - }, - LOCAL_INFERENCE_PROVIDERS: [], - providerExistsInGateway: () => true, - getProviderLabel: (provider) => provider, - upsertProvider: (...args) => { - calls.push(["upsertProvider", args]); - return { ok: true, status: 0, message: "Created provider compatible-endpoint" }; - }, - }; - } - if (request === "./registry" || request.endsWith("/registry")) { - return { - updateSandbox: (_name, patch) => calls.push(["registry.updateSandbox", patch]), - getSandbox: () => null, - getDisabledChannels: () => [], - }; - } - return originalLoad.apply(this, arguments); -}; - -const onboard = require("./dist/lib/onboard"); -const result = onboard.setupInference( - "test-sandbox", - "broken-model", - "compatible-endpoint", - "https://broken.example.invalid/v1", - "BROKEN_API_KEY", -); - -Promise.resolve(result) - .then((value) => { - console.log("__SETUP_INFERENCE_RESOLVED__"); - console.log(JSON.stringify(value)); - console.log("__CALLS__" + JSON.stringify(calls)); - process.exit(0); - }) - .catch((error) => { - console.error("__SETUP_INFERENCE_REJECTED__"); - console.error(error && error.stack ? error.stack : error); - console.log("__CALLS__" + JSON.stringify(calls)); - process.exit(3); - }); -NODE -NODE_EXIT=$? -set -e -cat /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log - -info "node exit code: ${NODE_EXIT}" - -if grep -q "__SETUP_INFERENCE_RESOLVED__" /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log || [ "$NODE_EXIT" -eq 0 ]; then - fail "setupInference() accepted a configured route without proving the chat/completions path; onboard would later print Installation complete while the first real request returns HTTP 503 (#3253)" -fi -pass "setupInference() did not accept a runtime-broken inference route" - -if ! grep -qiE "503|upstream|compatible-endpoint|broken-model|BROKEN_API_KEY|broken.example.invalid" /tmp/nemoclaw-e2e-onboard-inference-smoke-node.log; then - fail "onboard did not surface actionable inference smoke diagnostics (expected provider/model/api_base/credential env/upstream 503)" -fi -pass "onboard surfaced actionable inference smoke diagnostics for the broken route" diff --git a/test/onboard-inference-smoke.test.ts b/test/onboard-inference-smoke.test.ts new file mode 100644 index 0000000000..4cb549b35d --- /dev/null +++ b/test/onboard-inference-smoke.test.ts @@ -0,0 +1,178 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { describe, it } from "vitest"; + +import { testTimeoutOptions } from "./helpers/timeouts"; + +// Coverage guard for #3253. Onboard must not report installation success until +// the configured provider/model route has served a real chat completion. This +// caller-level, mock-driven Vitest test replaces test/e2e/test-onboard-inference-smoke.sh +// per #5119: direct setupInference() probes belong in test/, not in regression-e2e +// bash or the scenario framework. Refs #5098, #4349. +const REPO_ROOT = path.join(import.meta.dirname, ".."); + +describe("onboard inference smoke guard (#3253)", () => { + it( + "rejects a configured OpenAI-compatible route when chat/completions returns 503", + testTimeoutOptions(90_000), + () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-inference-smoke-")); + const fakeBin = path.join(tmpDir, "bin"); + const scriptPath = path.join(tmpDir, "setup-inference-smoke-check.cjs"); + const curlLogPath = path.join(tmpDir, "curl-probes.log"); + const onboardPath = JSON.stringify(path.join(REPO_ROOT, "dist", "lib", "onboard.js")); + const runnerPath = JSON.stringify(path.join(REPO_ROOT, "dist", "lib", "runner.js")); + const registryPath = JSON.stringify( + path.join(REPO_ROOT, "dist", "lib", "state", "registry.js"), + ); + + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { + mode: 0o755, + }); + fs.writeFileSync( + path.join(fakeBin, "curl"), + String.raw`#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "$NEMOCLAW_FAKE_CURL_LOG" +out="" +prev="" +for arg in "$@"; do + if [ "$prev" = "-o" ]; then + out="$arg" + break + fi + prev="$arg" +done +if [ -n "$out" ]; then + printf '%s\n' '{"error":{"message":"upstream returned HTTP 503 from compatible-endpoint"}}' > "$out" +fi +printf '503' +`, + { mode: 0o755 }, + ); + fs.writeFileSync( + scriptPath, + String.raw` +const runner = require(${runnerPath}); +const registry = require(${registryPath}); +const calls = []; +const normalize = (command) => (Array.isArray(command) ? command.join(" ") : String(command)); + +runner.run = (command) => { + const text = normalize(command); + calls.push(["run", text]); + if (text.includes("provider") && text.includes("upsert")) { + return { status: 0, stdout: "Created provider compatible-endpoint\n", stderr: "" }; + } + if (text.includes("inference") && text.includes("set")) { + return { status: 0, stdout: "Inference configured\n", stderr: "" }; + } + if (text.includes("/chat/completions")) { + return { + status: 22, + stdout: JSON.stringify({ error: { message: "upstream returned HTTP 503 from compatible-endpoint" } }), + stderr: "curl: (22) The requested URL returned error: 503", + }; + } + return { status: 0, stdout: "", stderr: "" }; +}; +runner.runCapture = (command) => { + const text = normalize(command); + calls.push(["runCapture", text]); + if (text.includes("inference") && text.includes("get")) { + return [ + "Gateway inference:", + "", + " Route: inference.local", + " Provider: compatible-endpoint", + " Model: broken-model", + " Version: 1", + ].join("\n"); + } + return ""; +}; +registry.updateSandbox = (_name, patch) => calls.push(["registry.updateSandbox", JSON.stringify(patch)]); + +process.env.NEMOCLAW_NON_INTERACTIVE = "1"; +process.env.NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE = "1"; +process.env.NEMOCLAW_ONBOARD_INFERENCE_SMOKE_E2E = "1"; +process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; +process.env.BROKEN_API_KEY = "test-key"; + +const { setupInference } = require(${onboardPath}); + +(async () => { + await setupInference( + "test-sandbox", + "broken-model", + "compatible-endpoint", + "https://broken.example.invalid/v1", + "BROKEN_API_KEY", + ); + console.log(JSON.stringify({ outcome: "resolved", calls })); +})().catch((error) => { + console.error(error && error.stack ? error.stack : error); + console.log(JSON.stringify({ outcome: "rejected", calls })); + process.exitCode = 3; +}); +`, + ); + + try { + const result = spawnSync(process.execPath, [scriptPath], { + cwd: REPO_ROOT, + encoding: "utf8", + env: { + ...process.env, + HOME: tmpDir, + PATH: `${fakeBin}:${process.env.PATH || ""}`, + VITEST: "false", + NEMOCLAW_TEST_NO_SLEEP: "1", + NEMOCLAW_FAKE_CURL_LOG: curlLogPath, + BROKEN_API_KEY: "test-key", + }, + timeout: 80_000, + }); + + const output = `${result.stdout || ""}\n${result.stderr || ""}`; + assert.notEqual( + result.status, + 0, + `setupInference accepted a configured route without proving chat/completions; output:\n${output}`, + ); + for (const expectedDiagnostic of [ + /compatible-endpoint/i, + /broken-model/i, + /broken\.example\.invalid/i, + /Credential env: configured/i, + /503|upstream/i, + ]) { + assert.match( + output, + expectedDiagnostic, + `onboard did not surface actionable inference smoke diagnostics; output:\n${output}`, + ); + } + + const curlLog = fs.existsSync(curlLogPath) ? fs.readFileSync(curlLogPath, "utf8") : ""; + assert.ok( + curlLog.includes("/chat/completions"), + `setupInference did not probe chat/completions before failing; curl log:\n${curlLog}`, + ); + assert.ok( + !output.includes("Inference route set: compatible-endpoint / broken-model"), + `setupInference printed route success after the smoke probe failed; output:\n${output}`, + ); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }, + ); +}); diff --git a/test/regression-e2e-workflow.test.ts b/test/regression-e2e-workflow.test.ts index 3179b2bc1e..fa3da7fdea 100644 --- a/test/regression-e2e-workflow.test.ts +++ b/test/regression-e2e-workflow.test.ts @@ -26,15 +26,18 @@ type RegressionWorkflow = { describe("Regression E2E workflow contract", () => { const workflow = readYaml(".github/workflows/regression-e2e.yaml"); - it("does not advertise or select the retired docker-unreachable gateway-start lane", () => { + it.each([ + ["docker-unreachable-gateway-start-e2e", "docker_unreachable_gateway_start"], + ["onboard-inference-smoke-e2e", "onboard_inference_smoke"], + ])("does not advertise or select retired lane %s", (jobName, selectorOutput) => { const jobsDescription = workflow.on?.workflow_dispatch?.inputs?.jobs?.description ?? ""; const selectorScript = workflow.jobs?.select_regression_jobs?.steps?.find((step) => step.id === "select")?.run ?? ""; - expect(jobsDescription).not.toContain("docker-unreachable-gateway-start-e2e"); - expect(Object.keys(workflow.jobs ?? {})).not.toContain("docker-unreachable-gateway-start-e2e"); - expect(selectorScript).not.toContain("docker-unreachable-gateway-start-e2e"); - expect(selectorScript).not.toContain("docker_unreachable_gateway_start"); + expect(jobsDescription).not.toContain(jobName); + expect(Object.keys(workflow.jobs ?? {})).not.toContain(jobName); + expect(selectorScript).not.toContain(jobName); + expect(selectorScript).not.toContain(selectorOutput); }); it("does not advertise or select the retired strict-tool-call-probe lane", () => {