From cece29fb53b259feb8e62298a1611b9326d6e227 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 09:24:30 -0400 Subject: [PATCH 01/11] test(e2e): add inference routing Vitest coverage --- .github/workflows/e2e-vitest-scenarios.yaml | 47 + .../live/inference-routing.test.ts | 823 ++++++++++++++++++ 2 files changed, 870 insertions(+) create mode 100644 test/e2e-scenario/live/inference-routing.test.ts diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 07bb8c54f5..af85fa8793 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -250,6 +250,53 @@ jobs: if-no-files-found: ignore retention-days: 14 + inference-routing-vitest: + runs-on: ubuntu-latest + timeout-minutes: 45 + env: + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/inference-routing + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Build CLI + run: npm run build:cli + + - name: Run inference routing live test + env: + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The + # always-on slices prove invalid-key classification, unreachable + # endpoint classification, and NVIDIA credential isolation; optional + # third-party provider smokes stay skipped unless their secrets are + # explicitly enabled by a future workflow. + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/inference-routing.test.ts \ + --silent=false --reporter=default + + - name: Upload inference routing artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-inference-routing + path: e2e-artifacts/vitest/inference-routing/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + # Focused coverage slice for the #2603/#3145 OpenClaw websocket # protocol/history contract. The retained legacy bash lane remains the # source for full closeout until a later PR proves replacement and deletes it. diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts new file mode 100644 index 0000000000..d9cf31514d --- /dev/null +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -0,0 +1,823 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { ArtifactSink } from "../fixtures/artifacts.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import type { SandboxClient } from "../fixtures/clients/sandbox.ts"; +import { trustedSandboxShellScript, validateSandboxName } from "../fixtures/clients/sandbox.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; +import { redactString } from "../fixtures/redaction.ts"; + +// Migrated from test/e2e/test-inference-routing.sh. This stays a simple live +// Vitest conversion: direct CLI/onboard subprocesses plus OpenShell sandbox +// probes, with local helpers only where raw in-memory output is required to +// prove credential non-exposure before redacted artifacts are written. + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); +const DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js"); +const NEMOCLAW_STATE_DIR = path.join(os.homedir(), ".nemoclaw"); +const ONBOARD_SESSION_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard-session.json"); +const ONBOARD_LOCK_FILE = path.join(NEMOCLAW_STATE_DIR, "onboard.lock"); +const ONBOARD_ARGS = [ + "onboard", + "--non-interactive", + "--yes", + "--yes-i-accept-third-party-software", +]; +const STACK_TRACE_PATTERNS = [ + /^\s+at (Object\.|Module\.|node:internal|process\.)/m, + /\bat node:internal/m, +]; +const CREDENTIAL_CLASSIFICATION_PATTERN = + /authorization|credential|invalid|401|unauthorized|api[._-]?key/i; +const TRANSPORT_CLASSIFICATION_PATTERN = + /unreachable|timeout|connect|ECONNREFUSED|ETIMEDOUT|ENETUNREACH|EHOSTUNREACH|ENOTFOUND|EAI_AGAIN|No route to host|transport|network|endpoint|dns/i; +const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip; + +function shouldRunProviderSmoke(provider: "openai" | "anthropic" | "compatible"): boolean { + const requested = process.env.NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE?.trim().toLowerCase(); + return requested === "1" || requested === "true" || requested === "all" || requested === provider; +} + +type SkipFn = (note?: string) => void; + +function skipLive(skip: SkipFn, note: string): never { + skip(note); + throw new Error(note); +} + +interface RawRunResult { + readonly command: readonly string[]; + readonly exitCode: number | null; + readonly signal: NodeJS.Signals | null; + readonly timedOut: boolean; + readonly stdout: string; + readonly stderr: string; + readonly redactedStdout: string; + readonly redactedStderr: string; +} + +interface RawRunOptions { + readonly artifactName: string; + readonly artifacts: ArtifactSink; + readonly cwd?: string; + readonly env?: NodeJS.ProcessEnv; + readonly redactionValues?: readonly string[]; + readonly timeoutMs?: number; +} + +function resultText(result: { stdout: string; stderr: string }): string { + return [result.stdout, result.stderr].filter(Boolean).join("\n"); +} + +function redactedResultText( + result: Pick, +): string { + return [result.redactedStdout, result.redactedStderr].filter(Boolean).join("\n"); +} + +function hasRawNodeStackTrace(text: string): boolean { + return STACK_TRACE_PATTERNS.some((pattern) => pattern.test(text)); +} + +function inferenceSandboxName(prefix: string): string { + const name = `${prefix}-${process.pid}`; + validateSandboxName(name); + return name; +} + +function onboardEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...buildAvailabilityProbeEnv(), + ...extra, + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_RECREATE_SANDBOX: "1", + }; +} + +function clearOnboardState(): void { + fs.rmSync(ONBOARD_LOCK_FILE, { force: true }); + fs.rmSync(ONBOARD_SESSION_FILE, { force: true }); +} + +function redactedCommand(command: readonly string[], values: readonly string[]): string[] { + return command.map((part) => redactString(part, values)); +} + +async function runRawCommand( + command: string, + args: readonly string[], + options: RawRunOptions, +): Promise { + const timeoutMs = options.timeoutMs ?? 60_000; + const redactionValues = [...(options.redactionValues ?? [])]; + const child = spawn(command, [...args], { + cwd: options.cwd ?? REPO_ROOT, + detached: true, + env: options.env, + stdio: ["ignore", "pipe", "pipe"], + }); + const fullCommand = [command, ...args]; + let stdout = ""; + let stderr = ""; + let timedOut = false; + let spawnError: Error | undefined; + + const killProcessGroup = (signal: NodeJS.Signals): void => { + if (child.pid === undefined) return; + try { + process.kill(-child.pid, signal); + } catch { + child.kill(signal); + } + }; + + const timeout = setTimeout(() => { + timedOut = true; + killProcessGroup("SIGTERM"); + setTimeout(() => killProcessGroup("SIGKILL"), 1_000).unref(); + }, timeoutMs); + timeout.unref(); + + child.stdout?.on("data", (chunk: Buffer) => { + stdout += chunk.toString("utf8"); + }); + child.stderr?.on("data", (chunk: Buffer) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (error) => { + spawnError = error; + }); + + const { exitCode, signal } = await new Promise<{ + exitCode: number | null; + signal: NodeJS.Signals | null; + }>((resolve) => { + child.on("close", (code, closeSignal) => resolve({ exitCode: code, signal: closeSignal })); + }); + clearTimeout(timeout); + + if (spawnError) { + const message = redactString(spawnError.message, redactionValues); + throw new Error(`failed to spawn ${redactString(command, redactionValues)}: ${message}`); + } + + const redactedStdout = redactString(stdout, redactionValues); + const redactedStderr = redactString(stderr, redactionValues); + await options.artifacts.writeText(`raw-shell/${options.artifactName}.stdout.txt`, redactedStdout); + await options.artifacts.writeText(`raw-shell/${options.artifactName}.stderr.txt`, redactedStderr); + await options.artifacts.writeJson(`raw-shell/${options.artifactName}.result.json`, { + command: redactedCommand(fullCommand, redactionValues), + exitCode, + signal, + timedOut, + stdout: redactedStdout, + stderr: redactedStderr, + }); + + return { + command: fullCommand, + exitCode, + signal, + timedOut, + stdout, + stderr, + redactedStdout, + redactedStderr, + }; +} + +async function runNemoclawCli( + args: readonly string[], + options: RawRunOptions, +): Promise { + return runRawCommand(process.execPath, [CLI_ENTRYPOINT, ...args], options); +} + +async function runOpenShell( + args: readonly string[], + options: RawRunOptions, +): Promise { + return runRawCommand("openshell", args, options); +} + +async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Promise { + expect( + fs.existsSync(DIST_ENTRYPOINT), + "run `npm run build:cli` before live inference-routing scenarios", + ).toBe(true); + + const docker = await host.command("docker", ["info"], { + artifactName: "prereq-docker-info-inference-routing", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + if (docker.exitCode !== 0) { + const message = `Docker is required for live inference-routing coverage: ${resultText(docker)}`; + if (process.env.GITHUB_ACTIONS === "true") throw new Error(message); + skipLive(skip, message); + } + + const openshell = await host.command("openshell", ["--version"], { + artifactName: "prereq-openshell-version-inference-routing", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + if (openshell.exitCode !== 0) { + // A fresh GitHub runner may not have OpenShell before the first onboard; + // `nemoclaw onboard` installs it. Record the prereq probe without blocking. + return; + } +} + +async function ignoreCleanupError(run: () => Promise): Promise { + try { + await run(); + } catch { + // Cleanup is best-effort before the first onboard because a fresh runner may + // not have OpenShell installed until `nemoclaw onboard` reaches that phase. + } +} + +async function cleanupSandbox( + host: HostCliClient, + sandbox: SandboxClient, + sandboxName: string, +): Promise { + await ignoreCleanupError(() => + host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { + artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 120_000, + }), + ); + await ignoreCleanupError(() => + sandbox.openshell(["sandbox", "delete", sandboxName], { + artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 60_000, + }), + ); + clearOnboardState(); +} + +async function expectNoActiveSandbox(host: HostCliClient, sandboxName: string): Promise { + const status = await host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "status"], { + artifactName: `post-failure-status-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + const text = resultText(status); + expect( + /running|ready/i.test(text), + `sandbox '${sandboxName}' is still active after failed onboard: ${text}`, + ).toBe(false); +} + +async function onboardSandbox( + artifacts: ArtifactSink, + sandboxName: string, + extraEnv: NodeJS.ProcessEnv, + redactionValues: readonly string[], + artifactName: string, + timeoutMs = 10 * 60_000, +): Promise { + clearOnboardState(); + return runNemoclawCli(ONBOARD_ARGS, { + artifactName, + artifacts, + env: onboardEnv({ + NEMOCLAW_POLICY_TIER: "open", + NEMOCLAW_SANDBOX_NAME: sandboxName, + ...extraEnv, + }), + redactionValues, + timeoutMs, + }); +} + +function expectOnboardSuccess(result: RawRunResult, label: string): void { + const redacted = redactedResultText(result); + expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false); + expect(result.exitCode, `${label} failed\n${redacted}`).toBe(0); +} + +function expectOnboardFailure(result: RawRunResult, label: string): void { + const redacted = redactedResultText(result); + expect(result.timedOut, `${label} timed out\n${redacted}`).toBe(false); + expect(result.exitCode, `${label} unexpectedly succeeded\n${redacted}`).not.toBe(0); +} + +function parseJsonBody(body: string, label: string): unknown { + try { + return JSON.parse(body); + } catch (error) { + throw new Error( + `${label} response was not JSON: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} + +function openAiContent(json: unknown): string { + if (!json || typeof json !== "object") return ""; + const choices = (json as { choices?: unknown }).choices; + if (!Array.isArray(choices)) return ""; + for (const choice of choices) { + if (!choice || typeof choice !== "object") continue; + const message = (choice as { message?: unknown }).message; + if (message && typeof message === "object") { + const content = (message as { content?: unknown }).content; + if (typeof content === "string" && content.trim()) return content; + } + const text = (choice as { text?: unknown }).text; + if (typeof text === "string" && text.trim()) return text; + } + return ""; +} + +function anthropicContent(json: unknown): string { + if (!json || typeof json !== "object") return ""; + const content = (json as { content?: unknown }).content; + if (Array.isArray(content)) { + return content + .map((part) => { + if ( + part && + typeof part === "object" && + typeof (part as { text?: unknown }).text === "string" + ) { + return (part as { text: string }).text; + } + return ""; + }) + .join("") + .trim(); + } + return openAiContent(json); +} + +async function expectOpenAiChatThroughSandbox( + sandbox: SandboxClient, + sandboxName: string, + model: string, + redactionValues: readonly string[], + artifactName: string, +): Promise { + const payload = JSON.stringify({ + model, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 50, + }); + const response = await sandbox.exec( + sandboxName, + [ + "curl", + "-sS", + "--max-time", + "60", + "https://inference.local/v1/chat/completions", + "-H", + "Content-Type: application/json", + "--data-raw", + payload, + ], + { + artifactName, + env: buildAvailabilityProbeEnv(), + redactionValues: [...redactionValues], + timeoutMs: 90_000, + }, + ); + expect(response.exitCode, resultText(response)).toBe(0); + const content = openAiContent(parseJsonBody(response.stdout, artifactName)); + expect(content, `no chat content in response: ${response.stdout.slice(0, 500)}`).not.toBe(""); +} + +async function expectAnthropicMessageThroughSandbox( + sandbox: SandboxClient, + sandboxName: string, + model: string, + redactionValues: readonly string[], +): Promise { + const payload = JSON.stringify({ + model, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 50, + }); + const response = await sandbox.exec( + sandboxName, + [ + "curl", + "-sS", + "--max-time", + "60", + "https://inference.local/v1/messages", + "-H", + "Content-Type: application/json", + "--data-raw", + payload, + ], + { + artifactName: "anthropic-inference-local-message", + env: buildAvailabilityProbeEnv(), + redactionValues: [...redactionValues], + timeoutMs: 90_000, + }, + ); + expect(response.exitCode, resultText(response)).toBe(0); + const content = anthropicContent(parseJsonBody(response.stdout, "anthropic inference.local")); + expect(content, `no Anthropic content in response: ${response.stdout.slice(0, 500)}`).not.toBe( + "", + ); +} + +liveTest( + "TC-INF-06 invalid API key fails with credential classification and cleanup", + { timeout: 5 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, skip }) => { + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-invalid-key"); + cleanup.add(`remove inference-routing invalid-key residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-invalid-api-key", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: [ + "invalid NVIDIA key exits non-zero", + "output contains credential classification", + "output does not expose raw stack trace or submitted key", + "failed onboard leaves no active sandbox", + ], + }); + + const invalidKey = ["nvapi", "INTENTIONALLY", "INVALID", "KEY", "FOR", "E2E", "TEST"].join("-"); + const result = await onboardSandbox( + artifacts, + sandboxName, + { NVIDIA_API_KEY: invalidKey }, + [invalidKey], + "tc-inf-06-onboard-invalid-api-key", + 120_000, + ); + const raw = resultText(result); + const redacted = redactedResultText(result); + + expectOnboardFailure(result, "TC-INF-06 invalid-key onboard"); + expect(CREDENTIAL_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true); + expect(hasRawNodeStackTrace(raw), redacted).toBe(false); + expect(raw.includes("INTENTIONALLY-INVALID-KEY-FOR-E2E-TEST"), redacted).toBe(false); + await expectNoActiveSandbox(host, sandboxName); + }, +); + +liveTest( + "TC-INF-07 unreachable endpoint fails with transport classification and cleanup", + { timeout: 5 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, skip }) => { + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-unreachable"); + cleanup.add(`remove inference-routing unreachable residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-unreachable-endpoint", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: [ + "unreachable custom endpoint exits non-zero", + "output contains transport classification", + "output does not expose raw stack trace", + "failed onboard leaves no active sandbox", + ], + }); + + const nvidiaKey = ["nvapi", "valid", "format", "but", "fake", "key", "1234567890"].join("-"); + const compatibleKey = "fake-key-for-unreachable-test"; + const result = await onboardSandbox( + artifacts, + sandboxName, + { + COMPATIBLE_API_KEY: compatibleKey, + NEMOCLAW_ENDPOINT_URL: "https://nemoclaw-e2e.invalid/v1", + NEMOCLAW_MODEL: "test-model", + NEMOCLAW_PROVIDER: "custom", + NVIDIA_API_KEY: nvidiaKey, + }, + [nvidiaKey, compatibleKey], + "tc-inf-07-onboard-unreachable-endpoint", + 120_000, + ); + const raw = resultText(result); + const redacted = redactedResultText(result); + + expectOnboardFailure(result, "TC-INF-07 unreachable-endpoint onboard"); + expect(TRANSPORT_CLASSIFICATION_PATTERN.test(raw), redacted).toBe(true); + expect(hasRawNodeStackTrace(raw), redacted).toBe(false); + await expectNoActiveSandbox(host, sandboxName); + }, +); + +liveTest( + "TC-INF-05 real NVIDIA key is isolated from sandbox env, process list, and filesystem", + { timeout: 15 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => { + const apiKey = + secrets.optional("NVIDIA_API_KEY") ?? + skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation"); + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-inf-cred"); + cleanup.add(`remove inference-routing credential-isolation residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-credential-isolation", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: [ + "real NVIDIA_API_KEY does not appear in sandbox environment", + "real NVIDIA_API_KEY does not appear in sandbox process list when ps is available", + "real NVIDIA_API_KEY does not appear in sampled sandbox filesystem", + "sandbox NVIDIA_API_KEY, when present, is a placeholder rather than the real key", + ], + }); + + const onboard = await onboardSandbox( + artifacts, + sandboxName, + { NVIDIA_API_KEY: apiKey }, + [apiKey], + "tc-inf-05-onboard-credential-isolation", + ); + expectOnboardSuccess(onboard, "TC-INF-05 credential-isolation onboard"); + + const sandboxEnv = await runOpenShell(["sandbox", "exec", "-n", sandboxName, "--", "env"], { + artifactName: "tc-inf-05-sandbox-env", + artifacts, + env: buildAvailabilityProbeEnv(), + redactionValues: [apiKey], + timeoutMs: 60_000, + }); + expect(sandboxEnv.exitCode, redactedResultText(sandboxEnv)).toBe(0); + expect(sandboxEnv.stdout.includes(apiKey), redactedResultText(sandboxEnv)).toBe(false); + + const processList = await runOpenShell( + [ + "sandbox", + "exec", + "-n", + sandboxName, + "--", + "sh", + "-lc", + "ps aux 2>/dev/null || ps -ef 2>/dev/null", + ], + { + artifactName: "tc-inf-05-sandbox-process-list", + artifacts, + env: buildAvailabilityProbeEnv(), + redactionValues: [apiKey], + timeoutMs: 60_000, + }, + ); + if (processList.exitCode === 0 && processList.stdout.trim()) { + expect(processList.stdout.includes(apiKey), redactedResultText(processList)).toBe(false); + } else { + await artifacts.writeJson("tc-inf-05-process-list-skipped.json", { + reason: "ps not available in hardened sandbox", + exitCode: processList.exitCode, + }); + } + + const keyB64 = Buffer.from(apiKey, "utf8").toString("base64"); + const scanScript = ` +const fs = require('fs'); +const { execFileSync } = require('child_process'); +const key = Buffer.from(process.env.KEY_B64 || '', 'base64').toString('utf8'); +if (!key) { console.log('NO_KEY_PROVIDED'); process.exit(0); } +let out = ''; +try { + out = execFileSync('sh', ['-lc', 'find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'], { encoding: 'utf8' }); +} catch { + console.log('SCAN_ERROR'); + process.exit(0); +} +for (const file of out.trim().split(/\\n/).filter(Boolean)) { + try { + const content = fs.readFileSync(file, 'utf8'); + if (content.includes(key)) console.log('FOUND:' + file); + } catch {} +} +console.log('SCAN_DONE'); +`; + const filesystemScan = await runOpenShell( + [ + "sandbox", + "exec", + "-n", + sandboxName, + "--", + "env", + `KEY_B64=${keyB64}`, + "node", + "-e", + scanScript, + ], + { + artifactName: "tc-inf-05-sandbox-filesystem-scan", + artifacts, + env: buildAvailabilityProbeEnv(), + redactionValues: [apiKey, keyB64], + timeoutMs: 90_000, + }, + ); + expect(filesystemScan.stdout).not.toContain("NO_KEY_PROVIDED"); + expect(filesystemScan.stdout).not.toContain("FOUND:"); + expect(filesystemScan.stdout, redactedResultText(filesystemScan)).toContain("SCAN_DONE"); + + const placeholder = await sandbox.execShell( + sandboxName, + trustedSandboxShellScript("printenv NVIDIA_API_KEY 2>/dev/null || true"), + { + artifactName: "tc-inf-05-sandbox-placeholder", + env: buildAvailabilityProbeEnv(), + redactionValues: [apiKey], + timeoutMs: 30_000, + }, + ); + const placeholderValue = placeholder.stdout.trim(); + if (!placeholderValue) { + await artifacts.writeJson("tc-inf-05-placeholder-skipped.json", { + reason: "NVIDIA_API_KEY not set in sandbox; placeholder injection may not be active", + }); + } else { + expect(placeholderValue, "sandbox has the real key, not a placeholder").not.toBe(apiKey); + } + }, +); + +liveTest( + "TC-INF-02 OpenAI provider responds through inference.local", + { timeout: 15 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => { + if (!shouldRunProviderSmoke("openai")) { + skipLive( + skip, + "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=openai or all to run OpenAI smoke", + ); + } + const apiKey = secrets.optional("OPENAI_API_KEY") ?? skipLive(skip, "OPENAI_API_KEY not set"); + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-openai"); + const model = process.env.NEMOCLAW_OPENAI_MODEL || "gpt-4o-mini"; + cleanup.add(`remove inference-routing OpenAI residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-openai", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: ["OpenAI provider onboards", "sandbox inference.local routes chat to OpenAI"], + model, + }); + + const onboard = await onboardSandbox( + artifacts, + sandboxName, + { NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "openai", OPENAI_API_KEY: apiKey }, + [apiKey], + "tc-inf-02-onboard-openai", + ); + expectOnboardSuccess(onboard, "TC-INF-02 OpenAI onboard"); + await expectOpenAiChatThroughSandbox( + sandbox, + sandboxName, + model, + [apiKey], + "openai-inference-local-chat", + ); + }, +); + +liveTest( + "TC-INF-03 Anthropic provider responds through inference.local", + { timeout: 15 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => { + if (!shouldRunProviderSmoke("anthropic")) { + skipLive( + skip, + "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=anthropic or all to run Anthropic smoke", + ); + } + const apiKey = + secrets.optional("ANTHROPIC_API_KEY") ?? skipLive(skip, "ANTHROPIC_API_KEY not set"); + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-anthropic"); + const model = process.env.NEMOCLAW_ANTHROPIC_MODEL || "claude-sonnet-4-6"; + cleanup.add(`remove inference-routing Anthropic residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-anthropic", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: [ + "Anthropic provider onboards", + "sandbox inference.local routes Messages API to Anthropic", + ], + model, + }); + + const onboard = await onboardSandbox( + artifacts, + sandboxName, + { ANTHROPIC_API_KEY: apiKey, NEMOCLAW_MODEL: model, NEMOCLAW_PROVIDER: "anthropic" }, + [apiKey], + "tc-inf-03-onboard-anthropic", + ); + expectOnboardSuccess(onboard, "TC-INF-03 Anthropic onboard"); + await expectAnthropicMessageThroughSandbox(sandbox, sandboxName, model, [apiKey]); + }, +); + +liveTest( + "TC-INF-09 custom OpenAI-compatible endpoint responds through inference.local", + { timeout: 15 * 60_000 }, + async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => { + if (!shouldRunProviderSmoke("compatible")) { + skipLive( + skip, + "set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=compatible or all to run compatible endpoint smoke", + ); + } + const endpointUrl = + process.env.NEMOCLAW_ENDPOINT_URL ?? + skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY"); + const model = + process.env.NEMOCLAW_COMPAT_MODEL || + process.env.NEMOCLAW_MODEL || + skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY"); + const apiKey = + secrets.optional("COMPATIBLE_API_KEY") ?? + skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY"); + await requireLivePrerequisites(host, skip); + const sandboxName = inferenceSandboxName("e2e-compat-ep"); + cleanup.add(`remove inference-routing compatible-endpoint residue for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName), + ); + await cleanupSandbox(host, sandbox, sandboxName); + + await artifacts.writeJson("scenario.json", { + id: "inference-routing-compatible-endpoint", + runner: "vitest", + migratedFrom: "test/e2e/test-inference-routing.sh", + contract: [ + "custom OpenAI-compatible endpoint onboards", + "sandbox inference.local routes chat to compatible endpoint", + ], + endpointUrl: redactString(endpointUrl, [apiKey]), + model, + }); + + const onboard = await onboardSandbox( + artifacts, + sandboxName, + { + COMPATIBLE_API_KEY: apiKey, + NEMOCLAW_ENDPOINT_URL: endpointUrl, + NEMOCLAW_MODEL: model, + NEMOCLAW_PROVIDER: "custom", + }, + [apiKey], + "tc-inf-09-onboard-compatible-endpoint", + ); + expectOnboardSuccess(onboard, "TC-INF-09 compatible-endpoint onboard"); + await expectOpenAiChatThroughSandbox( + sandbox, + sandboxName, + model, + [apiKey], + "compatible-endpoint-inference-local-chat", + ); + }, +); From bc9d871f7f37f3a8f6107ca7e1099654c4cb324a Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 09:28:41 -0400 Subject: [PATCH 02/11] test(e2e): tolerate fresh runners before openshell install --- .../live/inference-routing.test.ts | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts index d9cf31514d..cd944993d6 100644 --- a/test/e2e-scenario/live/inference-routing.test.ts +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -227,14 +227,19 @@ async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Prom skipLive(skip, message); } - const openshell = await host.command("openshell", ["--version"], { - artifactName: "prereq-openshell-version-inference-routing", - env: buildAvailabilityProbeEnv(), - timeoutMs: 30_000, - }); - if (openshell.exitCode !== 0) { - // A fresh GitHub runner may not have OpenShell before the first onboard; - // `nemoclaw onboard` installs it. Record the prereq probe without blocking. + try { + const openshell = await host.command("openshell", ["--version"], { + artifactName: "prereq-openshell-version-inference-routing", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + if (openshell.exitCode !== 0) { + // A fresh GitHub runner may not have OpenShell before the first onboard; + // `nemoclaw onboard` installs it. Record the prereq probe without blocking. + return; + } + } catch { + // Same as non-zero: fresh runner may not have openshell until onboard. return; } } From caf56dc368b1655007e6b6f63f6522cceeb941a9 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 09:34:35 -0400 Subject: [PATCH 03/11] test(e2e): keep sandbox scan command single-line --- .../live/inference-routing.test.ts | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts index cd944993d6..e0c5f01fb8 100644 --- a/test/e2e-scenario/live/inference-routing.test.ts +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -611,26 +611,16 @@ liveTest( } const keyB64 = Buffer.from(apiKey, "utf8").toString("base64"); - const scanScript = ` -const fs = require('fs'); -const { execFileSync } = require('child_process'); -const key = Buffer.from(process.env.KEY_B64 || '', 'base64').toString('utf8'); -if (!key) { console.log('NO_KEY_PROVIDED'); process.exit(0); } -let out = ''; -try { - out = execFileSync('sh', ['-lc', 'find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'], { encoding: 'utf8' }); -} catch { - console.log('SCAN_ERROR'); - process.exit(0); -} -for (const file of out.trim().split(/\\n/).filter(Boolean)) { - try { - const content = fs.readFileSync(file, 'utf8'); - if (content.includes(key)) console.log('FOUND:' + file); - } catch {} -} -console.log('SCAN_DONE'); -`; + const scanScript = [ + "const fs=require('fs')", + "const {execFileSync}=require('child_process')", + "const key=Buffer.from(process.env.KEY_B64||'','base64').toString('utf8')", + "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}", + "let out=''", + "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}", + "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}", + "console.log('SCAN_DONE')", + ].join(";"); const filesystemScan = await runOpenShell( [ "sandbox", From 6aa71fe330687d7ee75cd73adff30582b6a023ac Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 09:39:14 -0400 Subject: [PATCH 04/11] ci(e2e): keep inference routing job PR-safe --- .github/workflows/e2e-vitest-scenarios.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index af85fa8793..55377cc258 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -274,13 +274,12 @@ jobs: run: npm run build:cli - name: Run inference routing live test - env: - NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The - # always-on slices prove invalid-key classification, unreachable - # endpoint classification, and NVIDIA credential isolation; optional - # third-party provider smokes stay skipped unless their secrets are - # explicitly enabled by a future workflow. + # always-on PR-safe slices prove invalid-key and unreachable-endpoint + # classification/cleanup without spending live provider quota; real + # NVIDIA credential isolation and third-party provider smokes stay + # skipped unless their secrets are explicitly supplied by a future + # workflow. run: | set -euo pipefail npx vitest run --project e2e-scenarios-live \ From c63d3112be568af812d1c780d761a6ef0c5a2db4 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 10:32:55 -0400 Subject: [PATCH 05/11] test(e2e): harden inference routing probes --- .../live/inference-routing.test.ts | 177 ++++++++++++++---- 1 file changed, 141 insertions(+), 36 deletions(-) diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts index e0c5f01fb8..0a3daca40f 100644 --- a/test/e2e-scenario/live/inference-routing.test.ts +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 import { spawn } from "node:child_process"; +import crypto from "node:crypto"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; @@ -71,6 +72,7 @@ interface RawRunOptions { readonly cwd?: string; readonly env?: NodeJS.ProcessEnv; readonly redactionValues?: readonly string[]; + readonly stdin?: string; readonly timeoutMs?: number; } @@ -124,7 +126,7 @@ async function runRawCommand( cwd: options.cwd ?? REPO_ROOT, detached: true, env: options.env, - stdio: ["ignore", "pipe", "pipe"], + stdio: [options.stdin === undefined ? "ignore" : "pipe", "pipe", "pipe"], }); const fullCommand = [command, ...args]; let stdout = ""; @@ -148,6 +150,10 @@ async function runRawCommand( }, timeoutMs); timeout.unref(); + if (options.stdin !== undefined) { + child.stdin?.end(options.stdin); + } + child.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString("utf8"); }); @@ -244,35 +250,96 @@ async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Prom } } -async function ignoreCleanupError(run: () => Promise): Promise { +interface CleanupSandboxOptions { + readonly strict?: boolean; +} + +async function optionalCleanupStep(run: () => Promise): Promise { try { await run(); } catch { - // Cleanup is best-effort before the first onboard because a fresh runner may - // not have OpenShell installed until `nemoclaw onboard` reaches that phase. + // Pre-onboard cleanup is best-effort because a fresh runner may not have + // OpenShell installed until `nemoclaw onboard` reaches that phase. } } +function probeSummary( + label: string, + result: { exitCode: number | null; stdout: string; stderr: string }, +): string { + const text = resultText(result).trim(); + return `${label} exit=${result.exitCode}${text ? `: ${text.slice(0, 500)}` : ""}`; +} + async function cleanupSandbox( host: HostCliClient, sandbox: SandboxClient, sandboxName: string, + options: CleanupSandboxOptions = {}, ): Promise { - await ignoreCleanupError(() => - host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { - artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, - env: buildAvailabilityProbeEnv(), - timeoutMs: 120_000, - }), - ); - await ignoreCleanupError(() => - sandbox.openshell(["sandbox", "delete", sandboxName], { + if (!options.strict) { + await optionalCleanupStep(() => + host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { + artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 120_000, + }), + ); + await optionalCleanupStep(() => + sandbox.openshell(["sandbox", "delete", sandboxName], { + artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 60_000, + }), + ); + clearOnboardState(); + return; + } + + const cleanupEvidence: string[] = []; + try { + const destroy = await host.command( + process.execPath, + [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], + { + artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 120_000, + }, + ); + cleanupEvidence.push(probeSummary("nemoclaw destroy", destroy)); + } catch (error) { + cleanupEvidence.push( + `nemoclaw destroy threw: ${error instanceof Error ? error.message : String(error)}`, + ); + } + + try { + const deletion = await sandbox.openshell(["sandbox", "delete", sandboxName], { artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, env: buildAvailabilityProbeEnv(), timeoutMs: 60_000, - }), - ); + }); + cleanupEvidence.push(probeSummary("openshell sandbox delete", deletion)); + } catch (error) { + cleanupEvidence.push( + `openshell sandbox delete threw: ${error instanceof Error ? error.message : String(error)}`, + ); + } + clearOnboardState(); + + const status = await sandbox.status(sandboxName, { + artifactName: `cleanup-openshell-sandbox-status-${sandboxName}`, + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }); + cleanupEvidence.push(probeSummary("openshell sandbox status", status)); + if (status.exitCode === 0) { + throw new Error( + `sandbox '${sandboxName}' still exists after strict cleanup\n${cleanupEvidence.join("\n")}`, + ); + } } async function expectNoActiveSandbox(host: HostCliClient, sandboxName: string): Promise { @@ -546,8 +613,9 @@ liveTest( skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation"); await requireLivePrerequisites(host, skip); const sandboxName = inferenceSandboxName("e2e-inf-cred"); - cleanup.add(`remove inference-routing credential-isolation residue for ${sandboxName}`, () => - cleanupSandbox(host, sandbox, sandboxName), + cleanup.add( + `best-effort inference-routing credential-isolation cleanup for ${sandboxName}`, + () => cleanupSandbox(host, sandbox, sandboxName), ); await cleanupSandbox(host, sandbox, sandboxName); @@ -571,6 +639,9 @@ liveTest( "tc-inf-05-onboard-credential-isolation", ); expectOnboardSuccess(onboard, "TC-INF-05 credential-isolation onboard"); + cleanup.add(`strict inference-routing credential-isolation cleanup for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName, { strict: true }), + ); const sandboxEnv = await runOpenShell(["sandbox", "exec", "-n", sandboxName, "--", "env"], { artifactName: "tc-inf-05-sandbox-env", @@ -610,35 +681,59 @@ liveTest( }); } - const keyB64 = Buffer.from(apiKey, "utf8").toString("base64"); const scanScript = [ "const fs=require('fs')", "const {execFileSync}=require('child_process')", - "const key=Buffer.from(process.env.KEY_B64||'','base64').toString('utf8')", + "const key=fs.readFileSync(0,'utf8')", "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}", "let out=''", "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}", "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}", "console.log('SCAN_DONE')", ].join(";"); + const leakCanary = `nemoclaw-fs-scan-canary-${crypto.randomUUID()}`; + const canaryPath = "/tmp/nemoclaw-fs-scan-canary.txt"; + const plantCanary = await sandbox.execShell( + sandboxName, + trustedSandboxShellScript(`printf '%s' '${leakCanary}' > ${canaryPath}`), + { + artifactName: "tc-inf-05-sandbox-filesystem-canary-plant", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }, + ); + expect(plantCanary.exitCode, resultText(plantCanary)).toBe(0); + const canaryScan = await runOpenShell( + ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript], + { + artifactName: "tc-inf-05-sandbox-filesystem-canary-scan", + artifacts, + env: buildAvailabilityProbeEnv(), + stdin: leakCanary, + timeoutMs: 90_000, + }, + ); + expect(canaryScan.stdout, redactedResultText(canaryScan)).toContain(`FOUND:${canaryPath}`); + + const removeCanary = await sandbox.execShell( + sandboxName, + trustedSandboxShellScript(`rm -f ${canaryPath}`), + { + artifactName: "tc-inf-05-sandbox-filesystem-canary-remove", + env: buildAvailabilityProbeEnv(), + timeoutMs: 30_000, + }, + ); + expect(removeCanary.exitCode, resultText(removeCanary)).toBe(0); + const filesystemScan = await runOpenShell( - [ - "sandbox", - "exec", - "-n", - sandboxName, - "--", - "env", - `KEY_B64=${keyB64}`, - "node", - "-e", - scanScript, - ], + ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript], { artifactName: "tc-inf-05-sandbox-filesystem-scan", artifacts, env: buildAvailabilityProbeEnv(), - redactionValues: [apiKey, keyB64], + redactionValues: [apiKey], + stdin: apiKey, timeoutMs: 90_000, }, ); @@ -681,7 +776,7 @@ liveTest( await requireLivePrerequisites(host, skip); const sandboxName = inferenceSandboxName("e2e-openai"); const model = process.env.NEMOCLAW_OPENAI_MODEL || "gpt-4o-mini"; - cleanup.add(`remove inference-routing OpenAI residue for ${sandboxName}`, () => + cleanup.add(`best-effort inference-routing OpenAI cleanup for ${sandboxName}`, () => cleanupSandbox(host, sandbox, sandboxName), ); await cleanupSandbox(host, sandbox, sandboxName); @@ -702,6 +797,9 @@ liveTest( "tc-inf-02-onboard-openai", ); expectOnboardSuccess(onboard, "TC-INF-02 OpenAI onboard"); + cleanup.add(`strict inference-routing OpenAI cleanup for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName, { strict: true }), + ); await expectOpenAiChatThroughSandbox( sandbox, sandboxName, @@ -727,7 +825,7 @@ liveTest( await requireLivePrerequisites(host, skip); const sandboxName = inferenceSandboxName("e2e-anthropic"); const model = process.env.NEMOCLAW_ANTHROPIC_MODEL || "claude-sonnet-4-6"; - cleanup.add(`remove inference-routing Anthropic residue for ${sandboxName}`, () => + cleanup.add(`best-effort inference-routing Anthropic cleanup for ${sandboxName}`, () => cleanupSandbox(host, sandbox, sandboxName), ); await cleanupSandbox(host, sandbox, sandboxName); @@ -751,6 +849,9 @@ liveTest( "tc-inf-03-onboard-anthropic", ); expectOnboardSuccess(onboard, "TC-INF-03 Anthropic onboard"); + cleanup.add(`strict inference-routing Anthropic cleanup for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName, { strict: true }), + ); await expectAnthropicMessageThroughSandbox(sandbox, sandboxName, model, [apiKey]); }, ); @@ -777,8 +878,9 @@ liveTest( skipLive(skip, "Missing NEMOCLAW_ENDPOINT_URL, NEMOCLAW_COMPAT_MODEL, or COMPATIBLE_API_KEY"); await requireLivePrerequisites(host, skip); const sandboxName = inferenceSandboxName("e2e-compat-ep"); - cleanup.add(`remove inference-routing compatible-endpoint residue for ${sandboxName}`, () => - cleanupSandbox(host, sandbox, sandboxName), + cleanup.add( + `best-effort inference-routing compatible-endpoint cleanup for ${sandboxName}`, + () => cleanupSandbox(host, sandbox, sandboxName), ); await cleanupSandbox(host, sandbox, sandboxName); @@ -807,6 +909,9 @@ liveTest( "tc-inf-09-onboard-compatible-endpoint", ); expectOnboardSuccess(onboard, "TC-INF-09 compatible-endpoint onboard"); + cleanup.add(`strict inference-routing compatible-endpoint cleanup for ${sandboxName}`, () => + cleanupSandbox(host, sandbox, sandboxName, { strict: true }), + ); await expectOpenAiChatThroughSandbox( sandbox, sandboxName, From e407091aabf31165b967e0ef7cf0b6aff6d1444c Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 10:41:45 -0400 Subject: [PATCH 06/11] ci(e2e): allow selective Vitest job dispatch --- .github/workflows/e2e-vitest-scenarios.yaml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 92b06c7117..66b948e69d 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -11,6 +11,11 @@ on: required: false default: "" type: string + jobs: + description: "Optional comma-separated free-standing live Vitest job ids. Empty runs all enabled jobs." + required: false + default: "" + type: string pr_number: description: Optional PR number for selective-dispatch result comments. required: false @@ -21,7 +26,7 @@ permissions: contents: read concurrency: - group: e2e-vitest-scenarios-${{ github.ref }}-${{ inputs.scenarios || 'supported' }} + group: e2e-vitest-scenarios-${{ github.ref }}-${{ inputs.scenarios || 'supported' }}-${{ inputs.jobs || 'all-jobs' }} cancel-in-progress: false jobs: @@ -47,6 +52,7 @@ jobs: name: Generate Vitest scenario matrix env: SCENARIOS: ${{ inputs.scenarios }} + JOBS: ${{ inputs.jobs }} run: | set -euo pipefail args=(--emit-live-matrix) @@ -57,6 +63,10 @@ jobs: fi args+=(--scenarios "${SCENARIOS}") fi + if [ -n "${JOBS}" ] && [[ ! "${JOBS}" =~ ^[A-Za-z0-9_-]+(,[A-Za-z0-9_-]+)*$ ]]; then + echo "::error::Invalid jobs input: ${JOBS}" >&2 + exit 1 + fi matrix="$(npx tsx test/e2e-scenario/scenarios/run.ts "${args[@]}")" echo "matrix=${matrix}" >> "$GITHUB_OUTPUT" MATRIX_JSON="${matrix}" python - <<'PY' >> "$GITHUB_STEP_SUMMARY" @@ -74,6 +84,7 @@ jobs: live-scenarios: needs: generate-matrix + if: ${{ inputs.jobs == '' }} runs-on: ${{ matrix.runner }} timeout-minutes: 45 strategy: @@ -174,6 +185,7 @@ jobs: # because the matrix above only runs registry-scenarios.test.ts. Modeled on # #5049's free-standing pattern. openshell-version-pin-vitest: + if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openshell-version-pin-vitest,') }} runs-on: ubuntu-latest timeout-minutes: 15 env: @@ -213,6 +225,7 @@ jobs: retention-days: 14 onboard-negative-paths-vitest: + if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',onboard-negative-paths-vitest,') }} runs-on: ubuntu-latest timeout-minutes: 15 env: @@ -256,6 +269,7 @@ jobs: retention-days: 14 inference-routing-vitest: + if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }} runs-on: ubuntu-latest timeout-minutes: 45 env: @@ -305,7 +319,7 @@ jobs: # protocol/history contract. The retained legacy bash lane remains the # source for full closeout until a later PR proves replacement and deletes it. openclaw-tui-chat-correlation-vitest: - if: ${{ inputs.scenarios == '' }} + if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openclaw-tui-chat-correlation-vitest,') }} runs-on: ubuntu-latest timeout-minutes: 75 env: From fac49253e0a58cf1ae45f073c4ada7d977952a37 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 10:45:20 -0400 Subject: [PATCH 07/11] test(e2e): tighten inference routing boundaries --- .../live/inference-routing.test.ts | 60 +++++++--- .../e2e-scenarios-workflow.test.ts | 50 +++++++++ tools/e2e-scenarios/workflow-boundary.mts | 106 ++++++++++++++++++ 3 files changed, 201 insertions(+), 15 deletions(-) diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts index 0a3daca40f..a3fb4ef7a6 100644 --- a/test/e2e-scenario/live/inference-routing.test.ts +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -44,6 +44,10 @@ const TRANSPORT_CLASSIFICATION_PATTERN = const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip; function shouldRunProviderSmoke(provider: "openai" | "anthropic" | "compatible"): boolean { + // The legacy shell script auto-ran these smokes when provider secrets were + // present. This Vitest migration requires an explicit opt-in so PR-safe jobs + // cannot spend third-party quota accidentally; any future secret-backed lane + // must set NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE=all or a provider name. const requested = process.env.NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE?.trim().toLowerCase(); return requested === "1" || requested === "true" || requested === "all" || requested === provider; } @@ -72,7 +76,6 @@ interface RawRunOptions { readonly cwd?: string; readonly env?: NodeJS.ProcessEnv; readonly redactionValues?: readonly string[]; - readonly stdin?: string; readonly timeoutMs?: number; } @@ -126,7 +129,7 @@ async function runRawCommand( cwd: options.cwd ?? REPO_ROOT, detached: true, env: options.env, - stdio: [options.stdin === undefined ? "ignore" : "pipe", "pipe", "pipe"], + stdio: ["ignore", "pipe", "pipe"], }); const fullCommand = [command, ...args]; let stdout = ""; @@ -150,10 +153,6 @@ async function runRawCommand( }, timeoutMs); timeout.unref(); - if (options.stdin !== undefined) { - child.stdin?.end(options.stdin); - } - child.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString("utf8"); }); @@ -209,11 +208,22 @@ async function runNemoclawCli( return runRawCommand(process.execPath, [CLI_ENTRYPOINT, ...args], options); } +function rawOpenShellEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...buildAvailabilityProbeEnv(), + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + ...extra, + }; +} + async function runOpenShell( args: readonly string[], options: RawRunOptions, ): Promise { - return runRawCommand("openshell", args, options); + return runRawCommand("openshell", args, { + ...options, + env: rawOpenShellEnv(options.env), + }); } async function requireLivePrerequisites(host: HostCliClient, skip: SkipFn): Promise { @@ -682,13 +692,17 @@ liveTest( } const scanScript = [ + "const crypto=require('crypto')", "const fs=require('fs')", "const {execFileSync}=require('child_process')", - "const key=fs.readFileSync(0,'utf8')", - "if(!key){console.log('NO_KEY_PROVIDED');process.exit(0)}", + "const len=Number(process.env.KEY_LEN||'0')", + "const salt=process.env.SCAN_SALT||''", + "const target=process.env.TARGET_HASH||''", + "const digest=(value)=>crypto.createHash('sha256').update(salt).update(value).digest('hex')", + "if(!len||!salt||!target){console.log('SCAN_CONFIG_MISSING');process.exit(0)}", "let out=''", "try{out=execFileSync('sh',['-lc','find /sandbox /home /tmp -type f -size -1M 2>/dev/null | head -200'],{encoding:'utf8'})}catch{console.log('SCAN_ERROR');process.exit(0)}", - "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');if(content.includes(key))console.log('FOUND:'+file)}catch{}}", + "for(const file of out.trim().split(/\\n/).filter(Boolean)){try{const content=fs.readFileSync(file,'utf8');for(let i=0;i<=content.length-len;i++){if(digest(content.slice(i,i+len))===target){console.log('FOUND:'+file);break}}}catch{}}", "console.log('SCAN_DONE')", ].join(";"); const leakCanary = `nemoclaw-fs-scan-canary-${crypto.randomUUID()}`; @@ -703,13 +717,21 @@ liveTest( }, ); expect(plantCanary.exitCode, resultText(plantCanary)).toBe(0); + const canarySalt = crypto.randomUUID(); const canaryScan = await runOpenShell( ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript], { artifactName: "tc-inf-05-sandbox-filesystem-canary-scan", artifacts, - env: buildAvailabilityProbeEnv(), - stdin: leakCanary, + env: rawOpenShellEnv({ + KEY_LEN: String(leakCanary.length), + SCAN_SALT: canarySalt, + TARGET_HASH: crypto + .createHash("sha256") + .update(canarySalt) + .update(leakCanary) + .digest("hex"), + }), timeoutMs: 90_000, }, ); @@ -726,18 +748,26 @@ liveTest( ); expect(removeCanary.exitCode, resultText(removeCanary)).toBe(0); + const secretScanSalt = crypto.randomUUID(); const filesystemScan = await runOpenShell( ["sandbox", "exec", "-n", sandboxName, "--", "node", "-e", scanScript], { artifactName: "tc-inf-05-sandbox-filesystem-scan", artifacts, - env: buildAvailabilityProbeEnv(), + env: rawOpenShellEnv({ + KEY_LEN: String(apiKey.length), + SCAN_SALT: secretScanSalt, + TARGET_HASH: crypto + .createHash("sha256") + .update(secretScanSalt) + .update(apiKey) + .digest("hex"), + }), redactionValues: [apiKey], - stdin: apiKey, timeoutMs: 90_000, }, ); - expect(filesystemScan.stdout).not.toContain("NO_KEY_PROVIDED"); + expect(filesystemScan.stdout).not.toContain("SCAN_CONFIG_MISSING"); expect(filesystemScan.stdout).not.toContain("FOUND:"); expect(filesystemScan.stdout, redactedResultText(filesystemScan)).toContain("SCAN_DONE"); diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index bacbbfd369..8d6e9cea6c 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -112,6 +112,36 @@ jobs: path: .e2e/onboard-negative-paths/ include-hidden-files: true if-no-files-found: error + inference-routing-vitest: + runs-on: ubuntu-latest + needs: generate-matrix + if: \${{ inputs.scenarios != '' }} + env: + E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/inference-routing + NEMOCLAW_RUN_E2E_SCENARIOS: "0" + NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }} + NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE: all + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: true + - name: Set up Node + uses: actions/setup-node@v4 + env: + OPENAI_API_KEY: \${{ secrets.OPENAI_API_KEY }} + - name: Install root dependencies + run: npm install + - name: Run inference routing live test + env: + COMPATIBLE_API_KEY: \${{ secrets.COMPATIBLE_API_KEY }} + run: npx vitest run --project e2e-scenarios-live "\${{ inputs.test_filter }}" + - name: Upload inference routing artifacts + uses: actions/upload-artifact@v4 + with: + name: inference-routing + path: e2e-artifacts/vitest/ + include-hidden-files: true + if-no-files-found: error `, ); @@ -191,6 +221,26 @@ jobs: "onboard-negative-paths-vitest artifact upload must set include-hidden-files: false", "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts", "onboard-negative-paths-vitest artifact upload retention-days must be 14", + "inference-routing-vitest job must run independently of generate-matrix", + "inference-routing-vitest job must run independently of workflow dispatch scenario filters", + "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", + "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing", + "inference-routing-vitest job env must not include NVIDIA_API_KEY", + "inference-routing-vitest job env must not include NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE", + "inference-routing-vitest checkout action must be pinned to a full commit SHA", + "inference-routing-vitest checkout step must set persist-credentials=false", + "inference-routing-vitest step 'Set up Node' env must not include OPENAI_API_KEY", + "inference-routing-vitest setup-node action must be pinned to a full commit SHA", + "inference-routing-vitest job missing step: Build CLI", + "inference-routing-vitest step 'Run inference routing live test' env must not include COMPATIBLE_API_KEY", + "step 'Run inference routing live test' run script must not interpolate dispatch inputs directly", + "step 'Run inference routing live test' run script must include test/e2e-scenario/live/inference-routing.test.ts", + "inference-routing-vitest upload-artifact action must be pinned to a full commit SHA", + "inference-routing-vitest artifact upload name must be stable", + "artifact upload path must include e2e-artifacts/vitest/inference-routing/", + "inference-routing-vitest artifact upload must set include-hidden-files: false", + "inference-routing-vitest artifact upload must ignore missing fixture artifacts", + "inference-routing-vitest artifact upload retention-days must be 14", ]), ); } finally { diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index 9b06e6d219..a2408eeffe 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -210,6 +210,111 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe } +function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecord): void { + const jobName = "inference-routing-vitest"; + const job = asRecord(jobs[jobName]); + if (Object.keys(job).length === 0) { + errors.push("workflow missing inference-routing-vitest job"); + return; + } + + if (job["runs-on"] !== "ubuntu-latest") { + errors.push("inference-routing-vitest job must run on ubuntu-latest"); + } + if (Object.hasOwn(job, "needs")) { + errors.push("inference-routing-vitest job must run independently of generate-matrix"); + } + if (Object.hasOwn(job, "if")) { + errors.push( + "inference-routing-vitest job must run independently of workflow dispatch scenario filters", + ); + } + + const providerEnvNames = [ + "NVIDIA_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "COMPATIBLE_API_KEY", + "NEMOCLAW_ENDPOINT_URL", + "NEMOCLAW_INFERENCE_ROUTING_PROVIDER_SMOKE", + ]; + const jobEnv = asRecord(job.env); + if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") { + errors.push("inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1"); + } + if ( + jobEnv.E2E_ARTIFACT_DIR !== "${{ github.workspace }}/e2e-artifacts/vitest/inference-routing" + ) { + errors.push( + "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing", + ); + } + for (const envName of providerEnvNames) { + requireEnvDoesNotExposeSecret(errors, "inference-routing-vitest job", jobEnv, envName); + } + + const steps = asSteps(job.steps); + requireNoDispatchInputInterpolation(errors, steps); + for (const step of steps) { + const stepEnv = asRecord(step.env); + for (const envName of providerEnvNames) { + requireEnvDoesNotExposeSecret( + errors, + `inference-routing-vitest step '${step.name ?? step.uses ?? ""}'`, + stepEnv, + envName, + ); + } + } + + const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@")); + if (!checkout) errors.push("inference-routing-vitest job missing checkout step"); + requireFullShaAction(errors, checkout, "inference-routing-vitest checkout"); + if (asRecord(checkout?.with)["persist-credentials"] !== false) { + errors.push("inference-routing-vitest checkout step must set persist-credentials=false"); + } + + const setupNode = namedStep(steps, "Set up Node"); + if (!setupNode) errors.push("inference-routing-vitest job missing step: Set up Node"); + requireFullShaAction(errors, setupNode, "inference-routing-vitest setup-node"); + + const installRootDependencies = requireJobStep( + errors, + jobName, + steps, + "Install root dependencies", + ); + requireRunContains(errors, installRootDependencies, "npm ci --ignore-scripts"); + + const buildCli = requireJobStep(errors, jobName, steps, "Build CLI"); + requireRunContains(errors, buildCli, "npm run build:cli"); + + const runVitest = requireJobStep(errors, jobName, steps, "Run inference routing live test"); + requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live"); + requireRunContains(errors, runVitest, "test/e2e-scenario/live/inference-routing.test.ts"); + + const upload = requireJobStep(errors, jobName, steps, "Upload inference routing artifacts"); + requireFullShaAction(errors, upload, "inference-routing-vitest upload-artifact"); + const uploadWith = asRecord(upload?.with); + if (uploadWith.name !== "e2e-vitest-scenarios-inference-routing") { + errors.push("inference-routing-vitest artifact upload name must be stable"); + } + const uploadPath = stringValue(uploadWith.path); + requireUploadPathContains(errors, uploadPath, "e2e-artifacts/vitest/inference-routing/"); + if (uploadPath.trim() === "e2e-artifacts/vitest/") { + errors.push("inference-routing-vitest artifact upload path must not list all Vitest artifacts"); + } + if (uploadWith["include-hidden-files"] !== false) { + errors.push("inference-routing-vitest artifact upload must set include-hidden-files: false"); + } + if (uploadWith["if-no-files-found"] !== "ignore") { + errors.push("inference-routing-vitest artifact upload must ignore missing fixture artifacts"); + } + if (uploadWith["retention-days"] !== 14) { + errors.push("inference-routing-vitest artifact upload retention-days must be 14"); + } +} + function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowRecord): void { const jobName = "onboard-negative-paths-vitest"; const job = asRecord(jobs[jobName]); @@ -482,6 +587,7 @@ export function validateE2eVitestScenariosWorkflowBoundary( validateOpenShellVersionPinVitestJob(errors, jobs); validateOnboardNegativePathsVitestJob(errors, jobs); + validateInferenceRoutingVitestJob(errors, jobs); return errors; } From f89ad51dd8295d4fac39f9de29956e9ea2116ede Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 10:46:48 -0400 Subject: [PATCH 08/11] test(e2e): align workflow selector guard --- .../e2e-scenarios-workflow.test.ts | 6 ++-- tools/e2e-scenarios/workflow-boundary.mts | 29 +++++++++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index 8d6e9cea6c..767fe2b8d4 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -184,7 +184,7 @@ jobs: "artifact upload retention-days must be 14", "upload-artifact action must be pinned to a full commit SHA", "openshell-version-pin-vitest job must run independently of generate-matrix", - "openshell-version-pin-vitest job must run independently of workflow dispatch scenario filters", + "openshell-version-pin-vitest job must use the trusted jobs selector", "openshell-version-pin-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "openshell-version-pin-vitest job must write artifacts under e2e-artifacts/vitest/openshell-version-pin", "openshell-version-pin-vitest job env must not include NVIDIA_API_KEY", @@ -203,7 +203,7 @@ jobs: "openshell-version-pin-vitest artifact upload must ignore missing fixture artifacts", "openshell-version-pin-vitest artifact upload retention-days must be 14", "onboard-negative-paths-vitest job must run independently of generate-matrix", - "onboard-negative-paths-vitest job must run independently of workflow dispatch scenario filters", + "onboard-negative-paths-vitest job must use the trusted jobs selector", "onboard-negative-paths-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "onboard-negative-paths-vitest job must write artifacts under e2e-artifacts/vitest/onboard-negative-paths", "onboard-negative-paths-vitest job env must not include NVIDIA_API_KEY", @@ -222,7 +222,7 @@ jobs: "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts", "onboard-negative-paths-vitest artifact upload retention-days must be 14", "inference-routing-vitest job must run independently of generate-matrix", - "inference-routing-vitest job must run independently of workflow dispatch scenario filters", + "inference-routing-vitest job must use the trusted jobs selector", "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing", "inference-routing-vitest job env must not include NVIDIA_API_KEY", diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index a2408eeffe..133a65581d 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -122,6 +122,17 @@ function requireNoDispatchInputInterpolation( } } +function requireFreeStandingJobSelector( + errors: string[], + jobName: string, + job: WorkflowRecord, +): void { + const expected = `${"${{"} inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',${jobName},') ${"}}"}`; + if (job.if !== expected) { + errors.push(`${jobName} job must use the trusted jobs selector`); + } +} + function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRecord): void { const jobName = "openshell-version-pin-vitest"; const job = asRecord(jobs[jobName]); @@ -136,11 +147,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe if (Object.hasOwn(job, "needs")) { errors.push("openshell-version-pin-vitest job must run independently of generate-matrix"); } - if (Object.hasOwn(job, "if")) { - errors.push( - "openshell-version-pin-vitest job must run independently of workflow dispatch scenario filters", - ); - } + requireFreeStandingJobSelector(errors, jobName, job); const jobEnv = asRecord(job.env); if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") { @@ -224,11 +231,7 @@ function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecor if (Object.hasOwn(job, "needs")) { errors.push("inference-routing-vitest job must run independently of generate-matrix"); } - if (Object.hasOwn(job, "if")) { - errors.push( - "inference-routing-vitest job must run independently of workflow dispatch scenario filters", - ); - } + requireFreeStandingJobSelector(errors, jobName, job); const providerEnvNames = [ "NVIDIA_API_KEY", @@ -329,11 +332,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR if (Object.hasOwn(job, "needs")) { errors.push("onboard-negative-paths-vitest job must run independently of generate-matrix"); } - if (Object.hasOwn(job, "if")) { - errors.push( - "onboard-negative-paths-vitest job must run independently of workflow dispatch scenario filters", - ); - } + requireFreeStandingJobSelector(errors, jobName, job); const jobEnv = asRecord(job.env); if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") { From c3b9a688cf7dc8bedf5dc50be026668c3026d03a Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 10:52:26 -0400 Subject: [PATCH 09/11] ci(e2e): gate recovery job by selector --- .github/workflows/e2e-vitest-scenarios.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 66b948e69d..2e63f463bf 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -398,6 +398,7 @@ jobs: # restore the /tmp guard chain after pod recreate). Will fail on `main` # until the #2701 fix lands; flips green afterwards. gateway-guard-recovery: + if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',gateway-guard-recovery,') }} runs-on: ubuntu-latest timeout-minutes: 45 env: @@ -495,6 +496,7 @@ jobs: openshell-version-pin-vitest, onboard-negative-paths-vitest, openclaw-tui-chat-correlation-vitest, + inference-routing-vitest, gateway-guard-recovery, ] if: ${{ always() && github.event_name == 'workflow_dispatch' }} From ef988df2d2b97a991e459ff7782053ac67f6fdfc Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 11:01:32 -0400 Subject: [PATCH 10/11] ci(e2e): validate selective job input --- .github/workflows/e2e-vitest-scenarios.yaml | 58 ++++++++++++++-- .../live/inference-routing.test.ts | 28 ++++++-- .../e2e-scenarios-workflow.test.ts | 8 ++- tools/e2e-scenarios/workflow-boundary.mts | 67 ++++++++++++++++--- 4 files changed, 135 insertions(+), 26 deletions(-) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 2e63f463bf..7b98c5686a 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -30,6 +30,48 @@ concurrency: cancel-in-progress: false jobs: + validate-jobs: + runs-on: ubuntu-latest + outputs: + openshell_version_pin_vitest: ${{ steps.validate.outputs.openshell_version_pin_vitest }} + onboard_negative_paths_vitest: ${{ steps.validate.outputs.onboard_negative_paths_vitest }} + inference_routing_vitest: ${{ steps.validate.outputs.inference_routing_vitest }} + openclaw_tui_chat_correlation_vitest: ${{ steps.validate.outputs.openclaw_tui_chat_correlation_vitest }} + gateway_guard_recovery: ${{ steps.validate.outputs.gateway_guard_recovery }} + steps: + - id: validate + name: Validate free-standing job selector + env: + JOBS: ${{ inputs.jobs }} + run: | + set -euo pipefail + declare -A allowed=( + [openshell-version-pin-vitest]=openshell_version_pin_vitest + [onboard-negative-paths-vitest]=onboard_negative_paths_vitest + [inference-routing-vitest]=inference_routing_vitest + [openclaw-tui-chat-correlation-vitest]=openclaw_tui_chat_correlation_vitest + [gateway-guard-recovery]=gateway_guard_recovery + ) + declare -A selected=() + if [ -n "${JOBS}" ]; then + IFS=',' read -ra requested <<< "${JOBS}" + for job in "${requested[@]}"; do + if [[ ! "${job}" =~ ^[A-Za-z0-9_-]+$ ]] || [[ -z "${allowed[$job]:-}" ]]; then + echo "::error::Invalid jobs input: ${job}" >&2 + exit 1 + fi + selected[$job]=1 + done + fi + for job in "${!allowed[@]}"; do + output="${allowed[$job]}" + if [ -z "${JOBS}" ] || [ -n "${selected[$job]:-}" ]; then + echo "${output}=true" >> "$GITHUB_OUTPUT" + else + echo "${output}=false" >> "$GITHUB_OUTPUT" + fi + done + generate-matrix: runs-on: ubuntu-latest outputs: @@ -185,7 +227,8 @@ jobs: # because the matrix above only runs registry-scenarios.test.ts. Modeled on # #5049's free-standing pattern. openshell-version-pin-vitest: - if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openshell-version-pin-vitest,') }} + needs: validate-jobs + if: ${{ needs.validate-jobs.outputs.openshell_version_pin_vitest == 'true' }} runs-on: ubuntu-latest timeout-minutes: 15 env: @@ -225,7 +268,8 @@ jobs: retention-days: 14 onboard-negative-paths-vitest: - if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',onboard-negative-paths-vitest,') }} + needs: validate-jobs + if: ${{ needs.validate-jobs.outputs.onboard_negative_paths_vitest == 'true' }} runs-on: ubuntu-latest timeout-minutes: 15 env: @@ -269,7 +313,8 @@ jobs: retention-days: 14 inference-routing-vitest: - if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }} + needs: validate-jobs + if: ${{ needs.validate-jobs.outputs.inference_routing_vitest == 'true' }} runs-on: ubuntu-latest timeout-minutes: 45 env: @@ -319,7 +364,8 @@ jobs: # protocol/history contract. The retained legacy bash lane remains the # source for full closeout until a later PR proves replacement and deletes it. openclaw-tui-chat-correlation-vitest: - if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',openclaw-tui-chat-correlation-vitest,') }} + needs: validate-jobs + if: ${{ needs.validate-jobs.outputs.openclaw_tui_chat_correlation_vitest == 'true' }} runs-on: ubuntu-latest timeout-minutes: 75 env: @@ -398,7 +444,8 @@ jobs: # restore the /tmp guard chain after pod recreate). Will fail on `main` # until the #2701 fix lands; flips green afterwards. gateway-guard-recovery: - if: ${{ inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',gateway-guard-recovery,') }} + needs: validate-jobs + if: ${{ needs.validate-jobs.outputs.gateway_guard_recovery == 'true' }} runs-on: ubuntu-latest timeout-minutes: 45 env: @@ -491,6 +538,7 @@ jobs: runs-on: ubuntu-latest needs: [ + validate-jobs, generate-matrix, live-scenarios, openshell-version-pin-vitest, diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts index a3fb4ef7a6..ff958867fa 100644 --- a/test/e2e-scenario/live/inference-routing.test.ts +++ b/test/e2e-scenario/live/inference-routing.test.ts @@ -264,12 +264,26 @@ interface CleanupSandboxOptions { readonly strict?: boolean; } -async function optionalCleanupStep(run: () => Promise): Promise { +function isExpectedPreOnboardCleanupMiss(text: string): boolean { + return /does not exist|run 'nemoclaw onboard'|no active gateway|not found|no such file|enoent/i.test( + text, + ); +} + +async function optionalCleanupStep( + label: string, + run: () => Promise<{ exitCode: number | null; stdout: string; stderr: string }>, +): Promise { try { - await run(); - } catch { - // Pre-onboard cleanup is best-effort because a fresh runner may not have - // OpenShell installed until `nemoclaw onboard` reaches that phase. + const result = await run(); + if (result.exitCode === 0) return; + const text = resultText(result); + if (isExpectedPreOnboardCleanupMiss(text)) return; + throw new Error(`${label} failed unexpectedly during pre-onboard cleanup: ${text}`); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + if (isExpectedPreOnboardCleanupMiss(message)) return; + throw error; } } @@ -288,14 +302,14 @@ async function cleanupSandbox( options: CleanupSandboxOptions = {}, ): Promise { if (!options.strict) { - await optionalCleanupStep(() => + await optionalCleanupStep("nemoclaw destroy", () => host.command(process.execPath, [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, env: buildAvailabilityProbeEnv(), timeoutMs: 120_000, }), ); - await optionalCleanupStep(() => + await optionalCleanupStep("openshell sandbox delete", () => sandbox.openshell(["sandbox", "delete", sandboxName], { artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, env: buildAvailabilityProbeEnv(), diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index 767fe2b8d4..1380fd3b63 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -150,6 +150,7 @@ jobs: expect(errors).toEqual( expect.arrayContaining([ "workflow_dispatch missing input: scenarios", + "workflow_dispatch missing input: jobs", "workflow_dispatch must not expose legacy test_filter input", "workflow missing generate-matrix job", "generate-matrix job must run on ubuntu-latest", @@ -183,7 +184,8 @@ jobs: "artifact upload path must include e2e-artifacts/vitest/${{ matrix.id }}/shell/", "artifact upload retention-days must be 14", "upload-artifact action must be pinned to a full commit SHA", - "openshell-version-pin-vitest job must run independently of generate-matrix", + "workflow missing validate-jobs job", + "openshell-version-pin-vitest job must depend on validate-jobs", "openshell-version-pin-vitest job must use the trusted jobs selector", "openshell-version-pin-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "openshell-version-pin-vitest job must write artifacts under e2e-artifacts/vitest/openshell-version-pin", @@ -202,7 +204,7 @@ jobs: "openshell-version-pin-vitest artifact upload must set include-hidden-files: false", "openshell-version-pin-vitest artifact upload must ignore missing fixture artifacts", "openshell-version-pin-vitest artifact upload retention-days must be 14", - "onboard-negative-paths-vitest job must run independently of generate-matrix", + "onboard-negative-paths-vitest job must depend on validate-jobs", "onboard-negative-paths-vitest job must use the trusted jobs selector", "onboard-negative-paths-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "onboard-negative-paths-vitest job must write artifacts under e2e-artifacts/vitest/onboard-negative-paths", @@ -221,7 +223,7 @@ jobs: "onboard-negative-paths-vitest artifact upload must set include-hidden-files: false", "onboard-negative-paths-vitest artifact upload must ignore missing fixture artifacts", "onboard-negative-paths-vitest artifact upload retention-days must be 14", - "inference-routing-vitest job must run independently of generate-matrix", + "inference-routing-vitest job must depend on validate-jobs", "inference-routing-vitest job must use the trusted jobs selector", "inference-routing-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", "inference-routing-vitest job must write artifacts under e2e-artifacts/vitest/inference-routing", diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index 133a65581d..ef925dc1c4 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -122,12 +122,22 @@ function requireNoDispatchInputInterpolation( } } +const SELECTABLE_FREE_STANDING_JOBS = [ + "openshell-version-pin-vitest", + "onboard-negative-paths-vitest", + "inference-routing-vitest", + "openclaw-tui-chat-correlation-vitest", + "gateway-guard-recovery", +] as const; + +type SelectableFreeStandingJob = (typeof SELECTABLE_FREE_STANDING_JOBS)[number]; + function requireFreeStandingJobSelector( errors: string[], - jobName: string, + jobName: SelectableFreeStandingJob, job: WorkflowRecord, ): void { - const expected = `${"${{"} inputs.jobs == '' || contains(format(',{0},', inputs.jobs), ',${jobName},') ${"}}"}`; + const expected = `${"${{"} needs.validate-jobs.outputs.${jobName.replaceAll("-", "_")} == 'true' ${"}}"}`; if (job.if !== expected) { errors.push(`${jobName} job must use the trusted jobs selector`); } @@ -144,9 +154,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe if (job["runs-on"] !== "ubuntu-latest") { errors.push("openshell-version-pin-vitest job must run on ubuntu-latest"); } - if (Object.hasOwn(job, "needs")) { - errors.push("openshell-version-pin-vitest job must run independently of generate-matrix"); - } + requireNeedsValidateJobs(errors, jobName, job); requireFreeStandingJobSelector(errors, jobName, job); const jobEnv = asRecord(job.env); @@ -228,9 +236,7 @@ function validateInferenceRoutingVitestJob(errors: string[], jobs: WorkflowRecor if (job["runs-on"] !== "ubuntu-latest") { errors.push("inference-routing-vitest job must run on ubuntu-latest"); } - if (Object.hasOwn(job, "needs")) { - errors.push("inference-routing-vitest job must run independently of generate-matrix"); - } + requireNeedsValidateJobs(errors, jobName, job); requireFreeStandingJobSelector(errors, jobName, job); const providerEnvNames = [ @@ -329,9 +335,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR if (job["runs-on"] !== "ubuntu-latest") { errors.push("onboard-negative-paths-vitest job must run on ubuntu-latest"); } - if (Object.hasOwn(job, "needs")) { - errors.push("onboard-negative-paths-vitest job must run independently of generate-matrix"); - } + requireNeedsValidateJobs(errors, jobName, job); requireFreeStandingJobSelector(errors, jobName, job); const jobEnv = asRecord(job.env); @@ -404,6 +408,45 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR } } +function validateJobsGuard(errors: string[], jobs: WorkflowRecord): void { + const jobName = "validate-jobs"; + const job = asRecord(jobs[jobName]); + if (Object.keys(job).length === 0) { + errors.push("workflow missing validate-jobs job"); + return; + } + + if (job["runs-on"] !== "ubuntu-latest") { + errors.push("validate-jobs job must run on ubuntu-latest"); + } + const outputs = asRecord(job.outputs); + for (const selectable of SELECTABLE_FREE_STANDING_JOBS) { + const key = selectable.replaceAll("-", "_"); + if (outputs[key] !== `${"${{"} steps.validate.outputs.${key} ${"}}"}`) { + errors.push(`validate-jobs must expose ${key} output`); + } + } + + const steps = asSteps(job.steps); + requireNoDispatchInputInterpolation(errors, steps); + const validate = requireJobStep(errors, jobName, steps, "Validate free-standing job selector"); + const validateEnv = asRecord(validate?.env); + if (validateEnv.JOBS !== "${{ inputs.jobs }}") { + errors.push("validate-jobs step must pass jobs through JOBS env"); + } + for (const selectable of SELECTABLE_FREE_STANDING_JOBS) { + requireRunContains(errors, validate, selectable); + requireRunContains(errors, validate, selectable.replaceAll("-", "_")); + } + requireRunContains(errors, validate, "Invalid jobs input"); +} + +function requireNeedsValidateJobs(errors: string[], jobName: string, job: WorkflowRecord): void { + if (job.needs !== "validate-jobs") { + errors.push(`${jobName} job must depend on validate-jobs`); + } +} + export function validateE2eVitestScenariosWorkflowBoundary( workflowPath = DEFAULT_VITEST_WORKFLOW_PATH, ): string[] { @@ -416,6 +459,7 @@ export function validateE2eVitestScenariosWorkflowBoundary( const dispatchInputs = asRecord(workflowDispatch.inputs); requireInput(errors, dispatchInputs, "scenarios"); + requireInput(errors, dispatchInputs, "jobs"); if (Object.hasOwn(dispatchInputs, "test_filter")) { errors.push("workflow_dispatch must not expose legacy test_filter input"); } @@ -424,6 +468,7 @@ export function validateE2eVitestScenariosWorkflowBoundary( if (permissions.contents !== "read") errors.push("workflow permissions.contents must be read"); const jobs = asRecord(workflow.jobs); + validateJobsGuard(errors, jobs); const generateMatrix = asRecord(jobs["generate-matrix"]); if (Object.keys(generateMatrix).length === 0) errors.push("workflow missing generate-matrix job"); if (generateMatrix["runs-on"] !== "ubuntu-latest") { From 3d32327471232bc3e254215bfb36393ba1957c3c Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Thu, 11 Jun 2026 12:34:50 -0400 Subject: [PATCH 11/11] ci(e2e): align inference-routing-vitest dispatch --- .github/workflows/e2e-vitest-scenarios.yaml | 51 ++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index 65e06c7eda..e51b444f2e 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -40,7 +40,7 @@ jobs: SCENARIOS: ${{ inputs.scenarios }} run: | set -euo pipefail - allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery" + allowed_jobs="openshell-version-pin-vitest,onboard-negative-paths-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,inference-routing-vitest" if [ -n "${JOBS}" ] && [ -n "${SCENARIOS}" ]; then echo "::error::Use either scenarios or jobs, not both." >&2 exit 1 @@ -298,6 +298,54 @@ jobs: if-no-files-found: ignore retention-days: 14 + inference-routing-vitest: + needs: validate-jobs + if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',inference-routing-vitest,') }} + runs-on: ubuntu-latest + timeout-minutes: 45 + env: + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/inference-routing + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Build CLI + run: npm run build:cli + + - name: Run inference routing live test + # Direct Vitest coverage for test/e2e/test-inference-routing.sh. The + # always-on PR-safe slices prove invalid-key and unreachable-endpoint + # classification/cleanup without spending live provider quota; real + # NVIDIA credential isolation and third-party provider smokes stay + # skipped unless their secrets are explicitly supplied by a future + # workflow. + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/inference-routing.test.ts \ + --silent=false --reporter=default + + - name: Upload inference routing artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-inference-routing + path: e2e-artifacts/vitest/inference-routing/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + # Focused coverage slice for the #2603/#3145 OpenClaw websocket # protocol/history contract. The retained legacy bash lane remains the # source for full closeout until a later PR proves replacement and deletes it. @@ -481,6 +529,7 @@ jobs: live-scenarios, openshell-version-pin-vitest, onboard-negative-paths-vitest, + inference-routing-vitest, openclaw-tui-chat-correlation-vitest, gateway-guard-recovery, ]