From 30d7e034085dc3868d09e9cdb2ce3d276127b5b7 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:13:47 -0700 Subject: [PATCH 01/14] test(e2e): migrate messaging compatible endpoint Signed-off-by: Carlos Villela --- .github/workflows/e2e-vitest-scenarios.yaml | 80 ++ .../messaging-compatible-endpoint.test.ts | 968 ++++++++++++++++++ .../live/network-policy-transient-provider.ts | 2 +- .../e2e-scenarios-workflow.test.ts | 49 +- .../network-policy-transient-provider.test.ts | 5 + tools/e2e-scenarios/free-standing-jobs.env | 6 +- tools/e2e-scenarios/workflow-boundary.mts | 152 +++ 7 files changed, 1257 insertions(+), 5 deletions(-) create mode 100644 test/e2e-scenario/live/messaging-compatible-endpoint.test.ts diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index f199050c83..b6e3d45a20 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -1441,6 +1441,85 @@ jobs: if-no-files-found: ignore retention-days: 14 + messaging-compatible-endpoint-vitest: + needs: generate-matrix + if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',messaging-compatible-endpoint-vitest,') || contains(format(',{0},', inputs.scenarios), ',messaging-compatible-endpoint,') }} + runs-on: ubuntu-latest + timeout-minutes: 45 + env: + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/messaging-compatible-endpoint + NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + NEMOCLAW_SANDBOX_NAME: "e2e-msg-compat" + OPENSHELL_GATEWAY: "nemoclaw" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - name: Authenticate to Docker Hub + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + shell: bash + run: | + set -euo pipefail + if [[ -z "${DOCKERHUB_USERNAME}" || -z "${DOCKERHUB_TOKEN}" ]]; then + echo "::notice::Docker Hub credentials not configured; continuing with anonymous pulls." + exit 0 + fi + login_succeeded=0 + for attempt in 1 2 3; do + if echo "${DOCKERHUB_TOKEN}" | timeout 30s docker login docker.io --username "${DOCKERHUB_USERNAME}" --password-stdin; then + login_succeeded=1 + break + fi + if [[ "$attempt" -lt 3 ]]; then + echo "::warning::Docker Hub login attempt ${attempt} failed; retrying." + sleep 5 + fi + done + if [[ "$login_succeeded" -ne 1 ]]; then + echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls." + fi + + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Build CLI + run: npm run build:cli + + - name: Run messaging compatible endpoint live test + # Migrated from test/e2e/test-messaging-compatible-endpoint.sh. + # Preserves the fake OpenAI-compatible endpoint, Telegram messaging + # config, inference.local, OpenClaw agent-turn, and proxy hop-header + # strip boundaries without relying on real messaging/provider secrets. + env: + NEMOCLAW_COMPAT_MOCK_API_KEY: "fake-compatible-key-e2e" + TELEGRAM_ALLOWED_IDS: "123456789" + TELEGRAM_BOT_TOKEN: "test-fake-telegram-token-e2e" + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/messaging-compatible-endpoint.test.ts \ + --silent=false --reporter=default + + - name: Upload messaging compatible endpoint artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-messaging-compatible-endpoint + path: e2e-artifacts/vitest/messaging-compatible-endpoint/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + launchable-smoke-vitest: needs: generate-matrix if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',launchable-smoke-vitest,') }} @@ -1886,6 +1965,7 @@ jobs: rebuild-openclaw-vitest, sandbox-rebuild-vitest, token-rotation-vitest, + messaging-compatible-endpoint-vitest, launchable-smoke-vitest, double-onboard-vitest, model-router-provider-routed-inference-vitest, diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts new file mode 100644 index 0000000000..9249849dfc --- /dev/null +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -0,0 +1,968 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Live Vitest migration for test/e2e/test-messaging-compatible-endpoint.sh. + * + * This stays intentionally direct: the legacy contract is the real + * Docker/OpenShell/nemoclaw boundary with a local OpenAI-compatible endpoint + * mock, Telegram messaging config, sandbox inference.local routing, and an + * OpenClaw agent turn through the compatible endpoint proxy path. + */ + +import { randomUUID } from "node:crypto"; +import fs from "node:fs"; +import http from "node:http"; +import type { AddressInfo } from "node:net"; +import path from "node:path"; + +import { describe, it } from "vitest"; + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; +import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; +import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); +const CLI_DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js"); +const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-msg-compat"; +const COMPAT_MODEL = process.env.NEMOCLAW_COMPAT_MODEL ?? "mock/deepseek-compatible"; +const COMPATIBLE_KEY = process.env.NEMOCLAW_COMPAT_MOCK_API_KEY ?? "fake-compatible-key-e2e"; +const TELEGRAM_TOKEN = process.env.TELEGRAM_BOT_TOKEN ?? "test-fake-telegram-token-e2e"; +const TELEGRAM_IDS = process.env.TELEGRAM_ALLOWED_IDS ?? "123456789"; +const MOCK_PORT = Number(process.env.NEMOCLAW_COMPAT_MOCK_PORT ?? "18089"); +const ONBOARD_TIMEOUT_MS = 25 * 60_000; +const TEST_TIMEOUT_MS = 45 * 60_000; +const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip; + +validateSandboxName(SANDBOX_NAME); + +const HOP_BY_HOP_HEADERS = new Set([ + "proxy-authorization", + "proxy-connection", + "proxy-authenticate", + "connection", + "keep-alive", + "te", + "trailer", + "transfer-encoding", + "upgrade", +]); +const RATE_LIMIT_VALIDATION_RE = + /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i; + +interface MockRequestLog { + method: string; + path: string; + auth: "ok" | "missing"; + model?: unknown; + stream?: unknown; + hopHeaders: string[]; +} + +interface CompatibleMock { + readonly requests: MockRequestLog[]; + readonly hopHeaderLogs: string[][]; + readonly localBaseUrl: string; + close(): Promise; +} + +type ProcessResult = { exitCode?: number | null; stdout: string; stderr: string }; + +function resultText(result: ProcessResult): string { + return [result.stdout, result.stderr].filter(Boolean).join("\n"); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...buildAvailabilityProbeEnv(), + ...extra, + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + }; +} + +function redactionValues(): string[] { + return [COMPATIBLE_KEY, TELEGRAM_TOKEN, process.env.GITHUB_TOKEN].filter( + (value): value is string => typeof value === "string" && value.length > 0, + ); +} + +function jsonResponse(res: http.ServerResponse, status: number, payload: unknown): void { + const body = JSON.stringify(payload); + res.writeHead(status, { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(body), + }); + res.end(body); +} + +function sseResponse(res: http.ServerResponse, body: string): void { + res.writeHead(200, { + "Content-Type": "text/event-stream", + "Content-Length": Buffer.byteLength(body), + }); + res.end(body); +} + +function readRequestBody(req: http.IncomingMessage): Promise { + return new Promise((resolve) => { + let body = ""; + req.setEncoding("utf8"); + req.on("data", (chunk: string) => { + body += chunk; + }); + req.on("end", () => resolve(body)); + }); +} + +function parseJsonBody(raw: string): Record { + try { + const parsed = JSON.parse(raw) as unknown; + return parsed && typeof parsed === "object" && !Array.isArray(parsed) + ? (parsed as Record) + : {}; + } catch { + return {}; + } +} + +async function startCompatibleMock( + port: number, + model: string, + apiKey: string, +): Promise { + const requests: MockRequestLog[] = []; + const hopHeaderLogs: string[][] = []; + const server = http.createServer(async (req, res) => { + const requestPath = new URL(req.url ?? "/", "http://compatible.mock").pathname; + const auth = req.headers.authorization === `Bearer ${apiKey}` ? "ok" : "missing"; + const hopHeaders = Object.keys(req.headers).filter((name) => + HOP_BY_HOP_HEADERS.has(name.toLowerCase()), + ); + + if (req.method === "GET" && ["/v1/models", "/models"].includes(requestPath)) { + requests.push({ method: "GET", path: requestPath, auth, hopHeaders: [] }); + jsonResponse(res, 200, { + object: "list", + data: [{ id: model, object: "model" }], + }); + return; + } + + if (req.method !== "POST") { + requests.push({ method: req.method ?? "GET", path: requestPath, auth, hopHeaders }); + jsonResponse(res, 404, { error: { message: "not found" } }); + return; + } + + const payload = parseJsonBody(await readRequestBody(req)); + + if (["/v1/responses", "/responses"].includes(requestPath)) { + requests.push({ + method: "POST", + path: requestPath, + auth, + model: payload.model, + stream: payload.stream, + hopHeaders, + }); + if (auth !== "ok") { + jsonResponse(res, 401, { error: { message: "missing bearer credential" } }); + return; + } + if (payload.stream) { + sseResponse( + res, + [ + "event: response.output_text.delta", + 'data: {"delta":"OK"}', + "", + "event: response.completed", + "data: {}", + "", + ].join("\n"), + ); + return; + } + jsonResponse(res, 200, { + id: "resp-mock", + object: "response", + output: [ + { + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "PONG from compatible endpoint mock" }], + }, + ], + }); + return; + } + + if (["/v1/chat/completions", "/chat/completions"].includes(requestPath)) { + requests.push({ + method: "POST", + path: requestPath, + auth, + model: payload.model, + stream: payload.stream, + hopHeaders, + }); + hopHeaderLogs.push(hopHeaders); + if (auth !== "ok") { + jsonResponse(res, 401, { error: { message: "missing bearer credential" } }); + return; + } + if (payload.stream) { + const chunk = JSON.stringify({ + id: "chatcmpl-mock", + object: "chat.completion.chunk", + choices: [ + { + index: 0, + delta: { role: "assistant", content: "PONG from compatible endpoint mock" }, + finish_reason: null, + }, + ], + }); + const done = JSON.stringify({ + id: "chatcmpl-mock", + object: "chat.completion.chunk", + choices: [{ index: 0, delta: {}, finish_reason: "stop" }], + }); + sseResponse(res, `data: ${chunk}\n\ndata: ${done}\n\ndata: [DONE]\n\n`); + return; + } + jsonResponse(res, 200, { + id: "chatcmpl-mock", + object: "chat.completion", + choices: [ + { + index: 0, + message: { role: "assistant", content: "PONG from compatible endpoint mock" }, + finish_reason: "stop", + }, + ], + }); + return; + } + + requests.push({ + method: "POST", + path: requestPath, + auth, + model: payload.model, + stream: payload.stream, + hopHeaders, + }); + jsonResponse(res, 404, { error: { message: "not found" } }); + }); + + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(port, "0.0.0.0", () => { + server.off("error", reject); + resolve(); + }); + }); + + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("compatible endpoint mock did not bind to a TCP port"); + } + const boundPort = (address as AddressInfo).port; + const mock = { + requests, + hopHeaderLogs, + localBaseUrl: `http://127.0.0.1:${boundPort}/v1`, + close: () => + new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }), + }; + + for (let attempt = 1; attempt <= 30; attempt += 1) { + try { + const response = await fetch(`${mock.localBaseUrl}/models`); + if (response.ok) return mock; + } catch { + // Keep polling until the server accepts connections. + } + await sleep(1_000); + } + + await mock.close(); + throw new Error("compatible endpoint mock failed to answer /v1/models"); +} + +async function hostAddressForSandbox(host: HostCliClient): Promise { + const probe = await host.command( + "bash", + [ + "-lc", + [ + 'ip_addr="$(ip route get 1.1.1.1 2>/dev/null | awk \'{for (i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}\')"', + 'if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi', + "ip_addr=\"$(hostname -I 2>/dev/null | awk '{print $1}')\"", + 'if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi', + 'if [ "$(uname -s 2>/dev/null)" = "Darwin" ]; then', + " for iface in en0 en1 bridge100; do", + ' ip_addr="$(ipconfig getifaddr "$iface" 2>/dev/null || true)"', + ' if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi', + " done", + " ip_addr=\"$(ifconfig 2>/dev/null | awk '/inet / && $2 !~ /^127\\./ {print $2; exit}')\"", + ' if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi', + "fi", + "echo 127.0.0.1", + ].join("\n"), + ], + { + artifactName: "host-ip-for-compatible-endpoint", + env: commandEnv(), + timeoutMs: 30_000, + }, + ); + return probe.stdout.trim().split(/\s+/)[0] || "127.0.0.1"; +} + +async function sourceCliAvailable(host: HostCliClient): Promise { + if (!fs.existsSync(CLI_DIST_ENTRYPOINT)) return false; + const result = await host.command( + "bash", + ["-lc", "command -v node >/dev/null 2>&1 && command -v openshell >/dev/null 2>&1"], + { + artifactName: "source-cli-availability", + env: commandEnv(), + timeoutMs: 30_000, + }, + ); + return result.exitCode === 0; +} + +async function bestEffort(run: () => Promise): Promise { + try { + await run(); + } catch { + // Best-effort cleanup mirrors the legacy shell teardown. + } +} + +async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise { + await bestEffort(() => + host.command( + "bash", + [ + "-lc", + [ + "set +e", + "openshell forward stop 18789 >/dev/null 2>&1", + "openshell gateway stop -g nemoclaw >/dev/null 2>&1", + 'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"', + 'if [ -f "$pid_file" ]; then', + ' pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"', + ' if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then', + ' kill "$pid" 2>/dev/null || true', + " for _ in $(seq 1 10); do", + ' kill -0 "$pid" 2>/dev/null || break', + " sleep 1", + " done", + ' kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true', + " fi", + "fi", + 'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"', + 'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi', + "openshell gateway remove nemoclaw >/dev/null 2>&1", + "openshell gateway destroy -g nemoclaw >/dev/null 2>&1", + "exit 0", + ].join("\n"), + ], + { + artifactName, + env: commandEnv(), + timeoutMs: 90_000, + }, + ), + ); +} + +async function cleanupMessagingState(host: HostCliClient, sandboxName: string): Promise { + // Endpoint-validation skips can happen before the sandbox exists. Keep + // teardown non-throwing so "Sandbox ... does not exist" stays a normal + // pre-contract cleanup outcome instead of masking the original evidence. + await bestEffort(() => + host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { + artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, + env: commandEnv(), + timeoutMs: 120_000, + }), + ); + await bestEffort(() => + host.command("openshell", ["sandbox", "delete", sandboxName], { + artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, + env: commandEnv(), + timeoutMs: 60_000, + }), + ); + await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw"); +} + +function hasLegacyCompatibleEndpointEvidence( + result: Pick, + requests: readonly MockRequestLog[], +): boolean { + return ( + resultText(result).includes("Compatible endpoint responds through inference.local") || + requests.some((request) => request.path === "/v1/chat/completions" && request.auth === "ok") + ); +} + +function shouldSkipPreContractProviderRateLimit( + result: Pick, + requests: readonly MockRequestLog[] = [], +): boolean { + return ( + isTransientProviderValidationFailure(result) && + RATE_LIMIT_VALIDATION_RE.test(resultText(result)) && + !hasLegacyCompatibleEndpointEvidence(result, requests) + ); +} + +function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv { + return commandEnv({ + COMPATIBLE_API_KEY: COMPATIBLE_KEY, + DISCORD_BOT_TOKEN: undefined, + NEMOCLAW_ENDPOINT_URL: endpointUrl, + NEMOCLAW_MODEL: COMPAT_MODEL, + NEMOCLAW_POLICY_MODE: "custom", + NEMOCLAW_POLICY_PRESETS: "telegram", + NEMOCLAW_PREFERRED_API: "openai-completions", + NEMOCLAW_PROVIDER: "custom", + NEMOCLAW_RECREATE_SANDBOX: "1", + NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME, + NEMOCLAW_SKIP_TELEGRAM_REACHABILITY: "1", + SLACK_APP_TOKEN: undefined, + SLACK_BOT_TOKEN: undefined, + TELEGRAM_ALLOWED_IDS: TELEGRAM_IDS, + TELEGRAM_BOT_TOKEN: TELEGRAM_TOKEN, + }); +} + +async function runCompatibleOnboard( + host: HostCliClient, + endpointUrl: string, +): Promise<{ result: ShellProbeResult; runner: string }> { + const env = onboardEnv(endpointUrl); + const useSourceCli = await sourceCliAvailable(host); + const runOnce = async ( + attempt: number, + ): Promise<{ result: ShellProbeResult; runner: string }> => { + if (useSourceCli) { + await cleanupMessagingState(host, SANDBOX_NAME); + const result = await host.command( + "node", + [ + CLI_ENTRYPOINT, + "onboard", + "--fresh", + "--non-interactive", + "--yes", + "--yes-i-accept-third-party-software", + ], + { + artifactName: + attempt === 1 + ? "onboard-compatible-endpoint-source-cli" + : `onboard-compatible-endpoint-source-cli-retry-${attempt}`, + env, + redactionValues: redactionValues(), + timeoutMs: ONBOARD_TIMEOUT_MS, + }, + ); + return { result, runner: attempt === 1 ? "source CLI onboard" : "source CLI onboard retry" }; + } + + const result = await host.command( + "bash", + ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software", "--fresh"], + { + artifactName: + attempt === 1 + ? "onboard-compatible-endpoint-install-sh" + : `onboard-compatible-endpoint-install-sh-retry-${attempt}`, + cwd: REPO_ROOT, + env, + redactionValues: redactionValues(), + timeoutMs: ONBOARD_TIMEOUT_MS, + }, + ); + return { result, runner: attempt === 1 ? "install.sh" : "install.sh retry" }; + }; + + const first = await runOnce(1); + if ( + first.result.exitCode === 0 || + !/Connection refused|transport error|tcp connect error|client error \(Connect\)/i.test( + resultText(first.result), + ) + ) { + return first; + } + + await stopGatewayRuntime(host, "onboard-compatible-endpoint-retry-gateway-cleanup"); + await sleep(5_000); + return runOnce(2); +} + +function openAiContent(raw: string): string { + const parsed = JSON.parse(raw) as { + choices?: Array<{ message?: { content?: unknown }; text?: unknown }>; + }; + return (parsed.choices ?? []) + .map((choice) => { + if (typeof choice.message?.content === "string") return choice.message.content; + if (typeof choice.text === "string") return choice.text; + return ""; + }) + .join("\n"); +} + +async function assertOpenClawConfigShape(sandbox: SandboxClient): Promise { + const script = String.raw` +const fs = require("node:fs"); +const model = process.argv[1]; +const cfg = JSON.parse(fs.readFileSync("/sandbox/.openclaw/openclaw.json", "utf8")); +const providers = cfg.models?.providers ?? {}; +const errors = []; +if (Object.hasOwn(providers, "deepinfra")) errors.push("direct deepinfra provider is present"); +const providerKeys = Object.keys(providers).sort(); +if (JSON.stringify(providerKeys) !== JSON.stringify(["inference"])) { + errors.push("provider keys are " + JSON.stringify(providerKeys)); +} +const inference = providers.inference; +if (!inference || typeof inference !== "object") { + errors.push("models.providers.inference is missing"); +} else { + if (inference.baseUrl !== "https://inference.local/v1") { + errors.push("inference baseUrl is " + JSON.stringify(inference.baseUrl)); + } + if (inference.apiKey !== "unused") { + errors.push("inference apiKey is not the non-secret placeholder"); + } +} +const primary = cfg.agents?.defaults?.model?.primary; +if (primary !== "inference/" + model) errors.push("primary model is " + JSON.stringify(primary)); +if (!cfg.channels?.telegram) errors.push("telegram channel config missing"); +console.log(JSON.stringify({ + provider_keys: providerKeys, + inference_base: inference?.baseUrl, + inference_api_key: inference?.apiKey, + primary, + telegram_present: Boolean(cfg.channels?.telegram), + errors, +})); +process.exit(errors.length ? 1 : 0); +`; + const result = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script, COMPAT_MODEL], { + artifactName: "openclaw-config-compatible-endpoint", + env: commandEnv(), + timeoutMs: 60_000, + }); + expect(result.exitCode, resultText(result)).toBe(0); +} + +async function assertGatewayReady(sandbox: SandboxClient): Promise { + const script = String.raw` +const net = require("node:net"); +let done = false; +const sock = net.connect(18789, "127.0.0.1"); +function finish(line, code) { + if (done) return; + done = true; + console.log(line); + sock.destroy(); + process.exit(code); +} +sock.on("connect", () => finish("OPEN", 0)); +sock.on("error", (err) => finish("ERROR " + err.message, 1)); +sock.setTimeout(1000, () => finish("TIMEOUT", 1)); +`; + let last: ShellProbeResult | undefined; + for (let attempt = 1; attempt <= 30; attempt += 1) { + last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script], { + artifactName: `gateway-ready-compatible-endpoint-${attempt}`, + env: commandEnv(), + timeoutMs: 5_000, + }); + if (last.exitCode === 0 && last.stdout.includes("OPEN")) return; + await sleep(1_000); + } + throw new Error( + `gateway did not open port 18789: ${last ? resultText(last).slice(0, 300) : "no probe"}`, + ); +} + +async function assertSandboxInference(sandbox: SandboxClient): Promise { + const payload = JSON.stringify({ + model: COMPAT_MODEL, + messages: [{ role: "user", content: "Reply with exactly: PONG" }], + max_tokens: 32, + }); + const response = await sandbox.exec( + SANDBOX_NAME, + [ + "curl", + "-sS", + "--max-time", + "60", + "https://inference.local/v1/chat/completions", + "-H", + "Content-Type: application/json", + "--data-raw", + payload, + ], + { + artifactName: "sandbox-inference-local-compatible-chat", + env: commandEnv(), + timeoutMs: 90_000, + }, + ); + expect(response.exitCode, resultText(response)).toBe(0); + expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toMatch(/PONG/i); +} + +function findJsonObjectEnd(raw: string, start: number): number | null { + let depth = 0; + let inString = false; + let escaped = false; + for (let index = start; index < raw.length; index += 1) { + const char = raw[index]; + if (inString) { + if (escaped) { + escaped = false; + } else if (char === "\\") { + escaped = true; + } else if (char === '"') { + inString = false; + } + continue; + } + if (char === '"') { + inString = true; + } else if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) return index + 1; + } + } + return null; +} + +function parseOpenClawAgentText(raw: string): string { + if (!raw.trim()) return ""; + const parts: string[] = []; + const visited = new Set(); + const textKeys = new Set(["text", "content", "reasoning_content"]); + const containerKeys = new Set([ + "result", + "payloads", + "payload", + "messages", + "choices", + "response", + "data", + "output", + "outputs", + "items", + "segments", + "delta", + ]); + + const add = (value: unknown) => { + if (typeof value === "string" && value.trim()) parts.push(value.trim()); + }; + const collect = (value: unknown) => { + if (visited.has(value)) return; + visited.add(value); + if (typeof value === "string") { + add(value); + return; + } + if (Array.isArray(value)) { + value.forEach(collect); + return; + } + if (!value || typeof value !== "object") return; + const record = value as Record; + for (const key of textKeys) add(record[key]); + const choices = record.choices; + if (Array.isArray(choices)) { + for (const choice of choices) { + if (!choice || typeof choice !== "object") continue; + collect((choice as Record).message); + collect((choice as Record).delta); + add((choice as Record).text); + } + } + for (const key of containerKeys) { + if (key in record) collect(record[key]); + } + }; + const collectDoc = (doc: unknown) => { + if (doc && typeof doc === "object" && (doc as Record).result) { + collect((doc as Record).result); + } else { + collect(doc); + } + }; + + try { + collectDoc(JSON.parse(raw)); + } catch { + for (const match of raw.matchAll(/{/g)) { + try { + const before = parts.length; + const start = match.index; + const end = findJsonObjectEnd(raw, start); + if (end === null) continue; + collectDoc(JSON.parse(raw.slice(start, end))); + if (parts.length > before) break; + } catch { + // Continue scanning for a later JSON object, matching the legacy parser. + } + } + } + return parts.join("\n"); +} + +async function assertOpenClawAgentTurn( + sandbox: SandboxClient, + compatibleMock: CompatibleMock, +): Promise { + const hopCountBefore = compatibleMock.hopHeaderLogs.length; + const sessionId = `e2e-compat-agent-${Date.now()}-${randomUUID()}`; + const agent = await sandbox.exec( + SANDBOX_NAME, + [ + "openclaw", + "agent", + "--agent", + "main", + "--json", + "--session-id", + sessionId, + "-m", + "Reply with only: PONG", + ], + { + artifactName: "openclaw-agent-compatible-endpoint", + env: commandEnv(), + timeoutMs: 120_000, + }, + ); + const text = resultText(agent); + expect( + /SsrFBlockedError|Blocked hostname|transport error|ECONNREFUSED|EAI_AGAIN|gateway unavailable|network connection error/i.test( + text, + ), + text.slice(0, 500), + ).toBe(false); + expect(agent.exitCode, text.slice(0, 500)).toBe(0); + expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toMatch(/PONG/i); + + const newHopHeaderLogs = compatibleMock.hopHeaderLogs.slice(hopCountBefore); + expect( + newHopHeaderLogs.length, + "Mock logged no proxy_hop_headers line for the agent turn; agent did not reach /v1/chat/completions", + ).toBeGreaterThan(0); + const leaked = newHopHeaderLogs.flat().filter((name) => name.length > 0); + expect(leaked, `Proxy hop headers leaked to upstream: ${leaked.join(",")}`).toEqual([]); +} + +describe("messaging-compatible-endpoint live test local classifiers", () => { + function output(text: string): Pick { + return { stdout: "", stderr: text }; + } + + it("skips only rate-limited endpoint validation before legacy evidence exists", () => { + expect( + shouldSkipPreContractProviderRateLimit( + output( + "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", + ), + ), + ).toBe(true); + expect( + shouldSkipPreContractProviderRateLimit( + output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"), + ), + ).toBe(true); + expect( + shouldSkipPreContractProviderRateLimit( + output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"), + ), + ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", + ), + ), + ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), [ + { + auth: "ok", + hopHeaders: [], + method: "POST", + path: "/v1/chat/completions", + }, + ]), + ).toBe(false); + }); +}); + +liveTest( + "messaging compatible endpoint routes Telegram-enabled OpenClaw through inference.local", + { timeout: TEST_TIMEOUT_MS }, + async ({ artifacts, cleanup, host, sandbox, skip }) => { + const docker = await host.command("docker", ["info"], { + artifactName: "prereq-docker-info-messaging-compatible-endpoint", + env: commandEnv(), + timeoutMs: 30_000, + }); + if (docker.exitCode !== 0) { + if (process.env.GITHUB_ACTIONS === "true") { + throw new Error( + `Docker is required for messaging compatible endpoint E2E: ${resultText(docker)}`, + ); + } + skip("Docker is required for messaging compatible endpoint E2E"); + } + + await artifacts.writeJson("scenario.json", { + id: "messaging-compatible-endpoint", + runner: "vitest", + boundary: "direct-cli-onboard-openshell-compatible-endpoint", + legacySource: "test/e2e/test-messaging-compatible-endpoint.sh", + refs: ["#2766", "#2572", "#5098"], + contract: [ + "local OpenAI-compatible mock endpoint starts and is reachable", + "custom provider + Telegram onboard completes", + "onboard runs the compatible endpoint sandbox smoke check", + "gateway registers compatible-endpoint provider", + "openclaw.json uses managed inference.local provider and Telegram config", + "gateway stays up after Telegram provider initialization", + "sandbox inference.local chat completion reaches the mock with auth", + "OpenClaw agent turn completes through the compatible endpoint", + "http-proxy-fix.js strips RFC 7230 hop-by-hop proxy headers", + ], + }); + + cleanup.add(`destroy messaging compatible endpoint state ${SANDBOX_NAME}`, () => + cleanupMessagingState(host, SANDBOX_NAME), + ); + await cleanupMessagingState(host, SANDBOX_NAME); + + const compatibleMock = await startCompatibleMock(MOCK_PORT, COMPAT_MODEL, COMPATIBLE_KEY); + cleanup.add("stop compatible endpoint mock", async () => { + await artifacts.writeJson("compatible-endpoint-mock-requests.json", compatibleMock.requests); + await compatibleMock.close(); + }); + + const hostAddress = await hostAddressForSandbox(host); + const endpointUrl = `http://${hostAddress}:${new URL(compatibleMock.localBaseUrl).port}/v1`; + const hostReachability = await host.command("curl", ["-sf", `${endpointUrl}/models`], { + artifactName: "compatible-endpoint-host-reachability", + env: commandEnv(), + redactionValues: redactionValues(), + timeoutMs: 30_000, + }); + expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0); + + const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl); + if ( + onboard.exitCode !== 0 && + shouldSkipPreContractProviderRateLimit(onboard, compatibleMock.requests) + ) { + await artifacts.writeJson("scenario-result.json", { + id: "messaging-compatible-endpoint", + status: "skipped", + reason: "external-provider-rate-limit-before-legacy-contract", + runner, + onboardExitCode: onboard.exitCode, + onboardTimedOut: onboard.timedOut, + onboardArtifacts: onboard.artifacts, + mockRequestsBeforeSkip: compatibleMock.requests.length, + sourceBoundary: "external provider endpoint validation outside the repo", + removalCondition: + "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", + }); + skip( + "External endpoint validation was rate-limited before the messaging-compatible endpoint contract could run", + ); + } + expect(onboard.exitCode, resultText(onboard)).toBe(0); + expect(resultText(onboard)).toContain("Compatible endpoint responds through inference.local"); + + const provider = await host.command("openshell", ["provider", "get", "compatible-endpoint"], { + artifactName: "openshell-provider-get-compatible-endpoint", + env: commandEnv(), + timeoutMs: 30_000, + }); + expect(provider.exitCode, resultText(provider)).toBe(0); + + await assertOpenClawConfigShape(sandbox); + await assertGatewayReady(sandbox); + await assertSandboxInference(sandbox); + await assertOpenClawAgentTurn(sandbox, compatibleMock); + + expect( + compatibleMock.requests.some( + (request) => request.path === "/v1/chat/completions" && request.auth === "ok", + ), + "compatible mock did not record authenticated /v1/chat/completions traffic", + ).toBe(true); + + const telegramRoundTripSecretsAvailable = Boolean( + process.env.TELEGRAM_BOT_TOKEN_REAL && + process.env.TELEGRAM_CHAT_ID_E2E && + process.env.COMPATIBLE_API_KEY && + process.env.NEMOCLAW_ENDPOINT_URL && + process.env.NEMOCLAW_COMPAT_MODEL, + ); + await artifacts.writeJson("telegram-live-round-trip.json", { + status: "skipped", + reason: telegramRoundTripSecretsAvailable + ? "Live Telegram reply requires an inbound user-message driver; hermetic route passed" + : "Live Telegram-compatible round trip secrets not fully set", + }); + + await artifacts.writeJson("scenario-result.json", { + id: "messaging-compatible-endpoint", + runner, + endpointUrl, + assertions: { + dockerRunning: docker.exitCode === 0, + mockReachable: hostReachability.exitCode === 0, + onboardCompleted: onboard.exitCode === 0, + providerRegistered: provider.exitCode === 0, + authenticatedChatTraffic: compatibleMock.requests.some( + (request) => request.path === "/v1/chat/completions" && request.auth === "ok", + ), + proxyHopHeadersStripped: compatibleMock.hopHeaderLogs.every( + (headers) => headers.length === 0, + ), + }, + }); + }, +); diff --git a/test/e2e-scenario/live/network-policy-transient-provider.ts b/test/e2e-scenario/live/network-policy-transient-provider.ts index 04f091cbd4..224b1a73c8 100644 --- a/test/e2e-scenario/live/network-policy-transient-provider.ts +++ b/test/e2e-scenario/live/network-policy-transient-provider.ts @@ -6,7 +6,7 @@ import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; const TRANSIENT_PROVIDER_VALIDATION_RE = /endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i; const TRANSIENT_PROVIDER_DETAIL_RE = - /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|temporar/i; + /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|too many requests|rate[- ]?limit|quota|temporar/i; function resultText(result: Pick): string { return [result.stdout, result.stderr].filter(Boolean).join("\n"); diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index 354712ecde..fb62c0c080 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -165,6 +165,26 @@ describe("e2e-vitest-scenarios workflow boundary", () => { selectedFreeStandingJobs: ["runtime-overrides-vitest"], registryScenarios: [], }); + expect( + evaluateE2eVitestWorkflowDispatchSelectors({ + scenarios: "messaging-compatible-endpoint", + }), + ).toMatchObject({ + valid: true, + liveScenariosRuns: false, + selectedFreeStandingJobs: ["messaging-compatible-endpoint-vitest"], + registryScenarios: [], + }); + expect( + evaluateE2eVitestWorkflowDispatchSelectors({ + jobs: "messaging-compatible-endpoint-vitest", + }), + ).toMatchObject({ + valid: true, + liveScenariosRuns: false, + selectedFreeStandingJobs: ["messaging-compatible-endpoint-vitest"], + registryScenarios: [], + }); expect( evaluateE2eVitestWorkflowDispatchSelectors({ scenarios: "inference-routing" }), ).toMatchObject({ @@ -376,7 +396,7 @@ describe("e2e-vitest-scenarios workflow boundary", () => { registryScenarios: [], }); } - }); + }, 15_000); it("flags direct dispatch-input interpolation and unsafe artifact upload", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-")); @@ -660,6 +680,33 @@ jobs: } }); + it("requires messaging-compatible-endpoint workflow and report coverage", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-")); + const workflowPath = path.join(tmp, "workflow.yaml"); + const workflow = fs.readFileSync( + path.join(process.cwd(), ".github/workflows/e2e-vitest-scenarios.yaml"), + "utf8", + ); + fs.writeFileSync( + workflowPath, + workflow + .replace(/messaging-compatible-endpoint-vitest/g, "msg-compatible-missing") + .replace(/messaging-compatible-endpoint/g, "msg-compatible-missing"), + ); + + try { + const errors = validateE2eVitestScenariosWorkflowBoundary(workflowPath); + expect(errors).toEqual( + expect.arrayContaining([ + "workflow missing messaging-compatible-endpoint-vitest job", + "report-to-pr job must wait for messaging-compatible-endpoint-vitest", + ]), + ); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + it("rejects Docker Hub auth and inline secrets in runtime-overrides run steps", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-")); const workflowPath = path.join(tmp, "workflow.yaml"); diff --git a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts index 5b5138224a..551ea0a85e 100644 --- a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts +++ b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts @@ -20,6 +20,11 @@ describe("network-policy transient provider validation classifier", () => { probeOutput("endpoint validation failed: returned HTTP 503 from provider"), ), ).toBe(true); + expect( + isTransientProviderValidationFailure( + probeOutput("endpoint validation failed: provider rate limit exceeded"), + ), + ).toBe(true); expect( isTransientProviderValidationFailure( diff --git a/tools/e2e-scenarios/free-standing-jobs.env b/tools/e2e-scenarios/free-standing-jobs.env index 68ce23e36d..844630e3a5 100644 --- a/tools/e2e-scenarios/free-standing-jobs.env +++ b/tools/e2e-scenarios/free-standing-jobs.env @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest -free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival -free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest +allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,messaging-compatible-endpoint-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest +free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,messaging-compatible-endpoint,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival +free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,messaging-compatible-endpoint:messaging-compatible-endpoint-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index 9e55fc3ff3..0b799260e8 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -1168,6 +1168,157 @@ function validateTokenRotationVitestJob(errors: string[], jobs: WorkflowRecord): } } +function validateMessagingCompatibleEndpointVitestJob( + errors: string[], + jobs: WorkflowRecord, +): void { + const jobName = "messaging-compatible-endpoint-vitest"; + const job = asRecord(jobs[jobName]); + if (Object.keys(job).length === 0) { + errors.push("workflow missing messaging-compatible-endpoint-vitest job"); + return; + } + + if (job["runs-on"] !== "ubuntu-latest") { + errors.push("messaging-compatible-endpoint-vitest job must run on ubuntu-latest"); + } + validateFreeStandingJobSelector(errors, jobs, jobName, "messaging-compatible-endpoint"); + if (job["timeout-minutes"] !== 45) { + errors.push("messaging-compatible-endpoint-vitest job must keep the legacy 45 minute timeout"); + } + + const jobEnv = asRecord(job.env); + if ( + jobEnv.E2E_ARTIFACT_DIR !== + "${{ github.workspace }}/e2e-artifacts/vitest/messaging-compatible-endpoint" + ) { + errors.push( + "messaging-compatible-endpoint-vitest job must write artifacts under e2e-artifacts/vitest/messaging-compatible-endpoint", + ); + } + if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) { + errors.push("messaging-compatible-endpoint-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI"); + } + if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") { + errors.push("messaging-compatible-endpoint-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1"); + } + if (jobEnv.NEMOCLAW_SANDBOX_NAME !== "e2e-msg-compat") { + errors.push("messaging-compatible-endpoint-vitest job must pin the legacy sandbox name"); + } + if (jobEnv.OPENSHELL_GATEWAY !== "nemoclaw") { + errors.push("messaging-compatible-endpoint-vitest job must force OPENSHELL_GATEWAY=nemoclaw"); + } + requireEnvDoesNotExposeSecret( + errors, + "messaging-compatible-endpoint-vitest job", + jobEnv, + "NVIDIA_API_KEY", + ); + + const steps = asSteps(job.steps); + requireNoDispatchInputInterpolation(errors, steps); + for (const step of steps) { + if (step.name !== "Run messaging compatible endpoint live test") { + requireEnvDoesNotExposeSecret( + errors, + `messaging-compatible-endpoint-vitest step '${step.name ?? step.uses ?? ""}'`, + asRecord(step.env), + "NVIDIA_API_KEY", + ); + } + } + + const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@")); + if (!checkout) errors.push("messaging-compatible-endpoint-vitest job missing checkout step"); + requireFullShaAction(errors, checkout, "messaging-compatible-endpoint-vitest checkout"); + if (asRecord(checkout?.with)["persist-credentials"] !== false) { + errors.push("messaging-compatible-endpoint-vitest checkout step must set persist-credentials=false"); + } + + const dockerHubAuth = requireJobStep(errors, jobName, steps, "Authenticate to Docker Hub"); + const dockerHubEnv = asRecord(dockerHubAuth?.env); + if (dockerHubEnv.DOCKERHUB_USERNAME !== "${{ secrets.DOCKERHUB_USERNAME }}") { + errors.push( + "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_USERNAME from secrets", + ); + } + if (dockerHubEnv.DOCKERHUB_TOKEN !== "${{ secrets.DOCKERHUB_TOKEN }}") { + errors.push( + "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_TOKEN from secrets", + ); + } + requireRunContains(errors, dockerHubAuth, "docker login docker.io"); + + const setupNode = namedStep(steps, "Set up Node"); + if (!setupNode) errors.push("messaging-compatible-endpoint-vitest job missing step: Set up Node"); + requireFullShaAction(errors, setupNode, "messaging-compatible-endpoint-vitest setup-node"); + + const installRootDependencies = requireJobStep( + errors, + jobName, + steps, + "Install root dependencies", + ); + requireRunContains(errors, installRootDependencies, "npm ci --ignore-scripts"); + + const buildCli = requireJobStep(errors, jobName, steps, "Build CLI"); + requireRunContains(errors, buildCli, "npm run build:cli"); + + const runVitest = requireJobStep( + errors, + jobName, + steps, + "Run messaging compatible endpoint live test", + ); + const runVitestEnv = asRecord(runVitest?.env); + requireEnvDoesNotExposeSecret( + errors, + "messaging-compatible-endpoint-vitest step", + runVitestEnv, + "NVIDIA_API_KEY", + ); + if (runVitestEnv.NEMOCLAW_COMPAT_MOCK_API_KEY !== "fake-compatible-key-e2e") { + errors.push("messaging-compatible-endpoint-vitest step must set a fake compatible endpoint key"); + } + if (runVitestEnv.TELEGRAM_BOT_TOKEN !== "test-fake-telegram-token-e2e") { + errors.push("messaging-compatible-endpoint-vitest step must set a fake Telegram token"); + } + if (runVitestEnv.TELEGRAM_ALLOWED_IDS !== "123456789") { + errors.push("messaging-compatible-endpoint-vitest step must set fake Telegram allowed ids"); + } + requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live"); + requireRunContains( + errors, + runVitest, + "test/e2e-scenario/live/messaging-compatible-endpoint.test.ts", + ); + + const upload = requireJobStep( + errors, + jobName, + steps, + "Upload messaging compatible endpoint artifacts", + ); + requireFullShaAction(errors, upload, "messaging-compatible-endpoint-vitest upload-artifact"); + const uploadWith = asRecord(upload?.with); + if (uploadWith.name !== "e2e-vitest-scenarios-messaging-compatible-endpoint") { + errors.push("messaging-compatible-endpoint-vitest artifact upload name must be stable"); + } + const uploadPath = stringValue(uploadWith.path); + requireUploadPathContains(errors, uploadPath, "e2e-artifacts/vitest/messaging-compatible-endpoint/"); + if (uploadWith["include-hidden-files"] !== false) { + errors.push("messaging-compatible-endpoint-vitest artifact upload must set include-hidden-files: false"); + } + if (uploadWith["if-no-files-found"] !== "ignore") { + errors.push( + "messaging-compatible-endpoint-vitest artifact upload must ignore missing fixture artifacts", + ); + } + if (uploadWith["retention-days"] !== 14) { + errors.push("messaging-compatible-endpoint-vitest artifact upload retention-days must be 14"); + } +} + function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowRecord): void { const jobName = "onboard-negative-paths-vitest"; const job = asRecord(jobs[jobName]); @@ -2119,6 +2270,7 @@ export function validateE2eVitestScenariosWorkflowBoundary( validateRebuildOpenClawVitestJob(errors, jobs); validateSandboxRebuildVitestJob(errors, jobs); validateTokenRotationVitestJob(errors, jobs); + validateMessagingCompatibleEndpointVitestJob(errors, jobs); validateFreeStandingJobSelector( errors, jobs, From fe649c63a941af39f1927457fe3cd5abae4e5deb Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:33:12 -0700 Subject: [PATCH 02/14] test(e2e): address messaging endpoint review Signed-off-by: Carlos Villela --- .github/workflows/e2e-vitest-scenarios.yaml | 26 -------- .../messaging-compatible-endpoint.test.ts | 50 ++++++++++++-- .../e2e-scenarios-workflow.test.ts | 35 ++++++++++ tools/e2e-scenarios/workflow-boundary.mts | 65 ++++++++++++------- 4 files changed, 121 insertions(+), 55 deletions(-) diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index b6e3d45a20..00e44224c0 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -1457,32 +1457,6 @@ jobs: with: persist-credentials: false - - name: Authenticate to Docker Hub - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} - shell: bash - run: | - set -euo pipefail - if [[ -z "${DOCKERHUB_USERNAME}" || -z "${DOCKERHUB_TOKEN}" ]]; then - echo "::notice::Docker Hub credentials not configured; continuing with anonymous pulls." - exit 0 - fi - login_succeeded=0 - for attempt in 1 2 3; do - if echo "${DOCKERHUB_TOKEN}" | timeout 30s docker login docker.io --username "${DOCKERHUB_USERNAME}" --password-stdin; then - login_succeeded=1 - break - fi - if [[ "$attempt" -lt 3 ]]; then - echo "::warning::Docker Hub login attempt ${attempt} failed; retrying." - sleep 5 - fi - done - if [[ "$login_succeeded" -ne 1 ]]; then - echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls." - fi - - name: Set up Node uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 with: diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 9249849dfc..2c2a5903fb 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -54,6 +54,12 @@ const HOP_BY_HOP_HEADERS = new Set([ ]); const RATE_LIMIT_VALIDATION_RE = /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i; +const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i; +const COMPATIBLE_ENDPOINT_VALIDATION_RE = + /Other OpenAI-compatible endpoint endpoint validation failed|Chat Completions API validation/i; +const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; +const COMPAT_AGENT_PROMPT = + "Call the configured model and report the compatible endpoint route token."; interface MockRequestLog { method: string; @@ -201,7 +207,7 @@ async function startCompatibleMock( { type: "message", role: "assistant", - content: [{ type: "output_text", text: "PONG from compatible endpoint mock" }], + content: [{ type: "output_text", text: COMPAT_AGENT_REPLY }], }, ], }); @@ -229,7 +235,7 @@ async function startCompatibleMock( choices: [ { index: 0, - delta: { role: "assistant", content: "PONG from compatible endpoint mock" }, + delta: { role: "assistant", content: COMPAT_AGENT_REPLY }, finish_reason: null, }, ], @@ -248,7 +254,7 @@ async function startCompatibleMock( choices: [ { index: 0, - message: { role: "assistant", content: "PONG from compatible endpoint mock" }, + message: { role: "assistant", content: COMPAT_AGENT_REPLY }, finish_reason: "stop", }, ], @@ -429,9 +435,12 @@ function shouldSkipPreContractProviderRateLimit( result: Pick, requests: readonly MockRequestLog[] = [], ): boolean { + const text = resultText(result); return ( + COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) && + !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) && isTransientProviderValidationFailure(result) && - RATE_LIMIT_VALIDATION_RE.test(resultText(result)) && + RATE_LIMIT_VALIDATION_RE.test(text) && !hasLegacyCompatibleEndpointEvidence(result, requests) ); } @@ -761,7 +770,7 @@ async function assertOpenClawAgentTurn( "--session-id", sessionId, "-m", - "Reply with only: PONG", + COMPAT_AGENT_PROMPT, ], { artifactName: "openclaw-agent-compatible-endpoint", @@ -777,7 +786,7 @@ async function assertOpenClawAgentTurn( text.slice(0, 500), ).toBe(false); expect(agent.exitCode, text.slice(0, 500)).toBe(0); - expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toMatch(/PONG/i); + expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toContain(COMPAT_AGENT_REPLY); const newHopHeaderLogs = compatibleMock.hopHeaderLogs.slice(hopCountBefore); expect( @@ -805,7 +814,14 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { shouldSkipPreContractProviderRateLimit( output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"), ), - ).toBe(true); + ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429", + ), + ), + ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"), @@ -829,6 +845,16 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { ]), ).toBe(false); }); + + it("does not satisfy the agent reply assertion with echoed prompt text", () => { + expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY); + expect( + parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_PROMPT } })), + ).not.toContain(COMPAT_AGENT_REPLY); + expect( + parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_REPLY } })), + ).toContain(COMPAT_AGENT_REPLY); + }); }); liveTest( @@ -854,6 +880,10 @@ liveTest( runner: "vitest", boundary: "direct-cli-onboard-openshell-compatible-endpoint", legacySource: "test/e2e/test-messaging-compatible-endpoint.sh", + legacyRetirement: { + shellDeletion: "deferred to #5098 Phase 11 cleanup", + nightlyShellWiring: "deferred to #5098 Phase 11 cleanup", + }, refs: ["#2766", "#2572", "#5098"], contract: [ "local OpenAI-compatible mock endpoint starts and is reachable", @@ -904,6 +934,8 @@ liveTest( onboardArtifacts: onboard.artifacts, mockRequestsBeforeSkip: compatibleMock.requests.length, sourceBoundary: "external provider endpoint validation outside the repo", + sourceFixConstraint: + "skip is limited to compatible/custom endpoint validation evidence; NVIDIA/default provider validation remains a test failure", removalCondition: "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", }); @@ -951,6 +983,10 @@ liveTest( id: "messaging-compatible-endpoint", runner, endpointUrl, + legacyRetirement: { + shellDeletion: "deferred to #5098 Phase 11 cleanup", + nightlyShellWiring: "deferred to #5098 Phase 11 cleanup", + }, assertions: { dockerRunning: docker.exitCode === 0, mockReachable: hostReachability.exitCode === 0, diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index fb62c0c080..8f4ce78d2f 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -707,6 +707,41 @@ jobs: } }); + it("rejects Docker Hub auth in the messaging-compatible-endpoint job", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-")); + const workflowPath = path.join(tmp, "workflow.yaml"); + const workflow = readWorkflow() as { + jobs: Record> }>; + }; + const steps = workflow.jobs["messaging-compatible-endpoint-vitest"]?.steps; + expect(steps).toEqual(expect.any(Array)); + const setupNodeIndex = steps.findIndex((step) => step.name === "Set up Node"); + expect(setupNodeIndex).toBeGreaterThan(0); + steps.splice(setupNodeIndex, 0, { + name: "Authenticate to Docker Hub", + env: { + DOCKERHUB_USERNAME: "${{ secrets.DOCKERHUB_USERNAME }}", + DOCKERHUB_TOKEN: "${{ secrets.DOCKERHUB_TOKEN }}", + }, + run: "docker login docker.io --username user --password ${{ secrets.DOCKERHUB_TOKEN }}", + }); + fs.writeFileSync(workflowPath, YAML.stringify(workflow)); + + try { + const errors = validateE2eVitestScenariosWorkflowBoundary(workflowPath); + expect(errors).toEqual( + expect.arrayContaining([ + "messaging-compatible-endpoint-vitest must not authenticate to Docker Hub before branch-controlled test code runs", + "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' env must not include DOCKERHUB_USERNAME", + "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' env must not include DOCKERHUB_TOKEN", + "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' run script must not use docker login or inline secret interpolation", + ]), + ); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + it("rejects Docker Hub auth and inline secrets in runtime-overrides run steps", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-")); const workflowPath = path.join(tmp, "workflow.yaml"); diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index 0b799260e8..a149b807b8 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -1214,18 +1214,53 @@ function validateMessagingCompatibleEndpointVitestJob( jobEnv, "NVIDIA_API_KEY", ); + requireEnvDoesNotExposeSecret( + errors, + "messaging-compatible-endpoint-vitest job", + jobEnv, + "DOCKERHUB_USERNAME", + ); + requireEnvDoesNotExposeSecret( + errors, + "messaging-compatible-endpoint-vitest job", + jobEnv, + "DOCKERHUB_TOKEN", + ); const steps = asSteps(job.steps); requireNoDispatchInputInterpolation(errors, steps); for (const step of steps) { - if (step.name !== "Run messaging compatible endpoint live test") { - requireEnvDoesNotExposeSecret( - errors, - `messaging-compatible-endpoint-vitest step '${step.name ?? step.uses ?? ""}'`, - asRecord(step.env), - "NVIDIA_API_KEY", - ); - } + const stepName = step.name ?? step.uses ?? ""; + const stepEnv = asRecord(step.env); + requireEnvDoesNotExposeSecret( + errors, + `messaging-compatible-endpoint-vitest step '${stepName}'`, + stepEnv, + "NVIDIA_API_KEY", + ); + requireEnvDoesNotExposeSecret( + errors, + `messaging-compatible-endpoint-vitest step '${stepName}'`, + stepEnv, + "DOCKERHUB_USERNAME", + ); + requireEnvDoesNotExposeSecret( + errors, + `messaging-compatible-endpoint-vitest step '${stepName}'`, + stepEnv, + "DOCKERHUB_TOKEN", + ); + requireNoDockerHubAuthInRun( + errors, + `messaging-compatible-endpoint-vitest step '${stepName}'`, + stringValue(step.run), + ); + } + + if (namedStep(steps, "Authenticate to Docker Hub")) { + errors.push( + "messaging-compatible-endpoint-vitest must not authenticate to Docker Hub before branch-controlled test code runs", + ); } const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@")); @@ -1235,20 +1270,6 @@ function validateMessagingCompatibleEndpointVitestJob( errors.push("messaging-compatible-endpoint-vitest checkout step must set persist-credentials=false"); } - const dockerHubAuth = requireJobStep(errors, jobName, steps, "Authenticate to Docker Hub"); - const dockerHubEnv = asRecord(dockerHubAuth?.env); - if (dockerHubEnv.DOCKERHUB_USERNAME !== "${{ secrets.DOCKERHUB_USERNAME }}") { - errors.push( - "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_USERNAME from secrets", - ); - } - if (dockerHubEnv.DOCKERHUB_TOKEN !== "${{ secrets.DOCKERHUB_TOKEN }}") { - errors.push( - "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_TOKEN from secrets", - ); - } - requireRunContains(errors, dockerHubAuth, "docker login docker.io"); - const setupNode = namedStep(steps, "Set up Node"); if (!setupNode) errors.push("messaging-compatible-endpoint-vitest job missing step: Set up Node"); requireFullShaAction(errors, setupNode, "messaging-compatible-endpoint-vitest setup-node"); From 825a2af5dbeab35a95f56436dc64cf814e13b0fb Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:40:29 -0700 Subject: [PATCH 03/14] test(e2e): harden messaging compatible endpoint checks Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint.test.ts | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 2c2a5903fb..9c009e0a5a 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -61,6 +61,11 @@ const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; const COMPAT_AGENT_PROMPT = "Call the configured model and report the compatible endpoint route token."; +function nodeEvalArg(source: string): string { + const encoded = Buffer.from(source, "utf8").toString("base64"); + return `eval(Buffer.from(${JSON.stringify(encoded)}, "base64").toString("utf8"))`; +} + interface MockRequestLog { method: string; path: string; @@ -580,11 +585,15 @@ console.log(JSON.stringify({ })); process.exit(errors.length ? 1 : 0); `; - const result = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script, COMPAT_MODEL], { - artifactName: "openclaw-config-compatible-endpoint", - env: commandEnv(), - timeoutMs: 60_000, - }); + const result = await sandbox.exec( + SANDBOX_NAME, + ["node", "-e", nodeEvalArg(script), COMPAT_MODEL], + { + artifactName: "openclaw-config-compatible-endpoint", + env: commandEnv(), + timeoutMs: 60_000, + }, + ); expect(result.exitCode, resultText(result)).toBe(0); } @@ -606,7 +615,7 @@ sock.setTimeout(1000, () => finish("TIMEOUT", 1)); `; let last: ShellProbeResult | undefined; for (let attempt = 1; attempt <= 30; attempt += 1) { - last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script], { + last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", nodeEvalArg(script)], { artifactName: `gateway-ready-compatible-endpoint-${attempt}`, env: commandEnv(), timeoutMs: 5_000, From 9f0d4686db85e7696365add840fc3592bd2d446f Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:42:04 -0700 Subject: [PATCH 04/14] test(e2e): tighten messaging endpoint assertions Signed-off-by: Carlos Villela --- .../live/messaging-compatible-endpoint.test.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 9c009e0a5a..2fc0e0dcfe 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -56,7 +56,7 @@ const RATE_LIMIT_VALIDATION_RE = /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i; const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i; const COMPATIBLE_ENDPOINT_VALIDATION_RE = - /Other OpenAI-compatible endpoint endpoint validation failed|Chat Completions API validation/i; + /Other OpenAI-compatible endpoint endpoint validation failed/i; const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; const COMPAT_AGENT_PROMPT = "Call the configured model and report the compatible endpoint route token."; @@ -631,7 +631,9 @@ sock.setTimeout(1000, () => finish("TIMEOUT", 1)); async function assertSandboxInference(sandbox: SandboxClient): Promise { const payload = JSON.stringify({ model: COMPAT_MODEL, - messages: [{ role: "user", content: "Reply with exactly: PONG" }], + messages: [ + { role: "user", content: "Return the compatible endpoint route verification value." }, + ], max_tokens: 32, }); const response = await sandbox.exec( @@ -654,7 +656,9 @@ async function assertSandboxInference(sandbox: SandboxClient): Promise { }, ); expect(response.exitCode, resultText(response)).toBe(0); - expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toMatch(/PONG/i); + expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toContain( + COMPAT_AGENT_REPLY, + ); } function findJsonObjectEnd(raw: string, start: number): number | null { @@ -831,6 +835,11 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { ), ), ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output("Chat Completions API validation returned HTTP 429"), + ), + ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"), From c316b752d20cfd3a42f05b16c2ee263c5724126b Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:54:01 -0700 Subject: [PATCH 05/14] test(e2e): prove messaging endpoint rate-limit source Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint.test.ts | 75 ++++++++++++++++--- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 2fc0e0dcfe..535eb73241 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -426,27 +426,45 @@ async function cleanupMessagingState(host: HostCliClient, sandboxName: string): await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw"); } -function hasLegacyCompatibleEndpointEvidence( +function hasLegacyCompatibleEndpointSmokeEvidence( result: Pick, +): boolean { + return resultText(result).includes("Compatible endpoint responds through inference.local"); +} + +function hasCompatibleMockValidationEvidence( requests: readonly MockRequestLog[], + requestCountBeforeOnboard: number, ): boolean { - return ( - resultText(result).includes("Compatible endpoint responds through inference.local") || - requests.some((request) => request.path === "/v1/chat/completions" && request.auth === "ok") - ); + return requests + .slice(requestCountBeforeOnboard) + .some( + (request) => + request.auth === "ok" && + [ + "/v1/models", + "/models", + "/v1/responses", + "/responses", + "/v1/chat/completions", + "/chat/completions", + ].includes(request.path), + ); } function shouldSkipPreContractProviderRateLimit( result: Pick, requests: readonly MockRequestLog[] = [], + requestCountBeforeOnboard = 0, ): boolean { const text = resultText(result); return ( COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) && !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) && + hasCompatibleMockValidationEvidence(requests, requestCountBeforeOnboard) && isTransientProviderValidationFailure(result) && RATE_LIMIT_VALIDATION_RE.test(text) && - !hasLegacyCompatibleEndpointEvidence(result, requests) + !hasLegacyCompatibleEndpointSmokeEvidence(result) ); } @@ -822,7 +840,28 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", ), ), + ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", + ), + [ + { auth: "missing", hopHeaders: [], method: "GET", path: "/v1/models" }, + { auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }, + ], + 1, + ), ).toBe(true); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", + ), + [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }], + 1, + ), + ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"), @@ -862,6 +901,14 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { }, ]), ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", + ), + [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }], + ), + ).toBe(false); }); it("does not satisfy the agent reply assertion with echoed prompt text", () => { @@ -937,10 +984,15 @@ liveTest( }); expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0); + const mockRequestCountBeforeOnboard = compatibleMock.requests.length; const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl); if ( onboard.exitCode !== 0 && - shouldSkipPreContractProviderRateLimit(onboard, compatibleMock.requests) + shouldSkipPreContractProviderRateLimit( + onboard, + compatibleMock.requests, + mockRequestCountBeforeOnboard, + ) ) { await artifacts.writeJson("scenario-result.json", { id: "messaging-compatible-endpoint", @@ -950,10 +1002,15 @@ liveTest( onboardExitCode: onboard.exitCode, onboardTimedOut: onboard.timedOut, onboardArtifacts: onboard.artifacts, + mockRequestsBeforeOnboard: mockRequestCountBeforeOnboard, mockRequestsBeforeSkip: compatibleMock.requests.length, - sourceBoundary: "external provider endpoint validation outside the repo", + compatibleMockValidationRequests: compatibleMock.requests.slice( + mockRequestCountBeforeOnboard, + ), + sourceBoundary: + "onboard reached the configured compatible mock before an external provider rate limit stopped the pre-contract validation path", sourceFixConstraint: - "skip is limited to compatible/custom endpoint validation evidence; NVIDIA/default provider validation remains a test failure", + "skip requires compatible/custom endpoint validation text plus new authenticated mock traffic; NVIDIA/default provider validation or zero-mock-traffic 429 remains a test failure", removalCondition: "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", }); From 03fdfc01753aae1758355c6d8ba7ef6ace71ccfd Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:05:40 -0700 Subject: [PATCH 06/14] test(e2e): narrow messaging endpoint rate-limit skip Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint.test.ts | 94 ++++++------------- 1 file changed, 31 insertions(+), 63 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 535eb73241..0669731fbb 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -55,8 +55,6 @@ const HOP_BY_HOP_HEADERS = new Set([ const RATE_LIMIT_VALIDATION_RE = /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i; const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i; -const COMPATIBLE_ENDPOINT_VALIDATION_RE = - /Other OpenAI-compatible endpoint endpoint validation failed/i; const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; const COMPAT_AGENT_PROMPT = "Call the configured model and report the compatible endpoint route token."; @@ -432,36 +430,15 @@ function hasLegacyCompatibleEndpointSmokeEvidence( return resultText(result).includes("Compatible endpoint responds through inference.local"); } -function hasCompatibleMockValidationEvidence( - requests: readonly MockRequestLog[], - requestCountBeforeOnboard: number, -): boolean { - return requests - .slice(requestCountBeforeOnboard) - .some( - (request) => - request.auth === "ok" && - [ - "/v1/models", - "/models", - "/v1/responses", - "/responses", - "/v1/chat/completions", - "/chat/completions", - ].includes(request.path), - ); -} - function shouldSkipPreContractProviderRateLimit( result: Pick, - requests: readonly MockRequestLog[] = [], - requestCountBeforeOnboard = 0, + options: { githubActions?: boolean } = {}, ): boolean { const text = resultText(result); + const runningInActions = options.githubActions ?? process.env.GITHUB_ACTIONS === "true"; return ( - COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) && - !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) && - hasCompatibleMockValidationEvidence(requests, requestCountBeforeOnboard) && + runningInActions && + DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) && isTransientProviderValidationFailure(result) && RATE_LIMIT_VALIDATION_RE.test(text) && !hasLegacyCompatibleEndpointSmokeEvidence(result) @@ -839,6 +816,7 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { output( "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", ), + { githubActions: true }, ), ).toBe(false); expect( @@ -846,42 +824,41 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { output( "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", ), - [ - { auth: "missing", hopHeaders: [], method: "GET", path: "/v1/models" }, - { auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }, - ], - 1, + { githubActions: false }, ), - ).toBe(true); + ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output( "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", ), - [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }], - 1, + { githubActions: true }, ), ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"), + { githubActions: true }, ), - ).toBe(false); + ).toBe(true); expect( shouldSkipPreContractProviderRateLimit( output( "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429", ), + { githubActions: false }, ), ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("Chat Completions API validation returned HTTP 429"), + { githubActions: true }, ), ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"), + { githubActions: true }, ), ).toBe(false); expect( @@ -889,24 +866,28 @@ describe("messaging-compatible-endpoint live test local classifiers", () => { output( "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", ), + { githubActions: true }, ), ).toBe(false); expect( - shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), [ - { - auth: "ok", - hopHeaders: [], - method: "POST", - path: "/v1/chat/completions", - }, - ]), + shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), { + githubActions: true, + }), ).toBe(false); expect( shouldSkipPreContractProviderRateLimit( output( "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", ), - [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }], + { githubActions: true }, + ), + ).toBe(false); + expect( + shouldSkipPreContractProviderRateLimit( + output( + "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", + ), + { githubActions: true }, ), ).toBe(false); }); @@ -984,38 +965,25 @@ liveTest( }); expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0); - const mockRequestCountBeforeOnboard = compatibleMock.requests.length; const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl); - if ( - onboard.exitCode !== 0 && - shouldSkipPreContractProviderRateLimit( - onboard, - compatibleMock.requests, - mockRequestCountBeforeOnboard, - ) - ) { + if (onboard.exitCode !== 0 && shouldSkipPreContractProviderRateLimit(onboard)) { await artifacts.writeJson("scenario-result.json", { id: "messaging-compatible-endpoint", status: "skipped", - reason: "external-provider-rate-limit-before-legacy-contract", + reason: "external-nvidia-provider-rate-limit-before-legacy-contract", runner, onboardExitCode: onboard.exitCode, onboardTimedOut: onboard.timedOut, onboardArtifacts: onboard.artifacts, - mockRequestsBeforeOnboard: mockRequestCountBeforeOnboard, mockRequestsBeforeSkip: compatibleMock.requests.length, - compatibleMockValidationRequests: compatibleMock.requests.slice( - mockRequestCountBeforeOnboard, - ), - sourceBoundary: - "onboard reached the configured compatible mock before an external provider rate limit stopped the pre-contract validation path", + sourceBoundary: "external NVIDIA Endpoints provider availability", sourceFixConstraint: - "skip requires compatible/custom endpoint validation text plus new authenticated mock traffic; NVIDIA/default provider validation or zero-mock-traffic 429 remains a test failure", + "skip is limited to explicit NVIDIA Endpoints validation in GitHub Actions; compatible endpoint validation failures against the local mock remain test failures", removalCondition: "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", }); skip( - "External endpoint validation was rate-limited before the messaging-compatible endpoint contract could run", + "NVIDIA Endpoints validation was rate-limited before the messaging-compatible endpoint contract could run", ); } expect(onboard.exitCode, resultText(onboard)).toBe(0); From 2956eb551da644ee21d3c532b3735b4b1d830040 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:14:08 -0700 Subject: [PATCH 07/14] test(e2e): fail closed for messaging endpoint migration Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint.test.ts | 131 ------------------ 1 file changed, 131 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 0669731fbb..b3c244f0fe 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -24,7 +24,6 @@ import { type SandboxClient, validateSandboxName } from "../fixtures/clients/san import { expect, test } from "../fixtures/e2e-test.ts"; import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; -import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); @@ -52,9 +51,6 @@ const HOP_BY_HOP_HEADERS = new Set([ "transfer-encoding", "upgrade", ]); -const RATE_LIMIT_VALIDATION_RE = - /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i; -const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i; const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; const COMPAT_AGENT_PROMPT = "Call the configured model and report the compatible endpoint route token."; @@ -424,27 +420,6 @@ async function cleanupMessagingState(host: HostCliClient, sandboxName: string): await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw"); } -function hasLegacyCompatibleEndpointSmokeEvidence( - result: Pick, -): boolean { - return resultText(result).includes("Compatible endpoint responds through inference.local"); -} - -function shouldSkipPreContractProviderRateLimit( - result: Pick, - options: { githubActions?: boolean } = {}, -): boolean { - const text = resultText(result); - const runningInActions = options.githubActions ?? process.env.GITHUB_ACTIONS === "true"; - return ( - runningInActions && - DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) && - isTransientProviderValidationFailure(result) && - RATE_LIMIT_VALIDATION_RE.test(text) && - !hasLegacyCompatibleEndpointSmokeEvidence(result) - ); -} - function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv { return commandEnv({ COMPATIBLE_API_KEY: COMPATIBLE_KEY, @@ -806,92 +781,6 @@ async function assertOpenClawAgentTurn( } describe("messaging-compatible-endpoint live test local classifiers", () => { - function output(text: string): Pick { - return { stdout: "", stderr: text }; - } - - it("skips only rate-limited endpoint validation before legacy evidence exists", () => { - expect( - shouldSkipPreContractProviderRateLimit( - output( - "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", - ), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", - ), - { githubActions: false }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429", - ), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"), - { githubActions: true }, - ), - ).toBe(true); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429", - ), - { githubActions: false }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output("Chat Completions API validation returned HTTP 429"), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", - ), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), { - githubActions: true, - }), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", - ), - { githubActions: true }, - ), - ).toBe(false); - expect( - shouldSkipPreContractProviderRateLimit( - output( - "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox", - ), - { githubActions: true }, - ), - ).toBe(false); - }); - it("does not satisfy the agent reply assertion with echoed prompt text", () => { expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY); expect( @@ -966,26 +855,6 @@ liveTest( expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0); const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl); - if (onboard.exitCode !== 0 && shouldSkipPreContractProviderRateLimit(onboard)) { - await artifacts.writeJson("scenario-result.json", { - id: "messaging-compatible-endpoint", - status: "skipped", - reason: "external-nvidia-provider-rate-limit-before-legacy-contract", - runner, - onboardExitCode: onboard.exitCode, - onboardTimedOut: onboard.timedOut, - onboardArtifacts: onboard.artifacts, - mockRequestsBeforeSkip: compatibleMock.requests.length, - sourceBoundary: "external NVIDIA Endpoints provider availability", - sourceFixConstraint: - "skip is limited to explicit NVIDIA Endpoints validation in GitHub Actions; compatible endpoint validation failures against the local mock remain test failures", - removalCondition: - "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", - }); - skip( - "NVIDIA Endpoints validation was rate-limited before the messaging-compatible endpoint contract could run", - ); - } expect(onboard.exitCode, resultText(onboard)).toBe(0); expect(resultText(onboard)).toContain("Compatible endpoint responds through inference.local"); From 20475f1bb3aadb556ce06ead0a025ba33df59455 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:24:37 -0700 Subject: [PATCH 08/14] test(e2e): guard transient provider classifier Signed-off-by: Carlos Villela --- .../live/network-policy-transient-provider.ts | 8 +++++++- .../network-policy-transient-provider.test.ts | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/test/e2e-scenario/live/network-policy-transient-provider.ts b/test/e2e-scenario/live/network-policy-transient-provider.ts index 224b1a73c8..706fafa085 100644 --- a/test/e2e-scenario/live/network-policy-transient-provider.ts +++ b/test/e2e-scenario/live/network-policy-transient-provider.ts @@ -7,6 +7,8 @@ const TRANSIENT_PROVIDER_VALIDATION_RE = /endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i; const TRANSIENT_PROVIDER_DETAIL_RE = /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|too many requests|rate[- ]?limit|quota|temporar/i; +const LOCAL_VALIDATION_FAILURE_RE = + /invalid .*credential|invalid .*api[_ -]?key|authorization failed|authentication failed|denied by network policy|policy .*failed|routing .*failed|route .*failed|proxy .*failed|hop-by-hop|header stripping/i; function resultText(result: Pick): string { return [result.stdout, result.stderr].filter(Boolean).join("\n"); @@ -16,5 +18,9 @@ export function isTransientProviderValidationFailure( result: Pick, ): boolean { const output = resultText(result); - return TRANSIENT_PROVIDER_VALIDATION_RE.test(output) && TRANSIENT_PROVIDER_DETAIL_RE.test(output); + return ( + TRANSIENT_PROVIDER_VALIDATION_RE.test(output) && + TRANSIENT_PROVIDER_DETAIL_RE.test(output) && + !LOCAL_VALIDATION_FAILURE_RE.test(output) + ); } diff --git a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts index 551ea0a85e..1066725cdd 100644 --- a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts +++ b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts @@ -31,6 +31,26 @@ describe("network-policy transient provider validation classifier", () => { probeOutput("endpoint validation failed: invalid NVIDIA_API_KEY credential"), ), ).toBe(false); + expect( + isTransientProviderValidationFailure( + probeOutput("endpoint validation failed: invalid NVIDIA_API_KEY credential quota exceeded"), + ), + ).toBe(false); + expect( + isTransientProviderValidationFailure( + probeOutput("endpoint validation failed: denied by network policy rate-limit preset"), + ), + ).toBe(false); + expect( + isTransientProviderValidationFailure( + probeOutput("endpoint validation failed: routing failed before rate limit check"), + ), + ).toBe(false); + expect( + isTransientProviderValidationFailure( + probeOutput("endpoint validation failed: proxy header stripping quota marker failed"), + ), + ).toBe(false); expect( isTransientProviderValidationFailure( probeOutput("policy update failed: denied by network policy"), From c9342355499c6a6ec20e0109bc917b80efd403c4 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:34:32 -0700 Subject: [PATCH 09/14] test(e2e): avoid newline args in network policy probes Signed-off-by: Carlos Villela --- test/e2e-scenario/live/network-policy.test.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts index f968bd0bda..698152b12a 100644 --- a/test/e2e-scenario/live/network-policy.test.ts +++ b/test/e2e-scenario/live/network-policy.test.ts @@ -17,13 +17,13 @@ import path from "node:path"; import { isPrivateIp } from "../../../nemoclaw/src/blueprint/private-networks.ts"; import type { ArtifactSink } from "../fixtures/artifacts.ts"; -import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; import type { HostCliClient } from "../fixtures/clients/host.ts"; import { type SandboxClient, trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts"; import { expect, test } from "../fixtures/e2e-test.ts"; import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; +import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); @@ -64,6 +64,11 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } +function shellEvalArg(script: string): string { + const encoded = Buffer.from(script, "utf8").toString("base64"); + return `eval "$(printf %s ${encoded} | base64 -d)"`; +} + async function runNemoclaw( host: HostCliClient, args: string[], @@ -82,7 +87,7 @@ async function sandboxBash( script: string, options: { artifactName: string; timeoutMs?: number } = { artifactName: "sandbox-bash" }, ): Promise { - return sandbox.execShell(SANDBOX_NAME, trustedSandboxShellScript(script), { + return sandbox.execShell(SANDBOX_NAME, trustedSandboxShellScript(shellEvalArg(script)), { artifactName: options.artifactName, env: baseEnv(), timeoutMs: options.timeoutMs ?? SANDBOX_EXEC_TIMEOUT_MS, From 86b87a7e239e2bbd2c71138065ad6fc93856546a Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:46:54 -0700 Subject: [PATCH 10/14] test(e2e): accept blocked slack fetch errors Signed-off-by: Carlos Villela --- test/e2e-scenario/live/network-policy.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts index 698152b12a..7897c14302 100644 --- a/test/e2e-scenario/live/network-policy.test.ts +++ b/test/e2e-scenario/live/network-policy.test.ts @@ -560,7 +560,7 @@ hello ).resolves.toBe("403"); const slackBefore = await fetchStatus(sandbox, "https://slack.com/", "tc-net-03-slack-before"); - expect(slackBefore).toMatch(/STATUS_403/); + expect(slackBefore).toMatch(/STATUS_403|ERROR_/); const slackApply = await applyPresetInteractively(host, "slack"); expect(slackApply.exitCode, text(slackApply)).toBe(0); const slackAfter = await fetchStatus(sandbox, "https://slack.com/", "tc-net-03-slack-after"); From bc3b9dad58ac0cfaa06e9934be46dca3e5858371 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:56:05 -0700 Subject: [PATCH 11/14] test(e2e): preserve network policy shell quoting Signed-off-by: Carlos Villela --- test/e2e-scenario/live/network-policy.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts index 7897c14302..eb6000b5ce 100644 --- a/test/e2e-scenario/live/network-policy.test.ts +++ b/test/e2e-scenario/live/network-policy.test.ts @@ -66,7 +66,7 @@ function sleep(ms: number): Promise { function shellEvalArg(script: string): string { const encoded = Buffer.from(script, "utf8").toString("base64"); - return `eval "$(printf %s ${encoded} | base64 -d)"`; + return `printf %s ${encoded} | base64 -d | sh`; } async function runNemoclaw( From c97fd9d736a6d2009cf259a2806bc6a15e884aa2 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 14:03:24 -0700 Subject: [PATCH 12/14] test(e2e): cover messaging endpoint helpers Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint-helpers.ts | 195 ++++++++++++++++++ .../messaging-compatible-endpoint.test.ts | 189 +---------------- ...saging-compatible-endpoint-helpers.test.ts | 69 +++++++ 3 files changed, 270 insertions(+), 183 deletions(-) create mode 100644 test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts create mode 100644 test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts new file mode 100644 index 0000000000..de87b56122 --- /dev/null +++ b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts @@ -0,0 +1,195 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import path from "node:path"; + +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); + +export function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...buildAvailabilityProbeEnv(), + ...extra, + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + }; +} + +async function bestEffort(run: () => Promise): Promise { + try { + await run(); + } catch { + // Best-effort cleanup mirrors the legacy shell teardown. + } +} + +export async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise { + await bestEffort(() => + host.command( + "bash", + [ + "-lc", + [ + "set +e", + "openshell forward stop 18789 >/dev/null 2>&1", + "openshell gateway stop -g nemoclaw >/dev/null 2>&1", + 'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"', + 'if [ -f "$pid_file" ]; then', + ' pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"', + ' if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then', + ' kill "$pid" 2>/dev/null || true', + " for _ in $(seq 1 10); do", + ' kill -0 "$pid" 2>/dev/null || break', + " sleep 1", + " done", + ' kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true', + " fi", + "fi", + 'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"', + 'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi', + "openshell gateway remove nemoclaw >/dev/null 2>&1", + "openshell gateway destroy -g nemoclaw >/dev/null 2>&1", + "exit 0", + ].join("\n"), + ], + { + artifactName, + env: commandEnv(), + timeoutMs: 90_000, + }, + ), + ); +} + +export async function cleanupMessagingState( + host: HostCliClient, + sandboxName: string, +): Promise { + // Endpoint-validation skips can happen before the sandbox exists. Keep + // teardown non-throwing so "Sandbox ... does not exist" stays a normal + // pre-contract cleanup outcome instead of masking the original evidence. + await bestEffort(() => + host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { + artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, + env: commandEnv(), + timeoutMs: 120_000, + }), + ); + await bestEffort(() => + host.command("openshell", ["sandbox", "delete", sandboxName], { + artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, + env: commandEnv(), + timeoutMs: 60_000, + }), + ); + await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw"); +} + +function findJsonObjectEnd(raw: string, start: number): number | null { + let depth = 0; + let inString = false; + let escaped = false; + for (let index = start; index < raw.length; index += 1) { + const char = raw[index]; + if (inString) { + if (escaped) { + escaped = false; + } else if (char === "\\") { + escaped = true; + } else if (char === '"') { + inString = false; + } + continue; + } + if (char === '"') { + inString = true; + } else if (char === "{") { + depth += 1; + } else if (char === "}") { + depth -= 1; + if (depth === 0) return index + 1; + } + } + return null; +} + +export function parseOpenClawAgentText(raw: string): string { + if (!raw.trim()) return ""; + const parts: string[] = []; + const visited = new Set(); + const textKeys = new Set(["text", "content", "reasoning_content"]); + const containerKeys = new Set([ + "result", + "payloads", + "payload", + "messages", + "choices", + "response", + "data", + "output", + "outputs", + "items", + "segments", + "delta", + ]); + + const add = (value: unknown) => { + if (typeof value === "string" && value.trim()) parts.push(value.trim()); + }; + const collect = (value: unknown) => { + if (visited.has(value)) return; + visited.add(value); + if (typeof value === "string") { + add(value); + return; + } + if (Array.isArray(value)) { + value.forEach(collect); + return; + } + if (!value || typeof value !== "object") return; + const record = value as Record; + for (const key of textKeys) add(record[key]); + const choices = record.choices; + if (Array.isArray(choices)) { + for (const choice of choices) { + if (!choice || typeof choice !== "object") continue; + collect((choice as Record).message); + collect((choice as Record).delta); + add((choice as Record).text); + } + } + for (const key of containerKeys) { + if (key in record) collect(record[key]); + } + }; + const collectDoc = (doc: unknown) => { + if (doc && typeof doc === "object" && (doc as Record).result) { + collect((doc as Record).result); + } else { + collect(doc); + } + }; + + try { + collectDoc(JSON.parse(raw)); + } catch { + for (const match of raw.matchAll(/{/g)) { + try { + const before = parts.length; + const start = match.index; + const end = findJsonObjectEnd(raw, start); + if (end === null) continue; + collectDoc(JSON.parse(raw.slice(start, end))); + if (parts.length > before) break; + } catch { + // Continue scanning for a later JSON object, matching the legacy parser. + } + } + } + return parts.join("\n"); +} diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index b3c244f0fe..3eb9402414 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -18,12 +18,17 @@ import path from "node:path"; import { describe, it } from "vitest"; -import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; import type { HostCliClient } from "../fixtures/clients/host.ts"; import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts"; import { expect, test } from "../fixtures/e2e-test.ts"; import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; import type { ShellProbeResult } from "../fixtures/shell-probe.ts"; +import { + cleanupMessagingState, + commandEnv, + parseOpenClawAgentText, + stopGatewayRuntime, +} from "./messaging-compatible-endpoint-helpers.ts"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); @@ -86,16 +91,6 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } -function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { - return { - ...buildAvailabilityProbeEnv(), - ...extra, - NEMOCLAW_NON_INTERACTIVE: "1", - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", - OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", - }; -} - function redactionValues(): string[] { return [COMPATIBLE_KEY, TELEGRAM_TOKEN, process.env.GITHUB_TOKEN].filter( (value): value is string => typeof value === "string" && value.length > 0, @@ -353,73 +348,6 @@ async function sourceCliAvailable(host: HostCliClient): Promise { return result.exitCode === 0; } -async function bestEffort(run: () => Promise): Promise { - try { - await run(); - } catch { - // Best-effort cleanup mirrors the legacy shell teardown. - } -} - -async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise { - await bestEffort(() => - host.command( - "bash", - [ - "-lc", - [ - "set +e", - "openshell forward stop 18789 >/dev/null 2>&1", - "openshell gateway stop -g nemoclaw >/dev/null 2>&1", - 'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"', - 'if [ -f "$pid_file" ]; then', - ' pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"', - ' if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then', - ' kill "$pid" 2>/dev/null || true', - " for _ in $(seq 1 10); do", - ' kill -0 "$pid" 2>/dev/null || break', - " sleep 1", - " done", - ' kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true', - " fi", - "fi", - 'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"', - 'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi', - "openshell gateway remove nemoclaw >/dev/null 2>&1", - "openshell gateway destroy -g nemoclaw >/dev/null 2>&1", - "exit 0", - ].join("\n"), - ], - { - artifactName, - env: commandEnv(), - timeoutMs: 90_000, - }, - ), - ); -} - -async function cleanupMessagingState(host: HostCliClient, sandboxName: string): Promise { - // Endpoint-validation skips can happen before the sandbox exists. Keep - // teardown non-throwing so "Sandbox ... does not exist" stays a normal - // pre-contract cleanup outcome instead of masking the original evidence. - await bestEffort(() => - host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], { - artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`, - env: commandEnv(), - timeoutMs: 120_000, - }), - ); - await bestEffort(() => - host.command("openshell", ["sandbox", "delete", sandboxName], { - artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`, - env: commandEnv(), - timeoutMs: 60_000, - }), - ); - await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw"); -} - function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv { return commandEnv({ COMPATIBLE_API_KEY: COMPATIBLE_KEY, @@ -631,111 +559,6 @@ async function assertSandboxInference(sandbox: SandboxClient): Promise { ); } -function findJsonObjectEnd(raw: string, start: number): number | null { - let depth = 0; - let inString = false; - let escaped = false; - for (let index = start; index < raw.length; index += 1) { - const char = raw[index]; - if (inString) { - if (escaped) { - escaped = false; - } else if (char === "\\") { - escaped = true; - } else if (char === '"') { - inString = false; - } - continue; - } - if (char === '"') { - inString = true; - } else if (char === "{") { - depth += 1; - } else if (char === "}") { - depth -= 1; - if (depth === 0) return index + 1; - } - } - return null; -} - -function parseOpenClawAgentText(raw: string): string { - if (!raw.trim()) return ""; - const parts: string[] = []; - const visited = new Set(); - const textKeys = new Set(["text", "content", "reasoning_content"]); - const containerKeys = new Set([ - "result", - "payloads", - "payload", - "messages", - "choices", - "response", - "data", - "output", - "outputs", - "items", - "segments", - "delta", - ]); - - const add = (value: unknown) => { - if (typeof value === "string" && value.trim()) parts.push(value.trim()); - }; - const collect = (value: unknown) => { - if (visited.has(value)) return; - visited.add(value); - if (typeof value === "string") { - add(value); - return; - } - if (Array.isArray(value)) { - value.forEach(collect); - return; - } - if (!value || typeof value !== "object") return; - const record = value as Record; - for (const key of textKeys) add(record[key]); - const choices = record.choices; - if (Array.isArray(choices)) { - for (const choice of choices) { - if (!choice || typeof choice !== "object") continue; - collect((choice as Record).message); - collect((choice as Record).delta); - add((choice as Record).text); - } - } - for (const key of containerKeys) { - if (key in record) collect(record[key]); - } - }; - const collectDoc = (doc: unknown) => { - if (doc && typeof doc === "object" && (doc as Record).result) { - collect((doc as Record).result); - } else { - collect(doc); - } - }; - - try { - collectDoc(JSON.parse(raw)); - } catch { - for (const match of raw.matchAll(/{/g)) { - try { - const before = parts.length; - const start = match.index; - const end = findJsonObjectEnd(raw, start); - if (end === null) continue; - collectDoc(JSON.parse(raw.slice(start, end))); - if (parts.length > before) break; - } catch { - // Continue scanning for a later JSON object, matching the legacy parser. - } - } - } - return parts.join("\n"); -} - async function assertOpenClawAgentTurn( sandbox: SandboxClient, compatibleMock: CompatibleMock, diff --git a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts new file mode 100644 index 0000000000..d42eb8800c --- /dev/null +++ b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import { + cleanupMessagingState, + parseOpenClawAgentText, +} from "../live/messaging-compatible-endpoint-helpers.ts"; + +const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK"; +const COMPAT_AGENT_PROMPT = + "Call the configured model and report the compatible endpoint route token."; + +describe("messaging compatible endpoint helper coverage", () => { + it("keeps missing-sandbox cleanup from masking endpoint validation evidence", async () => { + const calls: Array<{ command: string; args: string[] }> = []; + const host = { + command: async (command: string, args: string[]) => { + calls.push({ command, args }); + throw new Error("Sandbox e2e-msg-compat-missing does not exist"); + }, + } as unknown as HostCliClient; + + await expect( + (async () => { + try { + throw new Error("endpoint validation failed with HTTP 429"); + } catch (error) { + await cleanupMessagingState(host, "e2e-msg-compat-missing"); + throw error; + } + })(), + ).rejects.toThrow(/HTTP 429/); + + expect(calls).toHaveLength(3); + expect(calls[0]?.command).toBe("node"); + expect(calls[0]?.args[0]).toMatch(/bin\/nemoclaw\.js$/); + expect(calls[0]?.args.slice(1)).toEqual(["e2e-msg-compat-missing", "destroy", "--yes"]); + expect(calls[1]).toEqual({ + command: "openshell", + args: ["sandbox", "delete", "e2e-msg-compat-missing"], + }); + expect(calls[2]?.command).toBe("bash"); + expect(calls[2]?.args[0]).toBe("-lc"); + expect(calls[2]?.args[1]).toContain("openshell gateway destroy -g nemoclaw"); + }); + + it("extracts noisy OpenClaw JSON while rejecting prompt echo text", () => { + expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY); + expect( + parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_PROMPT } })), + ).not.toContain(COMPAT_AGENT_REPLY); + + const noisyOutput = [ + "openclaw: session starting", + "debug: {not-json}", + JSON.stringify({ + result: { + messages: [{ role: "assistant", content: COMPAT_AGENT_REPLY }], + }, + }), + "openclaw: session complete", + ].join("\n"); + + expect(parseOpenClawAgentText(noisyOutput)).toContain(COMPAT_AGENT_REPLY); + }); +}); From c1d0f5814731f128a815ed298c9af86b1e5c1708 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 14:07:31 -0700 Subject: [PATCH 13/14] test(e2e): fix network policy preset selector Signed-off-by: Carlos Villela --- test/e2e-scenario/live/network-policy.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts index eb6000b5ce..ce3574a890 100644 --- a/test/e2e-scenario/live/network-policy.test.ts +++ b/test/e2e-scenario/live/network-policy.test.ts @@ -110,7 +110,7 @@ async function applyPresetInteractively( const script = String.raw` set -euo pipefail preset_list="$(env NEMOCLAW_NON_INTERACTIVE= node "$NEMOCLAW_E2E_CLI" "$NEMOCLAW_E2E_SANDBOX" policy-add &1 || true)" -preset_num="$(printf '%s\n' "$preset_list" | python3 -c 'import re,sys; preset=sys.argv[1]; text=sys.stdin.read(); m=re.search(r"(?m)^\\s*(\\d+)\\).*" + re.escape(preset), text); print(m.group(1) if m else "")' "$NEMOCLAW_E2E_PRESET")" +preset_num="$(printf '%s\n' "$preset_list" | python3 -c 'import re,sys; preset=sys.argv[1]; text=sys.stdin.read(); m=re.search(r"(?m)^\s*(\d+)\).*" + re.escape(preset), text); print(m.group(1) if m else "")' "$NEMOCLAW_E2E_PRESET")" if [ -z "$preset_num" ]; then printf 'preset %s not found in list:\n%s\n' "$NEMOCLAW_E2E_PRESET" "$preset_list" >&2 exit 1 From 1a9e58611056a2914210ad0c505d7f10abff71a6 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 14:19:24 -0700 Subject: [PATCH 14/14] test(e2e): refine live scenario helpers Signed-off-by: Carlos Villela --- .../messaging-compatible-endpoint-helpers.ts | 6 ++++- .../messaging-compatible-endpoint.test.ts | 24 +++++++++++++------ test/e2e-scenario/live/network-policy.test.ts | 3 +++ ...saging-compatible-endpoint-helpers.test.ts | 10 ++++++++ 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts index de87b56122..9266c6abcc 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts @@ -24,6 +24,8 @@ async function bestEffort(run: () => Promise): Promise { await run(); } catch { // Best-effort cleanup mirrors the legacy shell teardown. + // Narrow this once NemoClaw/OpenShell/gateway teardown treats missing + // resources as successful cleanup. } } @@ -153,7 +155,9 @@ export function parseOpenClawAgentText(raw: string): string { } if (!value || typeof value !== "object") return; const record = value as Record; - for (const key of textKeys) add(record[key]); + for (const key of textKeys) { + if (key in record) collect(record[key]); + } const choices = record.choices; if (Array.isArray(choices)) { for (const choice of choices) { diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts index 3eb9402414..925cba0168 100644 --- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts +++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts @@ -152,6 +152,10 @@ async function startCompatibleMock( if (req.method === "GET" && ["/v1/models", "/models"].includes(requestPath)) { requests.push({ method: "GET", path: requestPath, auth, hopHeaders: [] }); + if (auth !== "ok") { + jsonResponse(res, 401, { error: { message: "missing bearer credential" } }); + return; + } jsonResponse(res, 200, { object: "list", data: [{ id: model, object: "model" }], @@ -292,7 +296,9 @@ async function startCompatibleMock( for (let attempt = 1; attempt <= 30; attempt += 1) { try { - const response = await fetch(`${mock.localBaseUrl}/models`); + const response = await fetch(`${mock.localBaseUrl}/models`, { + headers: { Authorization: `Bearer ${apiKey}` }, + }); if (response.ok) return mock; } catch { // Keep polling until the server accepts connections. @@ -669,12 +675,16 @@ liveTest( const hostAddress = await hostAddressForSandbox(host); const endpointUrl = `http://${hostAddress}:${new URL(compatibleMock.localBaseUrl).port}/v1`; - const hostReachability = await host.command("curl", ["-sf", `${endpointUrl}/models`], { - artifactName: "compatible-endpoint-host-reachability", - env: commandEnv(), - redactionValues: redactionValues(), - timeoutMs: 30_000, - }); + const hostReachability = await host.command( + "curl", + ["-sf", "-H", `Authorization: Bearer ${COMPATIBLE_KEY}`, `${endpointUrl}/models`], + { + artifactName: "compatible-endpoint-host-reachability", + env: commandEnv(), + redactionValues: redactionValues(), + timeoutMs: 30_000, + }, + ); expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0); const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl); diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts index ce3574a890..6ba4b57f61 100644 --- a/test/e2e-scenario/live/network-policy.test.ts +++ b/test/e2e-scenario/live/network-policy.test.ts @@ -65,6 +65,9 @@ function sleep(ms: number): Promise { } function shellEvalArg(script: string): string { + if (script.length === 0) { + return ""; + } const encoded = Buffer.from(script, "utf8").toString("base64"); return `printf %s ${encoded} | base64 -d | sh`; } diff --git a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts index d42eb8800c..536d8e43ca 100644 --- a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts +++ b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts @@ -66,4 +66,14 @@ describe("messaging compatible endpoint helper coverage", () => { expect(parseOpenClawAgentText(noisyOutput)).toContain(COMPAT_AGENT_REPLY); }); + + it("extracts OpenAI Responses content parts", () => { + const output = JSON.stringify({ + result: { + content: [{ type: "output_text", text: COMPAT_AGENT_REPLY }], + }, + }); + + expect(parseOpenClawAgentText(output)).toContain(COMPAT_AGENT_REPLY); + }); });