From 30d7e034085dc3868d09e9cdb2ce3d276127b5b7 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 12:13:47 -0700
Subject: [PATCH 01/14] test(e2e): migrate messaging compatible endpoint

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/e2e-vitest-scenarios.yaml   |  80 ++
 .../messaging-compatible-endpoint.test.ts     | 968 ++++++++++++++++++
 .../live/network-policy-transient-provider.ts |   2 +-
 .../e2e-scenarios-workflow.test.ts            |  49 +-
 .../network-policy-transient-provider.test.ts |   5 +
 tools/e2e-scenarios/free-standing-jobs.env    |   6 +-
 tools/e2e-scenarios/workflow-boundary.mts     | 152 +++
 7 files changed, 1257 insertions(+), 5 deletions(-)
 create mode 100644 test/e2e-scenario/live/messaging-compatible-endpoint.test.ts

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index f199050c83..b6e3d45a20 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -1441,6 +1441,85 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+  messaging-compatible-endpoint-vitest:
+    needs: generate-matrix
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',messaging-compatible-endpoint-vitest,') || contains(format(',{0},', inputs.scenarios), ',messaging-compatible-endpoint,') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    env:
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/messaging-compatible-endpoint
+      NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+      NEMOCLAW_SANDBOX_NAME: "e2e-msg-compat"
+      OPENSHELL_GATEWAY: "nemoclaw"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Authenticate to Docker Hub
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -z "${DOCKERHUB_USERNAME}" || -z "${DOCKERHUB_TOKEN}" ]]; then
+            echo "::notice::Docker Hub credentials not configured; continuing with anonymous pulls."
+            exit 0
+          fi
+          login_succeeded=0
+          for attempt in 1 2 3; do
+            if echo "${DOCKERHUB_TOKEN}" | timeout 30s docker login docker.io --username "${DOCKERHUB_USERNAME}" --password-stdin; then
+              login_succeeded=1
+              break
+            fi
+            if [[ "$attempt" -lt 3 ]]; then
+              echo "::warning::Docker Hub login attempt ${attempt} failed; retrying."
+              sleep 5
+            fi
+          done
+          if [[ "$login_succeeded" -ne 1 ]]; then
+            echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls."
+          fi
+
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build CLI
+        run: npm run build:cli
+
+      - name: Run messaging compatible endpoint live test
+        # Migrated from test/e2e/test-messaging-compatible-endpoint.sh.
+        # Preserves the fake OpenAI-compatible endpoint, Telegram messaging
+        # config, inference.local, OpenClaw agent-turn, and proxy hop-header
+        # strip boundaries without relying on real messaging/provider secrets.
+        env:
+          NEMOCLAW_COMPAT_MOCK_API_KEY: "fake-compatible-key-e2e"
+          TELEGRAM_ALLOWED_IDS: "123456789"
+          TELEGRAM_BOT_TOKEN: "test-fake-telegram-token-e2e"
+        run: |
+          set -euo pipefail
+          npx vitest run --project e2e-scenarios-live \
+            test/e2e-scenario/live/messaging-compatible-endpoint.test.ts \
+            --silent=false --reporter=default
+
+      - name: Upload messaging compatible endpoint artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-messaging-compatible-endpoint
+          path: e2e-artifacts/vitest/messaging-compatible-endpoint/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   launchable-smoke-vitest:
     needs: generate-matrix
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',launchable-smoke-vitest,') }}
@@ -1886,6 +1965,7 @@ jobs:
         rebuild-openclaw-vitest,
         sandbox-rebuild-vitest,
         token-rotation-vitest,
+        messaging-compatible-endpoint-vitest,
         launchable-smoke-vitest,
         double-onboard-vitest,
         model-router-provider-routed-inference-vitest,
diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
new file mode 100644
index 0000000000..9249849dfc
--- /dev/null
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -0,0 +1,968 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Live Vitest migration for test/e2e/test-messaging-compatible-endpoint.sh.
+ *
+ * This stays intentionally direct: the legacy contract is the real
+ * Docker/OpenShell/nemoclaw boundary with a local OpenAI-compatible endpoint
+ * mock, Telegram messaging config, sandbox inference.local routing, and an
+ * OpenClaw agent turn through the compatible endpoint proxy path.
+ */
+
+import { randomUUID } from "node:crypto";
+import fs from "node:fs";
+import http from "node:http";
+import type { AddressInfo } from "node:net";
+import path from "node:path";
+
+import { describe, it } from "vitest";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const CLI_DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js");
+const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-msg-compat";
+const COMPAT_MODEL = process.env.NEMOCLAW_COMPAT_MODEL ?? "mock/deepseek-compatible";
+const COMPATIBLE_KEY = process.env.NEMOCLAW_COMPAT_MOCK_API_KEY ?? "fake-compatible-key-e2e";
+const TELEGRAM_TOKEN = process.env.TELEGRAM_BOT_TOKEN ?? "test-fake-telegram-token-e2e";
+const TELEGRAM_IDS = process.env.TELEGRAM_ALLOWED_IDS ?? "123456789";
+const MOCK_PORT = Number(process.env.NEMOCLAW_COMPAT_MOCK_PORT ?? "18089");
+const ONBOARD_TIMEOUT_MS = 25 * 60_000;
+const TEST_TIMEOUT_MS = 45 * 60_000;
+const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip;
+
+validateSandboxName(SANDBOX_NAME);
+
+const HOP_BY_HOP_HEADERS = new Set([
+  "proxy-authorization",
+  "proxy-connection",
+  "proxy-authenticate",
+  "connection",
+  "keep-alive",
+  "te",
+  "trailer",
+  "transfer-encoding",
+  "upgrade",
+]);
+const RATE_LIMIT_VALIDATION_RE =
+  /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i;
+
+interface MockRequestLog {
+  method: string;
+  path: string;
+  auth: "ok" | "missing";
+  model?: unknown;
+  stream?: unknown;
+  hopHeaders: string[];
+}
+
+interface CompatibleMock {
+  readonly requests: MockRequestLog[];
+  readonly hopHeaderLogs: string[][];
+  readonly localBaseUrl: string;
+  close(): Promise<void>;
+}
+
+type ProcessResult = { exitCode?: number | null; stdout: string; stderr: string };
+
+function resultText(result: ProcessResult): string {
+  return [result.stdout, result.stderr].filter(Boolean).join("\n");
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    ...extra,
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+}
+
+function redactionValues(): string[] {
+  return [COMPATIBLE_KEY, TELEGRAM_TOKEN, process.env.GITHUB_TOKEN].filter(
+    (value): value is string => typeof value === "string" && value.length > 0,
+  );
+}
+
+function jsonResponse(res: http.ServerResponse, status: number, payload: unknown): void {
+  const body = JSON.stringify(payload);
+  res.writeHead(status, {
+    "Content-Type": "application/json",
+    "Content-Length": Buffer.byteLength(body),
+  });
+  res.end(body);
+}
+
+function sseResponse(res: http.ServerResponse, body: string): void {
+  res.writeHead(200, {
+    "Content-Type": "text/event-stream",
+    "Content-Length": Buffer.byteLength(body),
+  });
+  res.end(body);
+}
+
+function readRequestBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve) => {
+    let body = "";
+    req.setEncoding("utf8");
+    req.on("data", (chunk: string) => {
+      body += chunk;
+    });
+    req.on("end", () => resolve(body));
+  });
+}
+
+function parseJsonBody(raw: string): Record<string, unknown> {
+  try {
+    const parsed = JSON.parse(raw) as unknown;
+    return parsed && typeof parsed === "object" && !Array.isArray(parsed)
+      ? (parsed as Record<string, unknown>)
+      : {};
+  } catch {
+    return {};
+  }
+}
+
+async function startCompatibleMock(
+  port: number,
+  model: string,
+  apiKey: string,
+): Promise<CompatibleMock> {
+  const requests: MockRequestLog[] = [];
+  const hopHeaderLogs: string[][] = [];
+  const server = http.createServer(async (req, res) => {
+    const requestPath = new URL(req.url ?? "/", "http://compatible.mock").pathname;
+    const auth = req.headers.authorization === `Bearer ${apiKey}` ? "ok" : "missing";
+    const hopHeaders = Object.keys(req.headers).filter((name) =>
+      HOP_BY_HOP_HEADERS.has(name.toLowerCase()),
+    );
+
+    if (req.method === "GET" && ["/v1/models", "/models"].includes(requestPath)) {
+      requests.push({ method: "GET", path: requestPath, auth, hopHeaders: [] });
+      jsonResponse(res, 200, {
+        object: "list",
+        data: [{ id: model, object: "model" }],
+      });
+      return;
+    }
+
+    if (req.method !== "POST") {
+      requests.push({ method: req.method ?? "GET", path: requestPath, auth, hopHeaders });
+      jsonResponse(res, 404, { error: { message: "not found" } });
+      return;
+    }
+
+    const payload = parseJsonBody(await readRequestBody(req));
+
+    if (["/v1/responses", "/responses"].includes(requestPath)) {
+      requests.push({
+        method: "POST",
+        path: requestPath,
+        auth,
+        model: payload.model,
+        stream: payload.stream,
+        hopHeaders,
+      });
+      if (auth !== "ok") {
+        jsonResponse(res, 401, { error: { message: "missing bearer credential" } });
+        return;
+      }
+      if (payload.stream) {
+        sseResponse(
+          res,
+          [
+            "event: response.output_text.delta",
+            'data: {"delta":"OK"}',
+            "",
+            "event: response.completed",
+            "data: {}",
+            "",
+          ].join("\n"),
+        );
+        return;
+      }
+      jsonResponse(res, 200, {
+        id: "resp-mock",
+        object: "response",
+        output: [
+          {
+            type: "message",
+            role: "assistant",
+            content: [{ type: "output_text", text: "PONG from compatible endpoint mock" }],
+          },
+        ],
+      });
+      return;
+    }
+
+    if (["/v1/chat/completions", "/chat/completions"].includes(requestPath)) {
+      requests.push({
+        method: "POST",
+        path: requestPath,
+        auth,
+        model: payload.model,
+        stream: payload.stream,
+        hopHeaders,
+      });
+      hopHeaderLogs.push(hopHeaders);
+      if (auth !== "ok") {
+        jsonResponse(res, 401, { error: { message: "missing bearer credential" } });
+        return;
+      }
+      if (payload.stream) {
+        const chunk = JSON.stringify({
+          id: "chatcmpl-mock",
+          object: "chat.completion.chunk",
+          choices: [
+            {
+              index: 0,
+              delta: { role: "assistant", content: "PONG from compatible endpoint mock" },
+              finish_reason: null,
+            },
+          ],
+        });
+        const done = JSON.stringify({
+          id: "chatcmpl-mock",
+          object: "chat.completion.chunk",
+          choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
+        });
+        sseResponse(res, `data: ${chunk}\n\ndata: ${done}\n\ndata: [DONE]\n\n`);
+        return;
+      }
+      jsonResponse(res, 200, {
+        id: "chatcmpl-mock",
+        object: "chat.completion",
+        choices: [
+          {
+            index: 0,
+            message: { role: "assistant", content: "PONG from compatible endpoint mock" },
+            finish_reason: "stop",
+          },
+        ],
+      });
+      return;
+    }
+
+    requests.push({
+      method: "POST",
+      path: requestPath,
+      auth,
+      model: payload.model,
+      stream: payload.stream,
+      hopHeaders,
+    });
+    jsonResponse(res, 404, { error: { message: "not found" } });
+  });
+
+  await new Promise<void>((resolve, reject) => {
+    server.once("error", reject);
+    server.listen(port, "0.0.0.0", () => {
+      server.off("error", reject);
+      resolve();
+    });
+  });
+
+  const address = server.address();
+  if (!address || typeof address === "string") {
+    throw new Error("compatible endpoint mock did not bind to a TCP port");
+  }
+  const boundPort = (address as AddressInfo).port;
+  const mock = {
+    requests,
+    hopHeaderLogs,
+    localBaseUrl: `http://127.0.0.1:${boundPort}/v1`,
+    close: () =>
+      new Promise<void>((resolve, reject) => {
+        server.close((error) => (error ? reject(error) : resolve()));
+      }),
+  };
+
+  for (let attempt = 1; attempt <= 30; attempt += 1) {
+    try {
+      const response = await fetch(`${mock.localBaseUrl}/models`);
+      if (response.ok) return mock;
+    } catch {
+      // Keep polling until the server accepts connections.
+    }
+    await sleep(1_000);
+  }
+
+  await mock.close();
+  throw new Error("compatible endpoint mock failed to answer /v1/models");
+}
+
+async function hostAddressForSandbox(host: HostCliClient): Promise<string> {
+  const probe = await host.command(
+    "bash",
+    [
+      "-lc",
+      [
+        'ip_addr="$(ip route get 1.1.1.1 2>/dev/null | awk \'{for (i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}\')"',
+        'if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi',
+        "ip_addr=\"$(hostname -I 2>/dev/null | awk '{print $1}')\"",
+        'if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi',
+        'if [ "$(uname -s 2>/dev/null)" = "Darwin" ]; then',
+        "  for iface in en0 en1 bridge100; do",
+        '    ip_addr="$(ipconfig getifaddr "$iface" 2>/dev/null || true)"',
+        '    if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi',
+        "  done",
+        "  ip_addr=\"$(ifconfig 2>/dev/null | awk '/inet / && $2 !~ /^127\\./ {print $2; exit}')\"",
+        '  if [ -n "$ip_addr" ]; then echo "$ip_addr"; exit 0; fi',
+        "fi",
+        "echo 127.0.0.1",
+      ].join("\n"),
+    ],
+    {
+      artifactName: "host-ip-for-compatible-endpoint",
+      env: commandEnv(),
+      timeoutMs: 30_000,
+    },
+  );
+  return probe.stdout.trim().split(/\s+/)[0] || "127.0.0.1";
+}
+
+async function sourceCliAvailable(host: HostCliClient): Promise<boolean> {
+  if (!fs.existsSync(CLI_DIST_ENTRYPOINT)) return false;
+  const result = await host.command(
+    "bash",
+    ["-lc", "command -v node >/dev/null 2>&1 && command -v openshell >/dev/null 2>&1"],
+    {
+      artifactName: "source-cli-availability",
+      env: commandEnv(),
+      timeoutMs: 30_000,
+    },
+  );
+  return result.exitCode === 0;
+}
+
+async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {
+    // Best-effort cleanup mirrors the legacy shell teardown.
+  }
+}
+
+async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise<void> {
+  await bestEffort(() =>
+    host.command(
+      "bash",
+      [
+        "-lc",
+        [
+          "set +e",
+          "openshell forward stop 18789 >/dev/null 2>&1",
+          "openshell gateway stop -g nemoclaw >/dev/null 2>&1",
+          'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"',
+          'if [ -f "$pid_file" ]; then',
+          '  pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"',
+          '  if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then',
+          '    kill "$pid" 2>/dev/null || true',
+          "    for _ in $(seq 1 10); do",
+          '      kill -0 "$pid" 2>/dev/null || break',
+          "      sleep 1",
+          "    done",
+          '    kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true',
+          "  fi",
+          "fi",
+          'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"',
+          'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi',
+          "openshell gateway remove nemoclaw >/dev/null 2>&1",
+          "openshell gateway destroy -g nemoclaw >/dev/null 2>&1",
+          "exit 0",
+        ].join("\n"),
+      ],
+      {
+        artifactName,
+        env: commandEnv(),
+        timeoutMs: 90_000,
+      },
+    ),
+  );
+}
+
+async function cleanupMessagingState(host: HostCliClient, sandboxName: string): Promise<void> {
+  // Endpoint-validation skips can happen before the sandbox exists. Keep
+  // teardown non-throwing so "Sandbox ... does not exist" stays a normal
+  // pre-contract cleanup outcome instead of masking the original evidence.
+  await bestEffort(() =>
+    host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
+      artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+      env: commandEnv(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await bestEffort(() =>
+    host.command("openshell", ["sandbox", "delete", sandboxName], {
+      artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+      env: commandEnv(),
+      timeoutMs: 60_000,
+    }),
+  );
+  await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw");
+}
+
+function hasLegacyCompatibleEndpointEvidence(
+  result: Pick<ShellProbeResult, "stdout" | "stderr">,
+  requests: readonly MockRequestLog[],
+): boolean {
+  return (
+    resultText(result).includes("Compatible endpoint responds through inference.local") ||
+    requests.some((request) => request.path === "/v1/chat/completions" && request.auth === "ok")
+  );
+}
+
+function shouldSkipPreContractProviderRateLimit(
+  result: Pick<ShellProbeResult, "stdout" | "stderr">,
+  requests: readonly MockRequestLog[] = [],
+): boolean {
+  return (
+    isTransientProviderValidationFailure(result) &&
+    RATE_LIMIT_VALIDATION_RE.test(resultText(result)) &&
+    !hasLegacyCompatibleEndpointEvidence(result, requests)
+  );
+}
+
+function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv {
+  return commandEnv({
+    COMPATIBLE_API_KEY: COMPATIBLE_KEY,
+    DISCORD_BOT_TOKEN: undefined,
+    NEMOCLAW_ENDPOINT_URL: endpointUrl,
+    NEMOCLAW_MODEL: COMPAT_MODEL,
+    NEMOCLAW_POLICY_MODE: "custom",
+    NEMOCLAW_POLICY_PRESETS: "telegram",
+    NEMOCLAW_PREFERRED_API: "openai-completions",
+    NEMOCLAW_PROVIDER: "custom",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    NEMOCLAW_SKIP_TELEGRAM_REACHABILITY: "1",
+    SLACK_APP_TOKEN: undefined,
+    SLACK_BOT_TOKEN: undefined,
+    TELEGRAM_ALLOWED_IDS: TELEGRAM_IDS,
+    TELEGRAM_BOT_TOKEN: TELEGRAM_TOKEN,
+  });
+}
+
+async function runCompatibleOnboard(
+  host: HostCliClient,
+  endpointUrl: string,
+): Promise<{ result: ShellProbeResult; runner: string }> {
+  const env = onboardEnv(endpointUrl);
+  const useSourceCli = await sourceCliAvailable(host);
+  const runOnce = async (
+    attempt: number,
+  ): Promise<{ result: ShellProbeResult; runner: string }> => {
+    if (useSourceCli) {
+      await cleanupMessagingState(host, SANDBOX_NAME);
+      const result = await host.command(
+        "node",
+        [
+          CLI_ENTRYPOINT,
+          "onboard",
+          "--fresh",
+          "--non-interactive",
+          "--yes",
+          "--yes-i-accept-third-party-software",
+        ],
+        {
+          artifactName:
+            attempt === 1
+              ? "onboard-compatible-endpoint-source-cli"
+              : `onboard-compatible-endpoint-source-cli-retry-${attempt}`,
+          env,
+          redactionValues: redactionValues(),
+          timeoutMs: ONBOARD_TIMEOUT_MS,
+        },
+      );
+      return { result, runner: attempt === 1 ? "source CLI onboard" : "source CLI onboard retry" };
+    }
+
+    const result = await host.command(
+      "bash",
+      ["install.sh", "--non-interactive", "--yes-i-accept-third-party-software", "--fresh"],
+      {
+        artifactName:
+          attempt === 1
+            ? "onboard-compatible-endpoint-install-sh"
+            : `onboard-compatible-endpoint-install-sh-retry-${attempt}`,
+        cwd: REPO_ROOT,
+        env,
+        redactionValues: redactionValues(),
+        timeoutMs: ONBOARD_TIMEOUT_MS,
+      },
+    );
+    return { result, runner: attempt === 1 ? "install.sh" : "install.sh retry" };
+  };
+
+  const first = await runOnce(1);
+  if (
+    first.result.exitCode === 0 ||
+    !/Connection refused|transport error|tcp connect error|client error \(Connect\)/i.test(
+      resultText(first.result),
+    )
+  ) {
+    return first;
+  }
+
+  await stopGatewayRuntime(host, "onboard-compatible-endpoint-retry-gateway-cleanup");
+  await sleep(5_000);
+  return runOnce(2);
+}
+
+function openAiContent(raw: string): string {
+  const parsed = JSON.parse(raw) as {
+    choices?: Array<{ message?: { content?: unknown }; text?: unknown }>;
+  };
+  return (parsed.choices ?? [])
+    .map((choice) => {
+      if (typeof choice.message?.content === "string") return choice.message.content;
+      if (typeof choice.text === "string") return choice.text;
+      return "";
+    })
+    .join("\n");
+}
+
+async function assertOpenClawConfigShape(sandbox: SandboxClient): Promise<void> {
+  const script = String.raw`
+const fs = require("node:fs");
+const model = process.argv[1];
+const cfg = JSON.parse(fs.readFileSync("/sandbox/.openclaw/openclaw.json", "utf8"));
+const providers = cfg.models?.providers ?? {};
+const errors = [];
+if (Object.hasOwn(providers, "deepinfra")) errors.push("direct deepinfra provider is present");
+const providerKeys = Object.keys(providers).sort();
+if (JSON.stringify(providerKeys) !== JSON.stringify(["inference"])) {
+  errors.push("provider keys are " + JSON.stringify(providerKeys));
+}
+const inference = providers.inference;
+if (!inference || typeof inference !== "object") {
+  errors.push("models.providers.inference is missing");
+} else {
+  if (inference.baseUrl !== "https://inference.local/v1") {
+    errors.push("inference baseUrl is " + JSON.stringify(inference.baseUrl));
+  }
+  if (inference.apiKey !== "unused") {
+    errors.push("inference apiKey is not the non-secret placeholder");
+  }
+}
+const primary = cfg.agents?.defaults?.model?.primary;
+if (primary !== "inference/" + model) errors.push("primary model is " + JSON.stringify(primary));
+if (!cfg.channels?.telegram) errors.push("telegram channel config missing");
+console.log(JSON.stringify({
+  provider_keys: providerKeys,
+  inference_base: inference?.baseUrl,
+  inference_api_key: inference?.apiKey,
+  primary,
+  telegram_present: Boolean(cfg.channels?.telegram),
+  errors,
+}));
+process.exit(errors.length ? 1 : 0);
+`;
+  const result = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script, COMPAT_MODEL], {
+    artifactName: "openclaw-config-compatible-endpoint",
+    env: commandEnv(),
+    timeoutMs: 60_000,
+  });
+  expect(result.exitCode, resultText(result)).toBe(0);
+}
+
+async function assertGatewayReady(sandbox: SandboxClient): Promise<void> {
+  const script = String.raw`
+const net = require("node:net");
+let done = false;
+const sock = net.connect(18789, "127.0.0.1");
+function finish(line, code) {
+  if (done) return;
+  done = true;
+  console.log(line);
+  sock.destroy();
+  process.exit(code);
+}
+sock.on("connect", () => finish("OPEN", 0));
+sock.on("error", (err) => finish("ERROR " + err.message, 1));
+sock.setTimeout(1000, () => finish("TIMEOUT", 1));
+`;
+  let last: ShellProbeResult | undefined;
+  for (let attempt = 1; attempt <= 30; attempt += 1) {
+    last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script], {
+      artifactName: `gateway-ready-compatible-endpoint-${attempt}`,
+      env: commandEnv(),
+      timeoutMs: 5_000,
+    });
+    if (last.exitCode === 0 && last.stdout.includes("OPEN")) return;
+    await sleep(1_000);
+  }
+  throw new Error(
+    `gateway did not open port 18789: ${last ? resultText(last).slice(0, 300) : "no probe"}`,
+  );
+}
+
+async function assertSandboxInference(sandbox: SandboxClient): Promise<void> {
+  const payload = JSON.stringify({
+    model: COMPAT_MODEL,
+    messages: [{ role: "user", content: "Reply with exactly: PONG" }],
+    max_tokens: 32,
+  });
+  const response = await sandbox.exec(
+    SANDBOX_NAME,
+    [
+      "curl",
+      "-sS",
+      "--max-time",
+      "60",
+      "https://inference.local/v1/chat/completions",
+      "-H",
+      "Content-Type: application/json",
+      "--data-raw",
+      payload,
+    ],
+    {
+      artifactName: "sandbox-inference-local-compatible-chat",
+      env: commandEnv(),
+      timeoutMs: 90_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toMatch(/PONG/i);
+}
+
+function findJsonObjectEnd(raw: string, start: number): number | null {
+  let depth = 0;
+  let inString = false;
+  let escaped = false;
+  for (let index = start; index < raw.length; index += 1) {
+    const char = raw[index];
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (char === "\\") {
+        escaped = true;
+      } else if (char === '"') {
+        inString = false;
+      }
+      continue;
+    }
+    if (char === '"') {
+      inString = true;
+    } else if (char === "{") {
+      depth += 1;
+    } else if (char === "}") {
+      depth -= 1;
+      if (depth === 0) return index + 1;
+    }
+  }
+  return null;
+}
+
+function parseOpenClawAgentText(raw: string): string {
+  if (!raw.trim()) return "";
+  const parts: string[] = [];
+  const visited = new Set<unknown>();
+  const textKeys = new Set(["text", "content", "reasoning_content"]);
+  const containerKeys = new Set([
+    "result",
+    "payloads",
+    "payload",
+    "messages",
+    "choices",
+    "response",
+    "data",
+    "output",
+    "outputs",
+    "items",
+    "segments",
+    "delta",
+  ]);
+
+  const add = (value: unknown) => {
+    if (typeof value === "string" && value.trim()) parts.push(value.trim());
+  };
+  const collect = (value: unknown) => {
+    if (visited.has(value)) return;
+    visited.add(value);
+    if (typeof value === "string") {
+      add(value);
+      return;
+    }
+    if (Array.isArray(value)) {
+      value.forEach(collect);
+      return;
+    }
+    if (!value || typeof value !== "object") return;
+    const record = value as Record<string, unknown>;
+    for (const key of textKeys) add(record[key]);
+    const choices = record.choices;
+    if (Array.isArray(choices)) {
+      for (const choice of choices) {
+        if (!choice || typeof choice !== "object") continue;
+        collect((choice as Record<string, unknown>).message);
+        collect((choice as Record<string, unknown>).delta);
+        add((choice as Record<string, unknown>).text);
+      }
+    }
+    for (const key of containerKeys) {
+      if (key in record) collect(record[key]);
+    }
+  };
+  const collectDoc = (doc: unknown) => {
+    if (doc && typeof doc === "object" && (doc as Record<string, unknown>).result) {
+      collect((doc as Record<string, unknown>).result);
+    } else {
+      collect(doc);
+    }
+  };
+
+  try {
+    collectDoc(JSON.parse(raw));
+  } catch {
+    for (const match of raw.matchAll(/{/g)) {
+      try {
+        const before = parts.length;
+        const start = match.index;
+        const end = findJsonObjectEnd(raw, start);
+        if (end === null) continue;
+        collectDoc(JSON.parse(raw.slice(start, end)));
+        if (parts.length > before) break;
+      } catch {
+        // Continue scanning for a later JSON object, matching the legacy parser.
+      }
+    }
+  }
+  return parts.join("\n");
+}
+
+async function assertOpenClawAgentTurn(
+  sandbox: SandboxClient,
+  compatibleMock: CompatibleMock,
+): Promise<void> {
+  const hopCountBefore = compatibleMock.hopHeaderLogs.length;
+  const sessionId = `e2e-compat-agent-${Date.now()}-${randomUUID()}`;
+  const agent = await sandbox.exec(
+    SANDBOX_NAME,
+    [
+      "openclaw",
+      "agent",
+      "--agent",
+      "main",
+      "--json",
+      "--session-id",
+      sessionId,
+      "-m",
+      "Reply with only: PONG",
+    ],
+    {
+      artifactName: "openclaw-agent-compatible-endpoint",
+      env: commandEnv(),
+      timeoutMs: 120_000,
+    },
+  );
+  const text = resultText(agent);
+  expect(
+    /SsrFBlockedError|Blocked hostname|transport error|ECONNREFUSED|EAI_AGAIN|gateway unavailable|network connection error/i.test(
+      text,
+    ),
+    text.slice(0, 500),
+  ).toBe(false);
+  expect(agent.exitCode, text.slice(0, 500)).toBe(0);
+  expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toMatch(/PONG/i);
+
+  const newHopHeaderLogs = compatibleMock.hopHeaderLogs.slice(hopCountBefore);
+  expect(
+    newHopHeaderLogs.length,
+    "Mock logged no proxy_hop_headers line for the agent turn; agent did not reach /v1/chat/completions",
+  ).toBeGreaterThan(0);
+  const leaked = newHopHeaderLogs.flat().filter((name) => name.length > 0);
+  expect(leaked, `Proxy hop headers leaked to upstream: ${leaked.join(",")}`).toEqual([]);
+}
+
+describe("messaging-compatible-endpoint live test local classifiers", () => {
+  function output(text: string): Pick<ShellProbeResult, "stdout" | "stderr"> {
+    return { stdout: "", stderr: text };
+  }
+
+  it("skips only rate-limited endpoint validation before legacy evidence exists", () => {
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
+        ),
+      ),
+    ).toBe(true);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"),
+      ),
+    ).toBe(true);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"),
+      ),
+    ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
+        ),
+      ),
+    ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), [
+        {
+          auth: "ok",
+          hopHeaders: [],
+          method: "POST",
+          path: "/v1/chat/completions",
+        },
+      ]),
+    ).toBe(false);
+  });
+});
+
+liveTest(
+  "messaging compatible endpoint routes Telegram-enabled OpenClaw through inference.local",
+  { timeout: TEST_TIMEOUT_MS },
+  async ({ artifacts, cleanup, host, sandbox, skip }) => {
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "prereq-docker-info-messaging-compatible-endpoint",
+      env: commandEnv(),
+      timeoutMs: 30_000,
+    });
+    if (docker.exitCode !== 0) {
+      if (process.env.GITHUB_ACTIONS === "true") {
+        throw new Error(
+          `Docker is required for messaging compatible endpoint E2E: ${resultText(docker)}`,
+        );
+      }
+      skip("Docker is required for messaging compatible endpoint E2E");
+    }
+
+    await artifacts.writeJson("scenario.json", {
+      id: "messaging-compatible-endpoint",
+      runner: "vitest",
+      boundary: "direct-cli-onboard-openshell-compatible-endpoint",
+      legacySource: "test/e2e/test-messaging-compatible-endpoint.sh",
+      refs: ["#2766", "#2572", "#5098"],
+      contract: [
+        "local OpenAI-compatible mock endpoint starts and is reachable",
+        "custom provider + Telegram onboard completes",
+        "onboard runs the compatible endpoint sandbox smoke check",
+        "gateway registers compatible-endpoint provider",
+        "openclaw.json uses managed inference.local provider and Telegram config",
+        "gateway stays up after Telegram provider initialization",
+        "sandbox inference.local chat completion reaches the mock with auth",
+        "OpenClaw agent turn completes through the compatible endpoint",
+        "http-proxy-fix.js strips RFC 7230 hop-by-hop proxy headers",
+      ],
+    });
+
+    cleanup.add(`destroy messaging compatible endpoint state ${SANDBOX_NAME}`, () =>
+      cleanupMessagingState(host, SANDBOX_NAME),
+    );
+    await cleanupMessagingState(host, SANDBOX_NAME);
+
+    const compatibleMock = await startCompatibleMock(MOCK_PORT, COMPAT_MODEL, COMPATIBLE_KEY);
+    cleanup.add("stop compatible endpoint mock", async () => {
+      await artifacts.writeJson("compatible-endpoint-mock-requests.json", compatibleMock.requests);
+      await compatibleMock.close();
+    });
+
+    const hostAddress = await hostAddressForSandbox(host);
+    const endpointUrl = `http://${hostAddress}:${new URL(compatibleMock.localBaseUrl).port}/v1`;
+    const hostReachability = await host.command("curl", ["-sf", `${endpointUrl}/models`], {
+      artifactName: "compatible-endpoint-host-reachability",
+      env: commandEnv(),
+      redactionValues: redactionValues(),
+      timeoutMs: 30_000,
+    });
+    expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0);
+
+    const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl);
+    if (
+      onboard.exitCode !== 0 &&
+      shouldSkipPreContractProviderRateLimit(onboard, compatibleMock.requests)
+    ) {
+      await artifacts.writeJson("scenario-result.json", {
+        id: "messaging-compatible-endpoint",
+        status: "skipped",
+        reason: "external-provider-rate-limit-before-legacy-contract",
+        runner,
+        onboardExitCode: onboard.exitCode,
+        onboardTimedOut: onboard.timedOut,
+        onboardArtifacts: onboard.artifacts,
+        mockRequestsBeforeSkip: compatibleMock.requests.length,
+        sourceBoundary: "external provider endpoint validation outside the repo",
+        removalCondition:
+          "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture",
+      });
+      skip(
+        "External endpoint validation was rate-limited before the messaging-compatible endpoint contract could run",
+      );
+    }
+    expect(onboard.exitCode, resultText(onboard)).toBe(0);
+    expect(resultText(onboard)).toContain("Compatible endpoint responds through inference.local");
+
+    const provider = await host.command("openshell", ["provider", "get", "compatible-endpoint"], {
+      artifactName: "openshell-provider-get-compatible-endpoint",
+      env: commandEnv(),
+      timeoutMs: 30_000,
+    });
+    expect(provider.exitCode, resultText(provider)).toBe(0);
+
+    await assertOpenClawConfigShape(sandbox);
+    await assertGatewayReady(sandbox);
+    await assertSandboxInference(sandbox);
+    await assertOpenClawAgentTurn(sandbox, compatibleMock);
+
+    expect(
+      compatibleMock.requests.some(
+        (request) => request.path === "/v1/chat/completions" && request.auth === "ok",
+      ),
+      "compatible mock did not record authenticated /v1/chat/completions traffic",
+    ).toBe(true);
+
+    const telegramRoundTripSecretsAvailable = Boolean(
+      process.env.TELEGRAM_BOT_TOKEN_REAL &&
+        process.env.TELEGRAM_CHAT_ID_E2E &&
+        process.env.COMPATIBLE_API_KEY &&
+        process.env.NEMOCLAW_ENDPOINT_URL &&
+        process.env.NEMOCLAW_COMPAT_MODEL,
+    );
+    await artifacts.writeJson("telegram-live-round-trip.json", {
+      status: "skipped",
+      reason: telegramRoundTripSecretsAvailable
+        ? "Live Telegram reply requires an inbound user-message driver; hermetic route passed"
+        : "Live Telegram-compatible round trip secrets not fully set",
+    });
+
+    await artifacts.writeJson("scenario-result.json", {
+      id: "messaging-compatible-endpoint",
+      runner,
+      endpointUrl,
+      assertions: {
+        dockerRunning: docker.exitCode === 0,
+        mockReachable: hostReachability.exitCode === 0,
+        onboardCompleted: onboard.exitCode === 0,
+        providerRegistered: provider.exitCode === 0,
+        authenticatedChatTraffic: compatibleMock.requests.some(
+          (request) => request.path === "/v1/chat/completions" && request.auth === "ok",
+        ),
+        proxyHopHeadersStripped: compatibleMock.hopHeaderLogs.every(
+          (headers) => headers.length === 0,
+        ),
+      },
+    });
+  },
+);
diff --git a/test/e2e-scenario/live/network-policy-transient-provider.ts b/test/e2e-scenario/live/network-policy-transient-provider.ts
index 04f091cbd4..224b1a73c8 100644
--- a/test/e2e-scenario/live/network-policy-transient-provider.ts
+++ b/test/e2e-scenario/live/network-policy-transient-provider.ts
@@ -6,7 +6,7 @@ import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
 const TRANSIENT_PROVIDER_VALIDATION_RE =
   /endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i;
 const TRANSIENT_PROVIDER_DETAIL_RE =
-  /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|temporar/i;
+  /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|too many requests|rate[- ]?limit|quota|temporar/i;
 
 function resultText(result: Pick<ShellProbeResult, "stdout" | "stderr">): string {
   return [result.stdout, result.stderr].filter(Boolean).join("\n");
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 354712ecde..fb62c0c080 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -165,6 +165,26 @@ describe("e2e-vitest-scenarios workflow boundary", () => {
       selectedFreeStandingJobs: ["runtime-overrides-vitest"],
       registryScenarios: [],
     });
+    expect(
+      evaluateE2eVitestWorkflowDispatchSelectors({
+        scenarios: "messaging-compatible-endpoint",
+      }),
+    ).toMatchObject({
+      valid: true,
+      liveScenariosRuns: false,
+      selectedFreeStandingJobs: ["messaging-compatible-endpoint-vitest"],
+      registryScenarios: [],
+    });
+    expect(
+      evaluateE2eVitestWorkflowDispatchSelectors({
+        jobs: "messaging-compatible-endpoint-vitest",
+      }),
+    ).toMatchObject({
+      valid: true,
+      liveScenariosRuns: false,
+      selectedFreeStandingJobs: ["messaging-compatible-endpoint-vitest"],
+      registryScenarios: [],
+    });
     expect(
       evaluateE2eVitestWorkflowDispatchSelectors({ scenarios: "inference-routing" }),
     ).toMatchObject({
@@ -376,7 +396,7 @@ describe("e2e-vitest-scenarios workflow boundary", () => {
         registryScenarios: [],
       });
     }
-  });
+  }, 15_000);
 
   it("flags direct dispatch-input interpolation and unsafe artifact upload", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-"));
@@ -660,6 +680,33 @@ jobs:
     }
   });
 
+  it("requires messaging-compatible-endpoint workflow and report coverage", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-"));
+    const workflowPath = path.join(tmp, "workflow.yaml");
+    const workflow = fs.readFileSync(
+      path.join(process.cwd(), ".github/workflows/e2e-vitest-scenarios.yaml"),
+      "utf8",
+    );
+    fs.writeFileSync(
+      workflowPath,
+      workflow
+        .replace(/messaging-compatible-endpoint-vitest/g, "msg-compatible-missing")
+        .replace(/messaging-compatible-endpoint/g, "msg-compatible-missing"),
+    );
+
+    try {
+      const errors = validateE2eVitestScenariosWorkflowBoundary(workflowPath);
+      expect(errors).toEqual(
+        expect.arrayContaining([
+          "workflow missing messaging-compatible-endpoint-vitest job",
+          "report-to-pr job must wait for messaging-compatible-endpoint-vitest",
+        ]),
+      );
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
   it("rejects Docker Hub auth and inline secrets in runtime-overrides run steps", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-"));
     const workflowPath = path.join(tmp, "workflow.yaml");
diff --git a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
index 5b5138224a..551ea0a85e 100644
--- a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
+++ b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
@@ -20,6 +20,11 @@ describe("network-policy transient provider validation classifier", () => {
         probeOutput("endpoint validation failed: returned HTTP 503 from provider"),
       ),
     ).toBe(true);
+    expect(
+      isTransientProviderValidationFailure(
+        probeOutput("endpoint validation failed: provider rate limit exceeded"),
+      ),
+    ).toBe(true);
 
     expect(
       isTransientProviderValidationFailure(
diff --git a/tools/e2e-scenarios/free-standing-jobs.env b/tools/e2e-scenarios/free-standing-jobs.env
index 68ce23e36d..844630e3a5 100644
--- a/tools/e2e-scenarios/free-standing-jobs.env
+++ b/tools/e2e-scenarios/free-standing-jobs.env
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest
-free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival
-free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest
+allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,messaging-compatible-endpoint-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest
+free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,messaging-compatible-endpoint,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival
+free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,messaging-compatible-endpoint:messaging-compatible-endpoint-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 9e55fc3ff3..0b799260e8 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -1168,6 +1168,157 @@ function validateTokenRotationVitestJob(errors: string[], jobs: WorkflowRecord):
   }
 }
 
+function validateMessagingCompatibleEndpointVitestJob(
+  errors: string[],
+  jobs: WorkflowRecord,
+): void {
+  const jobName = "messaging-compatible-endpoint-vitest";
+  const job = asRecord(jobs[jobName]);
+  if (Object.keys(job).length === 0) {
+    errors.push("workflow missing messaging-compatible-endpoint-vitest job");
+    return;
+  }
+
+  if (job["runs-on"] !== "ubuntu-latest") {
+    errors.push("messaging-compatible-endpoint-vitest job must run on ubuntu-latest");
+  }
+  validateFreeStandingJobSelector(errors, jobs, jobName, "messaging-compatible-endpoint");
+  if (job["timeout-minutes"] !== 45) {
+    errors.push("messaging-compatible-endpoint-vitest job must keep the legacy 45 minute timeout");
+  }
+
+  const jobEnv = asRecord(job.env);
+  if (
+    jobEnv.E2E_ARTIFACT_DIR !==
+    "${{ github.workspace }}/e2e-artifacts/vitest/messaging-compatible-endpoint"
+  ) {
+    errors.push(
+      "messaging-compatible-endpoint-vitest job must write artifacts under e2e-artifacts/vitest/messaging-compatible-endpoint",
+    );
+  }
+  if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) {
+    errors.push("messaging-compatible-endpoint-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI");
+  }
+  if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") {
+    errors.push("messaging-compatible-endpoint-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1");
+  }
+  if (jobEnv.NEMOCLAW_SANDBOX_NAME !== "e2e-msg-compat") {
+    errors.push("messaging-compatible-endpoint-vitest job must pin the legacy sandbox name");
+  }
+  if (jobEnv.OPENSHELL_GATEWAY !== "nemoclaw") {
+    errors.push("messaging-compatible-endpoint-vitest job must force OPENSHELL_GATEWAY=nemoclaw");
+  }
+  requireEnvDoesNotExposeSecret(
+    errors,
+    "messaging-compatible-endpoint-vitest job",
+    jobEnv,
+    "NVIDIA_API_KEY",
+  );
+
+  const steps = asSteps(job.steps);
+  requireNoDispatchInputInterpolation(errors, steps);
+  for (const step of steps) {
+    if (step.name !== "Run messaging compatible endpoint live test") {
+      requireEnvDoesNotExposeSecret(
+        errors,
+        `messaging-compatible-endpoint-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
+        asRecord(step.env),
+        "NVIDIA_API_KEY",
+      );
+    }
+  }
+
+  const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@"));
+  if (!checkout) errors.push("messaging-compatible-endpoint-vitest job missing checkout step");
+  requireFullShaAction(errors, checkout, "messaging-compatible-endpoint-vitest checkout");
+  if (asRecord(checkout?.with)["persist-credentials"] !== false) {
+    errors.push("messaging-compatible-endpoint-vitest checkout step must set persist-credentials=false");
+  }
+
+  const dockerHubAuth = requireJobStep(errors, jobName, steps, "Authenticate to Docker Hub");
+  const dockerHubEnv = asRecord(dockerHubAuth?.env);
+  if (dockerHubEnv.DOCKERHUB_USERNAME !== "${{ secrets.DOCKERHUB_USERNAME }}") {
+    errors.push(
+      "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_USERNAME from secrets",
+    );
+  }
+  if (dockerHubEnv.DOCKERHUB_TOKEN !== "${{ secrets.DOCKERHUB_TOKEN }}") {
+    errors.push(
+      "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_TOKEN from secrets",
+    );
+  }
+  requireRunContains(errors, dockerHubAuth, "docker login docker.io");
+
+  const setupNode = namedStep(steps, "Set up Node");
+  if (!setupNode) errors.push("messaging-compatible-endpoint-vitest job missing step: Set up Node");
+  requireFullShaAction(errors, setupNode, "messaging-compatible-endpoint-vitest setup-node");
+
+  const installRootDependencies = requireJobStep(
+    errors,
+    jobName,
+    steps,
+    "Install root dependencies",
+  );
+  requireRunContains(errors, installRootDependencies, "npm ci --ignore-scripts");
+
+  const buildCli = requireJobStep(errors, jobName, steps, "Build CLI");
+  requireRunContains(errors, buildCli, "npm run build:cli");
+
+  const runVitest = requireJobStep(
+    errors,
+    jobName,
+    steps,
+    "Run messaging compatible endpoint live test",
+  );
+  const runVitestEnv = asRecord(runVitest?.env);
+  requireEnvDoesNotExposeSecret(
+    errors,
+    "messaging-compatible-endpoint-vitest step",
+    runVitestEnv,
+    "NVIDIA_API_KEY",
+  );
+  if (runVitestEnv.NEMOCLAW_COMPAT_MOCK_API_KEY !== "fake-compatible-key-e2e") {
+    errors.push("messaging-compatible-endpoint-vitest step must set a fake compatible endpoint key");
+  }
+  if (runVitestEnv.TELEGRAM_BOT_TOKEN !== "test-fake-telegram-token-e2e") {
+    errors.push("messaging-compatible-endpoint-vitest step must set a fake Telegram token");
+  }
+  if (runVitestEnv.TELEGRAM_ALLOWED_IDS !== "123456789") {
+    errors.push("messaging-compatible-endpoint-vitest step must set fake Telegram allowed ids");
+  }
+  requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
+  requireRunContains(
+    errors,
+    runVitest,
+    "test/e2e-scenario/live/messaging-compatible-endpoint.test.ts",
+  );
+
+  const upload = requireJobStep(
+    errors,
+    jobName,
+    steps,
+    "Upload messaging compatible endpoint artifacts",
+  );
+  requireFullShaAction(errors, upload, "messaging-compatible-endpoint-vitest upload-artifact");
+  const uploadWith = asRecord(upload?.with);
+  if (uploadWith.name !== "e2e-vitest-scenarios-messaging-compatible-endpoint") {
+    errors.push("messaging-compatible-endpoint-vitest artifact upload name must be stable");
+  }
+  const uploadPath = stringValue(uploadWith.path);
+  requireUploadPathContains(errors, uploadPath, "e2e-artifacts/vitest/messaging-compatible-endpoint/");
+  if (uploadWith["include-hidden-files"] !== false) {
+    errors.push("messaging-compatible-endpoint-vitest artifact upload must set include-hidden-files: false");
+  }
+  if (uploadWith["if-no-files-found"] !== "ignore") {
+    errors.push(
+      "messaging-compatible-endpoint-vitest artifact upload must ignore missing fixture artifacts",
+    );
+  }
+  if (uploadWith["retention-days"] !== 14) {
+    errors.push("messaging-compatible-endpoint-vitest artifact upload retention-days must be 14");
+  }
+}
+
 function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowRecord): void {
   const jobName = "onboard-negative-paths-vitest";
   const job = asRecord(jobs[jobName]);
@@ -2119,6 +2270,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   validateRebuildOpenClawVitestJob(errors, jobs);
   validateSandboxRebuildVitestJob(errors, jobs);
   validateTokenRotationVitestJob(errors, jobs);
+  validateMessagingCompatibleEndpointVitestJob(errors, jobs);
   validateFreeStandingJobSelector(
     errors,
     jobs,

From fe649c63a941af39f1927457fe3cd5abae4e5deb Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 12:33:12 -0700
Subject: [PATCH 02/14] test(e2e): address messaging endpoint review

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/e2e-vitest-scenarios.yaml   | 26 --------
 .../messaging-compatible-endpoint.test.ts     | 50 ++++++++++++--
 .../e2e-scenarios-workflow.test.ts            | 35 ++++++++++
 tools/e2e-scenarios/workflow-boundary.mts     | 65 ++++++++++++-------
 4 files changed, 121 insertions(+), 55 deletions(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index b6e3d45a20..00e44224c0 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -1457,32 +1457,6 @@ jobs:
         with:
           persist-credentials: false
 
-      - name: Authenticate to Docker Hub
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
-          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
-        shell: bash
-        run: |
-          set -euo pipefail
-          if [[ -z "${DOCKERHUB_USERNAME}" || -z "${DOCKERHUB_TOKEN}" ]]; then
-            echo "::notice::Docker Hub credentials not configured; continuing with anonymous pulls."
-            exit 0
-          fi
-          login_succeeded=0
-          for attempt in 1 2 3; do
-            if echo "${DOCKERHUB_TOKEN}" | timeout 30s docker login docker.io --username "${DOCKERHUB_USERNAME}" --password-stdin; then
-              login_succeeded=1
-              break
-            fi
-            if [[ "$attempt" -lt 3 ]]; then
-              echo "::warning::Docker Hub login attempt ${attempt} failed; retrying."
-              sleep 5
-            fi
-          done
-          if [[ "$login_succeeded" -ne 1 ]]; then
-            echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls."
-          fi
-
       - name: Set up Node
         uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
         with:
diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 9249849dfc..2c2a5903fb 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -54,6 +54,12 @@ const HOP_BY_HOP_HEADERS = new Set([
 ]);
 const RATE_LIMIT_VALIDATION_RE =
   /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i;
+const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i;
+const COMPATIBLE_ENDPOINT_VALIDATION_RE =
+  /Other OpenAI-compatible endpoint endpoint validation failed|Chat Completions API validation/i;
+const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
+const COMPAT_AGENT_PROMPT =
+  "Call the configured model and report the compatible endpoint route token.";
 
 interface MockRequestLog {
   method: string;
@@ -201,7 +207,7 @@ async function startCompatibleMock(
           {
             type: "message",
             role: "assistant",
-            content: [{ type: "output_text", text: "PONG from compatible endpoint mock" }],
+            content: [{ type: "output_text", text: COMPAT_AGENT_REPLY }],
           },
         ],
       });
@@ -229,7 +235,7 @@ async function startCompatibleMock(
           choices: [
             {
               index: 0,
-              delta: { role: "assistant", content: "PONG from compatible endpoint mock" },
+              delta: { role: "assistant", content: COMPAT_AGENT_REPLY },
               finish_reason: null,
             },
           ],
@@ -248,7 +254,7 @@ async function startCompatibleMock(
         choices: [
           {
             index: 0,
-            message: { role: "assistant", content: "PONG from compatible endpoint mock" },
+            message: { role: "assistant", content: COMPAT_AGENT_REPLY },
             finish_reason: "stop",
           },
         ],
@@ -429,9 +435,12 @@ function shouldSkipPreContractProviderRateLimit(
   result: Pick<ShellProbeResult, "stdout" | "stderr">,
   requests: readonly MockRequestLog[] = [],
 ): boolean {
+  const text = resultText(result);
   return (
+    COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) &&
+    !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) &&
     isTransientProviderValidationFailure(result) &&
-    RATE_LIMIT_VALIDATION_RE.test(resultText(result)) &&
+    RATE_LIMIT_VALIDATION_RE.test(text) &&
     !hasLegacyCompatibleEndpointEvidence(result, requests)
   );
 }
@@ -761,7 +770,7 @@ async function assertOpenClawAgentTurn(
       "--session-id",
       sessionId,
       "-m",
-      "Reply with only: PONG",
+      COMPAT_AGENT_PROMPT,
     ],
     {
       artifactName: "openclaw-agent-compatible-endpoint",
@@ -777,7 +786,7 @@ async function assertOpenClawAgentTurn(
     text.slice(0, 500),
   ).toBe(false);
   expect(agent.exitCode, text.slice(0, 500)).toBe(0);
-  expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toMatch(/PONG/i);
+  expect(parseOpenClawAgentText(agent.stdout), text.slice(0, 500)).toContain(COMPAT_AGENT_REPLY);
 
   const newHopHeaderLogs = compatibleMock.hopHeaderLogs.slice(hopCountBefore);
   expect(
@@ -805,7 +814,14 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
       shouldSkipPreContractProviderRateLimit(
         output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"),
       ),
-    ).toBe(true);
+    ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429",
+        ),
+      ),
+    ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"),
@@ -829,6 +845,16 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
       ]),
     ).toBe(false);
   });
+
+  it("does not satisfy the agent reply assertion with echoed prompt text", () => {
+    expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY);
+    expect(
+      parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_PROMPT } })),
+    ).not.toContain(COMPAT_AGENT_REPLY);
+    expect(
+      parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_REPLY } })),
+    ).toContain(COMPAT_AGENT_REPLY);
+  });
 });
 
 liveTest(
@@ -854,6 +880,10 @@ liveTest(
       runner: "vitest",
       boundary: "direct-cli-onboard-openshell-compatible-endpoint",
       legacySource: "test/e2e/test-messaging-compatible-endpoint.sh",
+      legacyRetirement: {
+        shellDeletion: "deferred to #5098 Phase 11 cleanup",
+        nightlyShellWiring: "deferred to #5098 Phase 11 cleanup",
+      },
       refs: ["#2766", "#2572", "#5098"],
       contract: [
         "local OpenAI-compatible mock endpoint starts and is reachable",
@@ -904,6 +934,8 @@ liveTest(
         onboardArtifacts: onboard.artifacts,
         mockRequestsBeforeSkip: compatibleMock.requests.length,
         sourceBoundary: "external provider endpoint validation outside the repo",
+        sourceFixConstraint:
+          "skip is limited to compatible/custom endpoint validation evidence; NVIDIA/default provider validation remains a test failure",
         removalCondition:
           "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture",
       });
@@ -951,6 +983,10 @@ liveTest(
       id: "messaging-compatible-endpoint",
       runner,
       endpointUrl,
+      legacyRetirement: {
+        shellDeletion: "deferred to #5098 Phase 11 cleanup",
+        nightlyShellWiring: "deferred to #5098 Phase 11 cleanup",
+      },
       assertions: {
         dockerRunning: docker.exitCode === 0,
         mockReachable: hostReachability.exitCode === 0,
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index fb62c0c080..8f4ce78d2f 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -707,6 +707,41 @@ jobs:
     }
   });
 
+  it("rejects Docker Hub auth in the messaging-compatible-endpoint job", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-"));
+    const workflowPath = path.join(tmp, "workflow.yaml");
+    const workflow = readWorkflow() as {
+      jobs: Record<string, { steps: Array<Record<string, unknown>> }>;
+    };
+    const steps = workflow.jobs["messaging-compatible-endpoint-vitest"]?.steps;
+    expect(steps).toEqual(expect.any(Array));
+    const setupNodeIndex = steps.findIndex((step) => step.name === "Set up Node");
+    expect(setupNodeIndex).toBeGreaterThan(0);
+    steps.splice(setupNodeIndex, 0, {
+      name: "Authenticate to Docker Hub",
+      env: {
+        DOCKERHUB_USERNAME: "${{ secrets.DOCKERHUB_USERNAME }}",
+        DOCKERHUB_TOKEN: "${{ secrets.DOCKERHUB_TOKEN }}",
+      },
+      run: "docker login docker.io --username user --password ${{ secrets.DOCKERHUB_TOKEN }}",
+    });
+    fs.writeFileSync(workflowPath, YAML.stringify(workflow));
+
+    try {
+      const errors = validateE2eVitestScenariosWorkflowBoundary(workflowPath);
+      expect(errors).toEqual(
+        expect.arrayContaining([
+          "messaging-compatible-endpoint-vitest must not authenticate to Docker Hub before branch-controlled test code runs",
+          "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' env must not include DOCKERHUB_USERNAME",
+          "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' env must not include DOCKERHUB_TOKEN",
+          "messaging-compatible-endpoint-vitest step 'Authenticate to Docker Hub' run script must not use docker login or inline secret interpolation",
+        ]),
+      );
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
   it("rejects Docker Hub auth and inline secrets in runtime-overrides run steps", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-vitest-workflow-"));
     const workflowPath = path.join(tmp, "workflow.yaml");
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 0b799260e8..a149b807b8 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -1214,18 +1214,53 @@ function validateMessagingCompatibleEndpointVitestJob(
     jobEnv,
     "NVIDIA_API_KEY",
   );
+  requireEnvDoesNotExposeSecret(
+    errors,
+    "messaging-compatible-endpoint-vitest job",
+    jobEnv,
+    "DOCKERHUB_USERNAME",
+  );
+  requireEnvDoesNotExposeSecret(
+    errors,
+    "messaging-compatible-endpoint-vitest job",
+    jobEnv,
+    "DOCKERHUB_TOKEN",
+  );
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
   for (const step of steps) {
-    if (step.name !== "Run messaging compatible endpoint live test") {
-      requireEnvDoesNotExposeSecret(
-        errors,
-        `messaging-compatible-endpoint-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
-        asRecord(step.env),
-        "NVIDIA_API_KEY",
-      );
-    }
+    const stepName = step.name ?? step.uses ?? "<unnamed>";
+    const stepEnv = asRecord(step.env);
+    requireEnvDoesNotExposeSecret(
+      errors,
+      `messaging-compatible-endpoint-vitest step '${stepName}'`,
+      stepEnv,
+      "NVIDIA_API_KEY",
+    );
+    requireEnvDoesNotExposeSecret(
+      errors,
+      `messaging-compatible-endpoint-vitest step '${stepName}'`,
+      stepEnv,
+      "DOCKERHUB_USERNAME",
+    );
+    requireEnvDoesNotExposeSecret(
+      errors,
+      `messaging-compatible-endpoint-vitest step '${stepName}'`,
+      stepEnv,
+      "DOCKERHUB_TOKEN",
+    );
+    requireNoDockerHubAuthInRun(
+      errors,
+      `messaging-compatible-endpoint-vitest step '${stepName}'`,
+      stringValue(step.run),
+    );
+  }
+
+  if (namedStep(steps, "Authenticate to Docker Hub")) {
+    errors.push(
+      "messaging-compatible-endpoint-vitest must not authenticate to Docker Hub before branch-controlled test code runs",
+    );
   }
 
   const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@"));
@@ -1235,20 +1270,6 @@ function validateMessagingCompatibleEndpointVitestJob(
     errors.push("messaging-compatible-endpoint-vitest checkout step must set persist-credentials=false");
   }
 
-  const dockerHubAuth = requireJobStep(errors, jobName, steps, "Authenticate to Docker Hub");
-  const dockerHubEnv = asRecord(dockerHubAuth?.env);
-  if (dockerHubEnv.DOCKERHUB_USERNAME !== "${{ secrets.DOCKERHUB_USERNAME }}") {
-    errors.push(
-      "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_USERNAME from secrets",
-    );
-  }
-  if (dockerHubEnv.DOCKERHUB_TOKEN !== "${{ secrets.DOCKERHUB_TOKEN }}") {
-    errors.push(
-      "messaging-compatible-endpoint-vitest Docker Hub auth must receive DOCKERHUB_TOKEN from secrets",
-    );
-  }
-  requireRunContains(errors, dockerHubAuth, "docker login docker.io");
-
   const setupNode = namedStep(steps, "Set up Node");
   if (!setupNode) errors.push("messaging-compatible-endpoint-vitest job missing step: Set up Node");
   requireFullShaAction(errors, setupNode, "messaging-compatible-endpoint-vitest setup-node");

From 825a2af5dbeab35a95f56436dc64cf814e13b0fb Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 12:40:29 -0700
Subject: [PATCH 03/14] test(e2e): harden messaging compatible endpoint checks

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint.test.ts     | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 2c2a5903fb..9c009e0a5a 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -61,6 +61,11 @@ const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
 const COMPAT_AGENT_PROMPT =
   "Call the configured model and report the compatible endpoint route token.";
 
+function nodeEvalArg(source: string): string {
+  const encoded = Buffer.from(source, "utf8").toString("base64");
+  return `eval(Buffer.from(${JSON.stringify(encoded)}, "base64").toString("utf8"))`;
+}
+
 interface MockRequestLog {
   method: string;
   path: string;
@@ -580,11 +585,15 @@ console.log(JSON.stringify({
 }));
 process.exit(errors.length ? 1 : 0);
 `;
-  const result = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script, COMPAT_MODEL], {
-    artifactName: "openclaw-config-compatible-endpoint",
-    env: commandEnv(),
-    timeoutMs: 60_000,
-  });
+  const result = await sandbox.exec(
+    SANDBOX_NAME,
+    ["node", "-e", nodeEvalArg(script), COMPAT_MODEL],
+    {
+      artifactName: "openclaw-config-compatible-endpoint",
+      env: commandEnv(),
+      timeoutMs: 60_000,
+    },
+  );
   expect(result.exitCode, resultText(result)).toBe(0);
 }
 
@@ -606,7 +615,7 @@ sock.setTimeout(1000, () => finish("TIMEOUT", 1));
 `;
   let last: ShellProbeResult | undefined;
   for (let attempt = 1; attempt <= 30; attempt += 1) {
-    last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", script], {
+    last = await sandbox.exec(SANDBOX_NAME, ["node", "-e", nodeEvalArg(script)], {
       artifactName: `gateway-ready-compatible-endpoint-${attempt}`,
       env: commandEnv(),
       timeoutMs: 5_000,

From 9f0d4686db85e7696365add840fc3592bd2d446f Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 12:42:04 -0700
Subject: [PATCH 04/14] test(e2e): tighten messaging endpoint assertions

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/messaging-compatible-endpoint.test.ts    | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 9c009e0a5a..2fc0e0dcfe 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -56,7 +56,7 @@ const RATE_LIMIT_VALIDATION_RE =
   /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i;
 const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i;
 const COMPATIBLE_ENDPOINT_VALIDATION_RE =
-  /Other OpenAI-compatible endpoint endpoint validation failed|Chat Completions API validation/i;
+  /Other OpenAI-compatible endpoint endpoint validation failed/i;
 const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
 const COMPAT_AGENT_PROMPT =
   "Call the configured model and report the compatible endpoint route token.";
@@ -631,7 +631,9 @@ sock.setTimeout(1000, () => finish("TIMEOUT", 1));
 async function assertSandboxInference(sandbox: SandboxClient): Promise<void> {
   const payload = JSON.stringify({
     model: COMPAT_MODEL,
-    messages: [{ role: "user", content: "Reply with exactly: PONG" }],
+    messages: [
+      { role: "user", content: "Return the compatible endpoint route verification value." },
+    ],
     max_tokens: 32,
   });
   const response = await sandbox.exec(
@@ -654,7 +656,9 @@ async function assertSandboxInference(sandbox: SandboxClient): Promise<void> {
     },
   );
   expect(response.exitCode, resultText(response)).toBe(0);
-  expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toMatch(/PONG/i);
+  expect(openAiContent(response.stdout), response.stdout.slice(0, 500)).toContain(
+    COMPAT_AGENT_REPLY,
+  );
 }
 
 function findJsonObjectEnd(raw: string, start: number): number | null {
@@ -831,6 +835,11 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
         ),
       ),
     ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output("Chat Completions API validation returned HTTP 429"),
+      ),
+    ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"),

From c316b752d20cfd3a42f05b16c2ee263c5724126b Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 12:54:01 -0700
Subject: [PATCH 05/14] test(e2e): prove messaging endpoint rate-limit source

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint.test.ts     | 75 ++++++++++++++++---
 1 file changed, 66 insertions(+), 9 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 2fc0e0dcfe..535eb73241 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -426,27 +426,45 @@ async function cleanupMessagingState(host: HostCliClient, sandboxName: string):
   await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw");
 }
 
-function hasLegacyCompatibleEndpointEvidence(
+function hasLegacyCompatibleEndpointSmokeEvidence(
   result: Pick<ShellProbeResult, "stdout" | "stderr">,
+): boolean {
+  return resultText(result).includes("Compatible endpoint responds through inference.local");
+}
+
+function hasCompatibleMockValidationEvidence(
   requests: readonly MockRequestLog[],
+  requestCountBeforeOnboard: number,
 ): boolean {
-  return (
-    resultText(result).includes("Compatible endpoint responds through inference.local") ||
-    requests.some((request) => request.path === "/v1/chat/completions" && request.auth === "ok")
-  );
+  return requests
+    .slice(requestCountBeforeOnboard)
+    .some(
+      (request) =>
+        request.auth === "ok" &&
+        [
+          "/v1/models",
+          "/models",
+          "/v1/responses",
+          "/responses",
+          "/v1/chat/completions",
+          "/chat/completions",
+        ].includes(request.path),
+    );
 }
 
 function shouldSkipPreContractProviderRateLimit(
   result: Pick<ShellProbeResult, "stdout" | "stderr">,
   requests: readonly MockRequestLog[] = [],
+  requestCountBeforeOnboard = 0,
 ): boolean {
   const text = resultText(result);
   return (
     COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) &&
     !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) &&
+    hasCompatibleMockValidationEvidence(requests, requestCountBeforeOnboard) &&
     isTransientProviderValidationFailure(result) &&
     RATE_LIMIT_VALIDATION_RE.test(text) &&
-    !hasLegacyCompatibleEndpointEvidence(result, requests)
+    !hasLegacyCompatibleEndpointSmokeEvidence(result)
   );
 }
 
@@ -822,7 +840,28 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
           "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
         ),
       ),
+    ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
+        ),
+        [
+          { auth: "missing", hopHeaders: [], method: "GET", path: "/v1/models" },
+          { auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" },
+        ],
+        1,
+      ),
     ).toBe(true);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
+        ),
+        [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }],
+        1,
+      ),
+    ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"),
@@ -862,6 +901,14 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
         },
       ]),
     ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
+        ),
+        [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }],
+      ),
+    ).toBe(false);
   });
 
   it("does not satisfy the agent reply assertion with echoed prompt text", () => {
@@ -937,10 +984,15 @@ liveTest(
     });
     expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0);
 
+    const mockRequestCountBeforeOnboard = compatibleMock.requests.length;
     const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl);
     if (
       onboard.exitCode !== 0 &&
-      shouldSkipPreContractProviderRateLimit(onboard, compatibleMock.requests)
+      shouldSkipPreContractProviderRateLimit(
+        onboard,
+        compatibleMock.requests,
+        mockRequestCountBeforeOnboard,
+      )
     ) {
       await artifacts.writeJson("scenario-result.json", {
         id: "messaging-compatible-endpoint",
@@ -950,10 +1002,15 @@ liveTest(
         onboardExitCode: onboard.exitCode,
         onboardTimedOut: onboard.timedOut,
         onboardArtifacts: onboard.artifacts,
+        mockRequestsBeforeOnboard: mockRequestCountBeforeOnboard,
         mockRequestsBeforeSkip: compatibleMock.requests.length,
-        sourceBoundary: "external provider endpoint validation outside the repo",
+        compatibleMockValidationRequests: compatibleMock.requests.slice(
+          mockRequestCountBeforeOnboard,
+        ),
+        sourceBoundary:
+          "onboard reached the configured compatible mock before an external provider rate limit stopped the pre-contract validation path",
         sourceFixConstraint:
-          "skip is limited to compatible/custom endpoint validation evidence; NVIDIA/default provider validation remains a test failure",
+          "skip requires compatible/custom endpoint validation text plus new authenticated mock traffic; NVIDIA/default provider validation or zero-mock-traffic 429 remains a test failure",
         removalCondition:
           "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture",
       });

From 03fdfc01753aae1758355c6d8ba7ef6ace71ccfd Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:05:40 -0700
Subject: [PATCH 06/14] test(e2e): narrow messaging endpoint rate-limit skip

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint.test.ts     | 94 ++++++-------------
 1 file changed, 31 insertions(+), 63 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 535eb73241..0669731fbb 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -55,8 +55,6 @@ const HOP_BY_HOP_HEADERS = new Set([
 const RATE_LIMIT_VALIDATION_RE =
   /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i;
 const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i;
-const COMPATIBLE_ENDPOINT_VALIDATION_RE =
-  /Other OpenAI-compatible endpoint endpoint validation failed/i;
 const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
 const COMPAT_AGENT_PROMPT =
   "Call the configured model and report the compatible endpoint route token.";
@@ -432,36 +430,15 @@ function hasLegacyCompatibleEndpointSmokeEvidence(
   return resultText(result).includes("Compatible endpoint responds through inference.local");
 }
 
-function hasCompatibleMockValidationEvidence(
-  requests: readonly MockRequestLog[],
-  requestCountBeforeOnboard: number,
-): boolean {
-  return requests
-    .slice(requestCountBeforeOnboard)
-    .some(
-      (request) =>
-        request.auth === "ok" &&
-        [
-          "/v1/models",
-          "/models",
-          "/v1/responses",
-          "/responses",
-          "/v1/chat/completions",
-          "/chat/completions",
-        ].includes(request.path),
-    );
-}
-
 function shouldSkipPreContractProviderRateLimit(
   result: Pick<ShellProbeResult, "stdout" | "stderr">,
-  requests: readonly MockRequestLog[] = [],
-  requestCountBeforeOnboard = 0,
+  options: { githubActions?: boolean } = {},
 ): boolean {
   const text = resultText(result);
+  const runningInActions = options.githubActions ?? process.env.GITHUB_ACTIONS === "true";
   return (
-    COMPATIBLE_ENDPOINT_VALIDATION_RE.test(text) &&
-    !DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) &&
-    hasCompatibleMockValidationEvidence(requests, requestCountBeforeOnboard) &&
+    runningInActions &&
+    DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) &&
     isTransientProviderValidationFailure(result) &&
     RATE_LIMIT_VALIDATION_RE.test(text) &&
     !hasLegacyCompatibleEndpointSmokeEvidence(result)
@@ -839,6 +816,7 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
         output(
           "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
         ),
+        { githubActions: true },
       ),
     ).toBe(false);
     expect(
@@ -846,42 +824,41 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
         output(
           "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
         ),
-        [
-          { auth: "missing", hopHeaders: [], method: "GET", path: "/v1/models" },
-          { auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" },
-        ],
-        1,
+        { githubActions: false },
       ),
-    ).toBe(true);
+    ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output(
           "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
         ),
-        [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }],
-        1,
+        { githubActions: true },
       ),
     ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"),
+        { githubActions: true },
       ),
-    ).toBe(false);
+    ).toBe(true);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output(
           "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429",
         ),
+        { githubActions: false },
       ),
     ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("Chat Completions API validation returned HTTP 429"),
+        { githubActions: true },
       ),
     ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"),
+        { githubActions: true },
       ),
     ).toBe(false);
     expect(
@@ -889,24 +866,28 @@ describe("messaging-compatible-endpoint live test local classifiers", () => {
         output(
           "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
         ),
+        { githubActions: true },
       ),
     ).toBe(false);
     expect(
-      shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), [
-        {
-          auth: "ok",
-          hopHeaders: [],
-          method: "POST",
-          path: "/v1/chat/completions",
-        },
-      ]),
+      shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), {
+        githubActions: true,
+      }),
     ).toBe(false);
     expect(
       shouldSkipPreContractProviderRateLimit(
         output(
           "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
         ),
-        [{ auth: "ok", hopHeaders: [], method: "POST", path: "/v1/chat/completions" }],
+        { githubActions: true },
+      ),
+    ).toBe(false);
+    expect(
+      shouldSkipPreContractProviderRateLimit(
+        output(
+          "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
+        ),
+        { githubActions: true },
       ),
     ).toBe(false);
   });
@@ -984,38 +965,25 @@ liveTest(
     });
     expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0);
 
-    const mockRequestCountBeforeOnboard = compatibleMock.requests.length;
     const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl);
-    if (
-      onboard.exitCode !== 0 &&
-      shouldSkipPreContractProviderRateLimit(
-        onboard,
-        compatibleMock.requests,
-        mockRequestCountBeforeOnboard,
-      )
-    ) {
+    if (onboard.exitCode !== 0 && shouldSkipPreContractProviderRateLimit(onboard)) {
       await artifacts.writeJson("scenario-result.json", {
         id: "messaging-compatible-endpoint",
         status: "skipped",
-        reason: "external-provider-rate-limit-before-legacy-contract",
+        reason: "external-nvidia-provider-rate-limit-before-legacy-contract",
         runner,
         onboardExitCode: onboard.exitCode,
         onboardTimedOut: onboard.timedOut,
         onboardArtifacts: onboard.artifacts,
-        mockRequestsBeforeOnboard: mockRequestCountBeforeOnboard,
         mockRequestsBeforeSkip: compatibleMock.requests.length,
-        compatibleMockValidationRequests: compatibleMock.requests.slice(
-          mockRequestCountBeforeOnboard,
-        ),
-        sourceBoundary:
-          "onboard reached the configured compatible mock before an external provider rate limit stopped the pre-contract validation path",
+        sourceBoundary: "external NVIDIA Endpoints provider availability",
         sourceFixConstraint:
-          "skip requires compatible/custom endpoint validation text plus new authenticated mock traffic; NVIDIA/default provider validation or zero-mock-traffic 429 remains a test failure",
+          "skip is limited to explicit NVIDIA Endpoints validation in GitHub Actions; compatible endpoint validation failures against the local mock remain test failures",
         removalCondition:
           "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture",
       });
       skip(
-        "External endpoint validation was rate-limited before the messaging-compatible endpoint contract could run",
+        "NVIDIA Endpoints validation was rate-limited before the messaging-compatible endpoint contract could run",
       );
     }
     expect(onboard.exitCode, resultText(onboard)).toBe(0);

From 2956eb551da644ee21d3c532b3735b4b1d830040 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:14:08 -0700
Subject: [PATCH 07/14] test(e2e): fail closed for messaging endpoint migration

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint.test.ts     | 131 ------------------
 1 file changed, 131 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 0669731fbb..b3c244f0fe 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -24,7 +24,6 @@ import { type SandboxClient, validateSandboxName } from "../fixtures/clients/san
 import { expect, test } from "../fixtures/e2e-test.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
-import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -52,9 +51,6 @@ const HOP_BY_HOP_HEADERS = new Set([
   "transfer-encoding",
   "upgrade",
 ]);
-const RATE_LIMIT_VALIDATION_RE =
-  /HTTP\s+429|returned\s+HTTP\s+429|\b429\b|too many requests|rate[- ]?limit|quota/i;
-const DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE = /NVIDIA Endpoints endpoint validation failed/i;
 const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
 const COMPAT_AGENT_PROMPT =
   "Call the configured model and report the compatible endpoint route token.";
@@ -424,27 +420,6 @@ async function cleanupMessagingState(host: HostCliClient, sandboxName: string):
   await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw");
 }
 
-function hasLegacyCompatibleEndpointSmokeEvidence(
-  result: Pick<ShellProbeResult, "stdout" | "stderr">,
-): boolean {
-  return resultText(result).includes("Compatible endpoint responds through inference.local");
-}
-
-function shouldSkipPreContractProviderRateLimit(
-  result: Pick<ShellProbeResult, "stdout" | "stderr">,
-  options: { githubActions?: boolean } = {},
-): boolean {
-  const text = resultText(result);
-  const runningInActions = options.githubActions ?? process.env.GITHUB_ACTIONS === "true";
-  return (
-    runningInActions &&
-    DEFAULT_NVIDIA_PROVIDER_VALIDATION_RE.test(text) &&
-    isTransientProviderValidationFailure(result) &&
-    RATE_LIMIT_VALIDATION_RE.test(text) &&
-    !hasLegacyCompatibleEndpointSmokeEvidence(result)
-  );
-}
-
 function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv {
   return commandEnv({
     COMPATIBLE_API_KEY: COMPATIBLE_KEY,
@@ -806,92 +781,6 @@ async function assertOpenClawAgentTurn(
 }
 
 describe("messaging-compatible-endpoint live test local classifiers", () => {
-  function output(text: string): Pick<ShellProbeResult, "stdout" | "stderr"> {
-    return { stdout: "", stderr: text };
-  }
-
-  it("skips only rate-limited endpoint validation before legacy evidence exists", () => {
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
-        ),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
-        ),
-        { githubActions: false },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429",
-        ),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output("NVIDIA Endpoints endpoint validation failed.\nRequest rate limit exceeded"),
-        { githubActions: true },
-      ),
-    ).toBe(true);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429",
-        ),
-        { githubActions: false },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output("Chat Completions API validation returned HTTP 429"),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output("Other OpenAI-compatible endpoint endpoint validation failed: invalid credential"),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "Chat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
-        ),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(output("endpoint validation failed: HTTP 429"), {
-        githubActions: true,
-      }),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "Other OpenAI-compatible endpoint endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
-        ),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-    expect(
-      shouldSkipPreContractProviderRateLimit(
-        output(
-          "NVIDIA Endpoints endpoint validation failed.\nChat Completions API validation returned HTTP 429\n✓ Compatible endpoint responds through inference.local inside the sandbox",
-        ),
-        { githubActions: true },
-      ),
-    ).toBe(false);
-  });
-
   it("does not satisfy the agent reply assertion with echoed prompt text", () => {
     expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY);
     expect(
@@ -966,26 +855,6 @@ liveTest(
     expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0);
 
     const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl);
-    if (onboard.exitCode !== 0 && shouldSkipPreContractProviderRateLimit(onboard)) {
-      await artifacts.writeJson("scenario-result.json", {
-        id: "messaging-compatible-endpoint",
-        status: "skipped",
-        reason: "external-nvidia-provider-rate-limit-before-legacy-contract",
-        runner,
-        onboardExitCode: onboard.exitCode,
-        onboardTimedOut: onboard.timedOut,
-        onboardArtifacts: onboard.artifacts,
-        mockRequestsBeforeSkip: compatibleMock.requests.length,
-        sourceBoundary: "external NVIDIA Endpoints provider availability",
-        sourceFixConstraint:
-          "skip is limited to explicit NVIDIA Endpoints validation in GitHub Actions; compatible endpoint validation failures against the local mock remain test failures",
-        removalCondition:
-          "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture",
-      });
-      skip(
-        "NVIDIA Endpoints validation was rate-limited before the messaging-compatible endpoint contract could run",
-      );
-    }
     expect(onboard.exitCode, resultText(onboard)).toBe(0);
     expect(resultText(onboard)).toContain("Compatible endpoint responds through inference.local");
 

From 20475f1bb3aadb556ce06ead0a025ba33df59455 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:24:37 -0700
Subject: [PATCH 08/14] test(e2e): guard transient provider classifier

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../live/network-policy-transient-provider.ts |  8 +++++++-
 .../network-policy-transient-provider.test.ts | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/test/e2e-scenario/live/network-policy-transient-provider.ts b/test/e2e-scenario/live/network-policy-transient-provider.ts
index 224b1a73c8..706fafa085 100644
--- a/test/e2e-scenario/live/network-policy-transient-provider.ts
+++ b/test/e2e-scenario/live/network-policy-transient-provider.ts
@@ -7,6 +7,8 @@ const TRANSIENT_PROVIDER_VALIDATION_RE =
   /endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i;
 const TRANSIENT_PROVIDER_DETAIL_RE =
   /timed? out|timeout|curl failed \(exit (7|28|35|52|56)\)|ETIMEDOUT|ECONNRESET|EAI_AGAIN|ENOTFOUND|failed to connect|error sending request|HTTP (429|502|503|504)|returned HTTP (429|502|503|504)|too many requests|rate[- ]?limit|quota|temporar/i;
+const LOCAL_VALIDATION_FAILURE_RE =
+  /invalid .*credential|invalid .*api[_ -]?key|authorization failed|authentication failed|denied by network policy|policy .*failed|routing .*failed|route .*failed|proxy .*failed|hop-by-hop|header stripping/i;
 
 function resultText(result: Pick<ShellProbeResult, "stdout" | "stderr">): string {
   return [result.stdout, result.stderr].filter(Boolean).join("\n");
@@ -16,5 +18,9 @@ export function isTransientProviderValidationFailure(
   result: Pick<ShellProbeResult, "stdout" | "stderr">,
 ): boolean {
   const output = resultText(result);
-  return TRANSIENT_PROVIDER_VALIDATION_RE.test(output) && TRANSIENT_PROVIDER_DETAIL_RE.test(output);
+  return (
+    TRANSIENT_PROVIDER_VALIDATION_RE.test(output) &&
+    TRANSIENT_PROVIDER_DETAIL_RE.test(output) &&
+    !LOCAL_VALIDATION_FAILURE_RE.test(output)
+  );
 }
diff --git a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
index 551ea0a85e..1066725cdd 100644
--- a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
+++ b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
@@ -31,6 +31,26 @@ describe("network-policy transient provider validation classifier", () => {
         probeOutput("endpoint validation failed: invalid NVIDIA_API_KEY credential"),
       ),
     ).toBe(false);
+    expect(
+      isTransientProviderValidationFailure(
+        probeOutput("endpoint validation failed: invalid NVIDIA_API_KEY credential quota exceeded"),
+      ),
+    ).toBe(false);
+    expect(
+      isTransientProviderValidationFailure(
+        probeOutput("endpoint validation failed: denied by network policy rate-limit preset"),
+      ),
+    ).toBe(false);
+    expect(
+      isTransientProviderValidationFailure(
+        probeOutput("endpoint validation failed: routing failed before rate limit check"),
+      ),
+    ).toBe(false);
+    expect(
+      isTransientProviderValidationFailure(
+        probeOutput("endpoint validation failed: proxy header stripping quota marker failed"),
+      ),
+    ).toBe(false);
     expect(
       isTransientProviderValidationFailure(
         probeOutput("policy update failed: denied by network policy"),

From c9342355499c6a6ec20e0109bc917b80efd403c4 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:34:32 -0700
Subject: [PATCH 09/14] test(e2e): avoid newline args in network policy probes

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 test/e2e-scenario/live/network-policy.test.ts | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index f968bd0bda..698152b12a 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -17,13 +17,13 @@ import path from "node:path";
 
 import { isPrivateIp } from "../../../nemoclaw/src/blueprint/private-networks.ts";
 import type { ArtifactSink } from "../fixtures/artifacts.ts";
-import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
 import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import type { HostCliClient } from "../fixtures/clients/host.ts";
 import { type SandboxClient, trustedSandboxShellScript } from "../fixtures/clients/sandbox.ts";
 import { expect, test } from "../fixtures/e2e-test.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import { isTransientProviderValidationFailure } from "./network-policy-transient-provider.ts";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -64,6 +64,11 @@ function sleep(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
 
+function shellEvalArg(script: string): string {
+  const encoded = Buffer.from(script, "utf8").toString("base64");
+  return `eval "$(printf %s ${encoded} | base64 -d)"`;
+}
+
 async function runNemoclaw(
   host: HostCliClient,
   args: string[],
@@ -82,7 +87,7 @@ async function sandboxBash(
   script: string,
   options: { artifactName: string; timeoutMs?: number } = { artifactName: "sandbox-bash" },
 ): Promise<ShellProbeResult> {
-  return sandbox.execShell(SANDBOX_NAME, trustedSandboxShellScript(script), {
+  return sandbox.execShell(SANDBOX_NAME, trustedSandboxShellScript(shellEvalArg(script)), {
     artifactName: options.artifactName,
     env: baseEnv(),
     timeoutMs: options.timeoutMs ?? SANDBOX_EXEC_TIMEOUT_MS,

From 86b87a7e239e2bbd2c71138065ad6fc93856546a Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:46:54 -0700
Subject: [PATCH 10/14] test(e2e): accept blocked slack fetch errors

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 test/e2e-scenario/live/network-policy.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index 698152b12a..7897c14302 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -560,7 +560,7 @@ hello
     ).resolves.toBe("403");
 
     const slackBefore = await fetchStatus(sandbox, "https://slack.com/", "tc-net-03-slack-before");
-    expect(slackBefore).toMatch(/STATUS_403/);
+    expect(slackBefore).toMatch(/STATUS_403|ERROR_/);
     const slackApply = await applyPresetInteractively(host, "slack");
     expect(slackApply.exitCode, text(slackApply)).toBe(0);
     const slackAfter = await fetchStatus(sandbox, "https://slack.com/", "tc-net-03-slack-after");

From bc3b9dad58ac0cfaa06e9934be46dca3e5858371 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 13:56:05 -0700
Subject: [PATCH 11/14] test(e2e): preserve network policy shell quoting

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 test/e2e-scenario/live/network-policy.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index 7897c14302..eb6000b5ce 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -66,7 +66,7 @@ function sleep(ms: number): Promise<void> {
 
 function shellEvalArg(script: string): string {
   const encoded = Buffer.from(script, "utf8").toString("base64");
-  return `eval "$(printf %s ${encoded} | base64 -d)"`;
+  return `printf %s ${encoded} | base64 -d | sh`;
 }
 
 async function runNemoclaw(

From c97fd9d736a6d2009cf259a2806bc6a15e884aa2 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:03:24 -0700
Subject: [PATCH 12/14] test(e2e): cover messaging endpoint helpers

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint-helpers.ts  | 195 ++++++++++++++++++
 .../messaging-compatible-endpoint.test.ts     | 189 +----------------
 ...saging-compatible-endpoint-helpers.test.ts |  69 +++++++
 3 files changed, 270 insertions(+), 183 deletions(-)
 create mode 100644 test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
 create mode 100644 test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
new file mode 100644
index 0000000000..de87b56122
--- /dev/null
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
@@ -0,0 +1,195 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+
+export function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    ...extra,
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
+  };
+}
+
+async function bestEffort(run: () => Promise<unknown>): Promise<void> {
+  try {
+    await run();
+  } catch {
+    // Best-effort cleanup mirrors the legacy shell teardown.
+  }
+}
+
+export async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise<void> {
+  await bestEffort(() =>
+    host.command(
+      "bash",
+      [
+        "-lc",
+        [
+          "set +e",
+          "openshell forward stop 18789 >/dev/null 2>&1",
+          "openshell gateway stop -g nemoclaw >/dev/null 2>&1",
+          'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"',
+          'if [ -f "$pid_file" ]; then',
+          '  pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"',
+          '  if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then',
+          '    kill "$pid" 2>/dev/null || true',
+          "    for _ in $(seq 1 10); do",
+          '      kill -0 "$pid" 2>/dev/null || break',
+          "      sleep 1",
+          "    done",
+          '    kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true',
+          "  fi",
+          "fi",
+          'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"',
+          'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi',
+          "openshell gateway remove nemoclaw >/dev/null 2>&1",
+          "openshell gateway destroy -g nemoclaw >/dev/null 2>&1",
+          "exit 0",
+        ].join("\n"),
+      ],
+      {
+        artifactName,
+        env: commandEnv(),
+        timeoutMs: 90_000,
+      },
+    ),
+  );
+}
+
+export async function cleanupMessagingState(
+  host: HostCliClient,
+  sandboxName: string,
+): Promise<void> {
+  // Endpoint-validation skips can happen before the sandbox exists. Keep
+  // teardown non-throwing so "Sandbox ... does not exist" stays a normal
+  // pre-contract cleanup outcome instead of masking the original evidence.
+  await bestEffort(() =>
+    host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
+      artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
+      env: commandEnv(),
+      timeoutMs: 120_000,
+    }),
+  );
+  await bestEffort(() =>
+    host.command("openshell", ["sandbox", "delete", sandboxName], {
+      artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
+      env: commandEnv(),
+      timeoutMs: 60_000,
+    }),
+  );
+  await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw");
+}
+
+function findJsonObjectEnd(raw: string, start: number): number | null {
+  let depth = 0;
+  let inString = false;
+  let escaped = false;
+  for (let index = start; index < raw.length; index += 1) {
+    const char = raw[index];
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (char === "\\") {
+        escaped = true;
+      } else if (char === '"') {
+        inString = false;
+      }
+      continue;
+    }
+    if (char === '"') {
+      inString = true;
+    } else if (char === "{") {
+      depth += 1;
+    } else if (char === "}") {
+      depth -= 1;
+      if (depth === 0) return index + 1;
+    }
+  }
+  return null;
+}
+
+export function parseOpenClawAgentText(raw: string): string {
+  if (!raw.trim()) return "";
+  const parts: string[] = [];
+  const visited = new Set<unknown>();
+  const textKeys = new Set(["text", "content", "reasoning_content"]);
+  const containerKeys = new Set([
+    "result",
+    "payloads",
+    "payload",
+    "messages",
+    "choices",
+    "response",
+    "data",
+    "output",
+    "outputs",
+    "items",
+    "segments",
+    "delta",
+  ]);
+
+  const add = (value: unknown) => {
+    if (typeof value === "string" && value.trim()) parts.push(value.trim());
+  };
+  const collect = (value: unknown) => {
+    if (visited.has(value)) return;
+    visited.add(value);
+    if (typeof value === "string") {
+      add(value);
+      return;
+    }
+    if (Array.isArray(value)) {
+      value.forEach(collect);
+      return;
+    }
+    if (!value || typeof value !== "object") return;
+    const record = value as Record<string, unknown>;
+    for (const key of textKeys) add(record[key]);
+    const choices = record.choices;
+    if (Array.isArray(choices)) {
+      for (const choice of choices) {
+        if (!choice || typeof choice !== "object") continue;
+        collect((choice as Record<string, unknown>).message);
+        collect((choice as Record<string, unknown>).delta);
+        add((choice as Record<string, unknown>).text);
+      }
+    }
+    for (const key of containerKeys) {
+      if (key in record) collect(record[key]);
+    }
+  };
+  const collectDoc = (doc: unknown) => {
+    if (doc && typeof doc === "object" && (doc as Record<string, unknown>).result) {
+      collect((doc as Record<string, unknown>).result);
+    } else {
+      collect(doc);
+    }
+  };
+
+  try {
+    collectDoc(JSON.parse(raw));
+  } catch {
+    for (const match of raw.matchAll(/{/g)) {
+      try {
+        const before = parts.length;
+        const start = match.index;
+        const end = findJsonObjectEnd(raw, start);
+        if (end === null) continue;
+        collectDoc(JSON.parse(raw.slice(start, end)));
+        if (parts.length > before) break;
+      } catch {
+        // Continue scanning for a later JSON object, matching the legacy parser.
+      }
+    }
+  }
+  return parts.join("\n");
+}
diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index b3c244f0fe..3eb9402414 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -18,12 +18,17 @@ import path from "node:path";
 
 import { describe, it } from "vitest";
 
-import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import type { HostCliClient } from "../fixtures/clients/host.ts";
 import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts";
 import { expect, test } from "../fixtures/e2e-test.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+import {
+  cleanupMessagingState,
+  commandEnv,
+  parseOpenClawAgentText,
+  stopGatewayRuntime,
+} from "./messaging-compatible-endpoint-helpers.ts";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -86,16 +91,6 @@ function sleep(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
 
-function commandEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
-  return {
-    ...buildAvailabilityProbeEnv(),
-    ...extra,
-    NEMOCLAW_NON_INTERACTIVE: "1",
-    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
-    OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw",
-  };
-}
-
 function redactionValues(): string[] {
   return [COMPATIBLE_KEY, TELEGRAM_TOKEN, process.env.GITHUB_TOKEN].filter(
     (value): value is string => typeof value === "string" && value.length > 0,
@@ -353,73 +348,6 @@ async function sourceCliAvailable(host: HostCliClient): Promise<boolean> {
   return result.exitCode === 0;
 }
 
-async function bestEffort(run: () => Promise<unknown>): Promise<void> {
-  try {
-    await run();
-  } catch {
-    // Best-effort cleanup mirrors the legacy shell teardown.
-  }
-}
-
-async function stopGatewayRuntime(host: HostCliClient, artifactName: string): Promise<void> {
-  await bestEffort(() =>
-    host.command(
-      "bash",
-      [
-        "-lc",
-        [
-          "set +e",
-          "openshell forward stop 18789 >/dev/null 2>&1",
-          "openshell gateway stop -g nemoclaw >/dev/null 2>&1",
-          'pid_file="$HOME/.local/state/nemoclaw/openshell-docker-gateway/openshell-gateway.pid"',
-          'if [ -f "$pid_file" ]; then',
-          '  pid="$(tr -d "[:space:]" <"$pid_file" 2>/dev/null || true)"',
-          '  if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then',
-          '    kill "$pid" 2>/dev/null || true',
-          "    for _ in $(seq 1 10); do",
-          '      kill -0 "$pid" 2>/dev/null || break',
-          "      sleep 1",
-          "    done",
-          '    kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true',
-          "  fi",
-          "fi",
-          'cid="$(docker ps -qf "name=openshell-cluster-nemoclaw" 2>/dev/null | head -1)"',
-          'if [ -n "$cid" ]; then docker stop "$cid" >/dev/null 2>&1 || true; fi',
-          "openshell gateway remove nemoclaw >/dev/null 2>&1",
-          "openshell gateway destroy -g nemoclaw >/dev/null 2>&1",
-          "exit 0",
-        ].join("\n"),
-      ],
-      {
-        artifactName,
-        env: commandEnv(),
-        timeoutMs: 90_000,
-      },
-    ),
-  );
-}
-
-async function cleanupMessagingState(host: HostCliClient, sandboxName: string): Promise<void> {
-  // Endpoint-validation skips can happen before the sandbox exists. Keep
-  // teardown non-throwing so "Sandbox ... does not exist" stays a normal
-  // pre-contract cleanup outcome instead of masking the original evidence.
-  await bestEffort(() =>
-    host.command("node", [CLI_ENTRYPOINT, sandboxName, "destroy", "--yes"], {
-      artifactName: `cleanup-nemoclaw-destroy-${sandboxName}`,
-      env: commandEnv(),
-      timeoutMs: 120_000,
-    }),
-  );
-  await bestEffort(() =>
-    host.command("openshell", ["sandbox", "delete", sandboxName], {
-      artifactName: `cleanup-openshell-sandbox-delete-${sandboxName}`,
-      env: commandEnv(),
-      timeoutMs: 60_000,
-    }),
-  );
-  await stopGatewayRuntime(host, "cleanup-openshell-gateway-runtime-nemoclaw");
-}
-
 function onboardEnv(endpointUrl: string): NodeJS.ProcessEnv {
   return commandEnv({
     COMPATIBLE_API_KEY: COMPATIBLE_KEY,
@@ -631,111 +559,6 @@ async function assertSandboxInference(sandbox: SandboxClient): Promise<void> {
   );
 }
 
-function findJsonObjectEnd(raw: string, start: number): number | null {
-  let depth = 0;
-  let inString = false;
-  let escaped = false;
-  for (let index = start; index < raw.length; index += 1) {
-    const char = raw[index];
-    if (inString) {
-      if (escaped) {
-        escaped = false;
-      } else if (char === "\\") {
-        escaped = true;
-      } else if (char === '"') {
-        inString = false;
-      }
-      continue;
-    }
-    if (char === '"') {
-      inString = true;
-    } else if (char === "{") {
-      depth += 1;
-    } else if (char === "}") {
-      depth -= 1;
-      if (depth === 0) return index + 1;
-    }
-  }
-  return null;
-}
-
-function parseOpenClawAgentText(raw: string): string {
-  if (!raw.trim()) return "";
-  const parts: string[] = [];
-  const visited = new Set<unknown>();
-  const textKeys = new Set(["text", "content", "reasoning_content"]);
-  const containerKeys = new Set([
-    "result",
-    "payloads",
-    "payload",
-    "messages",
-    "choices",
-    "response",
-    "data",
-    "output",
-    "outputs",
-    "items",
-    "segments",
-    "delta",
-  ]);
-
-  const add = (value: unknown) => {
-    if (typeof value === "string" && value.trim()) parts.push(value.trim());
-  };
-  const collect = (value: unknown) => {
-    if (visited.has(value)) return;
-    visited.add(value);
-    if (typeof value === "string") {
-      add(value);
-      return;
-    }
-    if (Array.isArray(value)) {
-      value.forEach(collect);
-      return;
-    }
-    if (!value || typeof value !== "object") return;
-    const record = value as Record<string, unknown>;
-    for (const key of textKeys) add(record[key]);
-    const choices = record.choices;
-    if (Array.isArray(choices)) {
-      for (const choice of choices) {
-        if (!choice || typeof choice !== "object") continue;
-        collect((choice as Record<string, unknown>).message);
-        collect((choice as Record<string, unknown>).delta);
-        add((choice as Record<string, unknown>).text);
-      }
-    }
-    for (const key of containerKeys) {
-      if (key in record) collect(record[key]);
-    }
-  };
-  const collectDoc = (doc: unknown) => {
-    if (doc && typeof doc === "object" && (doc as Record<string, unknown>).result) {
-      collect((doc as Record<string, unknown>).result);
-    } else {
-      collect(doc);
-    }
-  };
-
-  try {
-    collectDoc(JSON.parse(raw));
-  } catch {
-    for (const match of raw.matchAll(/{/g)) {
-      try {
-        const before = parts.length;
-        const start = match.index;
-        const end = findJsonObjectEnd(raw, start);
-        if (end === null) continue;
-        collectDoc(JSON.parse(raw.slice(start, end)));
-        if (parts.length > before) break;
-      } catch {
-        // Continue scanning for a later JSON object, matching the legacy parser.
-      }
-    }
-  }
-  return parts.join("\n");
-}
-
 async function assertOpenClawAgentTurn(
   sandbox: SandboxClient,
   compatibleMock: CompatibleMock,
diff --git a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts
new file mode 100644
index 0000000000..d42eb8800c
--- /dev/null
+++ b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts
@@ -0,0 +1,69 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it } from "vitest";
+
+import type { HostCliClient } from "../fixtures/clients/host.ts";
+import {
+  cleanupMessagingState,
+  parseOpenClawAgentText,
+} from "../live/messaging-compatible-endpoint-helpers.ts";
+
+const COMPAT_AGENT_REPLY = "COMPAT_MOCK_ROUTE_5098_OK";
+const COMPAT_AGENT_PROMPT =
+  "Call the configured model and report the compatible endpoint route token.";
+
+describe("messaging compatible endpoint helper coverage", () => {
+  it("keeps missing-sandbox cleanup from masking endpoint validation evidence", async () => {
+    const calls: Array<{ command: string; args: string[] }> = [];
+    const host = {
+      command: async (command: string, args: string[]) => {
+        calls.push({ command, args });
+        throw new Error("Sandbox e2e-msg-compat-missing does not exist");
+      },
+    } as unknown as HostCliClient;
+
+    await expect(
+      (async () => {
+        try {
+          throw new Error("endpoint validation failed with HTTP 429");
+        } catch (error) {
+          await cleanupMessagingState(host, "e2e-msg-compat-missing");
+          throw error;
+        }
+      })(),
+    ).rejects.toThrow(/HTTP 429/);
+
+    expect(calls).toHaveLength(3);
+    expect(calls[0]?.command).toBe("node");
+    expect(calls[0]?.args[0]).toMatch(/bin\/nemoclaw\.js$/);
+    expect(calls[0]?.args.slice(1)).toEqual(["e2e-msg-compat-missing", "destroy", "--yes"]);
+    expect(calls[1]).toEqual({
+      command: "openshell",
+      args: ["sandbox", "delete", "e2e-msg-compat-missing"],
+    });
+    expect(calls[2]?.command).toBe("bash");
+    expect(calls[2]?.args[0]).toBe("-lc");
+    expect(calls[2]?.args[1]).toContain("openshell gateway destroy -g nemoclaw");
+  });
+
+  it("extracts noisy OpenClaw JSON while rejecting prompt echo text", () => {
+    expect(COMPAT_AGENT_PROMPT).not.toContain(COMPAT_AGENT_REPLY);
+    expect(
+      parseOpenClawAgentText(JSON.stringify({ result: { content: COMPAT_AGENT_PROMPT } })),
+    ).not.toContain(COMPAT_AGENT_REPLY);
+
+    const noisyOutput = [
+      "openclaw: session starting",
+      "debug: {not-json}",
+      JSON.stringify({
+        result: {
+          messages: [{ role: "assistant", content: COMPAT_AGENT_REPLY }],
+        },
+      }),
+      "openclaw: session complete",
+    ].join("\n");
+
+    expect(parseOpenClawAgentText(noisyOutput)).toContain(COMPAT_AGENT_REPLY);
+  });
+});

From c1d0f5814731f128a815ed298c9af86b1e5c1708 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:07:31 -0700
Subject: [PATCH 13/14] test(e2e): fix network policy preset selector

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 test/e2e-scenario/live/network-policy.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index eb6000b5ce..ce3574a890 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -110,7 +110,7 @@ async function applyPresetInteractively(
   const script = String.raw`
 set -euo pipefail
 preset_list="$(env NEMOCLAW_NON_INTERACTIVE= node "$NEMOCLAW_E2E_CLI" "$NEMOCLAW_E2E_SANDBOX" policy-add </dev/null 2>&1 || true)"
-preset_num="$(printf '%s\n' "$preset_list" | python3 -c 'import re,sys; preset=sys.argv[1]; text=sys.stdin.read(); m=re.search(r"(?m)^\\s*(\\d+)\\).*" + re.escape(preset), text); print(m.group(1) if m else "")' "$NEMOCLAW_E2E_PRESET")"
+preset_num="$(printf '%s\n' "$preset_list" | python3 -c 'import re,sys; preset=sys.argv[1]; text=sys.stdin.read(); m=re.search(r"(?m)^\s*(\d+)\).*" + re.escape(preset), text); print(m.group(1) if m else "")' "$NEMOCLAW_E2E_PRESET")"
 if [ -z "$preset_num" ]; then
   printf 'preset %s not found in list:\n%s\n' "$NEMOCLAW_E2E_PRESET" "$preset_list" >&2
   exit 1

From 1a9e58611056a2914210ad0c505d7f10abff71a6 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:19:24 -0700
Subject: [PATCH 14/14] test(e2e): refine live scenario helpers

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../messaging-compatible-endpoint-helpers.ts  |  6 ++++-
 .../messaging-compatible-endpoint.test.ts     | 24 +++++++++++++------
 test/e2e-scenario/live/network-policy.test.ts |  3 +++
 ...saging-compatible-endpoint-helpers.test.ts | 10 ++++++++
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
index de87b56122..9266c6abcc 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint-helpers.ts
@@ -24,6 +24,8 @@ async function bestEffort(run: () => Promise<unknown>): Promise<void> {
     await run();
   } catch {
     // Best-effort cleanup mirrors the legacy shell teardown.
+    // Narrow this once NemoClaw/OpenShell/gateway teardown treats missing
+    // resources as successful cleanup.
   }
 }
 
@@ -153,7 +155,9 @@ export function parseOpenClawAgentText(raw: string): string {
     }
     if (!value || typeof value !== "object") return;
     const record = value as Record<string, unknown>;
-    for (const key of textKeys) add(record[key]);
+    for (const key of textKeys) {
+      if (key in record) collect(record[key]);
+    }
     const choices = record.choices;
     if (Array.isArray(choices)) {
       for (const choice of choices) {
diff --git a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
index 3eb9402414..925cba0168 100644
--- a/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
+++ b/test/e2e-scenario/live/messaging-compatible-endpoint.test.ts
@@ -152,6 +152,10 @@ async function startCompatibleMock(
 
     if (req.method === "GET" && ["/v1/models", "/models"].includes(requestPath)) {
       requests.push({ method: "GET", path: requestPath, auth, hopHeaders: [] });
+      if (auth !== "ok") {
+        jsonResponse(res, 401, { error: { message: "missing bearer credential" } });
+        return;
+      }
       jsonResponse(res, 200, {
         object: "list",
         data: [{ id: model, object: "model" }],
@@ -292,7 +296,9 @@ async function startCompatibleMock(
 
   for (let attempt = 1; attempt <= 30; attempt += 1) {
     try {
-      const response = await fetch(`${mock.localBaseUrl}/models`);
+      const response = await fetch(`${mock.localBaseUrl}/models`, {
+        headers: { Authorization: `Bearer ${apiKey}` },
+      });
       if (response.ok) return mock;
     } catch {
       // Keep polling until the server accepts connections.
@@ -669,12 +675,16 @@ liveTest(
 
     const hostAddress = await hostAddressForSandbox(host);
     const endpointUrl = `http://${hostAddress}:${new URL(compatibleMock.localBaseUrl).port}/v1`;
-    const hostReachability = await host.command("curl", ["-sf", `${endpointUrl}/models`], {
-      artifactName: "compatible-endpoint-host-reachability",
-      env: commandEnv(),
-      redactionValues: redactionValues(),
-      timeoutMs: 30_000,
-    });
+    const hostReachability = await host.command(
+      "curl",
+      ["-sf", "-H", `Authorization: Bearer ${COMPATIBLE_KEY}`, `${endpointUrl}/models`],
+      {
+        artifactName: "compatible-endpoint-host-reachability",
+        env: commandEnv(),
+        redactionValues: redactionValues(),
+        timeoutMs: 30_000,
+      },
+    );
     expect(hostReachability.exitCode, resultText(hostReachability)).toBe(0);
 
     const { result: onboard, runner } = await runCompatibleOnboard(host, endpointUrl);
diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index ce3574a890..6ba4b57f61 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -65,6 +65,9 @@ function sleep(ms: number): Promise<void> {
 }
 
 function shellEvalArg(script: string): string {
+  if (script.length === 0) {
+    return "";
+  }
   const encoded = Buffer.from(script, "utf8").toString("base64");
   return `printf %s ${encoded} | base64 -d | sh`;
 }
diff --git a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts
index d42eb8800c..536d8e43ca 100644
--- a/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts
+++ b/test/e2e-scenario/support-tests/messaging-compatible-endpoint-helpers.test.ts
@@ -66,4 +66,14 @@ describe("messaging compatible endpoint helper coverage", () => {
 
     expect(parseOpenClawAgentText(noisyOutput)).toContain(COMPAT_AGENT_REPLY);
   });
+
+  it("extracts OpenAI Responses content parts", () => {
+    const output = JSON.stringify({
+      result: {
+        content: [{ type: "output_text", text: COMPAT_AGENT_REPLY }],
+      },
+    });
+
+    expect(parseOpenClawAgentText(output)).toContain(COMPAT_AGENT_REPLY);
+  });
 });