From 49210cdd3ca8ddecd56527bb8cbec502061ef997 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 11:45:38 -0700 Subject: [PATCH 1/8] test(e2e): migrate Bedrock Runtime compatible Anthropic scenario Signed-off-by: Carlos Villela --- .github/workflows/e2e-vitest-scenarios.yaml | 100 ++ ...drock-runtime-compatible-anthropic.test.ts | 1215 +++++++++++++++++ .../e2e-scenarios-workflow.test.ts | 22 +- tools/e2e-scenarios/free-standing-jobs.env | 6 +- 4 files changed, 1339 insertions(+), 4 deletions(-) create mode 100644 test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml index f199050c83..8e8a6c583c 100644 --- a/.github/workflows/e2e-vitest-scenarios.yaml +++ b/.github/workflows/e2e-vitest-scenarios.yaml @@ -1604,6 +1604,105 @@ jobs: docker logout docker.io || true rm -rf "${DOCKER_CONFIG}" + # Focused coverage slice for the Bedrock Runtime compatible Anthropic + # endpoint contract. The retained legacy bash lane remains the source for + # full closeout until #5098 Phase 11 shell retirement. + bedrock-runtime-compatible-anthropic-vitest: + needs: generate-matrix + if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',bedrock-runtime-compatible-anthropic-vitest,') || contains(format(',{0},', inputs.scenarios), ',bedrock-runtime-compatible-anthropic,') }} + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + agent: [openclaw, hermes] + env: + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/bedrock-runtime-compatible-anthropic/${{ matrix.agent }} + NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + NEMOCLAW_NON_INTERACTIVE: "1" + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" + NEMOCLAW_RECREATE_SANDBOX: "1" + NEMOCLAW_AGENT: ${{ matrix.agent }} + NEMOCLAW_SANDBOX_NAME: e2e-bedrock-${{ matrix.agent }} + OPENSHELL_GATEWAY: "nemoclaw" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - name: Configure isolated Docker auth directory + run: echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config-bedrock-runtime-compatible-anthropic-${{ matrix.agent }}" >> "$GITHUB_ENV" + + - name: Authenticate to Docker Hub + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + shell: bash + run: | + set -euo pipefail + if [[ -z "${DOCKERHUB_USERNAME}" || -z "${DOCKERHUB_TOKEN}" ]]; then + echo "::notice::Docker Hub credentials not configured; continuing with anonymous pulls." + exit 0 + fi + mkdir -p "${DOCKER_CONFIG}" + chmod 700 "${DOCKER_CONFIG}" + login_succeeded=0 + for attempt in 1 2 3; do + if echo "${DOCKERHUB_TOKEN}" | timeout 30s docker login docker.io --username "${DOCKERHUB_USERNAME}" --password-stdin; then + login_succeeded=1 + break + fi + if [[ "$attempt" -lt 3 ]]; then + echo "::warning::Docker Hub login attempt ${attempt} failed; retrying." + sleep 5 + fi + done + if [[ "$login_succeeded" -ne 1 ]]; then + echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls." + fi + + - name: Set up Node + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Build CLI + run: npm run build:cli + + - name: Run Bedrock Runtime compatible Anthropic live test + # Direct Vitest coverage for + # test/e2e/test-bedrock-runtime-compatible-anthropic.sh. Preserves the + # fake Bedrock Runtime endpoint, /etc/hosts mapping, source CLI + # onboard, OpenShell adapter route, agent-specific runtime probes, and + # leak-scan contract for both OpenClaw and Hermes. + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts \ + --silent=false --reporter=default + + - name: Upload Bedrock Runtime compatible Anthropic artifacts + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: e2e-vitest-scenarios-bedrock-runtime-compatible-anthropic-${{ matrix.agent }} + path: e2e-artifacts/vitest/bedrock-runtime-compatible-anthropic/${{ matrix.agent }}/ + include-hidden-files: false + if-no-files-found: ignore + retention-days: 14 + + - name: Clean up Docker auth + if: always() + run: | + set -euo pipefail + docker logout docker.io || true + rm -rf "${DOCKER_CONFIG}" + sandbox-survival-vitest: needs: generate-matrix if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',sandbox-survival-vitest,') || contains(format(',{0},', inputs.scenarios), ',sandbox-survival,') }} @@ -1889,6 +1988,7 @@ jobs: launchable-smoke-vitest, double-onboard-vitest, model-router-provider-routed-inference-vitest, + bedrock-runtime-compatible-anthropic-vitest, sandbox-survival-vitest, openclaw-tui-chat-correlation-vitest, gateway-guard-recovery, diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts new file mode 100644 index 0000000000..bbd0cdb718 --- /dev/null +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -0,0 +1,1215 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { spawn } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import fs from "node:fs"; +import * as http2 from "node:http2"; +import { createRequire } from "node:module"; +import net from "node:net"; +import os from "node:os"; +import path from "node:path"; +import type { ArtifactSink } from "../fixtures/artifacts.ts"; +import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts"; +import type { HostCliClient } from "../fixtures/clients/host.ts"; +import { + type SandboxClient, + trustedSandboxShellScript, + validateSandboxName, +} from "../fixtures/clients/sandbox.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; +import { redactString } from "../fixtures/redaction.ts"; + +// Direct Vitest migration for test/e2e/test-bedrock-runtime-compatible-anthropic.sh. +// Keep the same live system boundary: host fake Bedrock Runtime endpoint, +// /etc/hosts mapping, source CLI onboard, OpenShell provider route, sandbox +// config/runtime probes, adapter breadcrumbs, and leak scan. + +const require = createRequire(import.meta.url); + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js"); +const DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js"); +const BEDROCK_HOSTNAME = "bedrock-runtime.us-east-1.amazonaws.com"; +const BEDROCK_MOCK_PORT = Number(process.env.NEMOCLAW_BEDROCK_RUNTIME_MOCK_PORT ?? "18147"); +const BEDROCK_ADAPTER_PORT = 11436; +const BEDROCK_ENDPOINT_URL = `http://${BEDROCK_HOSTNAME}:${BEDROCK_MOCK_PORT}`; +const BEDROCK_MODEL = + process.env.NEMOCLAW_BEDROCK_RUNTIME_MODEL ?? "anthropic.claude-3-5-sonnet-20240620-v1:0"; +const COMPATIBLE_KEY = + process.env.NEMOCLAW_BEDROCK_RUNTIME_FAKE_KEY ?? "fake-pasted-bedrock-runtime-key-e2e"; +const AGENT = process.env.NEMOCLAW_AGENT ?? "openclaw"; +const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? `e2e-bedrock-${AGENT}`; +const RUN_BEDROCK_TEST = shouldRunLiveE2EScenarios() ? test : test.skip; +const ONBOARD_TIMEOUT_MS = 30 * 60_000; +const TEST_TIMEOUT_MS = 60 * 60_000; +const SANDBOX_TIMEOUT_MS = 180_000; + +type AgentName = "openclaw" | "hermes"; +type CommandText = { stdout: string; stderr: string }; +type EventHeader = { type: "string"; value: string }; +type EventStreamCodec = { + encode(message: { headers: Record; body: Uint8Array }): Uint8Array; +}; +type EventStreamCodecConstructor = new ( + encoder: (input: string) => Uint8Array, + decoder: (input: Uint8Array) => string, +) => EventStreamCodec; + +interface RawRunResult { + readonly command: readonly string[]; + readonly exitCode: number | null; + readonly signal: NodeJS.Signals | null; + readonly timedOut: boolean; + readonly stdout: string; + readonly stderr: string; + readonly redactedStdout: string; + readonly redactedStderr: string; +} + +interface RawRunOptions { + readonly artifactName: string; + readonly artifacts: ArtifactSink; + readonly cwd?: string; + readonly env?: NodeJS.ProcessEnv; + readonly redactionValues?: readonly string[]; + readonly timeoutMs?: number; +} + +interface MockBedrockRuntime { + readonly port: number; + readonly logs: readonly string[]; + readonly converseCount: number; + readonly streamCount: number; + close(): Promise; +} + +function resultText(result: CommandText): string { + return [result.stdout, result.stderr].filter(Boolean).join("\n"); +} + +function redactedResultText( + result: Pick, +): string { + return [result.redactedStdout, result.redactedStderr].filter(Boolean).join("\n"); +} + +function expectExitZero(result: CommandText & { exitCode: number | null }, label: string): void { + expect(result.exitCode, `${label} failed:\n${resultText(result)}`).toBe(0); +} + +function shellQuote(value: string): string { + return `'${value.replaceAll("'", "'\\''")}'`; +} + +function assertAgent(value: string): asserts value is AgentName { + if (value !== "openclaw" && value !== "hermes") { + throw new Error(`NEMOCLAW_AGENT must be openclaw or hermes, got ${value}`); + } +} + +function testEnv(home: string, extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + const base = buildAvailabilityProbeEnv({ ...process.env, HOME: home }); + return { + ...base, + HOME: home, + PATH: [path.join(home, ".local", "bin"), base.PATH].filter(Boolean).join(":"), + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NEMOCLAW_NON_INTERACTIVE: "1", + OPENSHELL_GATEWAY: process.env.OPENSHELL_GATEWAY ?? "nemoclaw", + ...extra, + }; +} + +function onboardEnv(home: string, agent: AgentName): NodeJS.ProcessEnv { + return testEnv(home, { + COMPATIBLE_ANTHROPIC_API_KEY: COMPATIBLE_KEY, + NEMOCLAW_AGENT: agent, + NEMOCLAW_ENDPOINT_URL: BEDROCK_ENDPOINT_URL, + NEMOCLAW_MODEL: BEDROCK_MODEL, + NEMOCLAW_POLICY_MODE: "skip", + NEMOCLAW_PREFERRED_API: "openai-completions", + NEMOCLAW_PROVIDER: "anthropicCompatible", + NEMOCLAW_RECREATE_SANDBOX: "1", + NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME, + NEMOCLAW_YES: "1", + }); +} + +function redactedCommand(command: readonly string[], values: readonly string[]): string[] { + return command.map((part) => redactString(part, values)); +} + +async function runRawCommand( + command: string, + args: readonly string[], + options: RawRunOptions, +): Promise { + const timeoutMs = options.timeoutMs ?? 60_000; + const redactionValues = [...(options.redactionValues ?? [])]; + const child = spawn(command, [...args], { + cwd: options.cwd ?? REPO_ROOT, + detached: true, + env: options.env, + stdio: ["ignore", "pipe", "pipe"], + }); + const fullCommand = [command, ...args]; + let stdout = ""; + let stderr = ""; + let timedOut = false; + let spawnError: Error | undefined; + + const killProcessGroup = (signal: NodeJS.Signals): void => { + if (child.pid === undefined) return; + try { + process.kill(-child.pid, signal); + } catch { + child.kill(signal); + } + }; + + const timeout = setTimeout(() => { + timedOut = true; + killProcessGroup("SIGTERM"); + setTimeout(() => killProcessGroup("SIGKILL"), 1_000).unref(); + }, timeoutMs); + timeout.unref(); + + child.stdout?.on("data", (chunk: Buffer) => { + stdout += chunk.toString("utf8"); + }); + child.stderr?.on("data", (chunk: Buffer) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (error) => { + spawnError = error; + }); + + const { exitCode, signal } = await new Promise<{ + exitCode: number | null; + signal: NodeJS.Signals | null; + }>((resolve) => { + child.on("close", (code, closeSignal) => resolve({ exitCode: code, signal: closeSignal })); + }); + clearTimeout(timeout); + + if (spawnError) { + const message = redactString(spawnError.message, redactionValues); + throw new Error(`failed to spawn ${redactString(command, redactionValues)}: ${message}`); + } + + const redactedStdout = redactString(stdout, redactionValues); + const redactedStderr = redactString(stderr, redactionValues); + await options.artifacts.writeText(`raw-shell/${options.artifactName}.stdout.txt`, redactedStdout); + await options.artifacts.writeText(`raw-shell/${options.artifactName}.stderr.txt`, redactedStderr); + await options.artifacts.writeJson(`raw-shell/${options.artifactName}.result.json`, { + command: redactedCommand(fullCommand, redactionValues), + exitCode, + signal, + timedOut, + stdout: redactedStdout, + stderr: redactedStderr, + }); + + return { + command: fullCommand, + exitCode, + signal, + timedOut, + stdout, + stderr, + redactedStdout, + redactedStderr, + }; +} + +function loadEventStreamCodec(): EventStreamCodec { + const loaded = require("@smithy/core/event-streams") as { + EventStreamCodec: EventStreamCodecConstructor; + }; + return new loaded.EventStreamCodec( + (input) => Buffer.from(input, "utf8"), + (input) => Buffer.from(input).toString("utf8"), + ); +} + +function eventMessage(codec: EventStreamCodec, eventType: string, payload: unknown): Buffer { + return Buffer.from( + codec.encode({ + headers: { + ":message-type": { type: "string", value: "event" }, + ":event-type": { type: "string", value: eventType }, + ":content-type": { type: "string", value: "application/json" }, + }, + body: Buffer.from(JSON.stringify(payload), "utf8"), + }), + ); +} + +function parseModelPath( + pathname: string, +): { model: string; operation: "converse" | "converse-stream" } | null { + const match = pathname.match(/^\/model\/(.+)\/(converse|converse-stream)$/); + if (!match) return null; + return { + model: decodeURIComponent(match[1] ?? ""), + operation: match[2] as "converse" | "converse-stream", + }; +} + +function sendHttp2Json(stream: http2.ServerHttp2Stream, status: number, payload: unknown): void { + stream.respond({ + [http2.constants.HTTP2_HEADER_STATUS]: status, + [http2.constants.HTTP2_HEADER_CONTENT_TYPE]: "application/json", + }); + stream.end(JSON.stringify(payload)); +} + +function conversePayload() { + return { + output: { + message: { + role: "assistant", + content: [{ text: "PONG" }], + }, + }, + stopReason: "end_turn", + usage: { + inputTokens: 1, + outputTokens: 1, + totalTokens: 2, + }, + metrics: { + latencyMs: 1, + }, + }; +} + +function sendConverseStream(stream: http2.ServerHttp2Stream, codec: EventStreamCodec): void { + stream.respond({ + [http2.constants.HTTP2_HEADER_STATUS]: 200, + [http2.constants.HTTP2_HEADER_CONTENT_TYPE]: "application/vnd.amazon.eventstream", + }); + stream.write(eventMessage(codec, "messageStart", { role: "assistant" })); + stream.write( + eventMessage(codec, "contentBlockDelta", { + contentBlockIndex: 0, + delta: { text: "PONG" }, + }), + ); + stream.write(eventMessage(codec, "messageStop", { stopReason: "end_turn" })); + stream.write( + eventMessage(codec, "metadata", { + usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, + metrics: { latencyMs: 1 }, + }), + ); + stream.end(); +} + +async function waitForTcpPort(port: number): Promise { + for (let attempt = 1; attempt <= 30; attempt += 1) { + const ok = await new Promise((resolve) => { + const socket = net.connect(port, "127.0.0.1"); + let done = false; + const finish = (value: boolean) => { + if (done) return; + done = true; + socket.destroy(); + resolve(value); + }; + socket.on("connect", () => finish(true)); + socket.on("error", () => finish(false)); + socket.setTimeout(500, () => finish(false)); + }); + if (ok) return; + await new Promise((resolve) => setTimeout(resolve, 1_000)); + } + throw new Error(`fake Bedrock Runtime endpoint did not listen on 127.0.0.1:${port}`); +} + +async function startFakeBedrockRuntimeMock(options: { + port: number; + expectedBearer: string; + expectedModel: string; +}): Promise { + const codec = loadEventStreamCodec(); + const logs: string[] = []; + let converseCount = 0; + let streamCount = 0; + const record = (line: string): void => { + logs.push(line); + }; + const server = http2.createServer(); + + server.on("stream", (rawStream, headers) => { + const stream = rawStream as http2.ServerHttp2Stream; + const method = String(headers[http2.constants.HTTP2_HEADER_METHOD] ?? ""); + const pathname = String(headers[http2.constants.HTTP2_HEADER_PATH] ?? ""); + const auth = String(headers[http2.constants.HTTP2_HEADER_AUTHORIZATION] ?? ""); + const chunks: Buffer[] = []; + + stream.on("data", (chunk: Buffer) => { + chunks.push(Buffer.from(chunk)); + }); + stream.on("end", () => { + const parsed = parseModelPath(pathname); + if (method !== "POST" || !parsed) { + sendHttp2Json(stream, 404, { message: "not found" }); + return; + } + + const opLabel = parsed.operation === "converse-stream" ? "converse-stream" : "converse"; + if (auth !== `Bearer ${options.expectedBearer}`) { + record(`POST /model/${opLabel} auth=missing`); + sendHttp2Json(stream, 401, { message: "missing bearer credential" }); + return; + } + + record(`POST /model/${opLabel} auth=ok`); + if (parsed.operation === "converse-stream") streamCount += 1; + else converseCount += 1; + + if (parsed.model !== options.expectedModel) { + sendHttp2Json(stream, 400, { message: "unexpected model id" }); + return; + } + + if (parsed.operation === "converse-stream") { + sendConverseStream(stream, codec); + return; + } + sendHttp2Json(stream, 200, conversePayload()); + }); + }); + server.on("sessionError", (err) => { + record(`session_error=${err && "code" in err ? String(err.code) : "unknown"}`); + }); + + await new Promise((resolve, reject) => { + const onError = (error: Error) => { + server.off("listening", onListening); + reject(error); + }; + const onListening = () => { + server.off("error", onError); + record("fake_bedrock_runtime_ready"); + resolve(); + }; + server.once("error", onError); + server.once("listening", onListening); + server.listen(options.port, "127.0.0.1"); + }); + await waitForTcpPort(options.port); + + return { + port: options.port, + get logs() { + return logs; + }, + get converseCount() { + return converseCount; + }, + get streamCount() { + return streamCount; + }, + close: () => + new Promise((resolve) => { + server.close(() => resolve()); + }), + }; +} + +async function bestEffort(run: () => Promise | unknown): Promise { + try { + await run(); + } catch { + // Best-effort cleanup must not hide the primary E2E failure. + } +} + +async function cleanupSandboxState(host: HostCliClient, home: string): Promise { + const env = testEnv(home); + await bestEffort(() => + host.command("node", [CLI_ENTRYPOINT, SANDBOX_NAME, "destroy", "--yes"], { + artifactName: "cleanup-nemoclaw-destroy-bedrock-runtime", + env, + timeoutMs: 180_000, + }), + ); + await bestEffort(() => + host.command("openshell", ["sandbox", "delete", SANDBOX_NAME], { + artifactName: "cleanup-openshell-sandbox-delete-bedrock-runtime", + env, + timeoutMs: 60_000, + }), + ); + await bestEffort(() => + host.command("openshell", ["gateway", "destroy", "-g", "nemoclaw"], { + artifactName: "cleanup-openshell-gateway-destroy-bedrock-runtime", + env, + timeoutMs: 120_000, + }), + ); +} + +function stopBedrockAdapterBestEffort(home: string): void { + const stateFile = path.join(home, ".nemoclaw", "bedrock-runtime-adapter.json"); + const pidFile = path.join(home, ".nemoclaw", "bedrock-runtime-adapter.pid"); + const tokenFile = path.join(home, ".nemoclaw", "bedrock-runtime-adapter-token"); + try { + if (fs.existsSync(stateFile)) { + const state = JSON.parse(fs.readFileSync(stateFile, "utf8")) as { endpointUrl?: unknown }; + if (state.endpointUrl !== BEDROCK_ENDPOINT_URL) return; + } + if (fs.existsSync(pidFile)) { + const pid = Number(fs.readFileSync(pidFile, "utf8").trim()); + if (Number.isInteger(pid) && pid > 0) { + try { + process.kill(pid, "SIGTERM"); + } catch { + // Already stopped. + } + } + } + } finally { + fs.rmSync(pidFile, { force: true }); + fs.rmSync(tokenFile, { force: true }); + fs.rmSync(stateFile, { force: true }); + } +} + +async function restoreHostsFile( + host: HostCliClient, + backupPath: string, + home: string, +): Promise { + await bestEffort(() => + host.command("sudo", ["cp", backupPath, "/etc/hosts"], { + artifactName: "restore-etc-hosts-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + ); + await bestEffort(() => + host.command("sudo", ["rm", "-f", backupPath], { + artifactName: "remove-etc-hosts-backup-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + ); + await bestEffort(() => fs.rmSync(backupPath, { force: true })); +} + +async function mapBedrockHostToLoopback( + host: HostCliClient, + home: string, + backupPath: string, + skip: (note?: string) => never, +): Promise { + const sudo = await host.command("sudo", ["-n", "true"], { + artifactName: "prereq-passwordless-sudo-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }); + if (sudo.exitCode !== 0) { + if (process.env.GITHUB_ACTIONS === "true") { + throw new Error( + "passwordless sudo is required to edit /etc/hosts for Bedrock hostname mapping", + ); + } + skip("passwordless sudo is required to edit /etc/hosts for Bedrock hostname mapping"); + } + + expectExitZero( + await host.command("sudo", ["cp", "/etc/hosts", backupPath], { + artifactName: "backup-etc-hosts-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + "backup /etc/hosts", + ); + expectExitZero( + await host.command( + "bash", + [ + "-lc", + `printf '\\n127.0.0.1 %s\\n' "$BEDROCK_HOSTNAME" | sudo tee -a /etc/hosts >/dev/null`, + ], + { + artifactName: "map-bedrock-hostname-to-loopback", + env: testEnv(home, { BEDROCK_HOSTNAME }), + timeoutMs: 30_000, + }, + ), + "map Bedrock hostname to loopback", + ); + const probe = await host.command( + "python3", + [ + "-c", + "import os,socket; raise SystemExit(0 if socket.gethostbyname(os.environ['BEDROCK_HOSTNAME']) == '127.0.0.1' else 1)", + ], + { + artifactName: "probe-bedrock-hostname-loopback", + env: testEnv(home, { BEDROCK_HOSTNAME }), + timeoutMs: 30_000, + }, + ); + expectExitZero(probe, "Bedrock Runtime hostname maps to localhost"); +} + +async function prepareSourceCliAndOpenShell(host: HostCliClient, home: string): Promise { + expect( + fs.existsSync(DIST_ENTRYPOINT), + "run `npm run build:cli` before live Bedrock Runtime compatible Anthropic scenarios", + ).toBe(true); + expectExitZero( + await host.command("node", [CLI_ENTRYPOINT, "--version"], { + artifactName: "source-cli-version-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + "source CLI version", + ); + + const openshell = await host.command("openshell", ["--version"], { + artifactName: "prereq-openshell-version-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }); + if (openshell.exitCode === 0) return; + + const install = await host.command( + "bash", + [path.join(REPO_ROOT, "scripts", "install-openshell.sh")], + { + artifactName: "install-openshell-bedrock-runtime", + env: testEnv(home), + timeoutMs: 240_000, + }, + ); + expectExitZero(install, "Install OpenShell CLI"); + expectExitZero( + await host.command("openshell", ["--version"], { + artifactName: "post-install-openshell-version-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + "OpenShell CLI available after install", + ); +} + +async function assertOnboardIdentity(home: string, agent: AgentName): Promise { + const sessionPath = path.join(home, ".nemoclaw", "onboard-session.json"); + const registryPath = path.join(home, ".nemoclaw", "sandboxes.json"); + const errors: string[] = []; + const expectedProvider = "compatible-anthropic-endpoint"; + + try { + const session = JSON.parse(fs.readFileSync(sessionPath, "utf8")) as Record; + if (session.sandboxName !== SANDBOX_NAME) + errors.push(`session sandboxName=${String(session.sandboxName)}`); + if (session.agent !== undefined && session.agent !== agent) + errors.push(`session agent=${String(session.agent)}`); + if (session.provider !== expectedProvider) + errors.push(`session provider=${String(session.provider)}`); + if (session.model !== BEDROCK_MODEL) errors.push(`session model=${String(session.model)}`); + } catch (error) { + errors.push(`session read failed: ${error instanceof Error ? error.message : String(error)}`); + } + + try { + const registry = JSON.parse(fs.readFileSync(registryPath, "utf8")) as { + sandboxes?: Record>; + }; + const sandbox = registry.sandboxes?.[SANDBOX_NAME]; + if (!sandbox) { + errors.push(`registry sandbox ${SANDBOX_NAME} missing`); + } else { + if (sandbox.agent !== undefined && sandbox.agent !== agent) + errors.push(`registry agent=${String(sandbox.agent)}`); + if (sandbox.provider !== expectedProvider) + errors.push(`registry provider=${String(sandbox.provider)}`); + if (sandbox.model !== BEDROCK_MODEL) errors.push(`registry model=${String(sandbox.model)}`); + } + } catch (error) { + errors.push(`registry read failed: ${error instanceof Error ? error.message : String(error)}`); + } + + expect(errors).toEqual([]); +} + +async function assertAdapterHealth(host: HostCliClient, home: string): Promise { + const health = await host.command( + "curl", + ["-sf", "--max-time", "5", `http://127.0.0.1:${BEDROCK_ADAPTER_PORT}/health`], + { + artifactName: "bedrock-runtime-adapter-health", + env: testEnv(home), + timeoutMs: 10_000, + }, + ); + expectExitZero(health, "Bedrock Runtime adapter health endpoint"); + const parsed = JSON.parse(health.stdout) as { + ok?: unknown; + endpointUrl?: unknown; + region?: unknown; + tokenHash?: unknown; + }; + expect(parsed.ok).toBe(true); + expect(parsed.endpointUrl).toBe(BEDROCK_ENDPOINT_URL); + expect(parsed.region).toBe("us-east-1"); + expect(typeof parsed.tokenHash).toBe("string"); + expect(String(parsed.tokenHash).length).toBeGreaterThan(0); +} + +async function assertOpenShellProviderRoute(host: HostCliClient, home: string): Promise { + const route = await host.command( + "bash", + ["-lc", "openshell inference get -g nemoclaw 2>&1 || openshell inference get 2>&1"], + { + artifactName: "openshell-inference-route-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }, + ); + expectExitZero(route, "openshell inference get"); + const plainRoute = route.stdout.replace(/\x1b\[[0-9;]*m/g, ""); + expect(plainRoute).toContain("Provider: compatible-anthropic-endpoint"); + expect(plainRoute).toContain(`Model: ${BEDROCK_MODEL}`); + + const provider = await host.command( + "openshell", + ["provider", "get", "-g", "nemoclaw", "compatible-anthropic-endpoint"], + { + artifactName: "openshell-provider-get-compatible-anthropic-endpoint", + env: testEnv(home), + timeoutMs: 30_000, + }, + ); + expectExitZero(provider, "OpenShell provider registry exposes compatible-anthropic-endpoint"); + expect(provider.stdout || provider.stderr).toContain("compatible-anthropic-endpoint"); +} + +function parseChatContent(raw: string): string { + const response = JSON.parse(raw) as { + choices?: Array<{ + message?: { content?: unknown; reasoning_content?: unknown }; + text?: unknown; + }>; + }; + const choice = response.choices?.[0]; + const content = + choice?.message?.content ?? choice?.message?.reasoning_content ?? choice?.text ?? ""; + return typeof content === "string" ? content.trim() : ""; +} + +function parseOpenClawAgentText(raw: string): string { + if (!raw.trim()) return ""; + const docs: unknown[] = []; + try { + docs.push(JSON.parse(raw)); + } catch { + const first = raw.indexOf("{"); + const last = raw.lastIndexOf("}"); + if (first >= 0 && last > first) { + try { + docs.push(JSON.parse(raw.slice(first, last + 1))); + } catch { + for (const line of raw.split("\n")) { + const trimmed = line.trim(); + if (!trimmed.startsWith("{")) continue; + try { + docs.push(JSON.parse(trimmed)); + } catch { + // Ignore non-JSON wrapper lines. + } + } + } + } + } + + const parts: string[] = []; + const visited = new Set(); + const collect = (value: unknown): void => { + if (value == null || visited.has(value)) return; + if (typeof value === "string") { + if (value.trim()) parts.push(value.trim()); + return; + } + if (typeof value !== "object") return; + visited.add(value); + if (Array.isArray(value)) { + for (const item of value) collect(item); + return; + } + const record = value as Record; + for (const key of ["text", "content", "reasoning_content"]) { + collect(record[key]); + } + for (const choice of Array.isArray(record.choices) ? record.choices : []) { + collect(choice); + } + for (const key of [ + "result", + "payloads", + "payload", + "messages", + "response", + "data", + "output", + "outputs", + "items", + "segments", + "delta", + "message", + ]) { + collect(record[key]); + } + }; + + for (const doc of docs) { + const record = + doc && typeof doc === "object" && !Array.isArray(doc) ? (doc as Record) : {}; + collect(record.result && typeof record.result === "object" ? record.result : doc); + } + return parts.join("\n"); +} + +async function assertOpenClawConfig(sandbox: SandboxClient, home: string): Promise { + const script = trustedSandboxShellScript(` +python3 - ${shellQuote(BEDROCK_MODEL)} <<'PY' +import json +import sys + +model = sys.argv[1] +cfg = json.load(open("/sandbox/.openclaw/openclaw.json", encoding="utf-8")) +errors = [] +providers = cfg.get("models", {}).get("providers", {}) +inference = providers.get("inference") if isinstance(providers, dict) else None +if sorted(providers.keys()) != ["inference"]: + errors.append("provider keys are %r" % sorted(providers.keys())) +if not isinstance(inference, dict): + errors.append("models.providers.inference is missing") +else: + if inference.get("baseUrl") != "https://inference.local/v1": + errors.append("inference baseUrl is %r" % inference.get("baseUrl")) + if inference.get("apiKey") != "unused": + errors.append("inference apiKey is not the non-secret placeholder") + if inference.get("api") != "openai-completions": + errors.append("inference api is %r" % inference.get("api")) +primary = cfg.get("agents", {}).get("defaults", {}).get("model", {}).get("primary") +if primary != "inference/" + model: + errors.append("primary model is %r" % primary) +print(json.dumps({ + "provider_keys": sorted(providers.keys()) if isinstance(providers, dict) else [], + "inference_base": inference.get("baseUrl") if isinstance(inference, dict) else None, + "inference_api_key": inference.get("apiKey") if isinstance(inference, dict) else None, + "primary": primary, + "errors": errors, +})) +sys.exit(1 if errors else 0) +PY +`); + const output = await sandbox.execShell(SANDBOX_NAME, script, { + artifactName: "openclaw-config-summary-bedrock-runtime", + env: testEnv(home), + timeoutMs: SANDBOX_TIMEOUT_MS, + }); + expectExitZero(output, "OpenClaw config uses only managed inference.local provider"); +} + +async function assertHermesConfig(sandbox: SandboxClient, home: string): Promise { + const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], { + artifactName: "hermes-config-bedrock-runtime", + env: testEnv(home), + timeoutMs: SANDBOX_TIMEOUT_MS, + }); + expectExitZero(config, "read Hermes config.yaml"); + const model: Record = {}; + let inModel = false; + for (const line of config.stdout.split("\n")) { + if (/^model:\s*$/.test(line)) { + inModel = true; + continue; + } + if (inModel && /^[A-Za-z0-9_-]+:/.test(line)) break; + if (!inModel) continue; + const match = line.match(/^\s+([A-Za-z0-9_-]+):\s*(.*?)\s*$/); + if (!match) continue; + let value = match[2]?.trim() ?? ""; + if ( + value.length >= 2 && + value[0] === value[value.length - 1] && + ['"', "'"].includes(value[0]) + ) { + value = value.slice(1, -1); + } + model[match[1] ?? ""] = value; + } + + const errors: string[] = []; + if (model.default !== BEDROCK_MODEL) errors.push(`model.default=${String(model.default)}`); + if (model.base_url !== "https://inference.local/v1") + errors.push(`model.base_url=${String(model.base_url)}`); + if (!model.api_key?.startsWith("sk-")) errors.push(`model.api_key=${String(model.api_key)}`); + if (/^models:\s*\n(?:[ \t].*\n)*?[ \t]+providers:/m.test(config.stdout)) { + errors.push("OpenClaw-style models.providers block present"); + } + if (config.stdout.includes("openshell:")) errors.push("OpenShell provider placeholder present"); + expect(errors).toEqual([]); +} + +async function assertSandboxInference(sandbox: SandboxClient, home: string): Promise { + const payload = JSON.stringify({ + model: BEDROCK_MODEL, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 32, + }); + const response = await sandbox.exec( + SANDBOX_NAME, + [ + "curl", + "-sS", + "--max-time", + "90", + "https://inference.local/v1/chat/completions", + "-H", + "Content-Type: application/json", + "--data-raw", + payload, + ], + { + artifactName: "sandbox-inference-local-bedrock-runtime", + env: testEnv(home), + redactionValues: [COMPATIBLE_KEY], + timeoutMs: 120_000, + }, + ); + expectExitZero(response, "sandbox inference.local chat completion"); + expect(parseChatContent(response.stdout)).toMatch(/PONG/i); +} + +async function assertOpenClawAgentTurn(sandbox: SandboxClient, home: string): Promise { + const sessionId = `bedrock-openclaw-e2e-${Date.now()}-${randomUUID()}`; + const remote = [ + `rm -f /sandbox/.openclaw/agents/main/sessions/${shellQuote(sessionId)}.jsonl.lock`, + `rm -f /sandbox/.openclaw/agents/main/sessions/${shellQuote(sessionId)}.trajectory.jsonl 2>/dev/null || true`, + `nemoclaw-start openclaw agent --agent main --json --session-id ${shellQuote(sessionId)} -m 'Reply with only: PONG'`, + ].join("; "); + const raw = await sandbox.exec(SANDBOX_NAME, ["sh", "-lc", remote], { + artifactName: "openclaw-agent-turn-bedrock-runtime", + env: testEnv(home), + redactionValues: [COMPATIBLE_KEY], + timeoutMs: 240_000, + }); + expect(resultText(raw)).not.toMatch( + /SsrFBlockedError|Blocked hostname|transport error|ECONNREFUSED|EAI_AGAIN|gateway unavailable|network connection error|bedrock_runtime_error/i, + ); + expectExitZero(raw, "OpenClaw agent turn through Bedrock adapter"); + expect(parseOpenClawAgentText(raw.stdout || raw.stderr)).toMatch(/PONG/i); +} + +async function assertHermesApiChat(sandbox: SandboxClient, home: string): Promise { + const payload = JSON.stringify({ + model: BEDROCK_MODEL, + messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], + max_tokens: 32, + }); + const remote = + "set -a; [ ! -f /sandbox/.hermes/.env ] || . /sandbox/.hermes/.env; set +a; " + + `if [ -n "\${API_SERVER_KEY:-}" ]; then curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -H "Authorization: Bearer \${API_SERVER_KEY}" -d ${shellQuote(payload)}; ` + + `else curl -sS --max-time 120 http://localhost:8642/v1/chat/completions -H 'Content-Type: application/json' -d ${shellQuote(payload)}; fi`; + const response = await sandbox.exec(SANDBOX_NAME, ["sh", "-lc", remote], { + artifactName: "hermes-local-chat-api-bedrock-runtime", + env: testEnv(home), + redactionValues: [COMPATIBLE_KEY], + timeoutMs: 180_000, + }); + expectExitZero(response, "Hermes local chat API through Bedrock adapter"); + expect(parseChatContent(response.stdout)).toMatch(/PONG/i); +} + +function readAdapterToken(home: string): string { + const tokenPath = path.join(home, ".nemoclaw", "bedrock-runtime-adapter-token"); + const token = fs.readFileSync(tokenPath, "utf8").trim(); + expect(token, "adapter token file was not created on the host").not.toBe(""); + return token; +} + +function adapterLogPath(home: string): string { + return path.join(home, ".nemoclaw", "bedrock-runtime-adapter.log"); +} + +function assertAdapterLogBreadcrumbs(home: string, agent: AgentName): void { + const logPath = adapterLogPath(home); + expect(fs.existsSync(logPath), "Bedrock Runtime adapter host log was not written").toBe(true); + const log = fs.readFileSync(logPath, "utf8"); + expect(log).toContain('"event":"request_completed"'); + expect(log).toContain('"operation":"converse"'); + expect(log).toContain(BEDROCK_MODEL); + if (agent === "openclaw") { + expect(log).toContain('"operation":"converse_stream"'); + } +} + +const SNAPSHOT_SCRIPT = trustedSandboxShellScript(` +set +e +emit_file() { + path="$1" + [ -r "$path" ] || return 0 + size=$(wc -c <"$path" 2>/dev/null || echo 0) + [ "$size" -le 1048576 ] || return 0 + printf '\\n@@NEMOCLAW_E2E_FILE@@ %s\\n' "$path" + tr '\\000' '\\n' <"$path" 2>/dev/null || true +} + +for root in /sandbox/.openclaw /sandbox/.hermes /etc/nemoclaw /tmp; do + [ -e "$root" ] || continue + find "$root" -maxdepth 4 -type f 2>/dev/null | while IFS= read -r file; do + case "$file" in + */node_modules/*|*/.git/*) continue ;; + esac + emit_file "$file" + done +done + +for proc_dir in /proc/[0-9]*; do + [ -d "$proc_dir" ] || continue + for name in environ cmdline; do + emit_file "$proc_dir/$name" + done +done +`); + +function findForbiddenLeaks( + text: string, + label: string, + patterns: Array<[string, string]>, +): string[] { + const locations: string[] = []; + let current = label; + for (const line of text.split("\n")) { + if (line.startsWith("@@NEMOCLAW_E2E_FILE@@ ")) { + current = line.slice("@@NEMOCLAW_E2E_FILE@@ ".length); + continue; + } + for (const [name, value] of patterns) { + if (value && line.includes(value)) locations.push(`${name}: ${current}`); + } + } + return [...new Set(locations)].sort(); +} + +async function assertNoBedrockLeaks(options: { + artifacts: ArtifactSink; + home: string; + mock: MockBedrockRuntime; + onboarding: RawRunResult; + sandbox: SandboxClient; + redact: (text: string, extraValues?: string[]) => string; +}): Promise { + const adapterToken = readAdapterToken(options.home); + const patterns: Array<[string, string]> = [ + ["fake user key", COMPATIBLE_KEY], + ["adapter token", adapterToken], + ["AWS bearer env name", "AWS_BEARER_TOKEN_BEDROCK"], + ["adapter token env name", "NEMOCLAW_BEDROCK_RUNTIME_ADAPTER_TOKEN"], + ["raw Bedrock hostname", BEDROCK_HOSTNAME], + ]; + const snapshot = await runRawCommand( + "openshell", + ["sandbox", "exec", "-n", SANDBOX_NAME, "--", "sh", "-lc", SNAPSHOT_SCRIPT], + { + artifactName: "sandbox-snapshot-bedrock-runtime", + artifacts: options.artifacts, + env: testEnv(options.home), + redactionValues: [COMPATIBLE_KEY, adapterToken], + timeoutMs: 180_000, + }, + ); + const adapterLog = fs.existsSync(adapterLogPath(options.home)) + ? fs.readFileSync(adapterLogPath(options.home), "utf8") + : ""; + const hostLogs = [ + "@@NEMOCLAW_E2E_FILE@@ onboard stdout", + options.onboarding.stdout, + "@@NEMOCLAW_E2E_FILE@@ onboard stderr", + options.onboarding.stderr, + "@@NEMOCLAW_E2E_FILE@@ adapter log", + adapterLog, + "@@NEMOCLAW_E2E_FILE@@ fake Bedrock mock log", + options.mock.logs.join("\n"), + ].join("\n"); + await options.artifacts.writeText( + "host-bedrock-runtime-logs.txt", + options.redact(hostLogs, [COMPATIBLE_KEY, adapterToken]), + ); + + const leaks = [ + ...findForbiddenLeaks(snapshot.stdout, "sandbox snapshot", patterns), + ...findForbiddenLeaks(hostLogs, "host logs", patterns), + ]; + expect(leaks).toEqual([]); +} + +RUN_BEDROCK_TEST( + "bedrock runtime compatible Anthropic endpoint routes through managed inference.local", + { timeout: TEST_TIMEOUT_MS }, + async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => { + assertAgent(AGENT); + validateSandboxName(SANDBOX_NAME); + + const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-bedrock-runtime-home-")); + const hostsBackup = path.join( + os.tmpdir(), + `nemoclaw-bedrock-hosts-${process.pid}-${Date.now()}`, + ); + let mock: MockBedrockRuntime | undefined; + let onboarding: RawRunResult | undefined; + + cleanup.add(`remove Bedrock Runtime test home ${home}`, () => + fs.rmSync(home, { recursive: true, force: true }), + ); + cleanup.add(`destroy Bedrock Runtime sandbox ${SANDBOX_NAME}`, () => + cleanupSandboxState(host, home), + ); + cleanup.add("restore /etc/hosts after Bedrock Runtime mapping", () => + restoreHostsFile(host, hostsBackup, home), + ); + cleanup.add("stop Bedrock Runtime adapter", () => stopBedrockAdapterBestEffort(home)); + cleanup.add("stop fake Bedrock Runtime endpoint", async () => { + if (mock) await mock.close(); + }); + cleanup.add("write fake Bedrock Runtime log", async () => { + if (mock) { + await artifacts.writeText( + "fake-bedrock-runtime.log", + secrets.redact(mock.logs.join("\n"), [COMPATIBLE_KEY]), + ); + } + }); + + await artifacts.writeJson("scenario.json", { + id: "bedrock-runtime-compatible-anthropic", + runner: "vitest", + legacySource: "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", + refs: ["#3767", "#5098"], + agent: AGENT, + sandboxName: SANDBOX_NAME, + boundary: "host-bedrock-mock-source-cli-onboard-and-sandbox-exec", + contracts: [ + "Docker, python3, source CLI, and OpenShell are available", + "bedrock-runtime.us-east-1.amazonaws.com maps to the host fake endpoint", + "non-interactive anthropicCompatible onboarding selects compatible-anthropic-endpoint", + "OpenShell owns the hidden Bedrock adapter token while sandbox config uses inference.local", + "OpenClaw and Hermes runtime paths return PONG through inference.local", + "fake Bedrock Runtime endpoint observes authenticated Converse traffic", + "adapter host log records safe request breadcrumbs", + "sandbox configs, env, proc, and host logs contain no Bedrock token or hostname leaks", + ], + workflowRetirement: "legacy bash lane remains until #5098 Phase 11 shell retirement", + }); + + const docker = await host.command("docker", ["info"], { + artifactName: "prereq-docker-info-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }); + if (docker.exitCode !== 0) { + if (process.env.GITHUB_ACTIONS === "true") { + throw new Error( + `Docker is required for Bedrock Runtime compatible Anthropic E2E: ${resultText(docker)}`, + ); + } + skip("Docker is required for Bedrock Runtime compatible Anthropic E2E"); + } + expectExitZero( + await host.command("python3", ["--version"], { + artifactName: "prereq-python-version-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }), + "python3 is available", + ); + + await prepareSourceCliAndOpenShell(host, home); + await mapBedrockHostToLoopback(host, home, hostsBackup, skip); + mock = await startFakeBedrockRuntimeMock({ + port: BEDROCK_MOCK_PORT, + expectedBearer: COMPATIBLE_KEY, + expectedModel: BEDROCK_MODEL, + }); + + await cleanupSandboxState(host, home); + onboarding = await runRawCommand( + "node", + [ + CLI_ENTRYPOINT, + "onboard", + "--fresh", + "--non-interactive", + "--yes-i-accept-third-party-software", + ], + { + artifactName: `onboard-bedrock-runtime-${AGENT}`, + artifacts, + env: onboardEnv(home, AGENT), + redactionValues: [COMPATIBLE_KEY], + timeoutMs: ONBOARD_TIMEOUT_MS, + }, + ); + expect(onboarding.exitCode, redactedResultText(onboarding)).toBe(0); + + await assertOnboardIdentity(home, AGENT); + await assertAdapterHealth(host, home); + await assertOpenShellProviderRoute(host, home); + if (AGENT === "hermes") { + await assertHermesConfig(sandbox, home); + } else { + await assertOpenClawConfig(sandbox, home); + } + + await assertSandboxInference(sandbox, home); + if (AGENT === "hermes") { + await assertHermesApiChat(sandbox, home); + } else { + await assertOpenClawAgentTurn(sandbox, home); + } + + expect( + mock.converseCount, + "fake Bedrock Runtime endpoint observed authenticated Converse traffic", + ).toBeGreaterThanOrEqual(1); + if (AGENT === "openclaw") { + expect( + mock.streamCount, + "fake Bedrock Runtime endpoint observed authenticated ConverseStream traffic", + ).toBeGreaterThanOrEqual(1); + } + assertAdapterLogBreadcrumbs(home, AGENT); + await assertNoBedrockLeaks({ + artifacts, + home, + mock, + onboarding, + sandbox, + redact: (text, extraValues) => secrets.redact(text, extraValues), + }); + + await artifacts.writeJson("scenario-result.json", { + id: "bedrock-runtime-compatible-anthropic", + agent: AGENT, + assertions: { + onboardCompleted: onboarding.exitCode === 0, + providerIdentity: "compatible-anthropic-endpoint", + adapterHealthy: true, + converseRequests: mock.converseCount, + converseStreamRequests: mock.streamCount, + leakScanPassed: true, + }, + }); + }, +); diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts index 354712ecde..f8f1b7fb96 100644 --- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts +++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts @@ -255,6 +255,26 @@ describe("e2e-vitest-scenarios workflow boundary", () => { selectedFreeStandingJobs: ["model-router-provider-routed-inference-vitest"], registryScenarios: [], }); + expect( + evaluateE2eVitestWorkflowDispatchSelectors({ + scenarios: "bedrock-runtime-compatible-anthropic", + }), + ).toMatchObject({ + valid: true, + liveScenariosRuns: false, + selectedFreeStandingJobs: ["bedrock-runtime-compatible-anthropic-vitest"], + registryScenarios: [], + }); + expect( + evaluateE2eVitestWorkflowDispatchSelectors({ + jobs: "bedrock-runtime-compatible-anthropic-vitest", + }), + ).toMatchObject({ + valid: true, + liveScenariosRuns: false, + selectedFreeStandingJobs: ["bedrock-runtime-compatible-anthropic-vitest"], + registryScenarios: [], + }); }); it("keeps the free-standing inventory internally consistent and data-only", () => { @@ -356,7 +376,7 @@ describe("e2e-vitest-scenarios workflow boundary", () => { } }); - it("keeps each free-standing scenario out of the registry matrix", () => { + it("keeps each free-standing scenario out of the registry matrix", { timeout: 15_000 }, () => { const inventory = readFreeStandingJobsInventory(); for (const job of inventory.allowedJobs) { expect(generateMatrixForDispatch({ JOBS: job, SCENARIOS: "" })).toMatchObject({ diff --git a/tools/e2e-scenarios/free-standing-jobs.env b/tools/e2e-scenarios/free-standing-jobs.env index 68ce23e36d..f758c02950 100644 --- a/tools/e2e-scenarios/free-standing-jobs.env +++ b/tools/e2e-scenarios/free-standing-jobs.env @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest -free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival -free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest +allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest,bedrock-runtime-compatible-anthropic-vitest +free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival,bedrock-runtime-compatible-anthropic +free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest,bedrock-runtime-compatible-anthropic:bedrock-runtime-compatible-anthropic-vitest From 4fe9e840f9b6a41ed753ee83932fbc22419d0dfc Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:10:12 -0700 Subject: [PATCH 2/8] test(e2e): migrate Bedrock Runtime compatible Anthropic scenario Signed-off-by: Carlos Villela --- ...drock-runtime-compatible-anthropic.test.ts | 149 +++++++++-- tools/e2e-scenarios/workflow-boundary.mts | 235 ++++++++++++++++++ 2 files changed, 361 insertions(+), 23 deletions(-) diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index bbd0cdb718..66b3933935 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -95,10 +95,20 @@ function redactedResultText( return [result.redactedStdout, result.redactedStderr].filter(Boolean).join("\n"); } +function evidenceTail(text: string): string { + return text.slice(-4_000); +} + function expectExitZero(result: CommandText & { exitCode: number | null }, label: string): void { expect(result.exitCode, `${label} failed:\n${resultText(result)}`).toBe(0); } +function isMissingSandboxCleanupOutput(text: string): boolean { + return /Sandbox '.+' does not exist|Run 'nemoclaw onboard' to create one|sandbox .* not found|no such sandbox/i.test( + text, + ); +} + function shellQuote(value: string): string { return `'${value.replaceAll("'", "'\\''")}'`; } @@ -429,31 +439,56 @@ async function bestEffort(run: () => Promise | unknown): Promise } } -async function cleanupSandboxState(host: HostCliClient, home: string): Promise { - const env = testEnv(home); - await bestEffort(() => - host.command("node", [CLI_ENTRYPOINT, SANDBOX_NAME, "destroy", "--yes"], { - artifactName: "cleanup-nemoclaw-destroy-bedrock-runtime", - env, - timeoutMs: 180_000, - }), - ); - await bestEffort(() => - host.command("openshell", ["sandbox", "delete", SANDBOX_NAME], { +async function cleanupNemoClawSandbox(host: HostCliClient, home: string): Promise { + const result = await host.command("node", [CLI_ENTRYPOINT, SANDBOX_NAME, "destroy", "--yes"], { + artifactName: "cleanup-nemoclaw-destroy-bedrock-runtime", + env: testEnv(home), + timeoutMs: 180_000, + }); + if (result.exitCode === 0 || isMissingSandboxCleanupOutput(resultText(result))) return; + expectExitZero(result, `cleanup NemoClaw sandbox ${SANDBOX_NAME}`); +} + +async function cleanupOpenShellSandbox(host: HostCliClient, home: string): Promise { + const result = await host.command( + "bash", + [ + "-lc", + 'if ! command -v openshell >/dev/null 2>&1; then exit 0; fi; openshell sandbox delete "$1"', + "cleanup-openshell-sandbox-delete", + SANDBOX_NAME, + ], + { artifactName: "cleanup-openshell-sandbox-delete-bedrock-runtime", - env, + env: testEnv(home), timeoutMs: 60_000, - }), + }, ); - await bestEffort(() => - host.command("openshell", ["gateway", "destroy", "-g", "nemoclaw"], { + if (result.exitCode === 0 || isMissingSandboxCleanupOutput(resultText(result))) return; + expectExitZero(result, `cleanup OpenShell sandbox ${SANDBOX_NAME}`); +} + +async function cleanupOpenShellGateway(host: HostCliClient, home: string): Promise { + await host.command( + "bash", + [ + "-lc", + "if ! command -v openshell >/dev/null 2>&1; then exit 0; fi; openshell gateway destroy -g nemoclaw", + ], + { artifactName: "cleanup-openshell-gateway-destroy-bedrock-runtime", - env, + env: testEnv(home), timeoutMs: 120_000, - }), + }, ); } +async function cleanupSandboxState(host: HostCliClient, home: string): Promise { + await bestEffort(() => cleanupNemoClawSandbox(host, home)); + await bestEffort(() => cleanupOpenShellSandbox(host, home)); + await bestEffort(() => cleanupOpenShellGateway(host, home)); +} + function stopBedrockAdapterBestEffort(home: string): void { const stateFile = path.join(home, ".nemoclaw", "bedrock-runtime-adapter.json"); const pidFile = path.join(home, ".nemoclaw", "bedrock-runtime-adapter.pid"); @@ -574,11 +609,15 @@ async function prepareSourceCliAndOpenShell(host: HostCliClient, home: string): "source CLI version", ); - const openshell = await host.command("openshell", ["--version"], { - artifactName: "prereq-openshell-version-bedrock-runtime", - env: testEnv(home), - timeoutMs: 30_000, - }); + const openshell = await host.command( + "bash", + ["-lc", "command -v openshell >/dev/null && openshell --version"], + { + artifactName: "prereq-openshell-version-bedrock-runtime", + env: testEnv(home), + timeoutMs: 30_000, + }, + ); if (openshell.exitCode === 0) return; const install = await host.command( @@ -592,7 +631,7 @@ async function prepareSourceCliAndOpenShell(host: HostCliClient, home: string): ); expectExitZero(install, "Install OpenShell CLI"); expectExitZero( - await host.command("openshell", ["--version"], { + await host.command("bash", ["-lc", "openshell --version"], { artifactName: "post-install-openshell-version-bedrock-runtime", env: testEnv(home), timeoutMs: 30_000, @@ -1003,6 +1042,64 @@ function findForbiddenLeaks( return [...new Set(locations)].sort(); } +function isPreContractEndpointValidationRateLimit(options: { + mock: MockBedrockRuntime | undefined; + onboarding: RawRunResult; +}): boolean { + if (options.onboarding.exitCode === 0) return false; + if ((options.mock?.converseCount ?? 0) > 0 || (options.mock?.streamCount ?? 0) > 0) { + return false; + } + + const text = redactedResultText(options.onboarding); + const endpointValidation = + /NVIDIA Endpoints endpoint validation failed|endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i.test( + text, + ); + const rateLimited = + /HTTP 429|\b429\b|Too Many Requests|rate[- ]?limit|quota|temporarily unavailable|timed? out|timeout/i.test( + text, + ); + const sanitizedNvidiaValidation = + /NVIDIA Endpoints endpoint validation failed/i.test(text) && + /Validation details were omitted to avoid exposing credentials/i.test(text); + return endpointValidation && (rateLimited || sanitizedNvidiaValidation); +} + +async function skipPreContractEndpointValidationRateLimit(options: { + artifacts: ArtifactSink; + mock: MockBedrockRuntime | undefined; + onboarding: RawRunResult; + skip: (note?: string) => never; +}): Promise { + if (!isPreContractEndpointValidationRateLimit(options)) return; + await options.artifacts.writeJson("transient-provider-validation.skip.json", { + id: "bedrock-runtime-compatible-anthropic", + status: "skipped", + reason: "external-provider-validation-unavailable-before-bedrock-runtime-contract", + sourceBoundary: + "external NVIDIA Endpoints provider availability before Bedrock Runtime contract", + removalCondition: + "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", + onboardExitCode: options.onboarding.exitCode, + onboardSignal: options.onboarding.signal, + onboardTimedOut: options.onboarding.timedOut, + mockConverseCount: options.mock?.converseCount ?? 0, + mockConverseStreamCount: options.mock?.streamCount ?? 0, + redactedStdoutTail: evidenceTail(options.onboarding.redactedStdout), + redactedStderrTail: evidenceTail(options.onboarding.redactedStderr), + }); + await options.artifacts.writeJson("scenario-result.json", { + id: "bedrock-runtime-compatible-anthropic", + status: "skipped", + reason: "external-provider-validation-unavailable-before-bedrock-runtime-contract", + onboardExitCode: options.onboarding.exitCode, + }); + options.skip( + "NVIDIA endpoint validation was rate-limited/unavailable before the Bedrock Runtime contract could run", + ); +} + async function assertNoBedrockLeaks(options: { artifacts: ArtifactSink; home: string; @@ -1161,6 +1258,12 @@ RUN_BEDROCK_TEST( timeoutMs: ONBOARD_TIMEOUT_MS, }, ); + await skipPreContractEndpointValidationRateLimit({ + artifacts, + mock, + onboarding, + skip, + }); expect(onboarding.exitCode, redactedResultText(onboarding)).toBe(0); await assertOnboardIdentity(home, AGENT); diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts index 9e55fc3ff3..fc3fe4cbab 100644 --- a/tools/e2e-scenarios/workflow-boundary.mts +++ b/tools/e2e-scenarios/workflow-boundary.mts @@ -1897,6 +1897,240 @@ function validateModelRouterProviderRoutedInferenceVitestJob( requireRunContains(errors, cleanup, 'rm -rf "${DOCKER_CONFIG}"'); } +function validateBedrockRuntimeCompatibleAnthropicVitestJob( + errors: string[], + jobs: WorkflowRecord, +): void { + const jobName = "bedrock-runtime-compatible-anthropic-vitest"; + const scenarioName = "bedrock-runtime-compatible-anthropic"; + const job = asRecord(jobs[jobName]); + if (Object.keys(job).length === 0) { + errors.push("workflow missing bedrock-runtime-compatible-anthropic-vitest job"); + return; + } + + if (job["runs-on"] !== "ubuntu-latest") { + errors.push("bedrock-runtime-compatible-anthropic-vitest job must run on ubuntu-latest"); + } + if (job["timeout-minutes"] !== 60) { + errors.push("bedrock-runtime-compatible-anthropic-vitest timeout-minutes must be 60"); + } + validateFreeStandingJobSelector(errors, jobs, jobName, scenarioName); + + const strategy = asRecord(job.strategy); + if (strategy["fail-fast"] !== false) { + errors.push("bedrock-runtime-compatible-anthropic-vitest strategy.fail-fast must be false"); + } + const matrix = asRecord(strategy.matrix); + if (!Array.isArray(matrix.agent) || matrix.agent.join(",") !== "openclaw,hermes") { + errors.push("bedrock-runtime-compatible-anthropic-vitest matrix.agent must be openclaw,hermes"); + } + + const jobEnv = asRecord(job.env); + if ("DOCKER_CONFIG" in jobEnv) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must not set DOCKER_CONFIG at job level", + ); + } + if ( + jobEnv.E2E_ARTIFACT_DIR !== + "${{ github.workspace }}/e2e-artifacts/vitest/bedrock-runtime-compatible-anthropic/${{ matrix.agent }}" + ) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must write artifacts under e2e-artifacts/vitest/bedrock-runtime-compatible-anthropic/${{ matrix.agent }}", + ); + } + if (jobEnv.NEMOCLAW_CLI_BIN !== "${{ github.workspace }}/bin/nemoclaw.js") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI", + ); + } + if (jobEnv.NEMOCLAW_RUN_E2E_SCENARIOS !== "1") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must set NEMOCLAW_RUN_E2E_SCENARIOS=1", + ); + } + if (jobEnv.NEMOCLAW_NON_INTERACTIVE !== "1") { + errors.push("bedrock-runtime-compatible-anthropic-vitest job must set NEMOCLAW_NON_INTERACTIVE=1"); + } + if (jobEnv.NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE !== "1") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must set NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1", + ); + } + if (jobEnv.NEMOCLAW_RECREATE_SANDBOX !== "1") { + errors.push("bedrock-runtime-compatible-anthropic-vitest job must set NEMOCLAW_RECREATE_SANDBOX=1"); + } + if (jobEnv.NEMOCLAW_AGENT !== "${{ matrix.agent }}") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must pass matrix.agent through NEMOCLAW_AGENT", + ); + } + if (jobEnv.NEMOCLAW_SANDBOX_NAME !== "e2e-bedrock-${{ matrix.agent }}") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must derive NEMOCLAW_SANDBOX_NAME from matrix.agent", + ); + } + if (jobEnv.OPENSHELL_GATEWAY !== "nemoclaw") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest job must force OPENSHELL_GATEWAY=nemoclaw", + ); + } + for (const secret of [ + "NVIDIA_API_KEY", + "DOCKERHUB_USERNAME", + "DOCKERHUB_TOKEN", + "GITHUB_TOKEN", + ]) { + requireEnvDoesNotExposeSecret( + errors, + "bedrock-runtime-compatible-anthropic-vitest job", + jobEnv, + secret, + ); + } + + const steps = asSteps(job.steps); + requireNoDispatchInputInterpolation(errors, steps); + for (const step of steps) { + const stepName = `bedrock-runtime-compatible-anthropic-vitest step '${step.name ?? step.uses ?? ""}'`; + const stepEnv = asRecord(step.env); + requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_API_KEY"); + requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "GITHUB_TOKEN"); + if (step.name !== "Authenticate to Docker Hub") { + requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_USERNAME"); + requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_TOKEN"); + requireNoDockerHubAuthInRun(errors, stepName, stringValue(step.run)); + } + } + + const checkout = steps.find((step) => stringValue(step.uses).startsWith("actions/checkout@")); + if (!checkout) { + errors.push("bedrock-runtime-compatible-anthropic-vitest job missing checkout step"); + } + requireFullShaAction(errors, checkout, "bedrock-runtime-compatible-anthropic-vitest checkout"); + if (asRecord(checkout?.with)["persist-credentials"] !== false) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest checkout step must set persist-credentials=false", + ); + } + + const configureDockerAuth = requireJobStep( + errors, + jobName, + steps, + "Configure isolated Docker auth directory", + ); + requireRunContains( + errors, + configureDockerAuth, + 'echo "DOCKER_CONFIG=${RUNNER_TEMP}/docker-config-bedrock-runtime-compatible-anthropic-${{ matrix.agent }}" >> "$GITHUB_ENV"', + ); + requireRunDoesNotContain(errors, configureDockerAuth, "${{ runner.temp }}"); + + const dockerLogin = requireJobStep(errors, jobName, steps, "Authenticate to Docker Hub"); + const dockerLoginEnv = asRecord(dockerLogin?.env); + if (dockerLoginEnv.DOCKERHUB_USERNAME !== "${{ secrets.DOCKERHUB_USERNAME }}") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest Docker Hub auth must receive DOCKERHUB_USERNAME from secrets", + ); + } + if (dockerLoginEnv.DOCKERHUB_TOKEN !== "${{ secrets.DOCKERHUB_TOKEN }}") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest Docker Hub auth must receive DOCKERHUB_TOKEN from secrets", + ); + } + requireRunContains(errors, dockerLogin, 'mkdir -p "${DOCKER_CONFIG}"'); + requireRunContains(errors, dockerLogin, 'chmod 700 "${DOCKER_CONFIG}"'); + requireRunContains(errors, dockerLogin, "docker login docker.io"); + requireRunContains(errors, dockerLogin, "--password-stdin"); + requireRunContains(errors, dockerLogin, "continuing with anonymous pulls"); + + const setupNode = namedStep(steps, "Set up Node"); + if (!setupNode) { + errors.push("bedrock-runtime-compatible-anthropic-vitest job missing step: Set up Node"); + } + requireFullShaAction( + errors, + setupNode, + "bedrock-runtime-compatible-anthropic-vitest setup-node", + ); + + const installRootDependencies = requireJobStep( + errors, + jobName, + steps, + "Install root dependencies", + ); + requireRunContains(errors, installRootDependencies, "npm ci --ignore-scripts"); + + const buildCli = requireJobStep(errors, jobName, steps, "Build CLI"); + requireRunContains(errors, buildCli, "npm run build:cli"); + + const runVitest = requireJobStep( + errors, + jobName, + steps, + "Run Bedrock Runtime compatible Anthropic live test", + ); + requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live"); + requireRunContains( + errors, + runVitest, + "test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts", + ); + requireRunDoesNotContain(errors, runVitest, "${{ inputs."); + + const upload = requireJobStep( + errors, + jobName, + steps, + "Upload Bedrock Runtime compatible Anthropic artifacts", + ); + requireFullShaAction( + errors, + upload, + "bedrock-runtime-compatible-anthropic-vitest upload-artifact", + ); + const uploadWith = asRecord(upload?.with); + if ( + uploadWith.name !== + "e2e-vitest-scenarios-bedrock-runtime-compatible-anthropic-${{ matrix.agent }}" + ) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest artifact upload name must include matrix.agent", + ); + } + const uploadPath = stringValue(uploadWith.path); + requireUploadPathContains( + errors, + uploadPath, + "e2e-artifacts/vitest/bedrock-runtime-compatible-anthropic/${{ matrix.agent }}/", + ); + if (uploadWith["include-hidden-files"] !== false) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest artifact upload must set include-hidden-files: false", + ); + } + if (uploadWith["if-no-files-found"] !== "ignore") { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest artifact upload must ignore missing fixture artifacts", + ); + } + if (uploadWith["retention-days"] !== 14) { + errors.push( + "bedrock-runtime-compatible-anthropic-vitest artifact upload retention-days must be 14", + ); + } + + const cleanup = requireJobStep(errors, jobName, steps, "Clean up Docker auth"); + if (cleanup?.if !== "always()") { + errors.push("bedrock-runtime-compatible-anthropic-vitest Docker auth cleanup must always run"); + } + requireRunContains(errors, cleanup, "docker logout docker.io"); + requireRunContains(errors, cleanup, 'rm -rf "${DOCKER_CONFIG}"'); +} + export function validateE2eVitestScenariosWorkflowBoundary( workflowPath = DEFAULT_VITEST_WORKFLOW_PATH, ): string[] { @@ -2133,6 +2367,7 @@ export function validateE2eVitestScenariosWorkflowBoundary( "issue-4434-tui-unreachable-inference", ); validateModelRouterProviderRoutedInferenceVitestJob(errors, jobs); + validateBedrockRuntimeCompatibleAnthropicVitestJob(errors, jobs); const reportToPr = asRecord(jobs["report-to-pr"]); if (Object.keys(reportToPr).length === 0) { From 26d462f102a355742312083786f71a653c753caa Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:23:17 -0700 Subject: [PATCH 3/8] test(e2e): harden Bedrock Runtime migration guard Signed-off-by: Carlos Villela --- ...drock-runtime-compatible-anthropic.test.ts | 96 +++++++++++++++---- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index 66b3933935..2377e2b5cd 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { spawn } from "node:child_process"; +import { spawn, spawnSync } from "node:child_process"; import { randomUUID } from "node:crypto"; import fs from "node:fs"; import * as http2 from "node:http2"; @@ -500,7 +500,7 @@ function stopBedrockAdapterBestEffort(home: string): void { } if (fs.existsSync(pidFile)) { const pid = Number(fs.readFileSync(pidFile, "utf8").trim()); - if (Number.isInteger(pid) && pid > 0) { + if (Number.isInteger(pid) && pid > 0 && isBedrockAdapterProcess(pid)) { try { process.kill(pid, "SIGTERM"); } catch { @@ -515,9 +515,26 @@ function stopBedrockAdapterBestEffort(home: string): void { } } +function isBedrockAdapterProcess(pid: number): boolean { + const expectedScript = "bedrock-runtime-adapter.js"; + try { + const cmdline = fs.readFileSync(`/proc/${pid}/cmdline`, "utf8").replaceAll("\0", " "); + if (cmdline.includes(expectedScript)) return true; + } catch { + // Fall back to ps on platforms without procfs. + } + + const ps = spawnSync("ps", ["-p", String(pid), "-o", "args="], { + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }); + return ps.status === 0 && ps.stdout.includes(expectedScript); +} + async function restoreHostsFile( host: HostCliClient, backupPath: string, + backupDir: string, home: string, ): Promise { await bestEffort(() => @@ -528,13 +545,13 @@ async function restoreHostsFile( }), ); await bestEffort(() => - host.command("sudo", ["rm", "-f", backupPath], { + host.command("sudo", ["rm", "-rf", backupDir], { artifactName: "remove-etc-hosts-backup-bedrock-runtime", env: testEnv(home), timeoutMs: 30_000, }), ); - await bestEffort(() => fs.rmSync(backupPath, { force: true })); + await bestEffort(() => fs.rmSync(backupDir, { recursive: true, force: true })); } async function mapBedrockHostToLoopback( @@ -650,7 +667,7 @@ async function assertOnboardIdentity(home: string, agent: AgentName): Promise; if (session.sandboxName !== SANDBOX_NAME) errors.push(`session sandboxName=${String(session.sandboxName)}`); - if (session.agent !== undefined && session.agent !== agent) + if (session.agent != null && session.agent !== agent) errors.push(`session agent=${String(session.agent)}`); if (session.provider !== expectedProvider) errors.push(`session provider=${String(session.provider)}`); @@ -667,7 +684,7 @@ async function assertOnboardIdentity(home: string, agent: AgentName): Promise") { + expectExitZero( + await sandbox.execShell(SANDBOX_NAME, hermesConfigApiKeyProbeScript(), { + artifactName: "hermes-config-api-key-probe-bedrock-runtime", + env: testEnv(home), + timeoutMs: SANDBOX_TIMEOUT_MS, + }), + "Hermes config api_key uses sk- placeholder", + ); + } else if (!model.api_key?.startsWith("sk-")) { + errors.push(`model.api_key=${String(model.api_key)}`); + } if (/^models:\s*\n(?:[ \t].*\n)*?[ \t]+providers:/m.test(config.stdout)) { errors.push("OpenClaw-style models.providers block present"); } @@ -901,6 +929,39 @@ async function assertHermesConfig(sandbox: SandboxClient, home: string): Promise expect(errors).toEqual([]); } +function hermesConfigApiKeyProbeScript(): ReturnType { + return trustedSandboxShellScript(` +python3 - <<'PY' +import re +from pathlib import Path + +text = Path("/sandbox/.hermes/config.yaml").read_text(encoding="utf-8") +model = {} +in_model = False +for line in text.splitlines(): + if re.match(r"^model:\\s*$", line): + in_model = True + continue + if in_model and re.match(r"^[A-Za-z0-9_-]+:", line): + break + if not in_model: + continue + match = re.match(r"^\\s+([A-Za-z0-9_-]+):\\s*(.*?)\\s*$", line) + if match: + value = match.group(2).strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in "\\"'": + value = value[1:-1] + model[match.group(1)] = value + +api_key = model.get("api_key") +if not isinstance(api_key, str) or not api_key.startswith("sk-"): + print("model.api_key missing sk- placeholder") + raise SystemExit(1) +print("OK") +PY +`); +} + async function assertSandboxInference(sandbox: SandboxClient, home: string): Promise { const payload = JSON.stringify({ model: BEDROCK_MODEL, @@ -1056,14 +1117,17 @@ function isPreContractEndpointValidationRateLimit(options: { /NVIDIA Endpoints endpoint validation failed|endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i.test( text, ); - const rateLimited = - /HTTP 429|\b429\b|Too Many Requests|rate[- ]?limit|quota|temporarily unavailable|timed? out|timeout/i.test( - text, - ); + const explicitRateLimit = /HTTP 429|\b429\b|Too Many Requests/i.test(text); + const transientProviderFailure = + explicitRateLimit || + /rate[- ]?limit|quota|temporarily unavailable|timed? out|timeout/i.test(text); const sanitizedNvidiaValidation = /NVIDIA Endpoints endpoint validation failed/i.test(text) && /Validation details were omitted to avoid exposing credentials/i.test(text); - return endpointValidation && (rateLimited || sanitizedNvidiaValidation); + return ( + endpointValidation && + (explicitRateLimit || (sanitizedNvidiaValidation && transientProviderFailure)) + ); } async function skipPreContractEndpointValidationRateLimit(options: { @@ -1160,10 +1224,8 @@ RUN_BEDROCK_TEST( validateSandboxName(SANDBOX_NAME); const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-bedrock-runtime-home-")); - const hostsBackup = path.join( - os.tmpdir(), - `nemoclaw-bedrock-hosts-${process.pid}-${Date.now()}`, - ); + const hostsBackupDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-bedrock-hosts-")); + const hostsBackup = path.join(hostsBackupDir, "hosts"); let mock: MockBedrockRuntime | undefined; let onboarding: RawRunResult | undefined; @@ -1174,7 +1236,7 @@ RUN_BEDROCK_TEST( cleanupSandboxState(host, home), ); cleanup.add("restore /etc/hosts after Bedrock Runtime mapping", () => - restoreHostsFile(host, hostsBackup, home), + restoreHostsFile(host, hostsBackup, hostsBackupDir, home), ); cleanup.add("stop Bedrock Runtime adapter", () => stopBedrockAdapterBestEffort(home)); cleanup.add("stop fake Bedrock Runtime endpoint", async () => { From 7ad3069884c81fc466a67fa15eb56c410457a5e4 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:34:02 -0700 Subject: [PATCH 4/8] test(e2e): avoid newline sandbox commands in Bedrock scenario Signed-off-by: Carlos Villela --- ...drock-runtime-compatible-anthropic.test.ts | 122 +++++++----------- 1 file changed, 48 insertions(+), 74 deletions(-) diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index 2377e2b5cd..3207137ca5 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -104,7 +104,7 @@ function expectExitZero(result: CommandText & { exitCode: number | null }, label } function isMissingSandboxCleanupOutput(text: string): boolean { - return /Sandbox '.+' does not exist|Run 'nemoclaw onboard' to create one|sandbox .* not found|no such sandbox/i.test( + return /Sandbox '.+' does not exist|Run 'nemoclaw onboard' to create one|sandbox (?:.* )?not found|no such sandbox/i.test( text, ); } @@ -113,6 +113,11 @@ function shellQuote(value: string): string { return `'${value.replaceAll("'", "'\\''")}'`; } +function sandboxShellArgs(script: string): string[] { + const encoded = Buffer.from(script, "utf8").toString("base64"); + return ["sh", "-lc", `printf %s ${shellQuote(encoded)} | base64 -d | sh`]; +} + function assertAgent(value: string): asserts value is AgentName { if (value !== "openclaw" && value !== "hermes") { throw new Error(`NEMOCLAW_AGENT must be openclaw or hermes, got ${value}`); @@ -835,48 +840,37 @@ function parseOpenClawAgentText(raw: string): string { } async function assertOpenClawConfig(sandbox: SandboxClient, home: string): Promise { - const script = trustedSandboxShellScript(` -python3 - ${shellQuote(BEDROCK_MODEL)} <<'PY' -import json -import sys - -model = sys.argv[1] -cfg = json.load(open("/sandbox/.openclaw/openclaw.json", encoding="utf-8")) -errors = [] -providers = cfg.get("models", {}).get("providers", {}) -inference = providers.get("inference") if isinstance(providers, dict) else None -if sorted(providers.keys()) != ["inference"]: - errors.append("provider keys are %r" % sorted(providers.keys())) -if not isinstance(inference, dict): - errors.append("models.providers.inference is missing") -else: - if inference.get("baseUrl") != "https://inference.local/v1": - errors.append("inference baseUrl is %r" % inference.get("baseUrl")) - if inference.get("apiKey") != "unused": - errors.append("inference apiKey is not the non-secret placeholder") - if inference.get("api") != "openai-completions": - errors.append("inference api is %r" % inference.get("api")) -primary = cfg.get("agents", {}).get("defaults", {}).get("model", {}).get("primary") -if primary != "inference/" + model: - errors.append("primary model is %r" % primary) -print(json.dumps({ - "provider_keys": sorted(providers.keys()) if isinstance(providers, dict) else [], - "inference_base": inference.get("baseUrl") if isinstance(inference, dict) else None, - "inference_api_key": inference.get("apiKey") if isinstance(inference, dict) else None, - "primary": primary, - "errors": errors, -})) -sys.exit(1 if errors else 0) -PY -`); - const output = await sandbox.execShell(SANDBOX_NAME, script, { - artifactName: "openclaw-config-summary-bedrock-runtime", - env: testEnv(home), - timeoutMs: SANDBOX_TIMEOUT_MS, - }); + const output = await sandbox.exec( + SANDBOX_NAME, + ["python3", "-c", OPENCLAW_CONFIG_PROBE, BEDROCK_MODEL], + { + artifactName: "openclaw-config-summary-bedrock-runtime", + env: testEnv(home), + timeoutMs: SANDBOX_TIMEOUT_MS, + }, + ); expectExitZero(output, "OpenClaw config uses only managed inference.local provider"); } +const OPENCLAW_CONFIG_PROBE = [ + "import json,sys", + "model=sys.argv[1]", + 'cfg=json.load(open("/sandbox/.openclaw/openclaw.json", encoding="utf-8"))', + "errors=[]", + 'providers=cfg.get("models",{}).get("providers",{})', + "provider_keys=sorted(providers.keys()) if isinstance(providers,dict) else []", + 'inference=providers.get("inference") if isinstance(providers,dict) else None', + 'errors.append("provider keys are %r" % provider_keys) if provider_keys != ["inference"] else None', + 'errors.append("models.providers.inference is missing") if not isinstance(inference,dict) else None', + 'errors.append("inference baseUrl is %r" % inference.get("baseUrl")) if isinstance(inference,dict) and inference.get("baseUrl") != "https://inference.local/v1" else None', + 'errors.append("inference apiKey is not the non-secret placeholder") if isinstance(inference,dict) and inference.get("apiKey") != "unused" else None', + 'errors.append("inference api is %r" % inference.get("api")) if isinstance(inference,dict) and inference.get("api") != "openai-completions" else None', + 'primary=cfg.get("agents",{}).get("defaults",{}).get("model",{}).get("primary")', + 'errors.append("primary model is %r" % primary) if primary != "inference/" + model else None', + 'print(json.dumps({"provider_keys":provider_keys,"inference_base":inference.get("baseUrl") if isinstance(inference,dict) else None,"inference_api_key":inference.get("apiKey") if isinstance(inference,dict) else None,"primary":primary,"errors":errors}))', + "sys.exit(1 if errors else 0)", +].join("; "); + async function assertHermesConfig(sandbox: SandboxClient, home: string): Promise { const config = await sandbox.exec(SANDBOX_NAME, ["cat", "/sandbox/.hermes/config.yaml"], { artifactName: "hermes-config-bedrock-runtime", @@ -912,7 +906,7 @@ async function assertHermesConfig(sandbox: SandboxClient, home: string): Promise errors.push(`model.base_url=${String(model.base_url)}`); if (model.api_key === "") { expectExitZero( - await sandbox.execShell(SANDBOX_NAME, hermesConfigApiKeyProbeScript(), { + await sandbox.exec(SANDBOX_NAME, ["python3", "-c", HERMES_CONFIG_API_KEY_PROBE], { artifactName: "hermes-config-api-key-probe-bedrock-runtime", env: testEnv(home), timeoutMs: SANDBOX_TIMEOUT_MS, @@ -929,38 +923,18 @@ async function assertHermesConfig(sandbox: SandboxClient, home: string): Promise expect(errors).toEqual([]); } -function hermesConfigApiKeyProbeScript(): ReturnType { - return trustedSandboxShellScript(` -python3 - <<'PY' -import re -from pathlib import Path - -text = Path("/sandbox/.hermes/config.yaml").read_text(encoding="utf-8") -model = {} -in_model = False -for line in text.splitlines(): - if re.match(r"^model:\\s*$", line): - in_model = True - continue - if in_model and re.match(r"^[A-Za-z0-9_-]+:", line): - break - if not in_model: - continue - match = re.match(r"^\\s+([A-Za-z0-9_-]+):\\s*(.*?)\\s*$", line) - if match: - value = match.group(2).strip() - if len(value) >= 2 and value[0] == value[-1] and value[0] in "\\"'": - value = value[1:-1] - model[match.group(1)] = value - -api_key = model.get("api_key") -if not isinstance(api_key, str) or not api_key.startswith("sk-"): - print("model.api_key missing sk- placeholder") - raise SystemExit(1) -print("OK") -PY -`); -} +const HERMES_CONFIG_API_KEY_PROBE = [ + "import pathlib,re,sys", + 'text=pathlib.Path("/sandbox/.hermes/config.yaml").read_text(encoding="utf-8")', + 'section=re.search(r"(?ms)^model:\\s*\\n((?:[ \\t].*\\n)*)", text)', + 'body=section.group(1) if section else ""', + 'match=re.search(r"(?m)^[ \\t]+api_key:\\s*(.*?)\\s*$", body)', + 'value=match.group(1).strip() if match else ""', + 'value=value[1:-1] if len(value)>=2 and value[0]==value[-1] and value[0] in "\\"\'" else value', + 'ok=value.startswith("sk-")', + 'print("OK" if ok else "model.api_key missing sk- placeholder")', + "sys.exit(0 if ok else 1)", +].join("; "); async function assertSandboxInference(sandbox: SandboxClient, home: string): Promise { const payload = JSON.stringify({ @@ -1182,7 +1156,7 @@ async function assertNoBedrockLeaks(options: { ]; const snapshot = await runRawCommand( "openshell", - ["sandbox", "exec", "-n", SANDBOX_NAME, "--", "sh", "-lc", SNAPSHOT_SCRIPT], + ["sandbox", "exec", "-n", SANDBOX_NAME, "--", ...sandboxShellArgs(SNAPSHOT_SCRIPT)], { artifactName: "sandbox-snapshot-bedrock-runtime", artifacts: options.artifacts, From 1aedf039db6c0a5e54ba169b1d6d57d463232f16 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 12:44:01 -0700 Subject: [PATCH 5/8] test(e2e): document Bedrock rate-limit skip guard Signed-off-by: Carlos Villela --- ...runtime-compatible-anthropic-rate-limit.ts | 44 +++++++++ ...drock-runtime-compatible-anthropic.test.ts | 47 ++++------ ...me-compatible-anthropic-rate-limit.test.ts | 91 +++++++++++++++++++ 3 files changed, 155 insertions(+), 27 deletions(-) create mode 100644 test/e2e-scenario/live/bedrock-runtime-compatible-anthropic-rate-limit.ts create mode 100644 test/e2e-scenario/support-tests/bedrock-runtime-compatible-anthropic-rate-limit.test.ts diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic-rate-limit.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic-rate-limit.ts new file mode 100644 index 0000000000..3ce6ced6f5 --- /dev/null +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic-rate-limit.ts @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export const BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SKIP_REASON = + "external-provider-validation-unavailable-before-bedrock-runtime-contract"; +export const BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SOURCE_BOUNDARY = + "external NVIDIA Endpoints provider availability before Bedrock Runtime contract"; +export const BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_INVALID_STATE = + "onboarding failed before any fake Bedrock Runtime Converse traffic because unrelated external provider validation was rate-limited or unavailable"; +export const BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_REMOVAL_CONDITION = + "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture"; + +export interface PreContractEndpointValidationEvidence { + readonly onboardingExitCode: number | null; + readonly redactedStdout: string; + readonly redactedStderr: string; + readonly mockConverseCount: number; + readonly mockConverseStreamCount: number; +} + +export function isPreContractEndpointValidationRateLimitEvidence( + evidence: PreContractEndpointValidationEvidence, +): boolean { + if (evidence.onboardingExitCode === 0) return false; + if (evidence.mockConverseCount > 0 || evidence.mockConverseStreamCount > 0) return false; + + const text = [evidence.redactedStdout, evidence.redactedStderr].filter(Boolean).join("\n"); + const endpointValidation = + /NVIDIA Endpoints endpoint validation failed|endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i.test( + text, + ); + const explicitRateLimit = /HTTP 429|\b429\b|Too Many Requests/i.test(text); + const transientProviderFailure = + explicitRateLimit || + /rate[- ]?limit|quota|temporarily unavailable|timed? out|timeout/i.test(text); + const sanitizedNvidiaValidation = + /NVIDIA Endpoints endpoint validation failed/i.test(text) && + /Validation details were omitted to avoid exposing credentials/i.test(text); + + return ( + endpointValidation && + (explicitRateLimit || (sanitizedNvidiaValidation && transientProviderFailure)) + ); +} diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index 3207137ca5..e483b9a578 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -20,6 +20,13 @@ import { import { expect, test } from "../fixtures/e2e-test.ts"; import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; import { redactString } from "../fixtures/redaction.ts"; +import { + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_INVALID_STATE, + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_REMOVAL_CONDITION, + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SKIP_REASON, + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SOURCE_BOUNDARY, + isPreContractEndpointValidationRateLimitEvidence, +} from "./bedrock-runtime-compatible-anthropic-rate-limit.ts"; // Direct Vitest migration for test/e2e/test-bedrock-runtime-compatible-anthropic.sh. // Keep the same live system boundary: host fake Bedrock Runtime endpoint, @@ -1081,27 +1088,13 @@ function isPreContractEndpointValidationRateLimit(options: { mock: MockBedrockRuntime | undefined; onboarding: RawRunResult; }): boolean { - if (options.onboarding.exitCode === 0) return false; - if ((options.mock?.converseCount ?? 0) > 0 || (options.mock?.streamCount ?? 0) > 0) { - return false; - } - - const text = redactedResultText(options.onboarding); - const endpointValidation = - /NVIDIA Endpoints endpoint validation failed|endpoint validation failed|failed to verify inference endpoint|Chat Completions API validation/i.test( - text, - ); - const explicitRateLimit = /HTTP 429|\b429\b|Too Many Requests/i.test(text); - const transientProviderFailure = - explicitRateLimit || - /rate[- ]?limit|quota|temporarily unavailable|timed? out|timeout/i.test(text); - const sanitizedNvidiaValidation = - /NVIDIA Endpoints endpoint validation failed/i.test(text) && - /Validation details were omitted to avoid exposing credentials/i.test(text); - return ( - endpointValidation && - (explicitRateLimit || (sanitizedNvidiaValidation && transientProviderFailure)) - ); + return isPreContractEndpointValidationRateLimitEvidence({ + onboardingExitCode: options.onboarding.exitCode, + redactedStdout: options.onboarding.redactedStdout, + redactedStderr: options.onboarding.redactedStderr, + mockConverseCount: options.mock?.converseCount ?? 0, + mockConverseStreamCount: options.mock?.streamCount ?? 0, + }); } async function skipPreContractEndpointValidationRateLimit(options: { @@ -1114,11 +1107,11 @@ async function skipPreContractEndpointValidationRateLimit(options: { await options.artifacts.writeJson("transient-provider-validation.skip.json", { id: "bedrock-runtime-compatible-anthropic", status: "skipped", - reason: "external-provider-validation-unavailable-before-bedrock-runtime-contract", - sourceBoundary: - "external NVIDIA Endpoints provider availability before Bedrock Runtime contract", - removalCondition: - "remove once CI endpoint validation is stable for a release cycle or covered by a hermetic provider-validation fixture", + reason: BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SKIP_REASON, + sourceBoundary: BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SOURCE_BOUNDARY, + invalidState: BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_INVALID_STATE, + removalCondition: BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_REMOVAL_CONDITION, + legacyContractNotExecuted: true, onboardExitCode: options.onboarding.exitCode, onboardSignal: options.onboarding.signal, onboardTimedOut: options.onboarding.timedOut, @@ -1130,7 +1123,7 @@ async function skipPreContractEndpointValidationRateLimit(options: { await options.artifacts.writeJson("scenario-result.json", { id: "bedrock-runtime-compatible-anthropic", status: "skipped", - reason: "external-provider-validation-unavailable-before-bedrock-runtime-contract", + reason: BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SKIP_REASON, onboardExitCode: options.onboarding.exitCode, }); options.skip( diff --git a/test/e2e-scenario/support-tests/bedrock-runtime-compatible-anthropic-rate-limit.test.ts b/test/e2e-scenario/support-tests/bedrock-runtime-compatible-anthropic-rate-limit.test.ts new file mode 100644 index 0000000000..86d03a280b --- /dev/null +++ b/test/e2e-scenario/support-tests/bedrock-runtime-compatible-anthropic-rate-limit.test.ts @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +import { + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_INVALID_STATE, + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_REMOVAL_CONDITION, + BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SOURCE_BOUNDARY, + isPreContractEndpointValidationRateLimitEvidence, + type PreContractEndpointValidationEvidence, +} from "../live/bedrock-runtime-compatible-anthropic-rate-limit.ts"; + +function evidence( + overrides: Partial = {}, +): PreContractEndpointValidationEvidence { + return { + mockConverseCount: 0, + mockConverseStreamCount: 0, + onboardingExitCode: 1, + redactedStderr: "", + redactedStdout: "", + ...overrides, + }; +} + +describe("Bedrock Runtime pre-contract endpoint-validation skip evidence", () => { + it("classifies explicit HTTP 429 endpoint validation before fake Bedrock traffic", () => { + expect( + isPreContractEndpointValidationRateLimitEvidence( + evidence({ + redactedStderr: "NVIDIA Endpoints endpoint validation failed: HTTP 429 Too Many Requests", + }), + ), + ).toBe(true); + }); + + it("classifies sanitized transient NVIDIA validation before fake Bedrock traffic", () => { + expect( + isPreContractEndpointValidationRateLimitEvidence( + evidence({ + redactedStderr: + "NVIDIA Endpoints endpoint validation failed. Validation details were omitted to avoid exposing credentials. Request timed out.", + }), + ), + ).toBe(true); + }); + + it("does not skip once the fake Bedrock contract has begun", () => { + expect( + isPreContractEndpointValidationRateLimitEvidence( + evidence({ + mockConverseCount: 1, + redactedStderr: "NVIDIA Endpoints endpoint validation failed: HTTP 429 Too Many Requests", + }), + ), + ).toBe(false); + }); + + it("does not skip successful onboarding", () => { + expect( + isPreContractEndpointValidationRateLimitEvidence( + evidence({ + onboardingExitCode: 0, + redactedStderr: "NVIDIA Endpoints endpoint validation failed: HTTP 429 Too Many Requests", + }), + ), + ).toBe(false); + }); + + it("does not skip non-transient endpoint-validation failures", () => { + expect( + isPreContractEndpointValidationRateLimitEvidence( + evidence({ + redactedStderr: "NVIDIA Endpoints endpoint validation failed: invalid model", + }), + ), + ).toBe(false); + }); + + it("documents the external source boundary and removal condition", () => { + expect(BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_SOURCE_BOUNDARY).toContain( + "external NVIDIA Endpoints", + ); + expect(BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_INVALID_STATE).toContain( + "before any fake Bedrock Runtime Converse traffic", + ); + expect(BEDROCK_PRE_CONTRACT_ENDPOINT_VALIDATION_REMOVAL_CONDITION).toContain( + "hermetic provider-validation fixture", + ); + }); +}); From e8d659e45f5e79740f4e001bd5d9a9fd10066ad7 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:49:58 -0700 Subject: [PATCH 6/8] test(e2e): close Bedrock mock sessions Signed-off-by: Carlos Villela --- ...drock-runtime-compatible-anthropic.test.ts | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index e483b9a578..4d78473fb7 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -320,6 +320,7 @@ function sendConverseStream(stream: http2.ServerHttp2Stream, codec: EventStreamC delta: { text: "PONG" }, }), ); + stream.write(eventMessage(codec, "contentBlockStop", { contentBlockIndex: 0 })); stream.write(eventMessage(codec, "messageStop", { stopReason: "end_turn" })); stream.write( eventMessage(codec, "metadata", { @@ -364,6 +365,12 @@ async function startFakeBedrockRuntimeMock(options: { logs.push(line); }; const server = http2.createServer(); + const sessions = new Set(); + + server.on("session", (session) => { + sessions.add(session); + session.once("close", () => sessions.delete(session)); + }); server.on("stream", (rawStream, headers) => { const stream = rawStream as http2.ServerHttp2Stream; @@ -438,7 +445,18 @@ async function startFakeBedrockRuntimeMock(options: { }, close: () => new Promise((resolve) => { - server.close(() => resolve()); + let done = false; + const finish = () => { + if (done) return; + done = true; + resolve(); + }; + server.close(finish); + for (const session of sessions) session.close(); + setTimeout(() => { + for (const session of sessions) session.destroy(); + finish(); + }, 1_000).unref(); }), }; } From 211a8e96d491160f12fc434a48d1c38e45d11bcc Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 13:54:39 -0700 Subject: [PATCH 7/8] test(e2e): fix Bedrock stream mock encoding Signed-off-by: Carlos Villela --- ...drock-runtime-compatible-anthropic.test.ts | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts index 4d78473fb7..44ee3733ba 100644 --- a/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts +++ b/test/e2e-scenario/live/bedrock-runtime-compatible-anthropic.test.ts @@ -60,8 +60,8 @@ type EventStreamCodec = { encode(message: { headers: Record; body: Uint8Array }): Uint8Array; }; type EventStreamCodecConstructor = new ( - encoder: (input: string) => Uint8Array, - decoder: (input: Uint8Array) => string, + toUtf8: (input: Uint8Array) => string, + fromUtf8: (input: string) => Uint8Array, ) => EventStreamCodec; interface RawRunResult { @@ -251,8 +251,8 @@ function loadEventStreamCodec(): EventStreamCodec { EventStreamCodec: EventStreamCodecConstructor; }; return new loaded.EventStreamCodec( - (input) => Buffer.from(input, "utf8"), (input) => Buffer.from(input).toString("utf8"), + (input) => Buffer.from(input, "utf8"), ); } @@ -312,6 +312,7 @@ function sendConverseStream(stream: http2.ServerHttp2Stream, codec: EventStreamC stream.respond({ [http2.constants.HTTP2_HEADER_STATUS]: 200, [http2.constants.HTTP2_HEADER_CONTENT_TYPE]: "application/vnd.amazon.eventstream", + "x-amzn-bedrock-content-type": "application/json", }); stream.write(eventMessage(codec, "messageStart", { role: "assistant" })); stream.write( @@ -383,33 +384,38 @@ async function startFakeBedrockRuntimeMock(options: { chunks.push(Buffer.from(chunk)); }); stream.on("end", () => { - const parsed = parseModelPath(pathname); - if (method !== "POST" || !parsed) { - sendHttp2Json(stream, 404, { message: "not found" }); - return; - } + try { + const parsed = parseModelPath(pathname); + if (method !== "POST" || !parsed) { + sendHttp2Json(stream, 404, { message: "not found" }); + return; + } - const opLabel = parsed.operation === "converse-stream" ? "converse-stream" : "converse"; - if (auth !== `Bearer ${options.expectedBearer}`) { - record(`POST /model/${opLabel} auth=missing`); - sendHttp2Json(stream, 401, { message: "missing bearer credential" }); - return; - } + const opLabel = parsed.operation === "converse-stream" ? "converse-stream" : "converse"; + if (auth !== `Bearer ${options.expectedBearer}`) { + record(`POST /model/${opLabel} auth=missing`); + sendHttp2Json(stream, 401, { message: "missing bearer credential" }); + return; + } - record(`POST /model/${opLabel} auth=ok`); - if (parsed.operation === "converse-stream") streamCount += 1; - else converseCount += 1; + record(`POST /model/${opLabel} auth=ok`); + if (parsed.operation === "converse-stream") streamCount += 1; + else converseCount += 1; - if (parsed.model !== options.expectedModel) { - sendHttp2Json(stream, 400, { message: "unexpected model id" }); - return; - } + if (parsed.model !== options.expectedModel) { + sendHttp2Json(stream, 400, { message: "unexpected model id" }); + return; + } - if (parsed.operation === "converse-stream") { - sendConverseStream(stream, codec); - return; + if (parsed.operation === "converse-stream") { + sendConverseStream(stream, codec); + return; + } + sendHttp2Json(stream, 200, conversePayload()); + } catch (error) { + record(`stream_handler_error=${error instanceof Error ? error.message : String(error)}`); + stream.destroy(error instanceof Error ? error : undefined); } - sendHttp2Json(stream, 200, conversePayload()); }); }); server.on("sessionError", (err) => { From caff2abea1acd65115fa6f84ac3a24a7a8aed5f5 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Fri, 12 Jun 2026 14:10:27 -0700 Subject: [PATCH 8/8] test(e2e): keep scenario job inventory data-only Signed-off-by: Carlos Villela --- tools/e2e-scenarios/free-standing-jobs.env | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/e2e-scenarios/free-standing-jobs.env b/tools/e2e-scenarios/free-standing-jobs.env index f758c02950..302a2e7f5b 100644 --- a/tools/e2e-scenarios/free-standing-jobs.env +++ b/tools/e2e-scenarios/free-standing-jobs.env @@ -1,5 +1,3 @@ -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 allowed_jobs=openshell-version-pin-vitest,onboard-negative-paths-vitest,skill-agent-vitest,inference-routing-vitest,credential-migration-vitest,runtime-overrides-vitest,hermes-e2e-vitest,hermes-root-entrypoint-smoke-vitest,network-policy-vitest,shields-config-vitest,rebuild-openclaw-vitest,sandbox-rebuild-vitest,token-rotation-vitest,launchable-smoke-vitest,openclaw-tui-chat-correlation-vitest,gateway-guard-recovery,double-onboard-vitest,issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference-vitest,credential-sanitization-vitest,sandbox-survival-vitest,bedrock-runtime-compatible-anthropic-vitest free_standing_scenarios_csv=openshell-version-pin,onboard-negative-paths,skill-agent,inference-routing,runtime-overrides,hermes-e2e,hermes-root-entrypoint-smoke,network-policy,shields-config,rebuild-openclaw,sandbox-rebuild,token-rotation,openclaw-tui-chat-correlation,double-onboard,issue-4434-tui-unreachable-inference,model-router-provider-routed-inference,credential-sanitization,sandbox-survival,bedrock-runtime-compatible-anthropic free_standing_scenario_jobs_csv=openshell-version-pin:openshell-version-pin-vitest,onboard-negative-paths:onboard-negative-paths-vitest,skill-agent:skill-agent-vitest,inference-routing:inference-routing-vitest,runtime-overrides:runtime-overrides-vitest,hermes-e2e:hermes-e2e-vitest,hermes-root-entrypoint-smoke:hermes-root-entrypoint-smoke-vitest,network-policy:network-policy-vitest,shields-config:shields-config-vitest,rebuild-openclaw:rebuild-openclaw-vitest,sandbox-rebuild:sandbox-rebuild-vitest,token-rotation:token-rotation-vitest,openclaw-tui-chat-correlation:openclaw-tui-chat-correlation-vitest,double-onboard:double-onboard-vitest,issue-4434-tui-unreachable-inference:issue-4434-tui-unreachable-inference-vitest,model-router-provider-routed-inference:model-router-provider-routed-inference-vitest,credential-sanitization:credential-sanitization-vitest,sandbox-survival:sandbox-survival-vitest,bedrock-runtime-compatible-anthropic:bedrock-runtime-compatible-anthropic-vitest