From 974943ef661f1d14237932a47c18ee8d3bcc6537 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:19:21 -0700
Subject: [PATCH 01/11] fix(inference): use NVIDIA inference endpoint

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/brev-nightly-e2e.yaml       |   2 +-
 .github/workflows/e2e-branch-validation.yaml  |   6 +-
 .github/workflows/e2e-script.yaml             |   6 +-
 .github/workflows/e2e-vitest-scenarios.yaml   |  42 +++---
 .github/workflows/macos-e2e.yaml              |   2 +-
 .github/workflows/nightly-e2e.yaml            |  68 +++++-----
 .github/workflows/regression-e2e.yaml         |   4 +-
 .github/workflows/wsl-e2e.yaml                |   4 +-
 agents/hermes/policy-additions.yaml           |  10 --
 agents/hermes/policy-permissive.yaml          |   5 -
 agents/openclaw/policy-permissive.yaml        |   5 -
 docs/_components/StarterPromptButton.tsx      |   4 +-
 docs/about/release-notes.mdx                  |   4 +-
 docs/get-started/quickstart-hermes.mdx        |   2 +-
 docs/get-started/quickstart.mdx               |   6 +-
 docs/inference/inference-options.mdx          |  10 +-
 .../approve-network-requests.mdx              |   2 +-
 docs/reference/network-policies.mdx           |   2 +-
 docs/reference/troubleshooting.mdx            |   4 +-
 docs/security/best-practices.mdx              |   2 +-
 docs/security/credential-storage.mdx          |   6 +-
 nemoclaw-blueprint/blueprint.yaml             |   6 +-
 .../policies/openclaw-sandbox-permissive.yaml |   5 -
 .../policies/openclaw-sandbox.yaml            |  10 --
 nemoclaw-blueprint/router/pool-config.yaml    |   4 +-
 .../scripts/nemotron-inference-fix.js         |   2 +-
 nemoclaw/src/banner.test.ts                   |   2 +-
 nemoclaw/src/blueprint/runner.test.ts         |  26 ++--
 nemoclaw/src/blueprint/ssrf.test.ts           |   2 +-
 nemoclaw/src/commands/config-show.test.ts     |  24 ++--
 nemoclaw/src/commands/slash.test.ts           |   8 +-
 nemoclaw/src/index.ts                         |   4 +-
 nemoclaw/src/lib/subprocess-env.ts            |   2 +-
 nemoclaw/src/onboard/config.test.ts           |   2 +-
 nemoclaw/src/register.test.ts                 |   4 +-
 nemoclaw/src/security/secret-scanner.test.ts  |   2 +-
 scripts/checks/direct-credential-env.ts       |   3 +-
 scripts/install.sh                            |   2 +-
 scripts/nemoclaw-start.sh                     |   6 +-
 scripts/smoke-macos-install.sh                |   4 +-
 scripts/validate-configs.ts                   |   2 +-
 scripts/walkthrough.sh                        |  10 +-
 src/commands/sandbox/config/rotate-token.ts   |   2 +-
 src/lib/actions/dev/npm-link-or-shim.test.ts  |   4 +-
 src/lib/actions/sandbox/status.test.ts        |   4 +-
 src/lib/credentials/store.ts                  |  29 ++++-
 src/lib/deploy/index.test.ts                  |  12 +-
 src/lib/deploy/index.ts                       |   8 +-
 src/lib/diagnostics/debug.test.ts             |   2 +-
 src/lib/diagnostics/debug.ts                  |   4 +-
 src/lib/inference/health.test.ts              |   5 +-
 src/lib/inference/health.ts                   |   2 +-
 src/lib/inference/model-prompts.test.ts       |   6 +-
 src/lib/inference/model-prompts.ts            |   4 +-
 src/lib/inference/nim.test.ts                 |  22 ++--
 src/lib/inference/nim.ts                      |   5 +-
 src/lib/inference/onboard-probes.test.ts      |  14 +-
 src/lib/inference/provider-models.ts          |   2 +-
 src/lib/messaging-channel-config.test.ts      |   2 +-
 src/lib/onboard.ts                            |  13 +-
 src/lib/onboard/bridge-dns-preflight.ts       |   4 +-
 src/lib/onboard/docker-gpu-patch.test.ts      |  10 +-
 src/lib/onboard/host-dns-preflight.test.ts    |  44 +++----
 src/lib/onboard/initial-policy.test.ts        |   2 +-
 .../onboard/machine/core-flow-phases.test.ts  |   4 +-
 .../flow-phases/provider-sandbox.test.ts      |   2 +-
 .../machine/handlers/finalization.test.ts     |   8 +-
 .../onboard/machine/handlers/policies.test.ts |   4 +-
 .../handlers/provider-inference.test.ts       |  14 +-
 src/lib/onboard/machine/runtime.test.ts       |   8 +-
 src/lib/onboard/missing-credential-hints.ts   |   4 +-
 src/lib/onboard/model-router.ts               |   2 +-
 src/lib/onboard/preflight.ts                  |   4 +-
 src/lib/onboard/providers.test.ts             |  36 ++++--
 src/lib/onboard/providers.ts                  |   4 +-
 src/lib/onboard/routed-inference.test.ts      |  18 +--
 src/lib/onboard/routed-inference.ts           |   4 +-
 src/lib/onboard/summary.test.ts               |   2 +-
 src/lib/onboard/validation-recovery-prompt.ts |   2 +-
 src/lib/security/credential-filter.test.ts    |   2 +-
 src/lib/security/redact.test.ts               |   2 +-
 src/lib/security/redact.ts                    |   7 +-
 src/lib/state/onboard-session.test.ts         |  16 +--
 src/lib/state/onboard-step-mutation.test.ts   |   7 +-
 src/lib/subprocess-env.ts                     |   2 +-
 src/lib/trace.test.ts                         |   4 +-
 src/lib/validation.test.ts                    |   2 +-
 src/lib/validation.ts                         |   7 +-
 test/canonical-credential-resolution.test.ts  |  66 ++++++++--
 test/check-env-var-docs.test.ts               |   2 +-
 test/cli/dispatch-basics.test.ts              |   4 +-
 test/config-set-nested-ssrf.test.ts           |   9 +-
 test/credential-exposure.test.ts              |  20 +--
 test/credentials-cli-command.test.ts          |   4 +-
 test/credentials-shim.test.ts                 |  20 +--
 test/credentials.test.ts                      | 120 ++++++++++--------
 .../4851-ultra-toolless-validation.md         |  20 +--
 .../fixtures/phases/onboarding.ts             |   8 +-
 .../live/credential-migration.test.ts         |  12 +-
 .../live/credential-sanitization.test.ts      |  10 +-
 .../live/gateway-guard-recovery.test.ts       |   2 +-
 test/e2e-scenario/live/hermes-e2e.test.ts     |  18 +--
 .../live/inference-routing.test.ts            |  23 ++--
 ...sue-4434-tui-unreachable-inference.test.ts |  12 +-
 .../live/launchable-smoke.test.ts             |  16 ++-
 ...l-router-provider-routed-inference.test.ts |  10 +-
 test/e2e-scenario/live/network-policy.test.ts |  10 +-
 .../live/onboard-negative-paths.test.ts       |   6 +-
 test/e2e-scenario/live/onboard-resume.test.ts |  22 ++--
 .../openclaw-tui-chat-correlation.test.ts     |   2 +-
 .../live/rebuild-openclaw.test.ts             |  14 +-
 .../live/sandbox-operations.test.ts           |   6 +-
 .../e2e-scenario/live/sandbox-rebuild.test.ts |   8 +-
 .../live/sandbox-survival.test.ts             |  12 +-
 test/e2e-scenario/live/shields-config.test.ts |   8 +-
 test/e2e-scenario/live/skill-agent.test.ts    |  12 +-
 test/e2e-scenario/live/token-rotation.test.ts |   4 +-
 .../live/whatsapp-qr-compact.test.ts          |   2 +-
 .../manifests/hermes-nvidia-discord.yaml      |   2 +-
 .../manifests/hermes-nvidia-slack.yaml        |   2 +-
 .../e2e-scenario/manifests/hermes-nvidia.yaml |   2 +-
 .../manifests/openclaw-nvidia-brave.yaml      |   2 +-
 .../openclaw-nvidia-brev-launchable.yaml      |   2 +-
 .../openclaw-nvidia-custom-policies.yaml      |   2 +-
 .../manifests/openclaw-nvidia-discord.yaml    |   2 +-
 ...penclaw-nvidia-double-provider-switch.yaml |   2 +-
 .../openclaw-nvidia-double-same-provider.yaml |   2 +-
 ...openclaw-nvidia-gateway-port-conflict.yaml |   2 +-
 .../openclaw-nvidia-invalid-key.yaml          |   2 +-
 .../manifests/openclaw-nvidia-macos.yaml      |   2 +-
 .../openclaw-nvidia-no-docker-negative.yaml   |   2 +-
 .../openclaw-nvidia-post-reboot-recovery.yaml |   2 +-
 .../manifests/openclaw-nvidia-rebuild.yaml    |   2 +-
 .../manifests/openclaw-nvidia-repair.yaml     |   2 +-
 .../manifests/openclaw-nvidia-resume.yaml     |   2 +-
 .../manifests/openclaw-nvidia-slack.yaml      |   2 +-
 .../manifests/openclaw-nvidia-telegram.yaml   |   2 +-
 .../openclaw-nvidia-token-rotation.yaml       |   2 +-
 .../manifests/openclaw-nvidia-wsl.yaml        |   2 +-
 .../manifests/openclaw-nvidia.yaml            |   2 +-
 .../scenarios/scenarios/baseline.ts           |  44 +++----
 test/e2e-scenario/scenarios/types.ts          |   2 +-
 .../support-tests/docker-probe.test.ts        |   8 +-
 .../support-tests/e2e-fixture-context.test.ts |   2 +-
 .../support-tests/e2e-manifests.test.ts       |   2 +-
 .../e2e-phase-environment.test.ts             |  12 +-
 .../e2e-phase-onboarding.test.ts              |  40 +++---
 .../e2e-phase-state-validation.test.ts        |  10 +-
 .../support-tests/e2e-scenario-matrix.test.ts |   4 +-
 .../e2e-scenarios-workflow.test.ts            |  30 ++---
 .../network-policy-transient-provider.test.ts |   2 +-
 test/e2e-script-workflow.test.ts              |   4 +-
 test/e2e/brev-e2e.test.ts                     |   6 +-
 .../expect-interactive-install.sh             |   4 +-
 .../features/skill/add-sandbox-skill.sh       |   2 +-
 .../skill/verify-sandbox-skill-via-agent.sh   |   8 +-
 .../test-port8080-conflict.sh                 |   4 +-
 test/e2e/test-agent-turn-latency-e2e.sh       |   8 +-
 ...st-bedrock-runtime-compatible-anthropic.sh |   2 +-
 test/e2e/test-brave-search-e2e.sh             |   4 +-
 test/e2e/test-channels-add-remove.sh          |  32 ++---
 test/e2e/test-channels-stop-start.sh          |  10 +-
 test/e2e/test-cloud-inference-e2e.sh          |  10 +-
 test/e2e/test-cloud-onboard-e2e.sh            |  20 +--
 test/e2e/test-common-egress-agent-e2e.sh      |   8 +-
 test/e2e/test-credential-migration.sh         |  20 +--
 test/e2e/test-credential-sanitization.sh      |  18 +--
 ...test-cron-preflight-inference-local-e2e.sh |  12 +-
 test/e2e/test-device-auth-health.sh           |  12 +-
 test/e2e/test-diagnostics.sh                  |  26 ++--
 test/e2e/test-double-onboard.sh               |   2 +-
 test/e2e/test-full-e2e.sh                     |  26 ++--
 test/e2e/test-hermes-discord-e2e.sh           |  30 ++---
 test/e2e/test-hermes-e2e.sh                   |  26 ++--
 test/e2e/test-hermes-inference-switch.sh      |  11 +-
 test/e2e/test-hermes-slack-e2e.sh             |  10 +-
 test/e2e/test-inference-routing.sh            |  17 +--
 .../test-issue-2478-crash-loop-recovery.sh    |  14 +-
 ...st-issue-4434-tui-unreachable-inference.sh |  10 +-
 .../test-issue-4462-scope-upgrade-approval.sh |   8 +-
 test/e2e/test-kimi-inference-compat.sh        |   2 +-
 test/e2e/test-launchable-smoke.sh             |  28 ++--
 test/e2e/test-messaging-providers.sh          |  12 +-
 ...-model-router-provider-routed-inference.sh |  12 +-
 test/e2e/test-network-policy.sh               |  10 +-
 test/e2e/test-onboard-negative-paths.sh       |  18 +--
 test/e2e/test-onboard-repair.sh               |  20 +--
 test/e2e/test-onboard-resume.sh               |  24 ++--
 test/e2e/test-openclaw-discord-pairing.sh     |  10 +-
 test/e2e/test-openclaw-inference-switch.sh    |   8 +-
 .../e2e/test-openclaw-plugin-runtime-exdev.sh |  12 +-
 test/e2e/test-openclaw-skill-cli-e2e.sh       |  10 +-
 test/e2e/test-openclaw-slack-pairing.sh       |  10 +-
 test/e2e/test-overlayfs-autofix.sh            |  14 +-
 test/e2e/test-rebuild-hermes.sh               |   6 +-
 test/e2e/test-rebuild-openclaw.sh             |   6 +-
 test/e2e/test-sandbox-operations.sh           |   2 +-
 test/e2e/test-sandbox-rebuild.sh              |  10 +-
 test/e2e/test-sandbox-survival.sh             |  20 +--
 test/e2e/test-sessions-agents-cli.sh          |  10 +-
 test/e2e/test-shields-config.sh               |  10 +-
 test/e2e/test-skill-agent-e2e.sh              |  12 +-
 test/e2e/test-snapshot-commands.sh            |   6 +-
 test/e2e/test-state-backup-restore.sh         |  10 +-
 test/e2e/test-telegram-injection.sh           |  36 +++---
 test/e2e/test-token-rotation.sh               |   4 +-
 test/e2e/test-tunnel-lifecycle.sh             |  10 +-
 test/e2e/test-upgrade-stale-sandbox.sh        |   4 +-
 test/gateway-state-reconcile-2276.test.ts     |   2 +-
 test/generate-openclaw-config.test.ts         |   4 +-
 test/helpers/onboard-final-flow-phases.ts     |   2 +-
 test/host-artifact-cleanup.test.ts            |   2 +-
 test/nemoclaw-start.test.ts                   |  14 +-
 test/nemotron-inference-fix.test.ts           |   2 +-
 test/no-direct-credential-env.test.ts         |  24 ++--
 test/ollama-proxy-recovery.test.ts            |   4 +-
 test/onboard-messaging.test.ts                |   4 +-
 test/onboard-model-router.test.ts             |  24 ++--
 test/onboard-selection-vllm.test.ts           |   4 +-
 test/onboard-selection.test.ts                |  46 +++----
 test/onboard.test.ts                          |   8 +-
 test/openclaw-config-snapshot.test.ts         |   2 +-
 test/rebuild-credential-hydration.test.ts     |   6 +-
 test/rebuild-credential-preflight.test.ts     |  38 +++---
 test/rebuild-shields-auto-unlock.test.ts      |   4 +-
 test/rebuild-stale-recovery.test.ts           |   2 +-
 test/regression-e2e-workflow.test.ts          |   5 +-
 test/runner.test.ts                           |  10 +-
 test/secret-redaction.test.ts                 |   6 +-
 test/smoke-macos-install.test.ts              |  16 +--
 test/validate-blueprint.test.ts               |  21 ++-
 test/validate-config-schemas.test.ts          |   2 +-
 test/validate-configs-dangerous-hosts.test.ts |   6 +-
 tools/e2e-scenarios/workflow-boundary.mts     |  96 +++++++-------
 234 files changed, 1264 insertions(+), 1163 deletions(-)

diff --git a/.github/workflows/brev-nightly-e2e.yaml b/.github/workflows/brev-nightly-e2e.yaml
index a98d8666b8..4b3c1093d7 100644
--- a/.github/workflows/brev-nightly-e2e.yaml
+++ b/.github/workflows/brev-nightly-e2e.yaml
@@ -49,4 +49,4 @@ jobs:
     secrets:
       BREV_API_KEY: ${{ secrets.BREV_API_KEY }}
       BREV_ORG_ID: ${{ secrets.BREV_ORG_ID }}
-      NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
diff --git a/.github/workflows/e2e-branch-validation.yaml b/.github/workflows/e2e-branch-validation.yaml
index 4524405a2b..5110a6a527 100644
--- a/.github/workflows/e2e-branch-validation.yaml
+++ b/.github/workflows/e2e-branch-validation.yaml
@@ -48,7 +48,7 @@ name: E2E / Branch Validation
 #   all                      — Runs credential-sanitization + telegram-injection (NOT full,
 #                              which destroys the sandbox the security tests need).
 #
-# Required secrets: BREV_API_KEY + BREV_ORG_ID (or legacy BREV_API_TOKEN), NVIDIA_API_KEY
+# Required secrets: BREV_API_KEY + BREV_ORG_ID (or legacy BREV_API_TOKEN), NVIDIA_INFERENCE_API_KEY
 # Instance cost: Brev CPU credits (~$0.10/run for 4x16 instance)
 
 on:
@@ -157,7 +157,7 @@ on:
         required: false
       BREV_ORG_ID:
         required: false
-      NVIDIA_API_KEY:
+      NVIDIA_INFERENCE_API_KEY:
         required: true
 
 permissions:
@@ -253,7 +253,7 @@ jobs:
         env:
           NEMOCLAW_RUN_BRANCH_VALIDATION_E2E: "1"
           BREV_API_TOKEN: ${{ inputs.brev_token || secrets.BREV_API_TOKEN }}
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           GITHUB_TOKEN: ${{ github.token }}
           INSTANCE_NAME: ${{ env.BREV_E2E_INSTANCE_NAME }}
           TEST_SUITE: ${{ inputs.test_suite }}
diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml
index 761af8c30d..4a721b68fc 100644
--- a/.github/workflows/e2e-script.yaml
+++ b/.github/workflows/e2e-script.yaml
@@ -43,7 +43,7 @@ on:
         type: string
         default: ""
       nvidia_api_key:
-        description: Pass the NVIDIA_API_KEY secret to the script.
+        description: Pass the NVIDIA_INFERENCE_API_KEY secret to the script.
         required: false
         type: boolean
         default: false
@@ -63,7 +63,7 @@ on:
         type: boolean
         default: false
     secrets:
-      NVIDIA_API_KEY:
+      NVIDIA_INFERENCE_API_KEY:
         required: false
       BRAVE_API_KEY:
         required: false
@@ -206,7 +206,7 @@ jobs:
         env:
           BRAVE_API_KEY: ${{ inputs.brave_api_key && secrets.BRAVE_API_KEY || '' }}
           GITHUB_TOKEN: ${{ inputs.github_token && github.token || '' }}
-          NVIDIA_API_KEY: ${{ inputs.nvidia_api_key && secrets.NVIDIA_API_KEY || '' }}
+          NVIDIA_INFERENCE_API_KEY: ${{ inputs.nvidia_api_key && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           TELEGRAM_BOT_TOKEN_REAL: ${{ inputs.messaging_live_secrets && secrets.TELEGRAM_BOT_TOKEN_REAL || '' }}
           TELEGRAM_CHAT_ID_E2E: ${{ inputs.messaging_live_secrets && secrets.TELEGRAM_CHAT_ID_E2E || '' }}
           DISCORD_BOT_TOKEN_REAL: ${{ inputs.messaging_live_secrets && secrets.DISCORD_BOT_TOKEN_REAL || '' }}
diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index f199050c83..bf029ebfcf 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -247,7 +247,7 @@ jobs:
 
       - name: Run Vitest live E2E scenarios
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           SCENARIO_ID: ${{ matrix.id }}
         run: |
           set -euo pipefail
@@ -467,7 +467,7 @@ jobs:
 
       - name: Run skill-agent live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
@@ -662,7 +662,7 @@ jobs:
 
       - name: Run issue #4434 TUI unreachable inference live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
@@ -749,11 +749,11 @@ jobs:
 
       - name: Run credential sanitization live test
         # Migrated from test/e2e/test-credential-sanitization.sh. Preserves the
-        # same ubuntu-latest + Docker/OpenShell + NVIDIA_API_KEY lane by running
+        # same ubuntu-latest + Docker/OpenShell + NVIDIA_INFERENCE_API_KEY lane by running
         # install.sh, onboarding a real sandbox, and probing sandbox state from
         # Vitest while fixture redaction owns evidence logs.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -832,11 +832,11 @@ jobs:
 
       - name: Run credential migration live test
         # Migrated from test/e2e/test-credential-migration.sh. This live test
-        # needs NVIDIA_API_KEY only as the staged legacy credential value; it
+        # needs NVIDIA_INFERENCE_API_KEY only as the staged legacy credential value; it
         # preserves the default NVIDIA provider/key migration path while
         # pinning a lower-quota catalog model in the test fixture.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -931,7 +931,7 @@ jobs:
 
       - name: Run Hermes live Vitest test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -979,21 +979,21 @@ jobs:
         run: npm run build:cli
 
       - name: Install OpenShell
-        # Runs without workflow tokens, Docker credentials, or NVIDIA_API_KEY.
+        # Runs without workflow tokens, Docker credentials, or NVIDIA_INFERENCE_API_KEY.
         # scripts/install-openshell.sh pins the OpenShell version and verifies
         # release SHA-256 checksums before installation.
         env:
           NEMOCLAW_NON_INTERACTIVE: "1"
         run: |
           set -euo pipefail
-          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
+          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_INFERENCE_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
 
       - name: Run network-policy live test
         # Migrated from test/e2e/test-network-policy.sh. Free-standing anchor
         # for live network policy allow/deny probes; shell retirement remains
         # deferred to #5098 Phase 11.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1067,7 +1067,7 @@ jobs:
         # bash install.sh to preserve installer/onboard fidelity, then probes
         # real shields/config behavior against the live sandbox.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1145,11 +1145,11 @@ jobs:
           NEMOCLAW_NON_INTERACTIVE: "1"
         run: |
           set -euo pipefail
-          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
+          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_INFERENCE_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
 
       - name: Run OpenClaw rebuild live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
@@ -1238,11 +1238,11 @@ jobs:
           NEMOCLAW_NON_INTERACTIVE: "1"
         run: |
           set -euo pipefail
-          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
+          env -u DOCKER_CONFIG -u DOCKERHUB_USERNAME -u DOCKERHUB_TOKEN -u NVIDIA_INFERENCE_API_KEY -u GITHUB_TOKEN bash scripts/install-openshell.sh
 
       - name: Run sandbox rebuild live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
@@ -1495,7 +1495,7 @@ jobs:
 
       - name: Run launchable smoke live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1580,7 +1580,7 @@ jobs:
         # sandbox inference.local completion boundaries without adding registry
         # or migration-ledger wiring.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1668,7 +1668,7 @@ jobs:
         # fidelity before exercising gateway restart, state survival, and live
         # inference.local before and after restart.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1749,7 +1749,7 @@ jobs:
 
       - name: Run OpenClaw TUI chat correlation live test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
@@ -1784,7 +1784,7 @@ jobs:
       E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/gateway-guard-recovery
       NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
       NEMOCLAW_RUN_E2E_SCENARIOS: "1"
-      NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       NEMOCLAW_NON_INTERACTIVE: "1"
       NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
       # nemoclaw onboard registers the gateway under the canonical name
diff --git a/.github/workflows/macos-e2e.yaml b/.github/workflows/macos-e2e.yaml
index fca322e431..d3e9b5cf5d 100644
--- a/.github/workflows/macos-e2e.yaml
+++ b/.github/workflows/macos-e2e.yaml
@@ -85,7 +85,7 @@ jobs:
       - name: Run macOS full E2E
         if: steps.docker.outputs.docker_ok == 'true'
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           GITHUB_TOKEN: ${{ github.token }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index d186ea2c88..42c9dab91f 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -99,10 +99,10 @@
 # Runs directly on the runner (not inside Docker) because OpenShell bootstraps
 # a K3s cluster inside a privileged Docker container — nesting would break networking.
 #
-# NVIDIA_API_KEY for cloud-e2e:
+# NVIDIA_INFERENCE_API_KEY for cloud-e2e:
 #   - Repository secret: Settings → Secrets and variables → Actions → Repository secrets.
 #   - Environment secret: only available if the job sets `environment: <that environment name>`.
-#     (Storing the key under Environments / NVIDIA_API_KEY without `environment:` here leaves the
+#     (Storing the key under Environments / NVIDIA_INFERENCE_API_KEY without `environment:` here leaves the
 #     variable empty in the job — repository secrets and environment secrets are separate.)
 # Only runs on schedule and manual dispatch — never on PRs (secret protection).
 
@@ -206,7 +206,7 @@ jobs:
       nvidia_api_key: true
       github_token: true
     secrets: &nightly-e2e-default-secrets
-      NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
       DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}
       DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}
@@ -376,7 +376,7 @@ jobs:
       github_token: true
       messaging_live_secrets: ${{ github.event_name != 'workflow_dispatch' || inputs.target_ref == '' }}
     secrets:
-      NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
       DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}
       DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}
@@ -461,7 +461,7 @@ jobs:
 
       - name: Run OpenClaw TUI chat correlation E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_RECREATE_SANDBOX: "1"
@@ -491,7 +491,7 @@ jobs:
         uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
         with:
           # This privileged proof mutates host firewall state and receives
-          # NVIDIA_API_KEY. Keep the runner script from the trusted workflow ref;
+          # NVIDIA_INFERENCE_API_KEY. Keep the runner script from the trusted workflow ref;
           # the product under test is selected separately via
           # NEMOCLAW_PUBLIC_INSTALL_REF.
           ref: ${{ github.ref }}
@@ -530,7 +530,7 @@ jobs:
 
       - name: "Run issue #4434 TUI unreachable inference E2E test"
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_ISSUE_4434_LIVE: "1"
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
@@ -544,19 +544,19 @@ jobs:
         if: failure()
         shell: bash
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           GITHUB_TOKEN: ${{ github.token }}
         run: |
           set -euo pipefail
           for file in /tmp/nemoclaw-e2e-issue-4434-install.log /tmp/nemoclaw-issue-4434.*; do
             [ -f "$file" ] || continue
-            if [ -n "${NVIDIA_API_KEY:-}" ]; then
-              perl -0pi -e 's/\Q$ENV{NVIDIA_API_KEY}\E/[REDACTED_NVIDIA_API_KEY]/g' "$file"
+            if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+              perl -0pi -e 's/\Q$ENV{NVIDIA_INFERENCE_API_KEY}\E/[REDACTED_NVIDIA_INFERENCE_API_KEY]/g' "$file"
             fi
             if [ -n "${GITHUB_TOKEN:-}" ]; then
               perl -0pi -e 's/\Q$ENV{GITHUB_TOKEN}\E/[REDACTED_GITHUB_TOKEN]/g' "$file"
             fi
-            perl -0pi -e 's/nvapi-[A-Za-z0-9._-]+/[REDACTED_NVIDIA_API_KEY]/g; s/gh[pousr]_[A-Za-z0-9_]+/[REDACTED_GITHUB_TOKEN]/g' "$file"
+            perl -0pi -e 's/nvapi-[A-Za-z0-9._-]+/[REDACTED_NVIDIA_INFERENCE_API_KEY]/g; s/gh[pousr]_[A-Za-z0-9_]+/[REDACTED_GITHUB_TOKEN]/g' "$file"
           done
 
       - name: "Upload issue #4434 logs on failure"
@@ -697,12 +697,12 @@ jobs:
   #           chooses a `target_ref`. Both paths are "trusted-ref": the
   #           code reached at runtime has already passed maintainer review
   #           or is the default branch itself.
-  #   * `NVIDIA_API_KEY` is the repo-scoped E2E credential — purposefully
+  #   * `NVIDIA_INFERENCE_API_KEY` is the repo-scoped E2E credential — purposefully
   #     not a production key. It is wired only to the inference quota
   #     allocated to this repository's E2E lane, with no IAM / billing
   #     authority outside that quota. Treat exposure as "rotate at the
   #     quota boundary," not "rotate at the production boundary." Audit
-  #     trail: `gh api repos/NVIDIA/NemoClaw/actions/secrets/NVIDIA_API_KEY`
+  #     trail: `gh api repos/NVIDIA/NemoClaw/actions/secrets/NVIDIA_INFERENCE_API_KEY`
   #     shows the secret scope.
   #   * The top-level `github.repository == 'NVIDIA/NemoClaw'` check
   #     additionally guards repo-forked schedules.
@@ -952,7 +952,7 @@ jobs:
 
       - name: Run token rotation E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_POLICY_TIER: "open"
@@ -1244,7 +1244,7 @@ jobs:
 
       - name: Run sandbox operations E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_POLICY_TIER: "open"
@@ -1538,11 +1538,11 @@ jobs:
 
       - name: Run credential migration Vitest test
         # Trusted-code boundary: this job runs the checked-out target ref with
-        # NVIDIA_API_KEY because it validates live credential migration into the
+        # NVIDIA_INFERENCE_API_KEY because it validates live credential migration into the
         # OpenShell gateway. Keep checkout credentials disabled, do not pass
         # GITHUB_TOKEN, and rely on reviewed/maintainer-dispatched refs.
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration
           NEMOCLAW_RUN_E2E_SCENARIOS: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-cred-migration"
@@ -1715,13 +1715,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run double onboard E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1753,13 +1753,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard repair E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1791,13 +1791,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard resume E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1829,13 +1829,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard negative-path E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1868,13 +1868,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run runtime overrides E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1907,14 +1907,14 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw and onboard sandbox
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run credential sanitization E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
@@ -1949,14 +1949,14 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw and onboard sandbox
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run telegram injection E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
@@ -2029,7 +2029,7 @@ jobs:
 
       - name: Run launchable install-flow smoke test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-launchable"
@@ -2255,13 +2255,13 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run concurrent gateway ports E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           [ -f "$HOME/.bashrc" ] && source "$HOME/.bashrc" 2>/dev/null || true
           export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
diff --git a/.github/workflows/regression-e2e.yaml b/.github/workflows/regression-e2e.yaml
index 09a48a0cab..7180ef6582 100644
--- a/.github/workflows/regression-e2e.yaml
+++ b/.github/workflows/regression-e2e.yaml
@@ -248,7 +248,7 @@ jobs:
 
       - name: Run Model Router provider-routed inference E2E test
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash test/e2e/test-model-router-provider-routed-inference.sh
@@ -322,7 +322,7 @@ jobs:
   # `openclaw channels login --channel whatsapp` onQr callback invokes)
   # at the version bundled in Dockerfile.base, and asserts the pairing QR
   # renders compact with the NemoClaw preload and oversized without it.
-  # Hermetic: only needs node + npm (no Docker, GPU, or NVIDIA_API_KEY).
+  # Hermetic: only needs node + npm (no Docker, GPU, or NVIDIA_INFERENCE_API_KEY).
   whatsapp-qr-compact-e2e:
     needs: select_regression_jobs
     if: >-
diff --git a/.github/workflows/wsl-e2e.yaml b/.github/workflows/wsl-e2e.yaml
index f3b633bd95..9d6bae556e 100644
--- a/.github/workflows/wsl-e2e.yaml
+++ b/.github/workflows/wsl-e2e.yaml
@@ -227,13 +227,13 @@ jobs:
         if: steps.docker.outputs.docker_ok == 'true'
         shell: powershell
         env:
-          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           GITHUB_TOKEN: ${{ github.token }}
         run: |
           $script = @"
           set -euo pipefail
           cd '$env:WSL_WORKDIR'
-          export NVIDIA_API_KEY='$env:NVIDIA_API_KEY'
+          export NVIDIA_INFERENCE_API_KEY='$env:NVIDIA_INFERENCE_API_KEY'
           export GITHUB_TOKEN='$env:GITHUB_TOKEN'
           export NEMOCLAW_NON_INTERACTIVE='$env:NEMOCLAW_NON_INTERACTIVE'
           export NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE='$env:NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE'
diff --git a/agents/hermes/policy-additions.yaml b/agents/hermes/policy-additions.yaml
index 6151ebc8dc..38b24e6805 100644
--- a/agents/hermes/policy-additions.yaml
+++ b/agents/hermes/policy-additions.yaml
@@ -65,16 +65,6 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
-        port: 443
-        protocol: rest
-        enforcement: enforce
-        rules:
-          - allow: { method: POST, path: "/v1/chat/completions" }
-          - allow: { method: POST, path: "/v1/completions" }
-          - allow: { method: POST, path: "/v1/embeddings" }
-          - allow: { method: GET, path: "/v1/models" }
-          - allow: { method: GET, path: "/v1/models/**" }
       - host: inference-api.nvidia.com
         port: 443
         protocol: rest
diff --git a/agents/hermes/policy-permissive.yaml b/agents/hermes/policy-permissive.yaml
index 5ed6a1b2ac..ce57a8b466 100644
--- a/agents/hermes/policy-permissive.yaml
+++ b/agents/hermes/policy-permissive.yaml
@@ -43,11 +43,6 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
-        port: 443
-        protocol: rest
-        enforcement: enforce
-        access: full
       - host: inference-api.nvidia.com
         port: 443
         protocol: rest
diff --git a/agents/openclaw/policy-permissive.yaml b/agents/openclaw/policy-permissive.yaml
index 19e6b19374..d7cdf6c972 100644
--- a/agents/openclaw/policy-permissive.yaml
+++ b/agents/openclaw/policy-permissive.yaml
@@ -43,11 +43,6 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
-        port: 443
-        protocol: rest
-        enforcement: enforce
-        access: full
       - host: inference-api.nvidia.com
         port: 443
         protocol: rest
diff --git a/docs/_components/StarterPromptButton.tsx b/docs/_components/StarterPromptButton.tsx
index d4f6411e63..168f6543fd 100644
--- a/docs/_components/StarterPromptButton.tsx
+++ b/docs/_components/StarterPromptButton.tsx
@@ -81,7 +81,7 @@ Use this provider mapping for non-interactive setup:
 
 | User choice | \`NEMOCLAW_PROVIDER\` | Other required values |
 |---|---|---|
-| NVIDIA Endpoints | \`build\` | \`NVIDIA_API_KEY\` |
+| NVIDIA Endpoints | \`build\` | \`NVIDIA_INFERENCE_API_KEY\` |
 | OpenAI | \`openai\` | \`OPENAI_API_KEY\` |
 | Other OpenAI-compatible endpoint | \`custom\` | \`NEMOCLAW_ENDPOINT_URL\`, \`NEMOCLAW_MODEL\`, \`COMPATIBLE_API_KEY\` |
 | Anthropic | \`anthropic\` | \`ANTHROPIC_API_KEY\` |
@@ -89,7 +89,7 @@ Use this provider mapping for non-interactive setup:
 | Google Gemini | \`gemini\` | \`GEMINI_API_KEY\` |
 | Hermes Provider | \`hermes-provider\` | Hermes-only; ask for the provider credential as documented |
 | Local Ollama | \`ollama\` | Optional \`NEMOCLAW_MODEL\`; set \`NEMOCLAW_YES=1\` only if I approve model download |
-| Model Router | \`routed\` | \`NVIDIA_API_KEY\` |
+| Model Router | \`routed\` | \`NVIDIA_INFERENCE_API_KEY\` |
 
 When you have the approved values, run the installer with the environment variables on the \`bash\` side of the pipe, not before \`curl\`.
 
diff --git a/docs/about/release-notes.mdx b/docs/about/release-notes.mdx
index 389ec1c24d..71e7e76901 100644
--- a/docs/about/release-notes.mdx
+++ b/docs/about/release-notes.mdx
@@ -132,7 +132,7 @@ NemoClaw v0.0.54 updates messaging activation, Windows WSL onboarding, NemoHerme
 - The Windows bootstrap flow waits for Ubuntu account creation before touching Docker settings, enables Docker Desktop WSL integration for the target distro, avoids changing the global WSL default distro, and adds WSL-specific Docker reachability hints during onboarding. For more information, refer to [Prepare Windows for NemoClaw](../get-started/prerequisites/windows-preparation).
 - Windows-host Ollama setup inside WSL now requires the Docker Desktop WSL integration path. NemoClaw still shows Windows-host Ollama options when it detects them, but labels the Docker Desktop requirement and blocks unsupported native Docker-in-WSL selections before it tries to start or install Ollama. For more information, refer to [Use a Local Inference Server](../inference/use-local-inference).
 - NemoHermes can expose the optional native Hermes web dashboard separately from the OpenAI-compatible API. Set `NEMOCLAW_HERMES_DASHBOARD=1` before onboarding to start and forward the dashboard on port `9119`, with `NEMOCLAW_HERMES_DASHBOARD_PORT` and `NEMOCLAW_HERMES_DASHBOARD_TUI` available for port and TUI tab control. For more information, refer to [NemoClaw Quickstart with Hermes](../../hermes/get-started/quickstart).
-- Onboarding diagnostics include more copy-paste-ready recovery hints. Invalid sandbox names now include a `Try: <suggested-slug>` line when NemoClaw can derive a valid name, and non-interactive NVIDIA Endpoints setup prints the exact `export NVIDIA_API_KEY=nvapi-...` shape when the key is missing. For more information, refer to [NemoClaw CLI Commands Reference](../reference/commands).
+- Onboarding diagnostics include more copy-paste-ready recovery hints. Invalid sandbox names now include a `Try: <suggested-slug>` line when NemoClaw can derive a valid name, and non-interactive NVIDIA Endpoints setup prints the exact `export NVIDIA_INFERENCE_API_KEY=nvapi-...` shape when the key is missing. For more information, refer to [NemoClaw CLI Commands Reference](../reference/commands).
 - Homebrew stays on the Linuxbrew prefix while exposing installed formula commands in sandbox shell sessions, the `/nemoclaw` slash command activates at OpenClaw startup again, Hermes rebuilds tolerate older release tarballs that lack optional UI package lockfiles, and device scope-upgrade approvals recover without being pinned to the old gateway-scoped request. For more information, refer to [Common NemoClaw Integration Policy Examples](../network-policy/integration-policy-examples).
 - The host-gateway allowance for OpenClaw `web_fetch` is confined to the trusted proxy path, while strict and direct paths continue to block host-gateway names. Hermes Provider onboarding skips the host-side smoke probe only for OAuth-backed setup and keeps direct validation for Nous API key setup. For more information, refer to [NemoClaw Inference Options](../inference/inference-options).
 
@@ -328,7 +328,7 @@ NemoClaw v0.0.39 improves several day-two workflows:
 - `$$nemoclaw <name> destroy` preserves the shared gateway by default unless `--cleanup-gateway` is selected.
 - `$$nemoclaw <name> connect` repairs stale `inference.local` DNS proxy routes before opening the session.
 - Windows-host Ollama onboarding relaunches the daemon with the reachable binding after install or restart.
-- Local NVIDIA NIM onboarding passes `NGC_API_KEY` or `NVIDIA_API_KEY` into the managed container without putting the secret in process arguments, detects early container exits during health checks, and prints a per-GPU preflight breakdown on mixed-model hosts.
+- Local NVIDIA NIM onboarding passes `NGC_API_KEY` or `NVIDIA_INFERENCE_API_KEY` into the managed container without putting the secret in process arguments, detects early container exits during health checks, and prints a per-GPU preflight breakdown on mixed-model hosts.
 - The sandbox startup path strips additional Linux capabilities before and during privilege step-down.
 - OpenClaw workspace template files are seeded when bootstrap is skipped and the workspace is still empty.
 - Kimi K2.6 and related NVIDIA-hosted chat-completions paths include model-specific compatibility handling for reasoning output.
diff --git a/docs/get-started/quickstart-hermes.mdx b/docs/get-started/quickstart-hermes.mdx
index c9e4e4d6c4..3ddcd39fa3 100644
--- a/docs/get-started/quickstart-hermes.mdx
+++ b/docs/get-started/quickstart-hermes.mdx
@@ -93,7 +93,7 @@ export NEMOCLAW_AGENT=hermes
 export NEMOCLAW_NON_INTERACTIVE=1
 export NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 export NEMOCLAW_SANDBOX_NAME=my-hermes
-export NVIDIA_API_KEY=<your-key>
+export NVIDIA_INFERENCE_API_KEY=<your-key>
 curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash
 ```
 
diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx
index 12a8d5a8bb..f291d6b9e0 100644
--- a/docs/get-started/quickstart.mdx
+++ b/docs/get-started/quickstart.mdx
@@ -119,7 +119,7 @@ A configured blueprint router profile makes the Model Router option appear.
 
 <Tip>
 Export the API key before launching the installer so the wizard does not have to ask for it.
-For example, run `export NVIDIA_API_KEY=<your-key>` before `curl ... | bash`.
+For example, run `export NVIDIA_INFERENCE_API_KEY=<your-key>` before `curl ... | bash`.
 If you entered a key incorrectly, refer to [Reset a Stored Credential](../manage-sandboxes/lifecycle#reset-a-stored-credential) to clear and re-enter it.
 </Tip>
 
@@ -131,14 +131,14 @@ For Ollama, vLLM, NIM, and compatible local servers, refer to [Use a Local Infer
 
 | Option | Use when | Credential variable |
 |---|---|---|
-| NVIDIA Endpoints | You want hosted models from `build.nvidia.com`, including hosted Nemotron models. | `NVIDIA_API_KEY` |
+| NVIDIA Endpoints | You want hosted models from `build.nvidia.com`, including hosted Nemotron models. | `NVIDIA_INFERENCE_API_KEY` |
 | OpenAI | You want the OpenAI API at `https://api.openai.com/v1`. | `OPENAI_API_KEY` |
 | Other OpenAI-compatible endpoint | You have OpenRouter, LocalAI, llama.cpp, vLLM, NIM, SGLang, an enterprise gateway, or another `/v1/chat/completions` endpoint. | `COMPATIBLE_API_KEY` |
 | Anthropic | You want the Anthropic Messages API. | `ANTHROPIC_API_KEY` |
 | Other Anthropic-compatible endpoint | You have a Claude proxy, Bedrock-compatible gateway, or self-hosted `/v1/messages` endpoint. | `COMPATIBLE_ANTHROPIC_API_KEY` |
 | Google Gemini | You want Google's OpenAI-compatible Gemini endpoint. | `GEMINI_API_KEY` |
 | Local Ollama | You want a host-local Ollama model. | None |
-| Model Router | You want NemoClaw to start the host-side model router. | `NVIDIA_API_KEY` |
+| Model Router | You want NemoClaw to start the host-side model router. | `NVIDIA_INFERENCE_API_KEY` |
 
 Export the relevant key before launching the installer when possible.
 If your compatible endpoint does not require authentication, set its credential variable to any non-empty placeholder.
diff --git a/docs/inference/inference-options.mdx b/docs/inference/inference-options.mdx
index a6620c6c9e..7ad7caa7f2 100644
--- a/docs/inference/inference-options.mdx
+++ b/docs/inference/inference-options.mdx
@@ -64,7 +64,7 @@ The managed install/start vLLM entry appears by default on DGX Spark and DGX Sta
 
 | Option | Description | Curated models |
 |--------|-------------|----------------|
-| NVIDIA Endpoints | Routes to models hosted on [build.nvidia.com](https://build.nvidia.com). You can also enter any model ID from the catalog. Set `NVIDIA_API_KEY`. | Nemotron 3 Super 120B, Nemotron 3 Ultra 550B, GLM-5.1, MiniMax M2.7, GPT-OSS 120B, DeepSeek V4 Pro |
+| NVIDIA Endpoints | Routes to models hosted on [build.nvidia.com](https://build.nvidia.com). You can also enter any model ID from the catalog. Set `NVIDIA_INFERENCE_API_KEY`. | Nemotron 3 Super 120B, Nemotron 3 Ultra 550B, GLM-5.1, MiniMax M2.7, GPT-OSS 120B, DeepSeek V4 Pro |
 | OpenAI | Routes to the OpenAI API. Set `OPENAI_API_KEY`. | `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`, `gpt-5.4-pro-2026-03-05` |
 | Other OpenAI-compatible endpoint | Routes to any server that implements `/v1/chat/completions`. NemoClaw uses `/v1/chat/completions` at runtime by default; set `NEMOCLAW_PREFERRED_API=openai-responses` to allow `/v1/responses` for proxies that implement it, such as some llama.cpp builds. The wizard prompts for a base URL and model name. Works with OpenRouter, LocalAI, llama.cpp, or any compatible proxy. When you enable Telegram messaging, onboarding also runs a bounded sandbox-side smoke check through `https://inference.local/v1/chat/completions`. Set `COMPATIBLE_API_KEY`. | You provide the model name. |
 | Anthropic | Routes to the Anthropic Messages API. Set `ANTHROPIC_API_KEY`. | `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-opus-4-6` |
@@ -80,7 +80,7 @@ NVIDIA Nemotron models expose OpenAI-compatible APIs across every supported depl
 
 | Nemotron Host | Onboard Wizard Option | Why |
 |---|---|---|
-| `build.nvidia.com` (NVIDIA-hosted) | **Option 1: NVIDIA Endpoints** | NemoClaw sets the base URL to `https://integrate.api.nvidia.com/v1` for you and validates the model against the build catalog. |
+| `build.nvidia.com` (NVIDIA-hosted) | **Option 1: NVIDIA Endpoints** | NemoClaw sets the base URL to `https://inference-api.nvidia.com/v1` for you and validates the model against the build catalog. |
 | Self-hosted NIM container | **Option 3: Other OpenAI-compatible endpoint** | NIM exposes an OpenAI-compatible `/v1/chat/completions` route. Point the base URL at your NIM service and enter the Nemotron model ID. |
 | Enterprise NVIDIA AI Enterprise gateway | **Option 3: Other OpenAI-compatible endpoint** | Enterprise gateways front Nemotron with the same OpenAI-compatible contract. Use the gateway's base URL and your enterprise token. |
 | vLLM, SGLang, or TRT-LLM serving Nemotron weights | **Option 3: Other OpenAI-compatible endpoint** | Each runtime exposes Nemotron through `/v1/chat/completions`. Use the runtime's base URL and the model ID it reports. |
@@ -138,7 +138,7 @@ The sandbox never sees raw API keys.
 To use the router in scripted setup, set:
 
 ```bash
-NEMOCLAW_PROVIDER=routed NVIDIA_API_KEY=<your-key> $$nemoclaw onboard --non-interactive
+NEMOCLAW_PROVIDER=routed NVIDIA_INFERENCE_API_KEY=<your-key> $$nemoclaw onboard --non-interactive
 ```
 
 ### Host Python Requirement
@@ -182,7 +182,7 @@ For setup instructions, refer to [Use a Local Inference Server](use-local-infere
 NemoClaw validates the selected provider and model before creating the sandbox.
 If credential validation fails, the wizard asks whether to re-enter the API key, choose a different provider, retry, or exit.
 The wizard retries transient upstream validation failures before it reports a provider failure.
-The `nvapi-` prefix check applies only to `NVIDIA_API_KEY`.
+The `nvapi-` prefix check applies only to `NVIDIA_INFERENCE_API_KEY`.
 Other provider credentials, such as `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, and compatible endpoint keys, use provider-aware validation during retry.
 
 | Provider type | Validation method |
@@ -401,7 +401,7 @@ NVIDIA hosts NIM container images on `nvcr.io`, and `docker pull` requires NGC r
 If Docker is not already logged in to `nvcr.io`, onboard prompts for an [NGC API key](https://org.ngc.nvidia.com/setup/api-key) and runs `docker login nvcr.io` over `--password-stdin` so the key is never written to disk or shell history.
 The prompt masks the key during input and retries one time on a bad key before failing.
 In non-interactive mode, onboard exits with login instructions if Docker is not already authenticated; run `docker login nvcr.io` yourself, then re-run `$$nemoclaw onboard --non-interactive`.
-If `NGC_API_KEY` or `NVIDIA_API_KEY` is already exported, NemoClaw passes it into the managed NIM container through the process environment instead of command-line arguments.
+If `NGC_API_KEY` or `NVIDIA_INFERENCE_API_KEY` is already exported, NemoClaw passes it into the managed NIM container through the process environment instead of command-line arguments.
 If the NIM container exits before the health endpoint becomes ready, onboarding stops early and prints the last container log lines.
 After NIM becomes healthy, NemoClaw reads `/v1/models` and uses the served model id for validation when it differs from the catalog name.
 Unsafe served ids are rejected instead of being written into the sandbox config.
diff --git a/docs/network-policy/approve-network-requests.mdx b/docs/network-policy/approve-network-requests.mdx
index 4766a54db9..9dc0038996 100644
--- a/docs/network-policy/approve-network-requests.mdx
+++ b/docs/network-policy/approve-network-requests.mdx
@@ -64,7 +64,7 @@ From the NemoClaw repository root, run the walkthrough script after you have onb
 ```
 
 This script opens a split tmux session with the TUI on the left and the agent on the right.
-The walkthrough requires tmux and the `NVIDIA_API_KEY` environment variable, and it assumes an existing sandbox to attach to.
+The walkthrough requires tmux and the `NVIDIA_INFERENCE_API_KEY` environment variable, and it assumes an existing sandbox to attach to.
 
 ## Related Topics
 
diff --git a/docs/reference/network-policies.mdx b/docs/reference/network-policies.mdx
index d92916384d..000049a8a3 100644
--- a/docs/reference/network-policies.mdx
+++ b/docs/reference/network-policies.mdx
@@ -41,7 +41,7 @@ The following endpoint groups are allowed by default:
 
 | Policy | Endpoints | Binaries | Rules |
 | --- | --- | --- | --- |
-| `nvidia` | `integrate.api.nvidia.com:443`, `inference-api.nvidia.com:443` | `/usr/local/bin/openclaw` | POST to inference and embedding paths, GET to model listings |
+| `nvidia` | `inference-api.nvidia.com:443` | `/usr/local/bin/openclaw` | POST to inference and embedding paths, GET to model listings |
 | `clawhub` | `clawhub.ai:443` | `/usr/local/bin/openclaw`, `/usr/local/bin/node` | GET, POST |
 | `openclaw_api` | `openclaw.ai:443` | `/usr/local/bin/openclaw`, `/usr/local/bin/node` | GET, POST |
 | `openclaw_docs` | `docs.openclaw.ai:443` | `/usr/local/bin/openclaw` | GET only |
diff --git a/docs/reference/troubleshooting.mdx b/docs/reference/troubleshooting.mdx
index 6c5f900453..4f8bb46baf 100644
--- a/docs/reference/troubleshooting.mdx
+++ b/docs/reference/troubleshooting.mdx
@@ -182,13 +182,13 @@ When the lookup returns an answer, retry onboarding.
 ### Host DNS resolution is blocked before provider validation
 
 NemoClaw also checks that the host process can resolve the provider host before it starts NVIDIA provider validation.
-A firewall rule that blocks host DNS traffic on port `53` can make later validation fail with `curl: (6) Could not resolve host: integrate.api.nvidia.com` even when container DNS probes look healthy.
+A firewall rule that blocks host DNS traffic on port `53` can make later validation fail with `curl: (6) Could not resolve host: inference-api.nvidia.com` even when container DNS probes look healthy.
 Current onboarding stops earlier with a host DNS diagnostic and remediation hints.
 
 Verify host DNS outside NemoClaw:
 
 ```bash
-node -e 'require("node:dns").resolve4("integrate.api.nvidia.com", (err, addrs) => { if (err) { console.error(err); process.exit(1); } console.log(addrs.join(",")); })'
+node -e 'require("node:dns").resolve4("inference-api.nvidia.com", (err, addrs) => { if (err) { console.error(err); process.exit(1); } console.log(addrs.join(",")); })'
 ```
 
 Fix the host firewall, VPN, or DNS policy so the host can resolve the provider endpoint, then rerun onboarding.
diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx
index bab83df377..7a9e69a681 100644
--- a/docs/security/best-practices.mdx
+++ b/docs/security/best-practices.mdx
@@ -129,7 +129,7 @@ Endpoint rules restrict allowed HTTP methods and URL paths.
 
 | Aspect | Detail |
 |---|---|
-| Default | Some endpoints allow GET and POST on `/**` (for example, `clawhub.ai`). Others restrict methods and paths to specific API routes (for example, `integrate.api.nvidia.com` allows POST only to inference and embedding paths and GET to model listings). Read-only endpoints such as `docs.openclaw.ai`, the `npm_registry` baseline entry, and the `pypi` preset allow GET only (PyPI also allows HEAD). The `npm` preset is an intentional exception: npm/Yarn registry traffic uses L4 pass-through for Node 22 undici CONNECT compatibility. |
+| Default | Some endpoints allow GET and POST on `/**` (for example, `clawhub.ai`). Others restrict methods and paths to specific API routes (for example, `inference-api.nvidia.com` allows POST only to inference and embedding paths and GET to model listings). Read-only endpoints such as `docs.openclaw.ai`, the `npm_registry` baseline entry, and the `pypi` preset allow GET only (PyPI also allows HEAD). The `npm` preset is an intentional exception: npm/Yarn registry traffic uses L4 pass-through for Node 22 undici CONNECT compatibility. |
 | What you can change | Add methods (PUT, DELETE, PATCH) or restrict paths to specific prefixes. |
 | Risk if relaxed | Allowing all methods on an API endpoint gives the agent write and delete access. For example, allowing DELETE on `api.github.com` lets the agent delete repositories. |
 | Recommendation | Use GET-only rules for endpoints that the agent only reads. Add write methods only for endpoints where the agent must create or modify resources. Restrict paths to specific API routes when possible. |
diff --git a/docs/security/credential-storage.mdx b/docs/security/credential-storage.mdx
index 7929751214..036a1507ba 100644
--- a/docs/security/credential-storage.mdx
+++ b/docs/security/credential-storage.mdx
@@ -53,7 +53,7 @@ That directory is created with mode `0700` and contains no credential material.
 When a NemoClaw command needs a credential value during a single run (for example to forward it to an `openshell provider` registration), it reads from `process.env` first.
 This means you can:
 
-- Prefix any command with the credential to override the gateway-stored value: `NVIDIA_API_KEY=nvapi-... $$nemoclaw onboard`
+- Prefix any command with the credential to override the gateway-stored value: `NVIDIA_INFERENCE_API_KEY=nvapi-... $$nemoclaw onboard`
 - Use short-lived or rotated credentials in CI by exporting them once per pipeline run
 - Avoid registering credentials in the gateway entirely if your environment supplies them
 
@@ -66,7 +66,7 @@ Export the credential only when you want to create, replace, or rotate the store
 A typical deploy invocation looks like:
 
 ```bash
-NVIDIA_API_KEY=nvapi-... \
+NVIDIA_INFERENCE_API_KEY=nvapi-... \
     HF_TOKEN=hf_... \
     TELEGRAM_BOT_TOKEN=... \
     $$nemoclaw deploy my-instance
@@ -107,7 +107,7 @@ If `~/.nemoclaw/credentials.json` remains after a rebuild or other credential lo
 The simplest way to replace a stored value is to rerun onboarding with the new value in your environment:
 
 ```bash
-NVIDIA_API_KEY=nvapi-new-value $$nemoclaw onboard
+NVIDIA_INFERENCE_API_KEY=nvapi-new-value $$nemoclaw onboard
 ```
 
 To remove a credential from the gateway entirely:
diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml
index c34323bf51..c6336ab1d8 100644
--- a/nemoclaw-blueprint/blueprint.yaml
+++ b/nemoclaw-blueprint/blueprint.yaml
@@ -54,7 +54,7 @@ components:
       default:
         provider_type: "nvidia"
         provider_name: "nvidia-inference"
-        endpoint: "https://integrate.api.nvidia.com/v1"
+        endpoint: "https://inference-api.nvidia.com/v1"
         model: "nvidia/nemotron-3-super-120b-a12b"
 
       ncp:
@@ -62,7 +62,7 @@ components:
         provider_name: "nvidia-ncp"
         endpoint: ""
         model: "nvidia/nemotron-3-super-120b-a12b"
-        credential_env: "NVIDIA_API_KEY"
+        credential_env: "NVIDIA_INFERENCE_API_KEY"
         dynamic_endpoint: true
 
       nim-local:
@@ -89,7 +89,7 @@ components:
         provider_name: "nvidia-router"
         endpoint: "http://localhost:4000/v1"
         model: "nvidia-routed"
-        credential_env: "NVIDIA_API_KEY"
+        credential_env: "NVIDIA_INFERENCE_API_KEY"
         timeout_secs: 180
 
   router:
diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
index a85ab6d8c6..160bfaea30 100644
--- a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
+++ b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
@@ -49,11 +49,6 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
-        port: 443
-        protocol: rest
-        enforcement: enforce
-        access: full
       - host: inference-api.nvidia.com
         port: 443
         protocol: rest
diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
index 0c907130b5..5bc856cb5f 100644
--- a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
+++ b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
@@ -73,16 +73,6 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
-        port: 443
-        protocol: rest
-        enforcement: enforce
-        rules:
-          - allow: { method: POST, path: "/v1/chat/completions" }
-          - allow: { method: POST, path: "/v1/completions" }
-          - allow: { method: POST, path: "/v1/embeddings" }
-          - allow: { method: GET, path: "/v1/models" }
-          - allow: { method: GET, path: "/v1/models/**" }
       - host: inference-api.nvidia.com
         port: 443
         protocol: rest
diff --git a/nemoclaw-blueprint/router/pool-config.yaml b/nemoclaw-blueprint/router/pool-config.yaml
index ddd9af5300..a457f79d33 100644
--- a/nemoclaw-blueprint/router/pool-config.yaml
+++ b/nemoclaw-blueprint/router/pool-config.yaml
@@ -26,11 +26,11 @@ models:
     litellm_model: "openai/nvidia/nemotron-3-nano-30b-a3b"
     cost_per_m_input_tokens: 0.05
     cost_per_m_output_tokens: 0.20
-    api_base: "https://integrate.api.nvidia.com/v1"
+    api_base: "https://inference-api.nvidia.com/v1"
 
   - name: nemotron-3-super
     display_name: "Nemotron 3 Super 120B"
     litellm_model: "openai/nvidia/nemotron-3-super-120b-a12b"
     cost_per_m_input_tokens: 0.10
     cost_per_m_output_tokens: 0.40
-    api_base: "https://integrate.api.nvidia.com/v1"
+    api_base: "https://inference-api.nvidia.com/v1"
diff --git a/nemoclaw-blueprint/scripts/nemotron-inference-fix.js b/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
index f3871ce99e..efd844eb44 100644
--- a/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
+++ b/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
@@ -78,7 +78,7 @@
 //   against a local OpenAI-compatible endpoint, asserting both the injected
 //   system message and the refreshed Content-Length. The runtime model-
 //   output behavior (acceptance criteria from #4851) is validated against
-//   integrate.api.nvidia.com via the checked-in runbook at
+//   inference-api.nvidia.com via the checked-in runbook at
 //   test/e2e-runtime/4851-ultra-toolless-validation.md — anyone reviewing
 //   acceptance can re-run it directly. Re-run when this preload changes
 //   or when OpenClaw bumps a version that may shift Ultra's chat template.
diff --git a/nemoclaw/src/banner.test.ts b/nemoclaw/src/banner.test.ts
index 512345759e..ceb896a40a 100644
--- a/nemoclaw/src/banner.test.ts
+++ b/nemoclaw/src/banner.test.ts
@@ -10,7 +10,7 @@ describe("renderBox (plugin)", () => {
       [
         "  NemoClaw registered",
         null,
-        "  Endpoint:  https://integrate.api.nvidia.com/v1",
+        "  Endpoint:  https://inference-api.nvidia.com/v1",
         "  Provider:  NVIDIA Endpoints",
         "  Model:     nvidia/nemotron-3-super-120b-a12b",
         "  Slash:     /nemoclaw",
diff --git a/nemoclaw/src/blueprint/runner.test.ts b/nemoclaw/src/blueprint/runner.test.ts
index 54cd1367c8..0505b260b7 100644
--- a/nemoclaw/src/blueprint/runner.test.ts
+++ b/nemoclaw/src/blueprint/runner.test.ts
@@ -141,7 +141,7 @@ function routedBlueprint(): Record<string, unknown> {
             provider_name: "nvidia-router",
             endpoint: "http://localhost:4000/v1",
             model: "routed",
-            credential_env: "NVIDIA_API_KEY",
+            credential_env: "NVIDIA_INFERENCE_API_KEY",
             credential_default: "router-local",
             timeout_secs: 180,
           },
@@ -664,7 +664,7 @@ describe("runner", () => {
                 name: "nim_service",
                 endpoints: [
                   {
-                    host: "integrate.api.nvidia.com",
+                    host: "inference-api.nvidia.com",
                     port: 443,
                     access: "full",
                   },
@@ -736,7 +736,7 @@ describe("runner", () => {
           name: "nim_service",
           endpoints: [
             {
-              host: "integrate.api.nvidia.com",
+              host: "inference-api.nvidia.com",
               port: 443,
               access: "full",
             },
@@ -759,7 +759,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(
@@ -779,7 +779,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(["Version: 1", "Hash: sha256:test"].join("\n"));
@@ -797,7 +797,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(["Version: 1", "Hash: sha256:test", "---"].join("\n"));
@@ -1005,13 +1005,13 @@ describe("runner", () => {
       const prevMyApiKey = process.env.MY_API_KEY;
       const prevGithubToken = process.env.GITHUB_TOKEN;
       const prevAwsKey = process.env.AWS_ACCESS_KEY_ID;
-      const prevNvidiaKey = process.env.NVIDIA_API_KEY;
+      const prevNvidiaKey = process.env.NVIDIA_INFERENCE_API_KEY;
       const prevProxy = process.env.HTTPS_PROXY;
       const prevOsDebug = process.env.OPENSHELL_DEBUG;
       process.env.MY_API_KEY = "secret-key-123";
       process.env.GITHUB_TOKEN = "ghp_leaked";
       process.env.AWS_ACCESS_KEY_ID = "AKIA_leaked";
-      process.env.NVIDIA_API_KEY = "nvapi-leaked";
+      process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-leaked";
       process.env.HTTPS_PROXY = "http://proxy.corp:8080";
       process.env.OPENSHELL_DEBUG = "1";
       try {
@@ -1029,7 +1029,7 @@ describe("runner", () => {
         // Secrets from the parent process must NOT be present
         expect(subEnv).not.toHaveProperty("GITHUB_TOKEN");
         expect(subEnv).not.toHaveProperty("AWS_ACCESS_KEY_ID");
-        expect(subEnv).not.toHaveProperty("NVIDIA_API_KEY");
+        expect(subEnv).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
         expect(subEnv).not.toHaveProperty("MY_API_KEY");
 
         // Allowed system vars should still be present
@@ -1046,8 +1046,8 @@ describe("runner", () => {
         else process.env.GITHUB_TOKEN = prevGithubToken;
         if (prevAwsKey === undefined) delete process.env.AWS_ACCESS_KEY_ID;
         else process.env.AWS_ACCESS_KEY_ID = prevAwsKey;
-        if (prevNvidiaKey === undefined) delete process.env.NVIDIA_API_KEY;
-        else process.env.NVIDIA_API_KEY = prevNvidiaKey;
+        if (prevNvidiaKey === undefined) delete process.env.NVIDIA_INFERENCE_API_KEY;
+        else process.env.NVIDIA_INFERENCE_API_KEY = prevNvidiaKey;
         if (prevProxy === undefined) delete process.env.HTTPS_PROXY;
         else process.env.HTTPS_PROXY = prevProxy;
         if (prevOsDebug === undefined) delete process.env.OPENSHELL_DEBUG;
@@ -1131,7 +1131,7 @@ describe("runner", () => {
     });
 
     it("passes endpoint as-is from blueprint (no rewriting)", async () => {
-      process.env.NVIDIA_API_KEY = "test-key";
+      process.env.NVIDIA_INFERENCE_API_KEY = "test-key";
       try {
         await actionApply("routed", routedBlueprint());
 
@@ -1144,7 +1144,7 @@ describe("runner", () => {
         );
         expect(configArg).toBe("OPENAI_BASE_URL=http://localhost:4000/v1");
       } finally {
-        delete process.env.NVIDIA_API_KEY;
+        delete process.env.NVIDIA_INFERENCE_API_KEY;
       }
     });
   });
diff --git a/nemoclaw/src/blueprint/ssrf.test.ts b/nemoclaw/src/blueprint/ssrf.test.ts
index 5d52b3dd26..6722feadfb 100644
--- a/nemoclaw/src/blueprint/ssrf.test.ts
+++ b/nemoclaw/src/blueprint/ssrf.test.ts
@@ -218,7 +218,7 @@ describe("validateEndpointUrl", () => {
 
   it("allows NVIDIA API endpoint", async () => {
     mockPublicDns();
-    const url = "https://integrate.api.nvidia.com/v1";
+    const url = "https://inference-api.nvidia.com/v1";
     const result = await validateEndpointUrl(url);
     expect(result.url).toBe(url);
     expect(result.pinnedUrl).toBe("https://93.184.216.34/v1");
diff --git a/nemoclaw/src/commands/config-show.test.ts b/nemoclaw/src/commands/config-show.test.ts
index 12cf488434..07fccd58bb 100644
--- a/nemoclaw/src/commands/config-show.test.ts
+++ b/nemoclaw/src/commands/config-show.test.ts
@@ -36,21 +36,21 @@ describe("commands/config-show", () => {
   it("shows config with redacted credentials when config exists", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://integrate.api.nvidia.com/v1",
+      endpointUrl: "https://inference-api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-04-10T14:22:00Z",
     };
     mockedLoadOnboardConfig.mockReturnValue(config);
-    mockedDescribeOnboardEndpoint.mockReturnValue("build (https://integrate.api.nvidia.com/v1)");
+    mockedDescribeOnboardEndpoint.mockReturnValue("build (https://inference-api.nvidia.com/v1)");
     mockedDescribeOnboardProvider.mockReturnValue("NVIDIA Endpoint API");
 
     const result = slashConfigShow();
     expect(result.text).toContain("NemoClaw Config");
-    expect(result.text).toContain("build (https://integrate.api.nvidia.com/v1)");
-    expect(result.text).toContain("$NVIDIA_API_KEY");
+    expect(result.text).toContain("build (https://inference-api.nvidia.com/v1)");
+    expect(result.text).toContain("$NVIDIA_INFERENCE_API_KEY");
     expect(result.text).toContain("NVIDIA Endpoint API");
     expect(result.text).toContain("nvidia/nemotron-3-super-120b-a12b");
     expect(result.text).toContain("2026-04-10T14:22:00Z");
@@ -59,11 +59,11 @@ describe("commands/config-show", () => {
   it("does not expose raw credential values", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://integrate.api.nvidia.com/v1",
+      endpointUrl: "https://inference-api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-04-10T14:22:00Z",
     };
     mockedLoadOnboardConfig.mockReturnValue(config);
@@ -72,7 +72,7 @@ describe("commands/config-show", () => {
 
     const result = slashConfigShow();
     // Should show env var name, not the actual key value
-    expect(result.text).toContain("$NVIDIA_API_KEY");
+    expect(result.text).toContain("$NVIDIA_INFERENCE_API_KEY");
     expect(result.text).not.toContain("nvapi-");
   });
 
@@ -83,7 +83,7 @@ describe("commands/config-show", () => {
       ncpPartner: "PartnerCo",
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-04-10T14:22:00Z",
     };
     mockedLoadOnboardConfig.mockReturnValue(config);
@@ -97,7 +97,7 @@ describe("commands/config-show", () => {
   it("shows not configured when credentialEnv is empty", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://integrate.api.nvidia.com/v1",
+      endpointUrl: "https://inference-api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
@@ -115,11 +115,11 @@ describe("commands/config-show", () => {
   it("notes that config is host-only modifiable", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://integrate.api.nvidia.com/v1",
+      endpointUrl: "https://inference-api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-04-10T14:22:00Z",
     };
     mockedLoadOnboardConfig.mockReturnValue(config);
diff --git a/nemoclaw/src/commands/slash.test.ts b/nemoclaw/src/commands/slash.test.ts
index fe7d402c11..2b9928886b 100644
--- a/nemoclaw/src/commands/slash.test.ts
+++ b/nemoclaw/src/commands/slash.test.ts
@@ -173,7 +173,7 @@ describe("commands/slash", () => {
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-03-01T00:00:00.000Z",
     };
 
@@ -309,7 +309,7 @@ describe("commands/slash", () => {
         ncpPartner: null,
         model: "nvidia/nemotron-3-super-120b-a12b",
         profile: "default",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         onboardedAt: "2026-03-01T00:00:00.000Z",
       };
       mockedLoadOnboardConfig.mockReturnValue(config);
@@ -319,7 +319,7 @@ describe("commands/slash", () => {
       expect(result.text).toContain("NemoClaw Onboard Status");
       expect(result.text).toContain("NVIDIA Endpoint API");
       expect(result.text).toContain("nvidia/nemotron-3-super-120b-a12b");
-      expect(result.text).toContain("NVIDIA_API_KEY");
+      expect(result.text).toContain("NVIDIA_INFERENCE_API_KEY");
     });
 
     it("includes NCP partner when set", () => {
@@ -329,7 +329,7 @@ describe("commands/slash", () => {
         ncpPartner: "PartnerCo",
         model: "nvidia/nemotron-3-super-120b-a12b",
         profile: "default",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         onboardedAt: "2026-03-01T00:00:00.000Z",
       };
       mockedLoadOnboardConfig.mockReturnValue(config);
diff --git a/nemoclaw/src/index.ts b/nemoclaw/src/index.ts
index dc9f25f622..3c24bd761d 100644
--- a/nemoclaw/src/index.ts
+++ b/nemoclaw/src/index.ts
@@ -283,7 +283,7 @@ function registeredProviderForConfig(
   providerCredentialEnv: string,
 ): ProviderPlugin {
   const authLabel =
-    providerCredentialEnv === "NVIDIA_API_KEY"
+    providerCredentialEnv === "NVIDIA_INFERENCE_API_KEY"
       ? `NVIDIA API Key (${providerCredentialEnv})`
       : `OpenAI API Key (${providerCredentialEnv})`;
 
@@ -369,7 +369,7 @@ export default function register(api: OpenClawPluginApi): void {
   const bannerProvider = onboardCfg ? describeOnboardProvider(onboardCfg) : "NVIDIA Endpoints";
   const bannerModel = activeModel || DEFAULT_INFERENCE_MODEL;
 
-  const providerCredentialEnv = onboardCfg?.credentialEnv ?? "NVIDIA_API_KEY";
+  const providerCredentialEnv = onboardCfg?.credentialEnv ?? "NVIDIA_INFERENCE_API_KEY";
   api.registerProvider(registeredProviderForConfig(activeModel, providerCredentialEnv));
 
   // 3. Register before_tool_call hook to block secrets in memory writes (#1233)
diff --git a/nemoclaw/src/lib/subprocess-env.ts b/nemoclaw/src/lib/subprocess-env.ts
index de497710b5..90927a186c 100644
--- a/nemoclaw/src/lib/subprocess-env.ts
+++ b/nemoclaw/src/lib/subprocess-env.ts
@@ -5,7 +5,7 @@
  * Subprocess environment allowlist.
  *
  * Subprocesses spawned by the CLI or plugin must NOT inherit the full
- * parent process.env — that leaks secrets (NVIDIA_API_KEY, GITHUB_TOKEN,
+ * parent process.env — that leaks secrets (NVIDIA_INFERENCE_API_KEY, GITHUB_TOKEN,
  * AWS_ACCESS_KEY_ID, etc.) to child processes where they can be read and
  * exfiltrated. Instead, only forward the categories below.
  *
diff --git a/nemoclaw/src/onboard/config.test.ts b/nemoclaw/src/onboard/config.test.ts
index 9eed8ca7f2..c539b3b455 100644
--- a/nemoclaw/src/onboard/config.test.ts
+++ b/nemoclaw/src/onboard/config.test.ts
@@ -45,7 +45,7 @@ function makeConfig(overrides: Partial<NemoClawOnboardConfig> = {}): NemoClawOnb
     ncpPartner: null,
     model: "nvidia/nemotron-3-super-120b-a12b",
     profile: "default",
-    credentialEnv: "NVIDIA_API_KEY",
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     onboardedAt: "2026-03-01T00:00:00.000Z",
     ...overrides,
   };
diff --git a/nemoclaw/src/register.test.ts b/nemoclaw/src/register.test.ts
index acaaa73398..ed66e916df 100644
--- a/nemoclaw/src/register.test.ts
+++ b/nemoclaw/src/register.test.ts
@@ -117,7 +117,7 @@ describe("plugin registration", () => {
       ncpPartner: null,
       model: "nvidia/stale-model",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-03-01T00:00:00.000Z",
     });
     mockedReadFileSync.mockReset();
@@ -151,7 +151,7 @@ describe("plugin registration", () => {
       ncpPartner: null,
       model: "nvidia/custom-model",
       profile: "default",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       onboardedAt: "2026-03-01T00:00:00.000Z",
     });
     mockedReadFileSync.mockReset();
diff --git a/nemoclaw/src/security/secret-scanner.test.ts b/nemoclaw/src/security/secret-scanner.test.ts
index 0c088c2038..f87eef7133 100644
--- a/nemoclaw/src/security/secret-scanner.test.ts
+++ b/nemoclaw/src/security/secret-scanner.test.ts
@@ -168,7 +168,7 @@ describe("scanForSecrets", () => {
 
   describe("multiple secrets in one content", () => {
     it("detects multiple different secrets", () => {
-      const content = `NVIDIA_API_KEY=${FAKE.nvidia}\nOPENAI_KEY=${FAKE.openai}`;
+      const content = `NVIDIA_INFERENCE_API_KEY=${FAKE.nvidia}\nOPENAI_KEY=${FAKE.openai}`;
       const matches = scanForSecrets(content);
       expect(matches.length).toBeGreaterThanOrEqual(2);
     });
diff --git a/scripts/checks/direct-credential-env.ts b/scripts/checks/direct-credential-env.ts
index 0d8b317f0a..2045a08164 100644
--- a/scripts/checks/direct-credential-env.ts
+++ b/scripts/checks/direct-credential-env.ts
@@ -4,7 +4,7 @@
 /**
  * Guards src/lib/onboard.ts against direct reads of provider credential env vars.
  *
- * Direct `process.env.NVIDIA_API_KEY`-style reads bypass credentials.json. Use
+ * Direct `process.env.NVIDIA_INFERENCE_API_KEY`-style reads bypass credentials.json. Use
  * resolveProviderCredential() or getCredential() for credential resolution unless
  * a narrowly-scoped raw env check is intentional and explicitly suppressed.
  */
@@ -15,6 +15,7 @@ import { fileURLToPath } from "node:url";
 import * as ts from "typescript";
 
 const CREDENTIAL_ENV_KEYS = new Set([
+  "NVIDIA_INFERENCE_API_KEY",
   "NVIDIA_API_KEY",
   "OPENAI_API_KEY",
   "ANTHROPIC_API_KEY",
diff --git a/scripts/install.sh b/scripts/install.sh
index bf35b94fd8..53e3c710f6 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -543,7 +543,7 @@ usage() {
   printf "    --version, -v        Print installer version and exit\n"
   printf "    --help, -h           Show this help message and exit\n\n"
   printf "  ${C_DIM}Environment:${C_RESET}\n"
-  printf "    NVIDIA_API_KEY                API key (skips credential prompt)\n"
+  printf "    NVIDIA_INFERENCE_API_KEY                API key (skips credential prompt)\n"
   printf "    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 Same as --yes-i-accept-third-party-software\n"
   printf "    NEMOCLAW_NON_INTERACTIVE=1    Same as --non-interactive\n"
   printf "    NEMOCLAW_NON_INTERACTIVE_SUDO_MODE=prompt Allow sudo prompts during non-interactive onboarding\n"
diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh
index 7b70d6b4ed..d3924c7080 100755
--- a/scripts/nemoclaw-start.sh
+++ b/scripts/nemoclaw-start.sh
@@ -10,7 +10,7 @@
 # The config hash is verified at startup to detect tampering.
 #
 # Optional env:
-#   NVIDIA_API_KEY                API key for NVIDIA-hosted inference
+#   NVIDIA_INFERENCE_API_KEY                API key for NVIDIA-hosted inference
 #   CHAT_UI_URL                   Browser origin that will access the forwarded dashboard
 #   NEMOCLAW_DISABLE_DEVICE_AUTH  Build-time only. Set to "1" to skip device-pairing auth.
 #                                  Also auto-disabled when CHAT_UI_URL is non-loopback.
@@ -1700,7 +1700,7 @@ prepare_gateway_token_for_current_command() {
 
 # Write an auth profile JSON for the NVIDIA API key so the gateway can authenticate.
 write_auth_profile() {
-  if [ -z "${NVIDIA_API_KEY:-}" ]; then
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
     return
   fi
 
@@ -1723,7 +1723,7 @@ json.dump({
     f'{provider_key}:manual': {
         'type': 'api_key',
         'provider': provider_key,
-        'keyRef': {'source': 'env', 'id': 'NVIDIA_API_KEY'},
+        'keyRef': {'source': 'env', 'id': 'NVIDIA_INFERENCE_API_KEY'},
         'profileId': f'{provider_key}:manual',
     }
 }, open(path, 'w'))
diff --git a/scripts/smoke-macos-install.sh b/scripts/smoke-macos-install.sh
index 414fb82872..7dcbf4c8e3 100755
--- a/scripts/smoke-macos-install.sh
+++ b/scripts/smoke-macos-install.sh
@@ -63,7 +63,7 @@ Options:
   -h, --help                  Show this help
 
 Environment:
-  NVIDIA_API_KEY              Required for the cloud install path
+  NVIDIA_INFERENCE_API_KEY              Required for the cloud install path
 EOF
 }
 
@@ -110,7 +110,7 @@ while [ $# -gt 0 ]; do
   esac
 done
 
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY must be set for the smoke install flow."
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY must be set for the smoke install flow."
 [ -x "$REPO_DIR/install.sh" ] || fail "install.sh not found at repo root."
 [ -x "$REPO_DIR/uninstall.sh" ] || fail "uninstall.sh not found at repo root."
 
diff --git a/scripts/validate-configs.ts b/scripts/validate-configs.ts
index cd24310fd4..f0b42bb571 100755
--- a/scripts/validate-configs.ts
+++ b/scripts/validate-configs.ts
@@ -209,7 +209,7 @@ interface DangerousHostFinding {
   host: string;
 }
 
-const ROUTER_API_BASE_HOST_ALLOWLIST: ReadonlySet<string> = new Set(["integrate.api.nvidia.com"]);
+const ROUTER_API_BASE_HOST_ALLOWLIST: ReadonlySet<string> = new Set(["inference-api.nvidia.com"]);
 
 /**
  * Walk a parsed policy document (full `network_policies` map or a preset
diff --git a/scripts/walkthrough.sh b/scripts/walkthrough.sh
index 3a02ec6381..02c1b2c516 100755
--- a/scripts/walkthrough.sh
+++ b/scripts/walkthrough.sh
@@ -14,7 +14,7 @@
 #
 # Prerequisites:
 #   - NemoClaw setup complete (nemoclaw onboard)
-#   - NVIDIA_API_KEY in environment
+#   - NVIDIA_INFERENCE_API_KEY in environment
 #
 # Suggested prompts that trigger the approval flow:
 #
@@ -37,14 +37,14 @@
 #
 #   Terminal 2 (Agent):
 #     openshell sandbox connect nemoclaw
-#     export NVIDIA_API_KEY=nvapi-...
+#     export NVIDIA_INFERENCE_API_KEY=nvapi-...
 #     nemoclaw-start
 #     openclaw agent --agent main --local --session-id live
 
 set -euo pipefail
 
-[ -n "${NVIDIA_API_KEY:-}" ] || {
-  echo "NVIDIA_API_KEY required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || {
+  echo "NVIDIA_INFERENCE_API_KEY required"
   exit 1
 }
 
@@ -85,7 +85,7 @@ tmux kill-session -t "$SESSION" 2>/dev/null || true
 tmux new-session -d -s "$SESSION" -x 200 -y 50 "openshell term"
 
 # Split right pane for the agent
-# NVIDIA_API_KEY is not needed inside the sandbox — inference is proxied
+# NVIDIA_INFERENCE_API_KEY is not needed inside the sandbox — inference is proxied
 # through the OpenShell gateway which injects credentials server-side.
 tmux split-window -h -t "$SESSION" \
   "openshell sandbox connect nemoclaw -- bash -c 'nemoclaw-start openclaw agent --agent main --local --session-id live'"
diff --git a/src/commands/sandbox/config/rotate-token.ts b/src/commands/sandbox/config/rotate-token.ts
index 544857b967..3162aece6a 100644
--- a/src/commands/sandbox/config/rotate-token.ts
+++ b/src/commands/sandbox/config/rotate-token.ts
@@ -22,7 +22,7 @@ export default class SandboxConfigRotateTokenCommand extends NemoClawCommand {
   static usage = ["<name> [--from-env <VAR>] [--stdin]"];
   static examples = [
     "<%= config.bin %> sandbox config rotate-token alpha",
-    "<%= config.bin %> sandbox config rotate-token alpha --from-env NVIDIA_API_KEY",
+    "<%= config.bin %> sandbox config rotate-token alpha --from-env NVIDIA_INFERENCE_API_KEY",
   ];
   static args = {
     sandboxName: sandboxNameArg,
diff --git a/src/lib/actions/dev/npm-link-or-shim.test.ts b/src/lib/actions/dev/npm-link-or-shim.test.ts
index 78329802ac..9494819108 100644
--- a/src/lib/actions/dev/npm-link-or-shim.test.ts
+++ b/src/lib/actions/dev/npm-link-or-shim.test.ts
@@ -87,7 +87,7 @@ describe("runNpmLinkOrShim", () => {
         commandPath: () => process.execPath,
         logError: (message) => errors.push(message),
         run: failingNpm(
-          `npm failed in ${repoDir} under ${homeDir}\nNVIDIA_API_KEY=${token}\nAuthorization: Bearer ${token}\n`,
+          `npm failed in ${repoDir} under ${homeDir}\nNVIDIA_INFERENCE_API_KEY=${token}\nAuthorization: Bearer ${token}\n`,
         ),
       },
     );
@@ -97,7 +97,7 @@ describe("runNpmLinkOrShim", () => {
     expect(logOutput).toContain("npm link failed");
     expect(logOutput).toContain("<repo-root>");
     expect(logOutput).toContain("~");
-    expect(logOutput).toContain("NVIDIA_API_KEY=[REDACTED]");
+    expect(logOutput).toContain("NVIDIA_INFERENCE_API_KEY=[REDACTED]");
     expect(logOutput).toContain("Bearer [REDACTED]");
     expect(logOutput).not.toContain(homeDir);
     expect(logOutput).not.toContain(repoDir);
diff --git a/src/lib/actions/sandbox/status.test.ts b/src/lib/actions/sandbox/status.test.ts
index 66d3c6f7a0..1a2894cad1 100644
--- a/src/lib/actions/sandbox/status.test.ts
+++ b/src/lib/actions/sandbox/status.test.ts
@@ -27,7 +27,7 @@ describe("sandbox status inference health", () => {
           ok: true,
           probed: true,
           providerLabel: "NVIDIA Endpoints",
-          endpoint: "https://integrate.api.nvidia.com/v1/chat/completions",
+          endpoint: "https://inference-api.nvidia.com/v1/chat/completions",
           detail: "healthy",
         };
       },
@@ -184,7 +184,7 @@ describe("maybeGetSandboxStatusInferenceHealth", () => {
           ok: true,
           probed: true,
           providerLabel: "NVIDIA Endpoints",
-          endpoint: "https://integrate.api.nvidia.com/v1/chat/completions",
+          endpoint: "https://inference-api.nvidia.com/v1/chat/completions",
           detail: "healthy",
         };
       },
diff --git a/src/lib/credentials/store.ts b/src/lib/credentials/store.ts
index 8464633e7c..6758ff2310 100644
--- a/src/lib/credentials/store.ts
+++ b/src/lib/credentials/store.ts
@@ -31,6 +31,7 @@ export type CredentialPromptIntent =
 // Exported so tests can import the same source-of-truth list and stay in
 // sync without a second hand-maintained copy.
 export const KNOWN_CREDENTIAL_ENV_KEYS: readonly string[] = [
+  "NVIDIA_INFERENCE_API_KEY",
   "NVIDIA_API_KEY",
   "OPENAI_API_KEY",
   "ANTHROPIC_API_KEY",
@@ -49,6 +50,10 @@ export const KNOWN_CREDENTIAL_ENV_KEYS: readonly string[] = [
   "WECHAT_BOT_TOKEN",
 ];
 
+const LEGACY_CREDENTIAL_ENV_ALIASES: Partial<Record<string, readonly string[]>> = {
+  NVIDIA_INFERENCE_API_KEY: ["NVIDIA_API_KEY"],
+};
+
 // Hard upper bound on the legacy credentials.json size we are willing to
 // read into memory. The largest realistic credential set NemoClaw has ever
 // shipped is well under 1 KiB; the cap exists purely so an attacker who
@@ -182,6 +187,14 @@ export function getCredential(key: string): string | null {
   return normalized || null;
 }
 
+function getLegacyCredentialAlias(envName: string): string | null {
+  for (const alias of LEGACY_CREDENTIAL_ENV_ALIASES[envName] ?? []) {
+    const value = getCredential(alias);
+    if (value) return value;
+  }
+  return null;
+}
+
 /**
  * Canonical entry point for provider credential resolution (PR #2306).
  * Resolves the credential for `envName` from `process.env`, falling back
@@ -202,10 +215,10 @@ export function getCredential(key: string): string | null {
  * guard inside the staging helper itself.
  */
 export function resolveProviderCredential(envName: string): string | null {
-  let value = getCredential(envName);
+  let value = getCredential(envName) || getLegacyCredentialAlias(envName);
   if (!value) {
     stageLegacyCredentialsToEnv();
-    value = getCredential(envName);
+    value = getCredential(envName) || getLegacyCredentialAlias(envName);
   }
   if (value) {
     process.env[envName] = value;
@@ -684,15 +697,17 @@ export async function readCredentialPrompt(
 }
 
 /**
- * Ensure `NVIDIA_API_KEY` is staged for this process. Returns immediately
+ * Ensure `NVIDIA_INFERENCE_API_KEY` is staged for this process. Returns immediately
  * if it is already in env, otherwise prompts interactively (validating
  * the `nvapi-` prefix) and stages the result. Onboarding registers the
  * value with the OpenShell gateway later in the flow.
  */
 export async function ensureApiKey(): Promise<CredentialPromptIntent> {
-  let key = getCredential("NVIDIA_API_KEY");
+  let key =
+    getCredential("NVIDIA_INFERENCE_API_KEY") ||
+    getLegacyCredentialAlias("NVIDIA_INFERENCE_API_KEY");
   if (key) {
-    process.env.NVIDIA_API_KEY = key;
+    process.env.NVIDIA_INFERENCE_API_KEY = key;
     return { kind: "credential", value: key };
   }
 
@@ -729,8 +744,8 @@ export async function ensureApiKey(): Promise<CredentialPromptIntent> {
     break;
   }
 
-  saveCredential("NVIDIA_API_KEY", key);
-  process.env.NVIDIA_API_KEY = key;
+  saveCredential("NVIDIA_INFERENCE_API_KEY", key);
+  process.env.NVIDIA_INFERENCE_API_KEY = key;
   console.log("");
   console.log("  Key staged for the OpenShell gateway. It is held in process memory only;");
   console.log("  onboarding registers it with the gateway and nothing is written to disk.");
diff --git a/src/lib/deploy/index.test.ts b/src/lib/deploy/index.test.ts
index c4226b7a36..1fa4418967 100644
--- a/src/lib/deploy/index.test.ts
+++ b/src/lib/deploy/index.test.ts
@@ -15,7 +15,7 @@ import { validateName } from "../../../dist/lib/runner";
 describe("inferDeployProvider", () => {
   it("prefers an explicit provider override", () => {
     const provider = inferDeployProvider("openai", {
-      NVIDIA_API_KEY: "nvapi-test",
+      NVIDIA_INFERENCE_API_KEY: "nvapi-test",
     });
 
     expect(provider).toBe("openai");
@@ -31,7 +31,7 @@ describe("inferDeployProvider", () => {
 
   it("returns null when multiple provider credentials are present without an override", () => {
     const provider = inferDeployProvider("", {
-      NVIDIA_API_KEY: "nvapi-test",
+      NVIDIA_INFERENCE_API_KEY: "nvapi-test",
       OPENAI_API_KEY: "sk-openai-test",
     });
 
@@ -49,7 +49,7 @@ describe("buildDeployEnvLines", () => {
       sandboxName: "my-assistant",
       provider: "build",
       credentials: {
-        NVIDIA_API_KEY: "nvapi-test",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-test",
       },
       shellQuote: (value: string) => `'${value}'`,
     });
@@ -60,7 +60,7 @@ describe("buildDeployEnvLines", () => {
     expect(envLines).toContain("NEMOCLAW_PROVIDER='build'");
     expect(envLines).toContain("CHAT_UI_URL='https://chat.example.com'");
     expect(envLines).toContain("NEMOCLAW_POLICY_MODE='suggested'");
-    expect(envLines).toContain("NVIDIA_API_KEY='nvapi-test'");
+    expect(envLines).toContain("NVIDIA_INFERENCE_API_KEY='nvapi-test'");
   });
 
   it("passes ALLOWED_CHAT_IDS through when Telegram is configured", () => {
@@ -85,7 +85,7 @@ describe("buildDeployEnvLines", () => {
       sandboxName: "my-assistant",
       provider: "build",
       credentials: {
-        NVIDIA_API_KEY: "nvapi-test",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-test",
         HF_TOKEN: "hf_abc123",
         HUGGING_FACE_HUB_TOKEN: "hf_def456",
       },
@@ -126,7 +126,7 @@ describe("executeDeploy", () => {
         NEMOCLAW_SANDBOX_NAME: "my-box",
       },
       rootDir: "/repo/root",
-      getCredential: (key: string) => (key === "NVIDIA_API_KEY" ? "nvapi-test" : null),
+      getCredential: (key: string) => (key === "NVIDIA_INFERENCE_API_KEY" ? "nvapi-test" : null),
       validateName: (value: string) => value,
       shellQuote: (value: string) => `'${value}'`,
       run: (command: readonly string[]) => {
diff --git a/src/lib/deploy/index.ts b/src/lib/deploy/index.ts
index 8133f86b2f..65733f1a65 100644
--- a/src/lib/deploy/index.ts
+++ b/src/lib/deploy/index.ts
@@ -28,7 +28,7 @@ function readCommandOutput(error: object | null, key: "stdout" | "stderr"): stri
 }
 
 export interface DeployCredentials {
-  NVIDIA_API_KEY?: string | null;
+  NVIDIA_INFERENCE_API_KEY?: string | null;
   OPENAI_API_KEY?: string | null;
   ANTHROPIC_API_KEY?: string | null;
   GEMINI_API_KEY?: string | null;
@@ -117,7 +117,7 @@ export function inferDeployProvider(
   if (explicit) return explicit;
 
   const providerByCredential: Array<[keyof DeployCredentials, string]> = [
-    ["NVIDIA_API_KEY", "build"],
+    ["NVIDIA_INFERENCE_API_KEY", "build"],
     ["OPENAI_API_KEY", "openai"],
     ["ANTHROPIC_API_KEY", "anthropic"],
     ["GEMINI_API_KEY", "gemini"],
@@ -308,7 +308,7 @@ export async function executeDeploy(opts: DeployExecutionOptions): Promise<void>
     exit,
   });
   const credentials: DeployCredentials = {
-    NVIDIA_API_KEY: getCredential("NVIDIA_API_KEY"),
+    NVIDIA_INFERENCE_API_KEY: getCredential("NVIDIA_INFERENCE_API_KEY"),
     OPENAI_API_KEY: getCredential("OPENAI_API_KEY"),
     ANTHROPIC_API_KEY: getCredential("ANTHROPIC_API_KEY"),
     GEMINI_API_KEY: getCredential("GEMINI_API_KEY"),
@@ -328,7 +328,7 @@ export async function executeDeploy(opts: DeployExecutionOptions): Promise<void>
       [
         "  Could not determine which inference provider to configure for remote onboarding.",
         "  Set `NEMOCLAW_PROVIDER` explicitly or provide exactly one matching provider credential.",
-        "  Supported provider credentials: NVIDIA_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, COMPATIBLE_API_KEY, COMPATIBLE_ANTHROPIC_API_KEY.",
+        "  Supported provider credentials: NVIDIA_INFERENCE_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, COMPATIBLE_API_KEY, COMPATIBLE_ANTHROPIC_API_KEY.",
       ],
       error,
       exit,
diff --git a/src/lib/diagnostics/debug.test.ts b/src/lib/diagnostics/debug.test.ts
index 054b4f6492..23716555e9 100644
--- a/src/lib/diagnostics/debug.test.ts
+++ b/src/lib/diagnostics/debug.test.ts
@@ -17,7 +17,7 @@ import {
 } from "../../../dist/lib/diagnostics/debug";
 
 describe("redact", () => {
-  it("redacts NVIDIA_API_KEY=value patterns", () => {
+  it("redacts NVIDIA_INFERENCE_API_KEY=value patterns", () => {
     const key = ["NVIDIA", "API", "KEY"].join("_");
     expect(redact(`${key}=some-value`)).toBe(`${key}=<REDACTED>`);
   });
diff --git a/src/lib/diagnostics/debug.ts b/src/lib/diagnostics/debug.ts
index a41e70d98b..862c07d3c1 100644
--- a/src/lib/diagnostics/debug.ts
+++ b/src/lib/diagnostics/debug.ts
@@ -440,11 +440,11 @@ function collectNetwork(collectDir: string): void {
     collect(collectDir, "ip-route", "ip", ["route"]);
     collectShell(collectDir, "resolv-conf", "cat /etc/resolv.conf");
   }
-  collect(collectDir, "nslookup", "nslookup", ["integrate.api.nvidia.com"]);
+  collect(collectDir, "nslookup", "nslookup", ["inference-api.nvidia.com"]);
   collectShell(
     collectDir,
     "curl-models",
-    'code=$(curl -s -o /dev/null -w "%{http_code}" https://integrate.api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi',
+    'code=$(curl -s -o /dev/null -w "%{http_code}" https://inference-api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi',
   );
   collectShell(collectDir, "lsof-net", "lsof -i -P -n 2>/dev/null | head -50");
   collect(collectDir, "lsof-18789", "lsof", ["-i", `:${DASHBOARD_PORT}`]);
diff --git a/src/lib/inference/health.test.ts b/src/lib/inference/health.test.ts
index 65c337aede..6e3c6f8569 100644
--- a/src/lib/inference/health.test.ts
+++ b/src/lib/inference/health.test.ts
@@ -206,7 +206,8 @@ describe("inference health", () => {
       let authConfigContent = "";
       const result = probeRemoteProviderHealth("nvidia-prod", {
         model: "moonshotai/kimi-k2.6",
-        getCredentialImpl: (envName) => (envName === "NVIDIA_API_KEY" ? "nvapi-test" : null),
+        getCredentialImpl: (envName) =>
+          envName === "NVIDIA_INFERENCE_API_KEY" ? "nvapi-test" : null,
         runCurlProbeImpl: (argv) => {
           capturedArgv = argv;
           const configIndex = argv.indexOf("--config");
@@ -268,7 +269,7 @@ describe("inference health", () => {
       expect(result?.ok).toBe(true);
       expect(result?.probed).toBe(false);
       expect(result?.endpoint).toBe(`${BUILD_ENDPOINT_URL}/chat/completions`);
-      expect(result?.detail).toContain("NVIDIA_API_KEY");
+      expect(result?.detail).toContain("NVIDIA_INFERENCE_API_KEY");
       expect(result?.detail).toContain("provider-level /models");
     });
 
diff --git a/src/lib/inference/health.ts b/src/lib/inference/health.ts
index 2cdb7b70ff..8ca7518cc1 100644
--- a/src/lib/inference/health.ts
+++ b/src/lib/inference/health.ts
@@ -50,7 +50,7 @@ export interface ProviderHealthProbeOptions {
 
 const COMPATIBLE_PROVIDERS = new Set(["compatible-endpoint", "compatible-anthropic-endpoint"]);
 const NVIDIA_MANAGED_PROVIDERS = new Set(["nvidia-prod", "nvidia-nim"]);
-const NVIDIA_HEALTH_CREDENTIAL_ENV = "NVIDIA_API_KEY";
+const NVIDIA_HEALTH_CREDENTIAL_ENV = "NVIDIA_INFERENCE_API_KEY";
 const KIMI_K26_MODEL = "moonshotai/kimi-k2.6";
 const KIMI_STATUS_CONNECT_TIMEOUT_SECONDS = "3";
 const KIMI_STATUS_MAX_TIME_SECONDS = "5";
diff --git a/src/lib/inference/model-prompts.test.ts b/src/lib/inference/model-prompts.test.ts
index 05e6a5e725..9f16f524a6 100644
--- a/src/lib/inference/model-prompts.test.ts
+++ b/src/lib/inference/model-prompts.test.ts
@@ -54,13 +54,13 @@ describe("model prompt helpers", () => {
       getCredentialFn: () => "nvapi-test",
       validateNvidiaEndpointModelFn: (model) => ({
         ok: model === "nemotron-custom",
-        message: `Model '${model}' is not available from NVIDIA Endpoints. Checked https://integrate.api.nvidia.com/v1/models.`,
+        message: `Model '${model}' is not available from NVIDIA Endpoints. Checked https://inference-api.nvidia.com/v1/models.`,
       }),
     });
 
     expect(result).toBe("nemotron-custom");
     expect(errorLine).toHaveBeenCalledWith(
-      "  Model 'bad-model' is not available from NVIDIA Endpoints. Checked https://integrate.api.nvidia.com/v1/models.",
+      "  Model 'bad-model' is not available from NVIDIA Endpoints. Checked https://inference-api.nvidia.com/v1/models.",
     );
   });
 
@@ -76,7 +76,7 @@ describe("model prompt helpers", () => {
 
     expect(result).toBe(BACK_TO_SELECTION);
     expect(errorLine).toHaveBeenCalledWith(
-      "  NVIDIA_API_KEY is required before validating a custom NVIDIA Endpoints model.",
+      "  NVIDIA_INFERENCE_API_KEY is required before validating a custom NVIDIA Endpoints model.",
     );
   });
 
diff --git a/src/lib/inference/model-prompts.ts b/src/lib/inference/model-prompts.ts
index 0784e62630..ad173656c6 100644
--- a/src/lib/inference/model-prompts.ts
+++ b/src/lib/inference/model-prompts.ts
@@ -162,10 +162,10 @@ export async function promptCloudModel(
     return deps.cloudModelOptions[index].id;
   }
 
-  const nvidiaApiKey = deps.getCredentialFn("NVIDIA_API_KEY");
+  const nvidiaApiKey = deps.getCredentialFn("NVIDIA_INFERENCE_API_KEY");
   if (!nvidiaApiKey) {
     deps.errorLine(
-      "  NVIDIA_API_KEY is required before validating a custom NVIDIA Endpoints model.",
+      "  NVIDIA_INFERENCE_API_KEY is required before validating a custom NVIDIA Endpoints model.",
     );
     return deps.backToSelection;
   }
diff --git a/src/lib/inference/nim.test.ts b/src/lib/inference/nim.test.ts
index 2ce0f9bb53..34591592a0 100644
--- a/src/lib/inference/nim.test.ts
+++ b/src/lib/inference/nim.test.ts
@@ -1705,9 +1705,9 @@ describe("nim", () => {
     });
 
     it("falls back to process.env.NGC_API_KEY when no opts key is supplied", () => {
-      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_API_KEY };
+      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_INFERENCE_API_KEY };
       process.env.NGC_API_KEY = "nvapi-env-ngc";
-      delete process.env.NVIDIA_API_KEY;
+      delete process.env.NVIDIA_INFERENCE_API_KEY;
       const run = vi.fn();
       const { nimModule, restore } = loadNimWithMockedRunner(
         vi.fn(() => ""),
@@ -1728,14 +1728,14 @@ describe("nim", () => {
         restore();
         if (prev.ngc === undefined) delete process.env.NGC_API_KEY;
         else process.env.NGC_API_KEY = prev.ngc;
-        if (prev.nv !== undefined) process.env.NVIDIA_API_KEY = prev.nv;
+        if (prev.nv !== undefined) process.env.NVIDIA_INFERENCE_API_KEY = prev.nv;
       }
     });
 
-    it("falls back to process.env.NVIDIA_API_KEY when NGC_API_KEY is unset", () => {
-      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_API_KEY };
+    it("falls back to process.env.NVIDIA_INFERENCE_API_KEY when NGC_API_KEY is unset", () => {
+      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_INFERENCE_API_KEY };
       delete process.env.NGC_API_KEY;
-      process.env.NVIDIA_API_KEY = "nvapi-env-nvidia";
+      process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-env-nvidia";
       const run = vi.fn();
       const { nimModule, restore } = loadNimWithMockedRunner(
         vi.fn(() => ""),
@@ -1752,15 +1752,15 @@ describe("nim", () => {
       } finally {
         restore();
         if (prev.ngc !== undefined) process.env.NGC_API_KEY = prev.ngc;
-        if (prev.nv === undefined) delete process.env.NVIDIA_API_KEY;
-        else process.env.NVIDIA_API_KEY = prev.nv;
+        if (prev.nv === undefined) delete process.env.NVIDIA_INFERENCE_API_KEY;
+        else process.env.NVIDIA_INFERENCE_API_KEY = prev.nv;
       }
     });
 
     it("omits env flags when no key is available", () => {
-      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_API_KEY };
+      const prev = { ngc: process.env.NGC_API_KEY, nv: process.env.NVIDIA_INFERENCE_API_KEY };
       delete process.env.NGC_API_KEY;
-      delete process.env.NVIDIA_API_KEY;
+      delete process.env.NVIDIA_INFERENCE_API_KEY;
       const run = vi.fn();
       const { nimModule, restore } = loadNimWithMockedRunner(
         vi.fn(() => ""),
@@ -1779,7 +1779,7 @@ describe("nim", () => {
       } finally {
         restore();
         if (prev.ngc !== undefined) process.env.NGC_API_KEY = prev.ngc;
-        if (prev.nv !== undefined) process.env.NVIDIA_API_KEY = prev.nv;
+        if (prev.nv !== undefined) process.env.NVIDIA_INFERENCE_API_KEY = prev.nv;
       }
     });
   });
diff --git a/src/lib/inference/nim.ts b/src/lib/inference/nim.ts
index 5c35fd9b4c..5503eeed51 100644
--- a/src/lib/inference/nim.ts
+++ b/src/lib/inference/nim.ts
@@ -821,12 +821,13 @@ export function startNimContainerByName(
     process.exit(1);
   }
 
-  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_API_KEY
+  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_INFERENCE_API_KEY
   // (covers users who only set the NVIDIA key for cloud inference but reuse it
   // against NGC). Without this, NIM's in-container model-manifest download
   // returns "Authentication Error" and the container exits 0 a few seconds in.
   // Regression of #210 — see #3333.
-  const ngcApiKey = opts.ngcApiKey ?? process.env.NGC_API_KEY ?? process.env.NVIDIA_API_KEY ?? "";
+  const ngcApiKey =
+    opts.ngcApiKey ?? process.env.NGC_API_KEY ?? process.env.NVIDIA_INFERENCE_API_KEY ?? "";
   // Use `-e KEY` (no value) so the secret never appears in argv; pass the
   // value through the spawn env instead. Docker reads each named var from
   // its own process env and forwards it to the container.
diff --git a/src/lib/inference/onboard-probes.test.ts b/src/lib/inference/onboard-probes.test.ts
index 8fbfacfd63..808db27181 100644
--- a/src/lib/inference/onboard-probes.test.ts
+++ b/src/lib/inference/onboard-probes.test.ts
@@ -321,7 +321,7 @@ describe("OpenAI-compatible inference probes", () => {
       const args = getChatCompletionsProbeCurlArgs({
         authHeader: ["-H", "Authorization: Bearer nvapi-test"],
         model,
-        url: "https://integrate.api.nvidia.com/v1/chat/completions",
+        url: "https://inference-api.nvidia.com/v1/chat/completions",
         isWsl: false,
       });
       expect(args[args.indexOf("--connect-timeout") + 1]).toBe("10");
@@ -331,7 +331,7 @@ describe("OpenAI-compatible inference probes", () => {
     const wslArgs = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "qwen/qwen3.5-397b-a17b",
-      url: "https://integrate.api.nvidia.com/v1/chat/completions",
+      url: "https://inference-api.nvidia.com/v1/chat/completions",
       isWsl: true,
     });
     expect(wslArgs[wslArgs.indexOf("--connect-timeout") + 1]).toBe("30");
@@ -362,7 +362,7 @@ describe("OpenAI-compatible inference probes", () => {
     const args = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "moonshotai/kimi-k2.6",
-      url: "https://integrate.api.nvidia.com/v1/chat/completions",
+      url: "https://inference-api.nvidia.com/v1/chat/completions",
       isWsl: false,
     });
 
@@ -388,7 +388,7 @@ describe("OpenAI-compatible inference probes", () => {
     const args = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "deepseek-ai/deepseek-v4-pro",
-      url: "https://integrate.api.nvidia.com/v1/chat/completions",
+      url: "https://inference-api.nvidia.com/v1/chat/completions",
       isWsl: false,
     });
 
@@ -504,7 +504,7 @@ exit 0
       console.log = (...args) => lines.push(args.join(" "));
       try {
         const result = probeOpenAiLikeEndpoint(
-          "https://integrate.api.nvidia.com/v1",
+          "https://inference-api.nvidia.com/v1",
           "nvidia/nemotron-3-super-120b-a12b",
           "nvapi-test",
           { skipResponsesProbe: true },
@@ -858,7 +858,7 @@ exit 0
       console.log = (...args) => lines.push(args.join(" "));
       try {
         const result = probeOpenAiLikeEndpoint(
-          "https://integrate.api.nvidia.com/v1",
+          "https://inference-api.nvidia.com/v1",
           "nvidia/nemotron-3-super-120b-a12b",
           "nvapi-test",
           { skipResponsesProbe: true },
@@ -911,7 +911,7 @@ exit 28
     console.log = (...args) => lines.push(args.join(" "));
     try {
       const result = probeOpenAiLikeEndpoint(
-        "https://integrate.api.nvidia.com/v1",
+        "https://inference-api.nvidia.com/v1",
         "deepseek-ai/deepseek-v4-pro",
         "nvapi-test",
         { skipResponsesProbe: true },
diff --git a/src/lib/inference/provider-models.ts b/src/lib/inference/provider-models.ts
index 2d219599fa..f78e543b8d 100644
--- a/src/lib/inference/provider-models.ts
+++ b/src/lib/inference/provider-models.ts
@@ -8,7 +8,7 @@ import type { ModelCatalogFetchResult, ModelValidationResult } from "../onboard/
 // credentials.ts still uses CommonJS-style exports.
 const { normalizeCredentialValue } = require("../credentials/store");
 
-export const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1";
+export const BUILD_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
 
 export interface ProviderModelOptions {
   runCurlProbeImpl?: (argv: string[]) => CurlProbeResult;
diff --git a/src/lib/messaging-channel-config.test.ts b/src/lib/messaging-channel-config.test.ts
index 50a98af1fc..b02268d270 100644
--- a/src/lib/messaging-channel-config.test.ts
+++ b/src/lib/messaging-channel-config.test.ts
@@ -33,7 +33,7 @@ describe("messaging channel config", () => {
         DISCORD_REQUIRE_MENTION: "0",
         SLACK_ALLOWED_USERS: "  U01ABC2DEF3, U04GHI5JKL6  ",
         SLACK_ALLOWED_CHANNELS: "  C012AB3CD, C987ZY6XW  ",
-        NVIDIA_API_KEY: "not-channel-config",
+        NVIDIA_INFERENCE_API_KEY: "not-channel-config",
       }),
     ).toEqual({
       TELEGRAM_ALLOWED_IDS: "123,456",
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index 4b82dfb7d6..9ec40237d9 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -3646,17 +3646,17 @@ async function setupNim(
         hydrateCredentialEnv(credentialEnv);
 
         if (selected.key === "build") {
-          // Allow NEMOCLAW_PROVIDER_KEY as a fallback for NVIDIA_API_KEY.
+          // Allow NEMOCLAW_PROVIDER_KEY as a fallback for NVIDIA_INFERENCE_API_KEY.
           // Check raw process.env first — NEMOCLAW_PROVIDER_KEY is a user-facing
           // override that should take precedence before resolving from credentials.json.
           const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim();
-          // check-direct-credential-env-ignore -- intentional: checking if env is already set before applying NEMOCLAW_PROVIDER_KEY override
-          const existingNvidiaKey = normalizeCredentialValue(process.env.NVIDIA_API_KEY ?? "");
+          const existingNvidiaKey =
+            getCredential("NVIDIA_INFERENCE_API_KEY") || getCredential("NVIDIA_API_KEY") || "";
           if (_nvProviderKey && !existingNvidiaKey) {
-            process.env.NVIDIA_API_KEY = _nvProviderKey;
+            process.env.NVIDIA_INFERENCE_API_KEY = _nvProviderKey;
           }
           if (isNonInteractive()) {
-            const resolvedNvidiaKey = resolveProviderCredential("NVIDIA_API_KEY");
+            const resolvedNvidiaKey = resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");
             if (resolvedNvidiaKey) {
               const keyError = validateNvidiaApiKeyValue(resolvedNvidiaKey);
               if (keyError) {
@@ -4015,7 +4015,8 @@ async function setupNim(
             // answer falls through to startNimContainerByName's warning so
             // we don't double-fail in non-interactive callers.
             ngcApiKey =
-              hydrateCredentialEnv("NGC_API_KEY") || hydrateCredentialEnv("NVIDIA_API_KEY");
+              hydrateCredentialEnv("NGC_API_KEY") ||
+              hydrateCredentialEnv("NVIDIA_INFERENCE_API_KEY");
             if (!ngcApiKey && !isNonInteractive()) {
               console.log("");
               console.log("  NGC API Key required to download NIM model weights at runtime.");
diff --git a/src/lib/onboard/bridge-dns-preflight.ts b/src/lib/onboard/bridge-dns-preflight.ts
index e37cce3f77..9d346c2465 100644
--- a/src/lib/onboard/bridge-dns-preflight.ts
+++ b/src/lib/onboard/bridge-dns-preflight.ts
@@ -278,7 +278,7 @@ function hostDnsPreflightSkipped(env: NodeJS.ProcessEnv = process.env): boolean
 }
 
 // `NEMOCLAW_PROVIDER` keys that resolve to NVIDIA-hosted endpoints
-// (integrate.api.nvidia.com). Mirrors the aliases in
+// (inference-api.nvidia.com). Mirrors the aliases in
 // `onboard/providers.ts::getNonInteractiveProvider`. Local/custom and
 // other hosted providers (ollama, vllm, openai, anthropic, nim-local, …)
 // do not need this host, so the NVIDIA host DNS probe must not gate them.
@@ -286,7 +286,7 @@ const NVIDIA_ENDPOINT_PROVIDER_KEYS = new Set(["build", "cloud", "routed"]);
 
 /**
  * Whether onboarding's effective inference provider is NVIDIA Endpoints,
- * so the `integrate.api.nvidia.com` host DNS probe is relevant.
+ * so the `inference-api.nvidia.com` host DNS probe is relevant.
  *
  * `NEMOCLAW_PROVIDER` is honored only in non-interactive mode (mirroring
  * `getRequestedProviderHint`), where an unset value defaults to NVIDIA
diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts
index b52ade24a9..29dd4c81b7 100644
--- a/src/lib/onboard/docker-gpu-patch.test.ts
+++ b/src/lib/onboard/docker-gpu-patch.test.ts
@@ -239,7 +239,7 @@ describe("docker-gpu-patch", () => {
 
   it("formats sanitized network diagnostics without dumping provider secrets", () => {
     const inspect = inspectFixture();
-    inspect.Config?.Env?.push("NVIDIA_API_KEY=secret");
+    inspect.Config?.Env?.push("NVIDIA_INFERENCE_API_KEY=secret");
 
     const summary = formatDockerInspectNetworkSummary("old-container-id", inspect);
 
@@ -248,7 +248,7 @@ describe("docker-gpu-patch", () => {
     expect(summary).toContain("host.openshell.internal:172.17.0.1");
     expect(summary).toContain("env.OPENSHELL_ENDPOINT=http://host.openshell.internal:8080/");
     expect(summary).toContain("openshell-docker: ip=172.18.0.2 gateway=172.18.0.1");
-    expect(summary).not.toContain("NVIDIA_API_KEY");
+    expect(summary).not.toContain("NVIDIA_INFERENCE_API_KEY");
     expect(summary).not.toContain("secret");
   });
 
@@ -765,12 +765,12 @@ describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => {
     );
   });
 
-  it("regression manifest: host.openshell.internal + google.com + gateway.discord.gg + integrate.api.nvidia.com (#3579 manager spec)", () => {
+  it("regression manifest: host.openshell.internal + google.com + gateway.discord.gg + inference-api.nvidia.com (#3579 manager spec)", () => {
     // The four hostnames called out in #3579's manager-provided spec:
     //   host.openshell.internal      → resolved via --add-host (mount namespace)
     //   google.com                   → public DNS via embedded Docker resolver
     //   gateway.discord.gg           → public DNS via embedded Docker resolver
-    //   integrate.api.nvidia.com     → public DNS via embedded Docker resolver
+    //   inference-api.nvidia.com     → public DNS via embedded Docker resolver
     //
     // Unit-testable invariants that together cover all four:
     //   1. --add-host preserves the host.openshell.internal mapping
@@ -788,7 +788,7 @@ describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => {
     expect(args).toEqual(
       expect.arrayContaining(["--add-host", "host.openshell.internal:172.17.0.1"]),
     );
-    // google.com / gateway.discord.gg / integrate.api.nvidia.com — covered by
+    // google.com / gateway.discord.gg / inference-api.nvidia.com — covered by
     // (a) not pinning --network=host and (b) injecting --dns when the host
     // has a loopback-only resolver.
     expect(args).not.toEqual(expect.arrayContaining(["--network", "host"]));
diff --git a/src/lib/onboard/host-dns-preflight.test.ts b/src/lib/onboard/host-dns-preflight.test.ts
index c522875e0c..fde6432bb5 100644
--- a/src/lib/onboard/host-dns-preflight.test.ts
+++ b/src/lib/onboard/host-dns-preflight.test.ts
@@ -4,7 +4,7 @@
 // Host DNS preflight (#4784): the CLI process must be able to resolve the
 // provider endpoint over port 53. A host OUTPUT chain that drops tcp/udp:53
 // lets the container DNS probe pass while later provider validation dies with
-// `curl: (6) Could not resolve host: integrate.api.nvidia.com`. These tests
+// `curl: (6) Could not resolve host: inference-api.nvidia.com`. These tests
 // cover the host-side probe (preflight.ts) plus the gate and remediation that
 // surface it before provider validation (bridge-dns-preflight.ts).
 
@@ -34,7 +34,7 @@ describe("probeHostDns (#4784)", () => {
       runProbeImpl: exec({ stdout: "HOSTDNS_OK 1.2.3.4,5.6.7.8", exitCode: 0 }),
     });
     expect(result.ok).toBe(true);
-    expect(result.hostname).toBe("integrate.api.nvidia.com");
+    expect(result.hostname).toBe("inference-api.nvidia.com");
     expect(result.reason).toBeUndefined();
     expect(isFatalHostDnsProbeFailure(result)).toBe(false);
   });
@@ -144,11 +144,11 @@ describe("printHostDnsRemediation (#4784)", () => {
     vi.spyOn(console, "error").mockImplementation((arg?: unknown) => {
       messages.push(String(arg ?? ""));
     });
-    printHostDnsRemediation({ platform: "linux", isWsl: false }, "integrate.api.nvidia.com");
+    printHostDnsRemediation({ platform: "linux", isWsl: false }, "inference-api.nvidia.com");
     const blob = messages.join("\n");
-    expect(blob).toContain("could not resolve integrate.api.nvidia.com");
+    expect(blob).toContain("could not resolve inference-api.nvidia.com");
     expect(blob).toContain("Container DNS may still look healthy");
-    expect(blob).toContain("curl: (6) Could not resolve host: integrate.api.nvidia.com");
+    expect(blob).toContain("curl: (6) Could not resolve host: inference-api.nvidia.com");
     expect(blob).toContain("--dport 53");
     expect(blob).toContain("NEMOCLAW_SKIP_HOST_DNS_PREFLIGHT=1");
     expect(blob).toContain("#4784");
@@ -187,7 +187,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
       env: {},
       nonInteractive: true,
       exit,
-      probeHostDnsImpl: () => ({ ok: true, hostname: "integrate.api.nvidia.com" }),
+      probeHostDnsImpl: () => ({ ok: true, hostname: "inference-api.nvidia.com" }),
     });
     expect(logs.join("\n")).toContain("✓ Host DNS resolution works");
     expect(exit).not.toHaveBeenCalled();
@@ -205,15 +205,15 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "integrate.api.nvidia.com",
+        hostname: "inference-api.nvidia.com",
         reason: "servers_unreachable",
-        details: "dns.resolve integrate.api.nvidia.com: ECONNREFUSED",
+        details: "dns.resolve inference-api.nvidia.com: ECONNREFUSED",
       }),
     });
     expect(exit).toHaveBeenCalledWith(1);
     const blob = errors.join("\n");
     expect(blob).toContain("✗ Host DNS resolution failed");
-    expect(blob).toContain("could not resolve integrate.api.nvidia.com");
+    expect(blob).toContain("could not resolve inference-api.nvidia.com");
     expect(blob).toContain("--dport 53");
   });
 
@@ -229,9 +229,9 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "integrate.api.nvidia.com",
+        hostname: "inference-api.nvidia.com",
         reason: "resolution_failed",
-        details: "dns.resolve integrate.api.nvidia.com: ENOTFOUND",
+        details: "dns.resolve inference-api.nvidia.com: ENOTFOUND",
       }),
     });
     expect(exit).toHaveBeenCalledWith(1);
@@ -248,7 +248,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "integrate.api.nvidia.com",
+        hostname: "inference-api.nvidia.com",
         reason: "error",
         details: "spawn node ENOENT",
       }),
@@ -261,7 +261,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const logs: string[] = [];
     vi.spyOn(console, "log").mockImplementation((arg?: unknown) => logs.push(String(arg ?? "")));
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_SKIP_HOST_DNS_PREFLIGHT: "1" },
       exit,
@@ -274,7 +274,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips silently (no probe, no exit) when a non-NVIDIA provider is selected (codex P2)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     // A user who picked a local/non-NVIDIA provider must not be blocked by
     // NVIDIA-domain DNS even if their host cannot resolve it — including in
     // non-interactive mode where the choice is explicit.
@@ -292,7 +292,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips an unset provider in interactive mode (provider not yet chosen — codex P2)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     // Fresh interactive onboarding hits preflight before the provider menu;
     // it may end up on Ollama/vLLM, so an NVIDIA-DNS block must not abort here.
     assertHostDnsHealthy(host, {
@@ -308,7 +308,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
   it("runs for an unset provider only in non-interactive mode (NVIDIA Endpoints default)", () => {
     const exit = vi.fn();
     vi.spyOn(console, "log").mockImplementation(() => {});
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: {},
       nonInteractive: true,
@@ -323,7 +323,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const exit = vi.fn();
     vi.spyOn(console, "log").mockImplementation(() => {});
     for (const provider of ["build", "cloud", "routed"]) {
-      const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+      const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
       assertHostDnsHealthy(host, {
         env: { NEMOCLAW_PROVIDER: provider },
         nonInteractive: true,
@@ -337,7 +337,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("ignores NEMOCLAW_PROVIDER in interactive mode (onboard ignores it there too)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     // Interactive onboarding ignores NEMOCLAW_PROVIDER and shows the menu, so
     // we must not assume NVIDIA from it before the user has chosen.
     assertHostDnsHealthy(host, {
@@ -352,9 +352,9 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips an explicit local NIM provider (nim-local) in non-interactive mode", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     // `nim-local` runs NIM locally and validates against localhost, not
-    // integrate.api.nvidia.com, so the NVIDIA host DNS probe must not gate it.
+    // inference-api.nvidia.com, so the NVIDIA host DNS probe must not gate it.
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_PROVIDER: "nim-local" },
       nonInteractive: true,
@@ -369,7 +369,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const logs: string[] = [];
     vi.spyOn(console, "log").mockImplementation((arg?: unknown) => logs.push(String(arg ?? "")));
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_PROVIDER: "build", HTTPS_PROXY: "http://proxy.corp:3128" },
       nonInteractive: true,
@@ -384,7 +384,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
   it("still runs when NO_PROXY exempts the provider host from the proxy", () => {
     vi.spyOn(console, "log").mockImplementation(() => {});
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: {
         NEMOCLAW_PROVIDER: "build",
diff --git a/src/lib/onboard/initial-policy.test.ts b/src/lib/onboard/initial-policy.test.ts
index 4343b3e8fe..72d8971a25 100644
--- a/src/lib/onboard/initial-policy.test.ts
+++ b/src/lib/onboard/initial-policy.test.ts
@@ -116,7 +116,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: integrate.api.nvidia.com
+      - host: inference-api.nvidia.com
         port: 443
 `);
     const gpuDoc = YAML.parse(gpuPolicy);
diff --git a/src/lib/onboard/machine/core-flow-phases.test.ts b/src/lib/onboard/machine/core-flow-phases.test.ts
index 712c92ad84..c7b262c6ec 100644
--- a/src/lib/onboard/machine/core-flow-phases.test.ts
+++ b/src/lib/onboard/machine/core-flow-phases.test.ts
@@ -87,7 +87,7 @@ function createPhases(
         model: "nvidia/test",
         provider: "nim",
         endpointUrl: "https://example.test/v1",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         hermesAuthMethod: null,
         hermesToolGateways: ["local"],
         preferredInferenceApi: "chat",
@@ -196,7 +196,7 @@ describe("core onboard flow phases", () => {
       model: "nvidia/test",
       provider: "nim",
       endpointUrl: "https://example.test/v1",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       hermesToolGateways: ["local"],
       preferredInferenceApi: "chat",
       nimContainer: "nim-test",
diff --git a/src/lib/onboard/machine/flow-phases/provider-sandbox.test.ts b/src/lib/onboard/machine/flow-phases/provider-sandbox.test.ts
index e01dbdf54f..1431a4bebf 100644
--- a/src/lib/onboard/machine/flow-phases/provider-sandbox.test.ts
+++ b/src/lib/onboard/machine/flow-phases/provider-sandbox.test.ts
@@ -43,7 +43,7 @@ describe("provider/sandbox flow phases", () => {
         provider: "nvidia-prod",
         model: "model",
         endpointUrl: "https://example.com/v1",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         preferredInferenceApi: "openai-responses",
       },
       result: [advanceTo("inference"), advanceTo("sandbox")],
diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts
index 74cc073884..93f3d0e5d4 100644
--- a/src/lib/onboard/machine/handlers/finalization.test.ts
+++ b/src/lib/onboard/machine/handlers/finalization.test.ts
@@ -142,8 +142,8 @@ describe("handleFinalizationState", () => {
 
     await handleFinalizationState({
       ...baseOptions(deps),
-      stagedLegacyKeys: ["NVIDIA_API_KEY", "SLACK_BOT_TOKEN"],
-      migratedLegacyKeys: new Set(["NVIDIA_API_KEY", "SLACK_BOT_TOKEN"]),
+      stagedLegacyKeys: ["NVIDIA_INFERENCE_API_KEY", "SLACK_BOT_TOKEN"],
+      migratedLegacyKeys: new Set(["NVIDIA_INFERENCE_API_KEY", "SLACK_BOT_TOKEN"]),
     });
 
     expect(calls.removeLegacy).toHaveBeenCalledOnce();
@@ -155,8 +155,8 @@ describe("handleFinalizationState", () => {
 
     const result = await handleFinalizationState({
       ...baseOptions(deps),
-      stagedLegacyKeys: ["NVIDIA_API_KEY", "SLACK_BOT_TOKEN"],
-      migratedLegacyKeys: new Set(["NVIDIA_API_KEY"]),
+      stagedLegacyKeys: ["NVIDIA_INFERENCE_API_KEY", "SLACK_BOT_TOKEN"],
+      migratedLegacyKeys: new Set(["NVIDIA_INFERENCE_API_KEY"]),
     });
 
     expect(calls.removeLegacy).not.toHaveBeenCalled();
diff --git a/src/lib/onboard/machine/handlers/policies.test.ts b/src/lib/onboard/machine/handlers/policies.test.ts
index a5882446a1..3b1889119d 100644
--- a/src/lib/onboard/machine/handlers/policies.test.ts
+++ b/src/lib/onboard/machine/handlers/policies.test.ts
@@ -118,7 +118,7 @@ function baseOptions(
     provider: "provider",
     model: "model",
     endpointUrl: "https://example.com/v1",
-    credentialEnv: "NVIDIA_API_KEY",
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     selectedMessagingChannels: [],
     webSearchConfig: null,
     webSearchSupported: true,
@@ -139,7 +139,7 @@ describe("handlePoliciesState", () => {
       provider: "provider",
       model: "model",
       endpointUrl: "https://example.com/v1",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       messagingChannels: ["telegram"],
       agent: null,
     });
diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts
index d6d2401464..b0b618c527 100644
--- a/src/lib/onboard/machine/handlers/provider-inference.test.ts
+++ b/src/lib/onboard/machine/handlers/provider-inference.test.ts
@@ -17,8 +17,8 @@ type Host = { cpus?: number };
 const baseSelection: ProviderSelectionResult = {
   model: "nvidia/test",
   provider: "nvidia-prod",
-  endpointUrl: "https://integrate.api.nvidia.com/v1",
-  credentialEnv: "NVIDIA_API_KEY",
+  endpointUrl: "https://inference-api.nvidia.com/v1",
+  credentialEnv: "NVIDIA_INFERENCE_API_KEY",
   hermesAuthMethod: null,
   hermesToolGateways: [],
   preferredInferenceApi: "openai-responses",
@@ -158,13 +158,13 @@ describe("handleProviderInferenceState", () => {
       "my-assistant",
       "nvidia/test",
       "nvidia-prod",
-      "https://integrate.api.nvidia.com/v1",
-      "NVIDIA_API_KEY",
+      "https://inference-api.nvidia.com/v1",
+      "NVIDIA_INFERENCE_API_KEY",
       null,
       [],
       { allowToolsIncompatible: false },
     );
-    expect(calls.deleteEnv).toHaveBeenCalledWith("NVIDIA_API_KEY");
+    expect(calls.deleteEnv).toHaveBeenCalledWith("NVIDIA_INFERENCE_API_KEY");
     expect(result).toMatchObject({
       sandboxName: "my-assistant",
       model: "nvidia/test",
@@ -374,7 +374,7 @@ describe("handleProviderInferenceState", () => {
       provider: "nvidia-router",
       model: "router/model",
       endpointUrl: "http://localhost:4000/v1",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     });
     session.steps.provider_selection.status = "complete";
     const { deps, calls } = createDeps({ isInferenceRouteReady: vi.fn(() => true) });
@@ -389,7 +389,7 @@ describe("handleProviderInferenceState", () => {
     expect(calls.reupsertRoutedProvider).toHaveBeenCalledWith(
       "nvidia-router",
       "http://localhost:4000/v1",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
     );
     expect(calls.setupInference).not.toHaveBeenCalled();
     expect(result.endpointUrl).toBe("http://host.openshell.internal:4000/v1");
diff --git a/src/lib/onboard/machine/runtime.test.ts b/src/lib/onboard/machine/runtime.test.ts
index cbff3a6f2a..fc9d4a81ad 100644
--- a/src/lib/onboard/machine/runtime.test.ts
+++ b/src/lib/onboard/machine/runtime.test.ts
@@ -193,14 +193,14 @@ describe("OnboardRuntime", () => {
     await runtime.updateContext({
       provider: "nvidia-prod",
       endpointUrl: "https://alice:secret@example.com/v1?token=super-secret&keep=yes#token=frag",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       apiKey: "super-secret",
     } as Parameters<typeof runtime.updateContext>[0] & { apiKey: string });
 
     expect(getSession()).toMatchObject({
       provider: "nvidia-prod",
       endpointUrl: "https://example.com/v1?token=%3CREDACTED%3E&keep=yes",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     });
     expect("apiKey" in getSession()).toBe(false);
     expect(events).toHaveLength(1);
@@ -295,11 +295,11 @@ describe("OnboardRuntime", () => {
   it("fails non-terminal sessions with redacted failure events", async () => {
     const { runtime, events, getSession } = createHarness(sessionInState("gateway"));
 
-    await runtime.fail("NVIDIA_API_KEY=super-secret", { step: "gateway" });
+    await runtime.fail("NVIDIA_INFERENCE_API_KEY=super-secret", { step: "gateway" });
 
     expect(getSession()).toMatchObject({
       status: "failed",
-      failure: { step: "gateway", message: "NVIDIA_API_KEY=<REDACTED>" },
+      failure: { step: "gateway", message: "NVIDIA_INFERENCE_API_KEY=<REDACTED>" },
       machine: { state: "failed", revision: 8 },
     });
     expect(events.map((event) => event.type)).toEqual(["state.failed", "onboard.failed"]);
diff --git a/src/lib/onboard/missing-credential-hints.ts b/src/lib/onboard/missing-credential-hints.ts
index bd154491ad..dbe9ca1c88 100644
--- a/src/lib/onboard/missing-credential-hints.ts
+++ b/src/lib/onboard/missing-credential-hints.ts
@@ -3,10 +3,10 @@
 
 export function logMissingNvidiaApiKeyHelp(helpUrl: string | null | undefined): void {
   console.error(
-    "  NVIDIA_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.",
+    "  NVIDIA_INFERENCE_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.",
   );
   console.error("  Set with:");
-  console.error("  export NVIDIA_API_KEY=nvapi-...");
+  console.error("  export NVIDIA_INFERENCE_API_KEY=nvapi-...");
   if (helpUrl) {
     console.error(`  Get a key from ${helpUrl}`);
   }
diff --git a/src/lib/onboard/model-router.ts b/src/lib/onboard/model-router.ts
index 05ec9bb250..cd95fdac01 100644
--- a/src/lib/onboard/model-router.ts
+++ b/src/lib/onboard/model-router.ts
@@ -48,7 +48,7 @@ const MODEL_ROUTER_FINGERPRINT_IGNORED_NAMES = new Set([
   "node_modules",
   "venv",
 ]);
-export const DEFAULT_MODEL_ROUTER_CREDENTIAL_ENV = "NVIDIA_API_KEY";
+export const DEFAULT_MODEL_ROUTER_CREDENTIAL_ENV = "NVIDIA_INFERENCE_API_KEY";
 
 export type BlueprintRouterConfig = {
   enabled?: boolean;
diff --git a/src/lib/onboard/preflight.ts b/src/lib/onboard/preflight.ts
index e2bfaa35de..8e260dc165 100644
--- a/src/lib/onboard/preflight.ts
+++ b/src/lib/onboard/preflight.ts
@@ -1926,13 +1926,13 @@ export function isFatalContainerDnsProbeFailure(result: DnsProbeResult): boolean
 // process itself cannot resolve the provider endpoint. That gap let
 // onboarding print "Container DNS resolution works" and then fail much
 // later at NVIDIA Endpoints validation with the cryptic
-// `curl: (6) Could not resolve host: integrate.api.nvidia.com`. This
+// `curl: (6) Could not resolve host: inference-api.nvidia.com`. This
 // probe resolves the provider hostname from the host (CLI) process so
 // the blocked-DNS condition surfaces up front, distinct from the
 // container-DNS path.
 
 /** The NVIDIA Endpoints provider host onboarding validates by default. */
-export const DEFAULT_HOST_DNS_PROBE_HOSTNAME = "integrate.api.nvidia.com";
+export const DEFAULT_HOST_DNS_PROBE_HOSTNAME = "inference-api.nvidia.com";
 
 /**
  * Host DNS probe budget (ms). Shorter than the container probe: there is
diff --git a/src/lib/onboard/providers.test.ts b/src/lib/onboard/providers.test.ts
index e4a390741b..edca6afaa5 100644
--- a/src/lib/onboard/providers.test.ts
+++ b/src/lib/onboard/providers.test.ts
@@ -60,8 +60,20 @@ describe("onboard provider helpers", () => {
   });
 
   it("builds update arguments", () => {
-    const args = buildProviderArgs("update", "inference", "openai", "NVIDIA_API_KEY", null);
-    expect(args).toEqual(["provider", "update", "inference", "--credential", "NVIDIA_API_KEY"]);
+    const args = buildProviderArgs(
+      "update",
+      "inference",
+      "openai",
+      "NVIDIA_INFERENCE_API_KEY",
+      null,
+    );
+    expect(args).toEqual([
+      "provider",
+      "update",
+      "inference",
+      "--credential",
+      "NVIDIA_INFERENCE_API_KEY",
+    ]);
   });
 
   it("appends OPENAI_BASE_URL config for openai providers with a base URL", () => {
@@ -69,7 +81,7 @@ describe("onboard provider helpers", () => {
       "create",
       "inference",
       "openai",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       "https://api.example.com/v1",
     );
     expect(args).toContain("--config");
@@ -151,8 +163,8 @@ describe("onboard provider helpers", () => {
     const result = upsertProvider(
       "inference",
       "openai",
-      "NVIDIA_API_KEY",
-      "https://integrate.api.nvidia.com/v1",
+      "NVIDIA_INFERENCE_API_KEY",
+      "https://inference-api.nvidia.com/v1",
       {},
       (command) => {
         commands.push(command.join(" "));
@@ -165,7 +177,7 @@ describe("onboard provider helpers", () => {
     expect(commands[0]).toMatch(/provider get/);
     expect(commands[1]).toMatch(/provider update/);
     expect(commands[1]).toMatch(
-      /--config OPENAI_BASE_URL=https:\/\/integrate\.api\.nvidia\.com\/v1/,
+      /--config OPENAI_BASE_URL=https:\/\/inference-api\.nvidia\.com\/v1/,
     );
   });
 
@@ -174,8 +186,8 @@ describe("onboard provider helpers", () => {
     const result = upsertProvider(
       "nvidia-prod",
       "openai",
-      "NVIDIA_API_KEY",
-      "https://integrate.api.nvidia.com/v1",
+      "NVIDIA_INFERENCE_API_KEY",
+      "https://inference-api.nvidia.com/v1",
       {},
       (command) => {
         commands.push(command.join(" "));
@@ -190,7 +202,7 @@ describe("onboard provider helpers", () => {
     // OpenShell CLI rejects `--credential KEY` when the host env is empty;
     // dropping the flag turns the call into a no-op merge that succeeds.
     expect(commands[1]).not.toMatch(/--credential/);
-    expect(commands[1]).toMatch(/OPENAI_BASE_URL=https:\/\/integrate\.api\.nvidia\.com\/v1/);
+    expect(commands[1]).toMatch(/OPENAI_BASE_URL=https:\/\/inference-api\.nvidia\.com\/v1/);
   });
 
   it("keeps --credential on the create path even when env is empty", () => {
@@ -213,9 +225,9 @@ describe("onboard provider helpers", () => {
     upsertProvider(
       "nvidia-prod",
       "openai",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       null,
-      { NVIDIA_API_KEY: "nvapi-staged" },
+      { NVIDIA_INFERENCE_API_KEY: "nvapi-staged" },
       (command) => {
         commands.push(command.join(" "));
         return { status: 0, stdout: "", stderr: "" };
@@ -224,7 +236,7 @@ describe("onboard provider helpers", () => {
 
     expect(commands).toHaveLength(2);
     expect(commands[1]).toMatch(/^provider update nvidia-prod /);
-    expect(commands[1]).toMatch(/--credential NVIDIA_API_KEY/);
+    expect(commands[1]).toMatch(/--credential NVIDIA_INFERENCE_API_KEY/);
   });
 
   it("returns redacted error details when create or update fails", () => {
diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts
index b307da4f7f..4dc46313ff 100644
--- a/src/lib/onboard/providers.ts
+++ b/src/lib/onboard/providers.ts
@@ -17,7 +17,7 @@ const { compactText } = require("../core/url-utils");
 
 // ── Constants ────────────────────────────────────────────────────
 
-const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1";
+const BUILD_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
 const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1";
 const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com";
 const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
@@ -28,7 +28,7 @@ const REMOTE_PROVIDER_CONFIG = {
     label: "NVIDIA Endpoints",
     providerName: "nvidia-prod",
     providerType: "nvidia",
-    credentialEnv: "NVIDIA_API_KEY",
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     endpointUrl: BUILD_ENDPOINT_URL,
     helpUrl: "https://build.nvidia.com/settings/api-keys",
     modelMode: "catalog",
diff --git a/src/lib/onboard/routed-inference.test.ts b/src/lib/onboard/routed-inference.test.ts
index f23509c56c..63353540b4 100644
--- a/src/lib/onboard/routed-inference.test.ts
+++ b/src/lib/onboard/routed-inference.test.ts
@@ -12,7 +12,7 @@ vi.mock("../inference/local", () => ({
   HOST_GATEWAY_URL: "http://host.openshell.internal",
 }));
 vi.mock("./model-router", () => ({
-  DEFAULT_MODEL_ROUTER_CREDENTIAL_ENV: "NVIDIA_API_KEY",
+  DEFAULT_MODEL_ROUTER_CREDENTIAL_ENV: "NVIDIA_INFERENCE_API_KEY",
   loadBlueprintProfile: vi.fn(() => ({ endpoint: "http://localhost:4000/v1" })),
 }));
 
@@ -76,7 +76,7 @@ describe("resolveRoutedCredentialEnv (#4564)", () => {
 
   it("uses the NVIDIA default when no profile credential env is set", () => {
     const loadProfile = vi.fn(() => ({ endpoint: "http://localhost:4000/v1" })) as never;
-    expect(resolveRoutedCredentialEnv(null, loadProfile)).toBe("NVIDIA_API_KEY");
+    expect(resolveRoutedCredentialEnv(null, loadProfile)).toBe("NVIDIA_INFERENCE_API_KEY");
   });
 });
 
@@ -88,7 +88,7 @@ describe("upsertRoutedProvider (#4564)", () => {
     const result = upsertRoutedProvider(
       "nvidia-router",
       "http://localhost:4000/v1",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       {
         upsertProvider,
         hydrateCredentialEnv,
@@ -97,13 +97,13 @@ describe("upsertRoutedProvider (#4564)", () => {
 
     expect(result.ok).toBe(true);
     expect(result.endpointUrl).toBe("http://host.openshell.internal:4000/v1");
-    expect(result.resolvedCredentialEnv).toBe("NVIDIA_API_KEY");
+    expect(result.resolvedCredentialEnv).toBe("NVIDIA_INFERENCE_API_KEY");
     expect(upsertProvider).toHaveBeenCalledWith(
       "nvidia-router",
       "openai",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       "http://host.openshell.internal:4000/v1",
-      { NVIDIA_API_KEY: "nvapi-secret" },
+      { NVIDIA_INFERENCE_API_KEY: "nvapi-secret" },
     );
   });
 
@@ -116,11 +116,11 @@ describe("upsertRoutedProvider (#4564)", () => {
       hydrateCredentialEnv,
     });
 
-    expect(result.resolvedCredentialEnv).toBe("NVIDIA_API_KEY");
+    expect(result.resolvedCredentialEnv).toBe("NVIDIA_INFERENCE_API_KEY");
     expect(upsertProvider).toHaveBeenCalledWith(
       "nvidia-router",
       "openai",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       "http://host.openshell.internal:4000/v1",
       {},
     );
@@ -133,7 +133,7 @@ describe("upsertRoutedProvider (#4564)", () => {
     const result = upsertRoutedProvider(
       "nvidia-router",
       "http://localhost:4000/v1",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       {
         upsertProvider,
         hydrateCredentialEnv,
diff --git a/src/lib/onboard/routed-inference.ts b/src/lib/onboard/routed-inference.ts
index ebdc56f9c6..c9e866234a 100644
--- a/src/lib/onboard/routed-inference.ts
+++ b/src/lib/onboard/routed-inference.ts
@@ -77,8 +77,8 @@ export function normalizeRoutedEndpointUrl(
  * Mirrors `reconcileModelRouter()`'s resolution order so the gateway provider
  * is bound to the same key the router process reads: an explicit recorded env
  * first, then the routed blueprint profile's credential env, and only then the
- * `NVIDIA_API_KEY` default. Without the profile step a resume with no recorded
- * credential env would re-upsert the provider against `NVIDIA_API_KEY` even
+ * `NVIDIA_INFERENCE_API_KEY` default. Without the profile step a resume with no recorded
+ * credential env would re-upsert the provider against `NVIDIA_INFERENCE_API_KEY` even
  * when the routed profile defines a custom `credential_env`, breaking
  * inference.local (#4564).
  */
diff --git a/src/lib/onboard/summary.test.ts b/src/lib/onboard/summary.test.ts
index 35dd384593..03721ab790 100644
--- a/src/lib/onboard/summary.test.ts
+++ b/src/lib/onboard/summary.test.ts
@@ -40,7 +40,7 @@ describe("onboard summary helpers", () => {
     const bareSummary = formatOnboardConfigSummary({
       provider: "nvidia-prod",
       model: "nvidia/nemotron-3-super-120b-a12b",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       webSearchConfig: null,
       enabledChannels: [],
       sandboxName: "test",
diff --git a/src/lib/onboard/validation-recovery-prompt.ts b/src/lib/onboard/validation-recovery-prompt.ts
index bdf73e8d20..bb39aca867 100644
--- a/src/lib/onboard/validation-recovery-prompt.ts
+++ b/src/lib/onboard/validation-recovery-prompt.ts
@@ -97,7 +97,7 @@ export function createValidationRecoveryPromptHelpers(
         // Regex fallback: base64-safe token pattern (20+ chars, no spaces, mixed alphanum)
         /^[A-Za-z0-9_\-.]{20,}$/.test(choice);
       // validateNvidiaApiKeyValue is provider-aware: it only enforces the
-      // nvapi- prefix when credentialEnv === "NVIDIA_API_KEY", so passing it
+      // nvapi- prefix when credentialEnv === "NVIDIA_INFERENCE_API_KEY", so passing it
       // unconditionally here is safe for Anthropic/OpenAI/Gemini too.
       const validator = (key: string) => deps.validateNvidiaApiKeyValue(key, credentialEnv);
       if (looksLikeToken) {
diff --git a/src/lib/security/credential-filter.test.ts b/src/lib/security/credential-filter.test.ts
index 22f64914bf..a85806954f 100644
--- a/src/lib/security/credential-filter.test.ts
+++ b/src/lib/security/credential-filter.test.ts
@@ -102,7 +102,7 @@ describe("valueLooksLikeSecret", () => {
 
   it("does not match benign values", () => {
     expect(valueLooksLikeSecret("npx")).toBe(false);
-    expect(valueLooksLikeSecret("https://integrate.api.nvidia.com/v1")).toBe(false);
+    expect(valueLooksLikeSecret("https://inference-api.nvidia.com/v1")).toBe(false);
     expect(valueLooksLikeSecret("moonshotai/kimi-k2")).toBe(false);
     expect(valueLooksLikeSecret("production")).toBe(false);
   });
diff --git a/src/lib/security/redact.test.ts b/src/lib/security/redact.test.ts
index f183f802b6..471b8d8d17 100644
--- a/src/lib/security/redact.test.ts
+++ b/src/lib/security/redact.test.ts
@@ -14,7 +14,7 @@ describe("redactForLog", () => {
         model: "gpt-4o",
         refreshToken: "refresh-token-value",
       },
-      items: [{ name: "safe" }, { credentialEnv: "NVIDIA_API_KEY" }],
+      items: [{ name: "safe" }, { credentialEnv: "NVIDIA_INFERENCE_API_KEY" }],
     });
 
     expect(result).toEqual({
diff --git a/src/lib/security/redact.ts b/src/lib/security/redact.ts
index 8503e6b834..a1787b6c04 100644
--- a/src/lib/security/redact.ts
+++ b/src/lib/security/redact.ts
@@ -84,7 +84,10 @@ export function writeRedactedResult(
 // ── Full redaction (debug.ts style) ─────────────────────────────
 
 const FULL_REDACT_PATTERNS: [RegExp, string][] = [
-  [/(NVIDIA_API_KEY|API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|_KEY)=\S+/gi, "$1=<REDACTED>"],
+  [
+    /(NVIDIA_INFERENCE_API_KEY|NVIDIA_API_KEY|API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|_KEY)=\S+/gi,
+    "$1=<REDACTED>",
+  ],
   ...TOKEN_PREFIX_PATTERNS.map((p): [RegExp, string] => [
     new RegExp(p.source, p.flags),
     "<REDACTED>",
@@ -108,7 +111,7 @@ export function redactSensitiveText(value: unknown): string | null {
   if (typeof value !== "string") return null;
   let result = value
     .replace(
-      /(NVIDIA_API_KEY|NOUS_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GEMINI_API_KEY|COMPATIBLE_API_KEY|COMPATIBLE_ANTHROPIC_API_KEY|BRAVE_API_KEY|SLACK_BOT_TOKEN|SLACK_APP_TOKEN|DISCORD_BOT_TOKEN|TELEGRAM_BOT_TOKEN)=\S+/gi,
+      /(NVIDIA_INFERENCE_API_KEY|NVIDIA_API_KEY|NOUS_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GEMINI_API_KEY|COMPATIBLE_API_KEY|COMPATIBLE_ANTHROPIC_API_KEY|BRAVE_API_KEY|SLACK_BOT_TOKEN|SLACK_APP_TOKEN|DISCORD_BOT_TOKEN|TELEGRAM_BOT_TOKEN)=\S+/gi,
       "$1=<REDACTED>",
     )
     .replace(/Bearer\s+\S+/gi, "Bearer <REDACTED>");
diff --git a/src/lib/state/onboard-session.test.ts b/src/lib/state/onboard-session.test.ts
index 6ea64d4679..a633eaa4e7 100644
--- a/src/lib/state/onboard-session.test.ts
+++ b/src/lib/state/onboard-session.test.ts
@@ -320,10 +320,10 @@ describe("onboard session", () => {
       sandboxName: "my-assistant",
       endpointUrl:
         "https://alice:super-secret-token@example.com/v1?token=super-secret-token&keep=yes#token=super-secret-token",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     });
     session.markStepSkipped("openclaw");
-    session.markStepFailed("sandbox", "NVIDIA_API_KEY=super-secret-token");
+    session.markStepFailed("sandbox", "NVIDIA_INFERENCE_API_KEY=super-secret-token");
     session.completeSession({ provider: "ollama-local", credentialEnv: null });
 
     expect(emitted.map((event) => event.type)).toEqual([
@@ -345,7 +345,7 @@ describe("onboard session", () => {
     });
     expect(emitted[1].context).toMatchObject({
       sandboxName: "my-assistant",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     });
     expect(emitted[1].context.endpointOrigin).toBe("https://example.com");
     expect(emitted[1].metadata.fields).toEqual(["sandboxName", "endpointUrl", "credentialEnv"]);
@@ -353,7 +353,7 @@ describe("onboard session", () => {
       type: "state.failed",
       state: "sandbox",
       step: "sandbox",
-      error: "NVIDIA_API_KEY=<REDACTED>",
+      error: "NVIDIA_INFERENCE_API_KEY=<REDACTED>",
     });
     expect(emitted[5]).toMatchObject({ type: "onboard.failed", state: "failed" });
     expect(emitted.at(-1)).toMatchObject({ type: "onboard.completed", state: "complete" });
@@ -409,7 +409,7 @@ describe("onboard session", () => {
       model: "nvidia/test-model",
       sandboxName: "my-assistant",
       endpointUrl: "https://example.com/v1",
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       preferredInferenceApi: "openai-completions",
       nimContainer: "nim-123",
       policyPresets: ["pypi", "npm"],
@@ -426,7 +426,7 @@ describe("onboard session", () => {
     expect(loaded.model).toBe("nvidia/test-model");
     expect(loaded.sandboxName).toBe("my-assistant");
     expect(loaded.endpointUrl).toBe("https://example.com/v1");
-    expect(loaded.credentialEnv).toBe("NVIDIA_API_KEY");
+    expect(loaded.credentialEnv).toBe("NVIDIA_INFERENCE_API_KEY");
     expect(loaded.preferredInferenceApi).toBe("openai-completions");
     expect(loaded.nimContainer).toBe("nim-123");
     expect(loaded.policyPresets).toEqual(["pypi", "npm"]);
@@ -1000,11 +1000,11 @@ describe("onboard session", () => {
     session.saveSession(session.createSession());
     session.markStepFailed(
       "inference",
-      "provider auth failed with NVIDIA_API_KEY=nvapi-secret Bearer topsecret sk-secret-value-that-is-long-enough ghp_1234567890123456789012345",
+      "provider auth failed with NVIDIA_INFERENCE_API_KEY=nvapi-secret Bearer topsecret sk-secret-value-that-is-long-enough ghp_1234567890123456789012345",
     );
 
     const loaded = requireLoadedSession(session.loadSession());
-    expect(loaded.steps.inference.error).toContain("NVIDIA_API_KEY=<REDACTED>");
+    expect(loaded.steps.inference.error).toContain("NVIDIA_INFERENCE_API_KEY=<REDACTED>");
     expect(loaded.steps.inference.error).toContain("Bearer <REDACTED>");
     expect(loaded.steps.inference.error).not.toContain("nvapi-secret");
     expect(loaded.steps.inference.error).not.toContain("topsecret");
diff --git a/src/lib/state/onboard-step-mutation.test.ts b/src/lib/state/onboard-step-mutation.test.ts
index 859f7f6105..71cbb4c628 100644
--- a/src/lib/state/onboard-step-mutation.test.ts
+++ b/src/lib/state/onboard-step-mutation.test.ts
@@ -59,10 +59,13 @@ describe("record-only onboard step mutation", () => {
     expect(loaded.sandboxName).toBe("my-assistant");
     expect(loaded.machine).toMatchObject({ state: "init", revision: 0 });
 
-    session.markStepFailedRecordOnly("gateway", "Gateway failed: NVIDIA_API_KEY=nvapi-secret");
+    session.markStepFailedRecordOnly(
+      "gateway",
+      "Gateway failed: NVIDIA_INFERENCE_API_KEY=nvapi-secret",
+    );
     loaded = requireLoadedSession(session.loadSession());
     expect(loaded.steps.gateway.status).toBe("failed");
-    expect(loaded.steps.gateway.error).toBe("Gateway failed: NVIDIA_API_KEY=<REDACTED>");
+    expect(loaded.steps.gateway.error).toBe("Gateway failed: NVIDIA_INFERENCE_API_KEY=<REDACTED>");
     expect(loaded.steps.gateway.error).not.toContain("nvapi-secret");
     expect(loaded.status).toBe("in_progress");
     expect(loaded.failure).toBeNull();
diff --git a/src/lib/subprocess-env.ts b/src/lib/subprocess-env.ts
index 0ff8b59bdc..9387aa5e61 100644
--- a/src/lib/subprocess-env.ts
+++ b/src/lib/subprocess-env.ts
@@ -5,7 +5,7 @@
  * Subprocess environment allowlist.
  *
  * Subprocesses spawned by the CLI or plugin must NOT inherit the full
- * parent process.env — that leaks secrets (NVIDIA_API_KEY, GITHUB_TOKEN,
+ * parent process.env — that leaks secrets (NVIDIA_INFERENCE_API_KEY, GITHUB_TOKEN,
  * AWS_ACCESS_KEY_ID, etc.) to child processes where they can be read and
  * exfiltrated. Instead, only forward the categories below.
  *
diff --git a/src/lib/trace.test.ts b/src/lib/trace.test.ts
index 430cd59213..8112a6fa9a 100644
--- a/src/lib/trace.test.ts
+++ b/src/lib/trace.test.ts
@@ -122,11 +122,11 @@ describe("onboard trace artifacts", () => {
     expect(
       sanitizeTraceAttributes({
         nested: { token: "xoxb-secret", ok: true },
-        credential_env: "NVIDIA_API_KEY",
+        credential_env: "NVIDIA_INFERENCE_API_KEY",
       }),
     ).toMatchObject({
       nested: '{"token":"<REDACTED>","ok":true}',
-      credential_env: "NVIDIA_API_KEY",
+      credential_env: "NVIDIA_INFERENCE_API_KEY",
     });
   });
 
diff --git a/src/lib/validation.test.ts b/src/lib/validation.test.ts
index 6b0620ea9b..f4c671c84c 100644
--- a/src/lib/validation.test.ts
+++ b/src/lib/validation.test.ts
@@ -340,7 +340,7 @@ describe("validateNvidiaApiKeyValue", () => {
     expect(validateNvidiaApiKeyValue("sk-abc123")).toBeTruthy();
   });
 
-  it("accepts non-nvapi keys when credentialEnv is not NVIDIA_API_KEY", () => {
+  it("accepts non-nvapi keys when credentialEnv is not NVIDIA_INFERENCE_API_KEY", () => {
     expect(validateNvidiaApiKeyValue("sk-ant-abc123", "ANTHROPIC_API_KEY")).toBeNull();
     expect(validateNvidiaApiKeyValue("sk-openai-xyz", "OPENAI_API_KEY")).toBeNull();
     expect(validateNvidiaApiKeyValue("AIza-gemini", "GEMINI_API_KEY")).toBeNull();
diff --git a/src/lib/validation.ts b/src/lib/validation.ts
index bd181f3261..47b8147fa4 100644
--- a/src/lib/validation.ts
+++ b/src/lib/validation.ts
@@ -193,12 +193,13 @@ export function classifyGatewayStartFailure(output = ""): GatewayStartFailure {
 
 export function validateNvidiaApiKeyValue(
   key: string,
-  credentialEnv: string = "NVIDIA_API_KEY",
+  credentialEnv: string = "NVIDIA_INFERENCE_API_KEY",
 ): string | null {
   // The nvapi- prefix check is specific to NVIDIA keys; skip it for keys
   // from other providers (e.g. ANTHROPIC_API_KEY, OPENAI_API_KEY) so that
   // a valid Anthropic key is not rejected with an NVIDIA-specific error.
-  const isNvidia = credentialEnv === "NVIDIA_API_KEY";
+  const isNvidia =
+    credentialEnv === "NVIDIA_INFERENCE_API_KEY" || credentialEnv === "NVIDIA_API_KEY";
   if (!key) {
     return isNvidia ? "  NVIDIA API Key is required." : "  API Key is required.";
   }
@@ -215,7 +216,7 @@ export function isSafeModelId(value: string): boolean {
 /**
  * Detect NVIDIA Cloud Functions "Function not found for account" errors.
  *
- * NVIDIA Build (integrate.api.nvidia.com) returns this when a model is in the
+ * NVIDIA Build (inference-api.nvidia.com) returns this when a model is in the
  * public catalog but is not deployed for the caller's account/org. The raw
  * body looks like:
  *
diff --git a/test/canonical-credential-resolution.test.ts b/test/canonical-credential-resolution.test.ts
index 9b417b5b14..137dcefd58 100644
--- a/test/canonical-credential-resolution.test.ts
+++ b/test/canonical-credential-resolution.test.ts
@@ -22,6 +22,20 @@ afterEach(() => {
   vi.restoreAllMocks();
   vi.resetModules();
   vi.unstubAllEnvs();
+  for (const key of [
+    "NVIDIA_INFERENCE_API_KEY",
+    "NVIDIA_API_KEY",
+    "OPENAI_API_KEY",
+    "ANTHROPIC_API_KEY",
+    "GEMINI_API_KEY",
+    "COMPATIBLE_API_KEY",
+    "COMPATIBLE_ANTHROPIC_API_KEY",
+    "TEST_RESOLVE_KEY",
+    "TEST_BOTH_KEY",
+    "NONEXISTENT_KEY",
+  ]) {
+    delete process.env[key];
+  }
   for (const dir of tmpFixtures.splice(0)) {
     try {
       fs.rmSync(dir, { recursive: true, force: true });
@@ -65,7 +79,11 @@ describe("resolveProviderCredential — canonical credential resolution (#2306)"
 
   // Parametric: all 6 remote providers
   const providers = [
-    { name: "NVIDIA Endpoints", credentialEnv: "NVIDIA_API_KEY", value: "nvapi-test-resolve" },
+    {
+      name: "NVIDIA Endpoints",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
+      value: "nvapi-test-resolve",
+    },
     { name: "OpenAI", credentialEnv: "OPENAI_API_KEY", value: "sk-test-resolve" },
     { name: "Anthropic", credentialEnv: "ANTHROPIC_API_KEY", value: "sk-ant-test-resolve" },
     { name: "Google Gemini", credentialEnv: "GEMINI_API_KEY", value: "gemini-test-resolve" },
@@ -118,20 +136,48 @@ describe("resolveProviderCredential — canonical credential resolution (#2306)"
   });
 
   it("stages legacy credentials through the resolver without deleting the legacy file", async () => {
-    const tmpDir = createFixtureHome("NVIDIA_API_KEY", "nvapi-staged-only");
+    const tmpDir = createFixtureHome("NVIDIA_INFERENCE_API_KEY", "nvapi-staged-only");
     const legacyFile = path.join(tmpDir, ".nemoclaw", "credentials.json");
-    delete process.env["NVIDIA_API_KEY"];
+    delete process.env["NVIDIA_INFERENCE_API_KEY"];
 
     const credentials = await importCredentialsModule(tmpDir);
-    const result = credentials.resolveProviderCredential("NVIDIA_API_KEY");
+    const result = credentials.resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");
 
     expect(result).toBe("nvapi-staged-only");
-    expect(process.env["NVIDIA_API_KEY"]).toBe("nvapi-staged-only");
+    expect(process.env["NVIDIA_INFERENCE_API_KEY"]).toBe("nvapi-staged-only");
     // Generic lookup cannot prove every legacy value reached the gateway.
     // Only onboard's verified migration gate may remove this plaintext file.
     expect(fs.existsSync(legacyFile)).toBe(true);
   });
 
+  it("maps legacy NVIDIA_API_KEY env to NVIDIA_INFERENCE_API_KEY", async () => {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-2306-env-alias-"));
+    tmpFixtures.push(tmpDir);
+    delete process.env["NVIDIA_INFERENCE_API_KEY"];
+    vi.stubEnv("NVIDIA_API_KEY", "nvapi-legacy-env");
+
+    const credentials = await importCredentialsModule(tmpDir);
+    const result = credentials.resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");
+
+    expect(result).toBe("nvapi-legacy-env");
+    expect(process.env["NVIDIA_INFERENCE_API_KEY"]).toBe("nvapi-legacy-env");
+  });
+
+  it("maps legacy NVIDIA_API_KEY credentials.json entries to NVIDIA_INFERENCE_API_KEY", async () => {
+    const tmpDir = createFixtureHome("NVIDIA_API_KEY", "nvapi-legacy-file");
+    const legacyFile = path.join(tmpDir, ".nemoclaw", "credentials.json");
+    delete process.env["NVIDIA_INFERENCE_API_KEY"];
+    delete process.env["NVIDIA_API_KEY"];
+
+    const credentials = await importCredentialsModule(tmpDir);
+    const result = credentials.resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");
+
+    expect(result).toBe("nvapi-legacy-file");
+    expect(process.env["NVIDIA_INFERENCE_API_KEY"]).toBe("nvapi-legacy-file");
+    expect(process.env["NVIDIA_API_KEY"]).toBe("nvapi-legacy-file");
+    expect(fs.existsSync(legacyFile)).toBe(true);
+  });
+
   it("returns null when credential exists nowhere", async () => {
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-2306-missing-"));
     tmpFixtures.push(tmpDir);
@@ -145,21 +191,21 @@ describe("resolveProviderCredential — canonical credential resolution (#2306)"
   });
 
   it("normalizes whitespace and carriage returns", async () => {
-    // Uses an allowlisted env-key (`NVIDIA_API_KEY`) so the value can
+    // Uses an allowlisted env-key (`NVIDIA_INFERENCE_API_KEY`) so the value can
     // actually be staged from the legacy file. The post-#2554 staging
     // helper rejects entries that aren't in `KNOWN_CREDENTIAL_ENV_KEYS`,
     // which is the security guard that prevents a tampered
     // credentials.json from injecting unrelated env vars (e.g. `PATH`,
     // `NODE_OPTIONS`); the original test fixture used a fake
     // `TEST_WHITESPACE_KEY` that is correctly filtered out.
-    const tmpDir = createFixtureHome("NVIDIA_API_KEY", "  nvapi-whitespace-test \r\n");
+    const tmpDir = createFixtureHome("NVIDIA_INFERENCE_API_KEY", "  nvapi-whitespace-test \r\n");
 
     const credentials = await importCredentialsModule(tmpDir);
-    delete process.env["NVIDIA_API_KEY"];
-    const result = credentials.resolveProviderCredential("NVIDIA_API_KEY");
+    delete process.env["NVIDIA_INFERENCE_API_KEY"];
+    const result = credentials.resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");
 
     expect(result).toBe("nvapi-whitespace-test");
-    expect(process.env["NVIDIA_API_KEY"]).toBe("nvapi-whitespace-test");
+    expect(process.env["NVIDIA_INFERENCE_API_KEY"]).toBe("nvapi-whitespace-test");
   });
 
   it("does not pollute process.env on null resolve", async () => {
diff --git a/test/check-env-var-docs.test.ts b/test/check-env-var-docs.test.ts
index ff587abab6..6e41bcef92 100644
--- a/test/check-env-var-docs.test.ts
+++ b/test/check-env-var-docs.test.ts
@@ -49,7 +49,7 @@ describe("findEnvVarReads", () => {
   it.each([
     "const x = process.env.PATH;",
     "const x = process.env.HOME;",
-    "const x = process.env.NVIDIA_API_KEY;",
+    "const x = process.env.NVIDIA_INFERENCE_API_KEY;",
     "const x = process.env.BRAVE_API_KEY;",
     "const x = process.env.TELEGRAM_BOT_TOKEN;",
   ])("ignores non-NEMOCLAW var %s", (code) => {
diff --git a/test/cli/dispatch-basics.test.ts b/test/cli/dispatch-basics.test.ts
index 2447efc770..301666df8f 100644
--- a/test/cli/dispatch-basics.test.ts
+++ b/test/cli/dispatch-basics.test.ts
@@ -66,7 +66,7 @@ describe("CLI dispatch", () => {
   });
 
   it(
-    "start does not prompt for NVIDIA_API_KEY before launching local services",
+    "start does not prompt for NVIDIA_INFERENCE_API_KEY before launching local services",
     testTimeoutOptions(35_000),
     () => {
       const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-start-no-key-"));
@@ -107,7 +107,7 @@ describe("CLI dispatch", () => {
         {
           HOME: home,
           PATH: `${localBin}:${process.env.PATH || ""}`,
-          NVIDIA_API_KEY: "",
+          NVIDIA_INFERENCE_API_KEY: "",
           TELEGRAM_BOT_TOKEN: "",
         },
         30000,
diff --git a/test/config-set-nested-ssrf.test.ts b/test/config-set-nested-ssrf.test.ts
index ab809ebe0d..dcbd581e6e 100644
--- a/test/config-set-nested-ssrf.test.ts
+++ b/test/config-set-nested-ssrf.test.ts
@@ -577,7 +577,7 @@ describe("config set nested URL SSRF enforcement", () => {
       exports: {
         loadSession: () => ({
           sandboxName: "rotate-test",
-          credentialEnv: "NVIDIA_API_KEY",
+          credentialEnv: "NVIDIA_INFERENCE_API_KEY",
           provider: "nvidia-prod",
           providerType: "openai",
         }),
@@ -611,7 +611,10 @@ describe("config set nested URL SSRF enforcement", () => {
       ).resolves.toBeUndefined();
 
       expect(errorSpy).not.toHaveBeenCalled();
-      expect(saveCredential).toHaveBeenCalledWith("NVIDIA_API_KEY", "nvapi-rotated-value");
+      expect(saveCredential).toHaveBeenCalledWith(
+        "NVIDIA_INFERENCE_API_KEY",
+        "nvapi-rotated-value",
+      );
       // Credential rotation is not a shields operation; its audit entry must
       // use the rotate_token action so it does not inflate shields_down counts
       // in the forensics log.
@@ -619,7 +622,7 @@ describe("config set nested URL SSRF enforcement", () => {
         expect.objectContaining({
           action: "rotate_token",
           sandbox: "rotate-test",
-          reason: "rotate-token openclaw:NVIDIA_API_KEY",
+          reason: "rotate-token openclaw:NVIDIA_INFERENCE_API_KEY",
         }),
       );
       expect(appendAuditEntry).not.toHaveBeenCalledWith(
diff --git a/test/credential-exposure.test.ts b/test/credential-exposure.test.ts
index d260e78557..6f05c71eeb 100644
--- a/test/credential-exposure.test.ts
+++ b/test/credential-exposure.test.ts
@@ -3,7 +3,7 @@
 //
 // Security regression test: credential values must never appear in --credential
 // CLI arguments. OpenShell reads credential values from the environment when
-// only the env-var name is passed (e.g. --credential "NVIDIA_API_KEY"), so
+// only the env-var name is passed (e.g. --credential "NVIDIA_INFERENCE_API_KEY"), so
 // there is no reason to pass the secret itself on the command line where it
 // would be visible in `ps aux` output.
 
@@ -34,13 +34,13 @@ describe("credential exposure in process arguments", () => {
       "create",
       "inference",
       "openai",
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
       "https://api.example.test/v1",
     );
 
     expect(args).toContain("--credential");
-    expect(args).toContain("NVIDIA_API_KEY");
-    expect(args.join(" ")).not.toContain("NVIDIA_API_KEY=");
+    expect(args).toContain("NVIDIA_INFERENCE_API_KEY");
+    expect(args.join(" ")).not.toContain("NVIDIA_INFERENCE_API_KEY=");
     expect(args.join(" ")).not.toContain("nvapi-");
   });
 
@@ -123,20 +123,20 @@ describe("credential exposure in process arguments", () => {
 
   it("subprocess env builder does not spread full process.env into subprocesses", () => {
     const previous = {
-      NVIDIA_API_KEY: process.env.NVIDIA_API_KEY,
+      NVIDIA_INFERENCE_API_KEY: process.env.NVIDIA_INFERENCE_API_KEY,
       PATH: process.env.PATH,
     };
     try {
-      process.env.NVIDIA_API_KEY = "nvapi-secret-should-not-leak";
+      process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-secret-should-not-leak";
       process.env.PATH = `/tmp/nemoclaw-fake-bin:${process.env.PATH || ""}`;
       const env = buildCliSubprocessEnv();
-      expect(env.NVIDIA_API_KEY).toBeUndefined();
+      expect(env.NVIDIA_INFERENCE_API_KEY).toBeUndefined();
       expect(env.PATH).toContain("/tmp/nemoclaw-fake-bin");
     } finally {
-      if (previous.NVIDIA_API_KEY === undefined) {
-        delete process.env.NVIDIA_API_KEY;
+      if (previous.NVIDIA_INFERENCE_API_KEY === undefined) {
+        delete process.env.NVIDIA_INFERENCE_API_KEY;
       } else {
-        process.env.NVIDIA_API_KEY = previous.NVIDIA_API_KEY;
+        process.env.NVIDIA_INFERENCE_API_KEY = previous.NVIDIA_INFERENCE_API_KEY;
       }
       if (previous.PATH === undefined) {
         delete process.env.PATH;
diff --git a/test/credentials-cli-command.test.ts b/test/credentials-cli-command.test.ts
index ad38844fd8..50e40f7e19 100644
--- a/test/credentials-cli-command.test.ts
+++ b/test/credentials-cli-command.test.ts
@@ -254,10 +254,10 @@ describe("credentials oclif commands", () => {
     const { CredentialsResetCommand } = loadCommands();
 
     const output = await captureOutput(() =>
-      expectExitCode(() => CredentialsResetCommand.run(["NVIDIA_API_KEY", "--yes"]), 1),
+      expectExitCode(() => CredentialsResetCommand.run(["NVIDIA_INFERENCE_API_KEY", "--yes"]), 1),
     );
 
-    expect(output.stderr).toContain("Could not remove provider 'NVIDIA_API_KEY'.");
+    expect(output.stderr).toContain("Could not remove provider 'NVIDIA_INFERENCE_API_KEY'.");
     expect(output.stderr).toContain("looks like a credential env variable name");
     expect(output.stderr).toContain("provider not found");
   });
diff --git a/test/credentials-shim.test.ts b/test/credentials-shim.test.ts
index 1709699528..131c622971 100644
--- a/test/credentials-shim.test.ts
+++ b/test/credentials-shim.test.ts
@@ -50,13 +50,13 @@ describe("credentials shim", () => {
   });
 
   it("stages and lists allowlisted credentials from process.env only", () => {
-    credentials.saveCredential("NVIDIA_API_KEY", "  nvapi-js-shim \r\n");
+    credentials.saveCredential("NVIDIA_INFERENCE_API_KEY", "  nvapi-js-shim \r\n");
     credentials.saveCredential("TEST_KEY", "fixture-only");
 
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe("nvapi-js-shim");
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe("nvapi-js-shim");
     expect(credentials.getCredential("TEST_KEY")).toBe("fixture-only");
-    expect(credentials.loadCredentials()).toEqual({ NVIDIA_API_KEY: "nvapi-js-shim" });
-    expect(credentials.listCredentialKeys()).toEqual(["NVIDIA_API_KEY"]);
+    expect(credentials.loadCredentials()).toEqual({ NVIDIA_INFERENCE_API_KEY: "nvapi-js-shim" });
+    expect(credentials.listCredentialKeys()).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
   });
 
   it("clears blank values instead of persisting them", () => {
@@ -72,15 +72,17 @@ describe("credentials shim", () => {
     const dir = path.join(tmpDir, ".nemoclaw");
     const file = path.join(dir, "credentials.json");
     fs.mkdirSync(dir, { recursive: true });
-    fs.writeFileSync(file, JSON.stringify({ NVIDIA_API_KEY: "nvapi-from-disk" }), { mode: 0o600 });
+    fs.writeFileSync(file, JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-from-disk" }), {
+      mode: 0o600,
+    });
 
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe(null);
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe(null);
     expect(credentials.loadCredentials()).toEqual({});
 
-    credentials.saveCredential("NVIDIA_API_KEY", "nvapi-from-env");
+    credentials.saveCredential("NVIDIA_INFERENCE_API_KEY", "nvapi-from-env");
     expect(fs.readFileSync(file, "utf8")).toBe(
-      JSON.stringify({ NVIDIA_API_KEY: "nvapi-from-disk" }),
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-from-disk" }),
     );
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe("nvapi-from-env");
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe("nvapi-from-env");
   });
 });
diff --git a/test/credentials.test.ts b/test/credentials.test.ts
index d9a5b28510..3ab41cb1bd 100644
--- a/test/credentials.test.ts
+++ b/test/credentials.test.ts
@@ -54,7 +54,7 @@ async function importCredentialsModule(home: string): Promise<CredentialsModule>
 }
 
 beforeEach(() => {
-  // The user's shell may export NVIDIA_API_KEY etc.; the credentials module
+  // The user's shell may export NVIDIA_INFERENCE_API_KEY etc.; the credentials module
   // now reads exclusively from process.env, so any inherited value would
   // contaminate every test. Start each case from a clean process env.
   clearTrackedEnv();
@@ -94,16 +94,16 @@ describe("host-side credential staging", () => {
 
     expect(credentials.loadCredentials()).toEqual({});
 
-    credentials.saveCredential("NVIDIA_API_KEY", "  nvapi-saved-key \r\n");
+    credentials.saveCredential("NVIDIA_INFERENCE_API_KEY", "  nvapi-saved-key \r\n");
 
     // No plaintext credentials.json — the gateway is the system of record.
     const legacyFile = path.join(home, ".nemoclaw", "credentials.json");
     expect(fs.existsSync(legacyFile)).toBe(false);
 
-    expect(process.env.NVIDIA_API_KEY).toBe("nvapi-saved-key");
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe("nvapi-saved-key");
-    expect(credentials.loadCredentials()).toEqual({ NVIDIA_API_KEY: "nvapi-saved-key" });
-    expect(credentials.listCredentialKeys()).toEqual(["NVIDIA_API_KEY"]);
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-saved-key");
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe("nvapi-saved-key");
+    expect(credentials.loadCredentials()).toEqual({ NVIDIA_INFERENCE_API_KEY: "nvapi-saved-key" });
+    expect(credentials.listCredentialKeys()).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
   });
 
   it("getCredential reads only from process.env", async () => {
@@ -114,15 +114,15 @@ describe("host-side credential staging", () => {
     fs.mkdirSync(path.join(home, ".nemoclaw"), { recursive: true });
     fs.writeFileSync(
       path.join(home, ".nemoclaw", "credentials.json"),
-      JSON.stringify({ NVIDIA_API_KEY: "nvapi-from-disk" }),
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-from-disk" }),
       { mode: 0o600 },
     );
 
     const credentials = await importCredentialsModule(home);
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe(null);
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe(null);
 
-    vi.stubEnv("NVIDIA_API_KEY", "  nvapi-from-env \n");
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe("nvapi-from-env");
+    vi.stubEnv("NVIDIA_INFERENCE_API_KEY", "  nvapi-from-env \n");
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe("nvapi-from-env");
   });
 
   it("returns null for missing or blank credential values", async () => {
@@ -131,26 +131,29 @@ describe("host-side credential staging", () => {
 
     credentials.saveCredential("EMPTY_VALUE", " \r\n ");
     expect(credentials.getCredential("EMPTY_VALUE")).toBe(null);
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe(null);
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe(null);
   });
 
   it("deleteCredential clears the staged value without touching disk", async () => {
     const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-creds-"));
     const credentials = await importCredentialsModule(home);
 
-    credentials.saveCredential("NVIDIA_API_KEY", "nvapi-bad-key");
+    credentials.saveCredential("NVIDIA_INFERENCE_API_KEY", "nvapi-bad-key");
     credentials.saveCredential("OPENAI_API_KEY", "sk-other");
 
-    expect(credentials.listCredentialKeys()).toEqual(["NVIDIA_API_KEY", "OPENAI_API_KEY"]);
+    expect(credentials.listCredentialKeys()).toEqual([
+      "NVIDIA_INFERENCE_API_KEY",
+      "OPENAI_API_KEY",
+    ]);
     expect(fs.existsSync(path.join(home, ".nemoclaw", "credentials.json"))).toBe(false);
 
-    expect(credentials.deleteCredential("NVIDIA_API_KEY")).toBe(true);
-    expect(credentials.getCredential("NVIDIA_API_KEY")).toBe(null);
+    expect(credentials.deleteCredential("NVIDIA_INFERENCE_API_KEY")).toBe(true);
+    expect(credentials.getCredential("NVIDIA_INFERENCE_API_KEY")).toBe(null);
     expect(credentials.listCredentialKeys()).toEqual(["OPENAI_API_KEY"]);
     expect(credentials.getCredential("OPENAI_API_KEY")).toBe("sk-other");
 
     // Idempotent.
-    expect(credentials.deleteCredential("NVIDIA_API_KEY")).toBe(false);
+    expect(credentials.deleteCredential("NVIDIA_INFERENCE_API_KEY")).toBe(false);
   });
 
   it("deleteCredential returns false when nothing is staged", async () => {
@@ -179,7 +182,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     fs.writeFileSync(
       legacyFile,
       JSON.stringify({
-        NVIDIA_API_KEY: "nvapi-legacy",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-legacy",
         TELEGRAM_BOT_TOKEN: "tg-legacy",
         IGNORED_NON_STRING: 42 as unknown as string,
       }),
@@ -189,8 +192,8 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     const credentials = await importCredentialsModule(home);
     const staged = credentials.stageLegacyCredentialsToEnv();
 
-    expect(staged).toEqual(["NVIDIA_API_KEY", "TELEGRAM_BOT_TOKEN"]);
-    expect(process.env.NVIDIA_API_KEY).toBe("nvapi-legacy");
+    expect(staged).toEqual(["NVIDIA_INFERENCE_API_KEY", "TELEGRAM_BOT_TOKEN"]);
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-legacy");
     expect(process.env.TELEGRAM_BOT_TOKEN).toBe("tg-legacy");
 
     // The file MUST still exist after staging — it is removed only after a
@@ -215,7 +218,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
         PATH: "/attacker/bin:/usr/bin",
         NODE_OPTIONS: "--require=/tmp/evil.js",
         OPENSHELL_GATEWAY: "evil-gw",
-        NVIDIA_API_KEY: "nvapi-legitimate",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-legitimate",
       }),
       { mode: 0o600 },
     );
@@ -223,8 +226,8 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     const credentials = await importCredentialsModule(home);
     const staged = credentials.stageLegacyCredentialsToEnv();
 
-    expect(staged).toEqual(["NVIDIA_API_KEY"]);
-    expect(process.env.NVIDIA_API_KEY).toBe("nvapi-legitimate");
+    expect(staged).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-legitimate");
     expect(process.env.PATH).toBe(originalPath);
     expect(process.env.NODE_OPTIONS).toBe(originalNodeOptions);
     expect(process.env.OPENSHELL_GATEWAY).toBe(originalOpenshellGateway);
@@ -242,15 +245,15 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     fs.mkdirSync(credsDir, { recursive: true });
     fs.writeFileSync(
       path.join(credsDir, "credentials.json"),
-      JSON.stringify({ NVIDIA_API_KEY: "nvapi-from-disk" }),
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-from-disk" }),
       { mode: 0o600 },
     );
 
-    vi.stubEnv("NVIDIA_API_KEY", "nvapi-from-env");
+    vi.stubEnv("NVIDIA_INFERENCE_API_KEY", "nvapi-from-env");
     const credentials = await importCredentialsModule(home);
     const staged = credentials.stageLegacyCredentialsToEnv();
 
-    expect(process.env.NVIDIA_API_KEY).toBe("nvapi-from-env");
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-from-env");
     // The legacy value was skipped, so it must NOT be reported as staged.
     // Onboard uses the staged length to decide whether to delete the file;
     // a false-positive entry here would unlink credentials we never
@@ -265,7 +268,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-creds-"));
     const credentials = await importCredentialsModule(home);
     expect(credentials.stageLegacyCredentialsToEnv()).toEqual([]);
-    expect(process.env.NVIDIA_API_KEY).toBeUndefined();
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBeUndefined();
   });
 
   it("treats a blank/whitespace env entry as unset and stages the legacy value", async () => {
@@ -274,7 +277,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     fs.mkdirSync(credsDir, { recursive: true });
     fs.writeFileSync(
       path.join(credsDir, "credentials.json"),
-      JSON.stringify({ NVIDIA_API_KEY: "nvapi-from-disk" }),
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-from-disk" }),
       { mode: 0o600 },
     );
 
@@ -282,12 +285,12 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     // an empty value — must not block staging the legacy file value, or
     // rebuild/onboard preflight will fail with a credential the user
     // demonstrably has on disk.
-    vi.stubEnv("NVIDIA_API_KEY", "   ");
+    vi.stubEnv("NVIDIA_INFERENCE_API_KEY", "   ");
     const credentials = await importCredentialsModule(home);
     const staged = credentials.stageLegacyCredentialsToEnv();
 
-    expect(staged).toEqual(["NVIDIA_API_KEY"]);
-    expect(process.env.NVIDIA_API_KEY).toBe("nvapi-from-disk");
+    expect(staged).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-from-disk");
   });
 
   it("stages nothing from a corrupt legacy file and leaves it untouched", async () => {
@@ -301,7 +304,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     expect(credentials.stageLegacyCredentialsToEnv()).toEqual([]);
     // Corrupt input must not silently disappear — leave it for inspection.
     expect(fs.existsSync(legacyFile)).toBe(true);
-    expect(process.env.NVIDIA_API_KEY).toBeUndefined();
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBeUndefined();
   });
 
   it("refuses to migrate an oversized legacy file (DoS guard)", async () => {
@@ -311,7 +314,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     fs.mkdirSync(credsDir, { recursive: true });
     // Two megabytes of valid JSON, well above the 1 MiB sanity cap.
     const filler = "x".repeat(2 * 1024 * 1024);
-    fs.writeFileSync(legacyFile, JSON.stringify({ NVIDIA_API_KEY: `nvapi-${filler}` }), {
+    fs.writeFileSync(legacyFile, JSON.stringify({ NVIDIA_INFERENCE_API_KEY: `nvapi-${filler}` }), {
       mode: 0o600,
     });
 
@@ -320,7 +323,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
 
     try {
       expect(credentials.stageLegacyCredentialsToEnv()).toEqual([]);
-      expect(process.env.NVIDIA_API_KEY).toBeUndefined();
+      expect(process.env.NVIDIA_INFERENCE_API_KEY).toBeUndefined();
       // File is left in place so the user can inspect or delete it.
       expect(fs.existsSync(legacyFile)).toBe(true);
       // The user gets a diagnostic on stderr explaining the refusal.
@@ -340,12 +343,15 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     // A real credentials file at an unrelated path; the attacker plants a
     // symlink at credentials.json that points at it.
     const realFile = path.join(home, "real-creds.json");
-    fs.writeFileSync(realFile, JSON.stringify({ NVIDIA_API_KEY: "nvapi-attacker-controlled" }));
+    fs.writeFileSync(
+      realFile,
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-attacker-controlled" }),
+    );
     fs.symlinkSync(realFile, legacyFile);
 
     const credentials = await importCredentialsModule(home);
     expect(credentials.stageLegacyCredentialsToEnv()).toEqual([]);
-    expect(process.env.NVIDIA_API_KEY).toBeUndefined();
+    expect(process.env.NVIDIA_INFERENCE_API_KEY).toBeUndefined();
     // The pointee is intact; we never read or modified it.
     expect(fs.existsSync(realFile)).toBe(true);
   });
@@ -359,29 +365,33 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     const credsDir = path.join(home, ".nemoclaw");
     const legacyFile = path.join(credsDir, "credentials.json");
     fs.mkdirSync(credsDir, { recursive: true });
-    fs.writeFileSync(legacyFile, JSON.stringify({ NVIDIA_API_KEY: "nvapi-survives-crash" }), {
-      mode: 0o600,
-    });
+    fs.writeFileSync(
+      legacyFile,
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-survives-crash" }),
+      {
+        mode: 0o600,
+      },
+    );
 
     // --- Process A: stage, then "crash" (we just abandon the env). ---
     {
       const credentials = await importCredentialsModule(home);
       const stagedA = credentials.stageLegacyCredentialsToEnv();
-      expect(stagedA).toEqual(["NVIDIA_API_KEY"]);
-      expect(process.env.NVIDIA_API_KEY).toBe("nvapi-survives-crash");
+      expect(stagedA).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
+      expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-survives-crash");
       // Mid-onboard crash — file MUST still exist.
       expect(fs.existsSync(legacyFile)).toBe(true);
     }
 
     // Wipe env so nothing carries over from "process A" into "process B".
-    delete process.env.NVIDIA_API_KEY;
+    delete process.env.NVIDIA_INFERENCE_API_KEY;
 
     // --- Process B: fresh start, re-stage idempotently, then succeed. ---
     {
       const credentials = await importCredentialsModule(home);
       const stagedB = credentials.stageLegacyCredentialsToEnv();
-      expect(stagedB).toEqual(["NVIDIA_API_KEY"]);
-      expect(process.env.NVIDIA_API_KEY).toBe("nvapi-survives-crash");
+      expect(stagedB).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
+      expect(process.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-survives-crash");
       credentials.removeLegacyCredentialsFile();
       expect(fs.existsSync(legacyFile)).toBe(false);
     }
@@ -392,7 +402,7 @@ describe("legacy credentials.json migration (two-phase: stage then remove)", ()
     const credsDir = path.join(home, ".nemoclaw");
     const legacyFile = path.join(credsDir, "credentials.json");
     fs.mkdirSync(credsDir, { recursive: true });
-    const cleartext = JSON.stringify({ NVIDIA_API_KEY: "nvapi-secret-payload" });
+    const cleartext = JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-secret-payload" });
     fs.writeFileSync(legacyFile, cleartext, { mode: 0o600 });
 
     // Capture the pre-unlink content via a wrapper that intercepts the unlink
@@ -487,7 +497,7 @@ describe("removeLegacyCredentialsFileIfEmpty (post-upgrade cleanup, #3105)", ()
     fs.mkdirSync(credsDir, { recursive: true });
     fs.writeFileSync(
       legacyFile,
-      JSON.stringify({ NVIDIA_API_KEY: "", OPENAI_API_KEY: "   \r\n\t  " }),
+      JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "", OPENAI_API_KEY: "   \r\n\t  " }),
       { mode: 0o600 },
     );
 
@@ -501,7 +511,7 @@ describe("removeLegacyCredentialsFileIfEmpty (post-upgrade cleanup, #3105)", ()
     const credsDir = path.join(home, ".nemoclaw");
     const legacyFile = path.join(credsDir, "credentials.json");
     fs.mkdirSync(credsDir, { recursive: true });
-    const payload = JSON.stringify({ NVIDIA_API_KEY: "nvapi-real-secret", FOO: "bar" });
+    const payload = JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-real-secret", FOO: "bar" });
     fs.writeFileSync(legacyFile, payload, { mode: 0o600 });
 
     const credentials = await importCredentialsModule(home);
@@ -766,9 +776,9 @@ createCredentialPromptHelpers(() => { throw new Error("unexpected exit"); }).rea
 
     const script = `
 const { ensureApiKey } = require(${JSON.stringify(path.join(import.meta.dirname, "..", "dist", "lib", "credentials", "store.js"))});
-delete process.env.NVIDIA_API_KEY;
+delete process.env.NVIDIA_INFERENCE_API_KEY;
 ensureApiKey()
-  .then(() => console.log('STAGED=' + process.env.NVIDIA_API_KEY))
+  .then(() => console.log('STAGED=' + process.env.NVIDIA_INFERENCE_API_KEY))
   .catch((err) => { console.error(err && err.stack ? err.stack : String(err)); process.exit(1); });
 `;
     const scriptFile = path.join(os.tmpdir(), `nemoclaw-ensure-api-key-${process.pid}.js`);
@@ -785,7 +795,7 @@ ${JSON.stringify(process.execPath)} ${JSON.stringify(scriptFile)} < "$pipe"
     try {
       result = spawnSync("bash", ["--noprofile", "--norc"], {
         encoding: "utf-8",
-        env: { ...process.env, NVIDIA_API_KEY: "" },
+        env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "" },
         input: bash,
         timeout: 5000,
       });
@@ -806,15 +816,15 @@ ${JSON.stringify(process.execPath)} ${JSON.stringify(scriptFile)} < "$pipe"
   it("returns navigation from the NVIDIA API key prompt without staging it", () => {
     const script = `
 const { ensureApiKey } = require(${JSON.stringify(path.join(import.meta.dirname, "..", "dist", "lib", "credentials", "store.js"))});
-delete process.env.NVIDIA_API_KEY;
+delete process.env.NVIDIA_INFERENCE_API_KEY;
 ensureApiKey()
-  .then((result) => console.log(JSON.stringify({ result, key: process.env.NVIDIA_API_KEY || null })))
+  .then((result) => console.log(JSON.stringify({ result, key: process.env.NVIDIA_INFERENCE_API_KEY || null })))
   .catch((err) => { console.error(err && err.stack ? err.stack : String(err)); process.exit(1); });
 `;
     const result = spawnSync(process.execPath, ["-e", script], {
       encoding: "utf-8",
       input: "back\n",
-      env: { ...process.env, NVIDIA_API_KEY: "" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "" },
       timeout: 5000,
     });
 
@@ -826,15 +836,15 @@ ensureApiKey()
   it("returns exit from the NVIDIA API key prompt without staging it", () => {
     const script = `
 const { ensureApiKey } = require(${JSON.stringify(path.join(import.meta.dirname, "..", "dist", "lib", "credentials", "store.js"))});
-delete process.env.NVIDIA_API_KEY;
+delete process.env.NVIDIA_INFERENCE_API_KEY;
 ensureApiKey()
-  .then((result) => console.log(JSON.stringify({ result, key: process.env.NVIDIA_API_KEY || null })))
+  .then((result) => console.log(JSON.stringify({ result, key: process.env.NVIDIA_INFERENCE_API_KEY || null })))
   .catch((err) => { console.error(err && err.stack ? err.stack : String(err)); process.exit(1); });
 `;
     const result = spawnSync(process.execPath, ["-e", script], {
       encoding: "utf-8",
       input: "exit\n",
-      env: { ...process.env, NVIDIA_API_KEY: "" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "" },
       timeout: 5000,
     });
 
diff --git a/test/e2e-runtime/4851-ultra-toolless-validation.md b/test/e2e-runtime/4851-ultra-toolless-validation.md
index c6444f04b6..42cb97fa82 100644
--- a/test/e2e-runtime/4851-ultra-toolless-validation.md
+++ b/test/e2e-runtime/4851-ultra-toolless-validation.md
@@ -9,7 +9,7 @@ Repository-verifiable acceptance evidence for [PR #5085](https://github.com/NVID
 
 The unit tests in `test/nemotron-inference-fix.test.ts` prove request mutation, Content-Length refresh, and the 12 inject/skip branches via stubbed http + real fetch/undici. They do not prove the upstream model-output behavior the issue's expected result asks for. That requires a live call to NVIDIA Endpoints, which can't run in unit CI without API-key secret infrastructure.
 
-This runbook is the maintained runtime-validation path. Anyone reviewing #4851 acceptance can run it directly against `integrate.api.nvidia.com` and confirm the model returns `content` with both file-creation code and the run command after the preload's system message is injected.
+This runbook is the maintained runtime-validation path. Anyone reviewing #4851 acceptance can run it directly against `inference-api.nvidia.com` and confirm the model returns `content` with both file-creation code and the run command after the preload's system message is injected.
 
 ## When to run
 
@@ -25,7 +25,7 @@ This runbook is the maintained runtime-validation path. Anyone reviewing #4851 a
 Export the key once for the session:
 
 ```bash
-export NVIDIA_API_KEY="nvapi-..."
+export NVIDIA_INFERENCE_API_KEY="nvapi-..."
 ```
 
 ## Scenario A — baseline (no preload, no system message, no tools)
@@ -33,8 +33,8 @@ export NVIDIA_API_KEY="nvapi-..."
 Demonstrates the bug as filed in the issue body.
 
 ```bash
-curl -sS -X POST https://integrate.api.nvidia.com/v1/chat/completions \
-  -H "Authorization: Bearer ${NVIDIA_API_KEY}" \
+curl -sS -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
   -H "Content-Type: application/json" \
   -d '{
     "model": "nvidia/nemotron-3-ultra-550b-a55b",
@@ -61,8 +61,8 @@ Expected result with `nemotron-3-ultra-550b-a55b` (matches issue body):
 Demonstrates that the existing Nemotron-family kwarg doesn't fix #4851 by itself.
 
 ```bash
-curl -sS -X POST https://integrate.api.nvidia.com/v1/chat/completions \
-  -H "Authorization: Bearer ${NVIDIA_API_KEY}" \
+curl -sS -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
   -H "Content-Type: application/json" \
   -d '{
     "model": "nvidia/nemotron-3-ultra-550b-a55b",
@@ -80,8 +80,8 @@ Expected: still ≈ 0–60 chars. The kwarg doesn't change the failure mode.
 Demonstrates the fix shipped in this PR.
 
 ```bash
-curl -sS -X POST https://integrate.api.nvidia.com/v1/chat/completions \
-  -H "Authorization: Bearer ${NVIDIA_API_KEY}" \
+curl -sS -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
   -H "Content-Type: application/json" \
   -d '{
     "model": "nvidia/nemotron-3-ultra-550b-a55b",
@@ -109,7 +109,7 @@ This satisfies the issue's "Expected Result, Option A" (`Model explains it lacks
 
 ## Sanitized acceptance transcript
 
-The transcript below was captured by @cjagwani on 2026-06-09 against `integrate.api.nvidia.com` from a GCP Brev box. Reproduces the bug behavior in Scenarios A/B and the fix behavior in Scenario C. Use this as the durable acceptance baseline; new runs that differ structurally should update this section (and the dated entry below) rather than the unit tests.
+The transcript below was captured by @cjagwani on 2026-06-09 against `inference-api.nvidia.com` from a GCP Brev box. Reproduces the bug behavior in Scenarios A/B and the fix behavior in Scenario C. Use this as the durable acceptance baseline; new runs that differ structurally should update this section (and the dated entry below) rather than the unit tests.
 
 ### Scenario A (baseline) — 2026-06-09
 
@@ -200,6 +200,6 @@ This satisfies the issue's Option A acceptance condition: model explains it lack
 
 ## Live verification log
 
-- 2026-06-09 — verified by @cjagwani on a GCP Brev box against `integrate.api.nvidia.com`. Numbers and content above match this run.
+- 2026-06-09 — verified by @cjagwani on a GCP Brev box against `inference-api.nvidia.com`. Numbers and content above match this run.
 
 When you re-run this runbook, add a dated entry here so the next reviewer can see how recently the upstream behavior was last confirmed. If the response shape differs materially from the sanitized transcript above, update both this log and the transcript.
diff --git a/test/e2e-scenario/fixtures/phases/onboarding.ts b/test/e2e-scenario/fixtures/phases/onboarding.ts
index 00cbd0f545..865d817587 100644
--- a/test/e2e-scenario/fixtures/phases/onboarding.ts
+++ b/test/e2e-scenario/fixtures/phases/onboarding.ts
@@ -175,11 +175,11 @@ export class OnboardingPhaseFixture {
       throw new Error("cloud-openclaw onboarding requires an available Docker runtime.");
     }
     const sandboxName = sandboxNameFromOptions(environment.onboarding, options);
-    const apiKey = this.secrets.required("NVIDIA_API_KEY");
+    const apiKey = this.secrets.required("NVIDIA_INFERENCE_API_KEY");
     this.registerSandboxCleanup(sandboxName);
     const result = await this.host.nemoclaw(ONBOARD_ARGS, {
       artifactName: "onboard-cloud-openclaw",
-      env: commandEnv(sandboxName, { NVIDIA_API_KEY: apiKey }),
+      env: commandEnv(sandboxName, { NVIDIA_INFERENCE_API_KEY: apiKey }),
       redactionValues: [apiKey],
       timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
     });
@@ -205,14 +205,14 @@ export class OnboardingPhaseFixture {
       );
     }
     const sandboxName = sandboxNameFromOptions(environment.onboarding, options);
-    const apiKey = this.secrets.required("NVIDIA_API_KEY");
+    const apiKey = this.secrets.required("NVIDIA_INFERENCE_API_KEY");
     this.registerSandboxCleanup(sandboxName);
     const shimDir = await mkdtemp(join(tmpdir(), "e2e-no-docker-"));
     const shimPath = join(shimDir, "docker");
     try {
       await writeFile(shimPath, noDockerShim(), "utf8");
       await chmod(shimPath, 0o700);
-      const env = commandEnv(sandboxName, { NVIDIA_API_KEY: apiKey });
+      const env = commandEnv(sandboxName, { NVIDIA_INFERENCE_API_KEY: apiKey });
       env.PATH = prependPath(shimDir, env.PATH);
       const result = await this.host.nemoclaw(ONBOARD_ARGS, {
         artifactName: "onboard-cloud-openclaw-no-docker",
diff --git a/test/e2e-scenario/live/credential-migration.test.ts b/test/e2e-scenario/live/credential-migration.test.ts
index a020a5a113..3629bc339e 100644
--- a/test/e2e-scenario/live/credential-migration.test.ts
+++ b/test/e2e-scenario/live/credential-migration.test.ts
@@ -18,7 +18,7 @@ import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 // gateway, the plaintext file is removed after success, credentials list reads
 // from the gateway, and secure unlink removes a planted symlink without touching
 // its target. The live onboard intentionally follows the legacy default NVIDIA
-// Endpoints path: NVIDIA_API_KEY is present only in the legacy file, absent from
+// Endpoints path: NVIDIA_INFERENCE_API_KEY is present only in the legacy file, absent from
 // the onboard child env, and must migrate into the nvidia-prod gateway provider.
 // No registry, migration ledger, or shared helper is introduced.
 
@@ -107,7 +107,7 @@ async function cleanupCredentialMigrationState(host: HostCliClient, home: string
     host.command("node", [CLI_ENTRYPOINT, SANDBOX_NAME, "destroy", "--yes"], {
       artifactName: "cleanup-nemoclaw-destroy",
       env,
-      redactionValues: [process.env.NVIDIA_API_KEY ?? ""],
+      redactionValues: [process.env.NVIDIA_INFERENCE_API_KEY ?? ""],
       timeoutMs: 120_000,
     }),
   );
@@ -139,13 +139,13 @@ runCredentialMigrationTest(
   { timeout: ONBOARD_TIMEOUT_MS + INSTALL_TIMEOUT_MS + 5 * 60_000 },
   async ({ artifacts, cleanup, host, secrets, skip }) => {
     // Use the existing nightly secret as the legacy NVIDIA credential. The
-    // onboard child env below deliberately does not receive NVIDIA_API_KEY, so
+    // onboard child env below deliberately does not receive NVIDIA_INFERENCE_API_KEY, so
     // the only source is ~/.nemoclaw/credentials.json — matching the retired
     // shell lane's migration contract.
-    const migratedCredentialValue = secrets.required("NVIDIA_API_KEY");
+    const migratedCredentialValue = secrets.required("NVIDIA_INFERENCE_API_KEY");
     expect(
       migratedCredentialValue.startsWith("nvapi-"),
-      "NVIDIA_API_KEY must start with nvapi-",
+      "NVIDIA_INFERENCE_API_KEY must start with nvapi-",
     ).toBe(true);
     expect(fs.existsSync(CLI_ENTRYPOINT), "bin/nemoclaw.js missing").toBe(true);
     expect(
@@ -201,7 +201,7 @@ runCredentialMigrationTest(
       legacyFile,
       JSON.stringify(
         {
-          NVIDIA_API_KEY: migratedCredentialValue,
+          NVIDIA_INFERENCE_API_KEY: migratedCredentialValue,
           OPENSHELL_GATEWAY: "evil-gw-from-tampered-file",
           NODE_OPTIONS: "--require=/tmp/evil.js",
         },
diff --git a/test/e2e-scenario/live/credential-sanitization.test.ts b/test/e2e-scenario/live/credential-sanitization.test.ts
index 0c9bd56f75..e12e57b2a1 100644
--- a/test/e2e-scenario/live/credential-sanitization.test.ts
+++ b/test/e2e-scenario/live/credential-sanitization.test.ts
@@ -199,7 +199,7 @@ function assertCredentialFieldDetectionContract(): void {
     displayName: "should-be-preserved",
     sortKey: "should-also-be-preserved",
     modelName: "nvidia/nemotron-3-super-120b-a12b",
-    keyRef: { source: "env", id: "NVIDIA_API_KEY" },
+    keyRef: { source: "env", id: "NVIDIA_INFERENCE_API_KEY" },
     description: "A secret garden (but not a real secret)",
     tokenizer: "sentencepiece",
     endpoint: "https://api.nvidia.com/v1",
@@ -318,8 +318,10 @@ runCredentialSanitizationTest(
     assertCredentialFieldDetectionContract();
     assertBlueprintDigestContract();
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     const docker = await host.command("docker", ["info"], {
       artifactName: "prereq-docker-info-credential-sanitization",
@@ -350,7 +352,7 @@ runCredentialSanitizationTest(
         artifactName: "install-and-onboard-credential-sanitization",
         cwd: REPO_ROOT,
         env: testEnv(home, {
-          NVIDIA_API_KEY: apiKey,
+          NVIDIA_INFERENCE_API_KEY: apiKey,
           NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
           NEMOCLAW_RECREATE_SANDBOX: "1",
         }),
diff --git a/test/e2e-scenario/live/gateway-guard-recovery.test.ts b/test/e2e-scenario/live/gateway-guard-recovery.test.ts
index 3a26756765..4cb08907d1 100644
--- a/test/e2e-scenario/live/gateway-guard-recovery.test.ts
+++ b/test/e2e-scenario/live/gateway-guard-recovery.test.ts
@@ -72,7 +72,7 @@ test("gateway recovery restores /tmp guard chain after pod-recreate wipe (#2701)
   secrets,
   cleanup,
 }) => {
-  secrets.required("NVIDIA_API_KEY");
+  secrets.required("NVIDIA_INFERENCE_API_KEY");
 
   await artifacts.writeJson("scenario.json", {
     id: "gateway-guard-recovery",
diff --git a/test/e2e-scenario/live/hermes-e2e.test.ts b/test/e2e-scenario/live/hermes-e2e.test.ts
index e086f52fc4..bd7fed651c 100644
--- a/test/e2e-scenario/live/hermes-e2e.test.ts
+++ b/test/e2e-scenario/live/hermes-e2e.test.ts
@@ -75,7 +75,7 @@ function commandEnv(apiKey?: string): NodeJS.ProcessEnv {
     NEMOCLAW_ONBOARD_VALIDATION_TIMEOUT_SECONDS: ONBOARD_VALIDATION_TIMEOUT_SECONDS,
     NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
   };
-  if (apiKey) env.NVIDIA_API_KEY = apiKey;
+  if (apiKey) env.NVIDIA_INFERENCE_API_KEY = apiKey;
   if (process.env.NEMOCLAW_E2E_HERMES_DASHBOARD) {
     env.NEMOCLAW_E2E_HERMES_DASHBOARD = process.env.NEMOCLAW_E2E_HERMES_DASHBOARD;
   }
@@ -219,8 +219,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
   "hermes-e2e: install.sh onboards Hermes and proves health plus live inference",
   { timeout: LIVE_TIMEOUT_MS },
   async ({ artifacts, cleanup, host, provider, sandbox, secrets }) => {
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await artifacts.writeJson("scenario.json", {
       id: "hermes-e2e",
@@ -277,11 +279,11 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     expect(fs.existsSync(path.join(REPO_ROOT, "agents", "hermes", "manifest.yaml"))).toBe(true);
 
     const providerModels = await provider.requestJson(
-      trustedProviderEndpoint("https://integrate.api.nvidia.com/v1/models", {
-        allowedHosts: ["integrate.api.nvidia.com"],
+      trustedProviderEndpoint("https://inference-api.nvidia.com/v1/models", {
+        allowedHosts: ["inference-api.nvidia.com"],
       }),
       {
-        artifactName: "phase-1-integrate-models",
+        artifactName: "phase-1-inference-models",
         curlMaxTimeSeconds: 15,
         headers: [`Authorization: Bearer ${apiKey}`],
         env: buildAvailabilityProbeEnv(),
@@ -484,8 +486,8 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       "direct NVIDIA Endpoints chat",
       async (attempt) => {
         const response = await provider.requestJson(
-          trustedProviderEndpoint("https://integrate.api.nvidia.com/v1/chat/completions", {
-            allowedHosts: ["integrate.api.nvidia.com"],
+          trustedProviderEndpoint("https://inference-api.nvidia.com/v1/chat/completions", {
+            allowedHosts: ["inference-api.nvidia.com"],
           }),
           {
             artifactName: `phase-5-direct-nvidia-chat-attempt-${attempt}`,
diff --git a/test/e2e-scenario/live/inference-routing.test.ts b/test/e2e-scenario/live/inference-routing.test.ts
index ff958867fa..83007af27a 100644
--- a/test/e2e-scenario/live/inference-routing.test.ts
+++ b/test/e2e-scenario/live/inference-routing.test.ts
@@ -563,7 +563,7 @@ liveTest(
     const result = await onboardSandbox(
       artifacts,
       sandboxName,
-      { NVIDIA_API_KEY: invalidKey },
+      { NVIDIA_INFERENCE_API_KEY: invalidKey },
       [invalidKey],
       "tc-inf-06-onboard-invalid-api-key",
       120_000,
@@ -612,7 +612,7 @@ liveTest(
         NEMOCLAW_ENDPOINT_URL: "https://nemoclaw-e2e.invalid/v1",
         NEMOCLAW_MODEL: "test-model",
         NEMOCLAW_PROVIDER: "custom",
-        NVIDIA_API_KEY: nvidiaKey,
+        NVIDIA_INFERENCE_API_KEY: nvidiaKey,
       },
       [nvidiaKey, compatibleKey],
       "tc-inf-07-onboard-unreachable-endpoint",
@@ -633,8 +633,8 @@ liveTest(
   { timeout: 15 * 60_000 },
   async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
     const apiKey =
-      secrets.optional("NVIDIA_API_KEY") ??
-      skipLive(skip, "NVIDIA_API_KEY not set — cannot test credential isolation");
+      secrets.optional("NVIDIA_INFERENCE_API_KEY") ??
+      skipLive(skip, "NVIDIA_INFERENCE_API_KEY not set — cannot test credential isolation");
     await requireLivePrerequisites(host, skip);
     const sandboxName = inferenceSandboxName("e2e-inf-cred");
     cleanup.add(
@@ -648,17 +648,17 @@ liveTest(
       runner: "vitest",
       migratedFrom: "test/e2e/test-inference-routing.sh",
       contract: [
-        "real NVIDIA_API_KEY does not appear in sandbox environment",
-        "real NVIDIA_API_KEY does not appear in sandbox process list when ps is available",
-        "real NVIDIA_API_KEY does not appear in sampled sandbox filesystem",
-        "sandbox NVIDIA_API_KEY, when present, is a placeholder rather than the real key",
+        "real NVIDIA_INFERENCE_API_KEY does not appear in sandbox environment",
+        "real NVIDIA_INFERENCE_API_KEY does not appear in sandbox process list when ps is available",
+        "real NVIDIA_INFERENCE_API_KEY does not appear in sampled sandbox filesystem",
+        "sandbox NVIDIA_INFERENCE_API_KEY, when present, is a placeholder rather than the real key",
       ],
     });
 
     const onboard = await onboardSandbox(
       artifacts,
       sandboxName,
-      { NVIDIA_API_KEY: apiKey },
+      { NVIDIA_INFERENCE_API_KEY: apiKey },
       [apiKey],
       "tc-inf-05-onboard-credential-isolation",
     );
@@ -787,7 +787,7 @@ liveTest(
 
     const placeholder = await sandbox.execShell(
       sandboxName,
-      trustedSandboxShellScript("printenv NVIDIA_API_KEY 2>/dev/null || true"),
+      trustedSandboxShellScript("printenv NVIDIA_INFERENCE_API_KEY 2>/dev/null || true"),
       {
         artifactName: "tc-inf-05-sandbox-placeholder",
         env: buildAvailabilityProbeEnv(),
@@ -798,7 +798,8 @@ liveTest(
     const placeholderValue = placeholder.stdout.trim();
     if (!placeholderValue) {
       await artifacts.writeJson("tc-inf-05-placeholder-skipped.json", {
-        reason: "NVIDIA_API_KEY not set in sandbox; placeholder injection may not be active",
+        reason:
+          "NVIDIA_INFERENCE_API_KEY not set in sandbox; placeholder injection may not be active",
       });
     } else {
       expect(placeholderValue, "sandbox has the real key, not a placeholder").not.toBe(apiKey);
diff --git a/test/e2e-scenario/live/issue-4434-tui-unreachable-inference.test.ts b/test/e2e-scenario/live/issue-4434-tui-unreachable-inference.test.ts
index dc1d50b9ac..b9160de08d 100644
--- a/test/e2e-scenario/live/issue-4434-tui-unreachable-inference.test.ts
+++ b/test/e2e-scenario/live/issue-4434-tui-unreachable-inference.test.ts
@@ -25,7 +25,7 @@ const ENVIRONMENT = ubuntuRepoDocker("cloud-openclaw");
 const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-issue-4434-tui-unreachable";
 validateSandboxName(SANDBOX_NAME);
 
-const INTEGRATE_MODELS_URL = "https://integrate.api.nvidia.com/v1/models";
+const INFERENCE_MODELS_URL = "https://inference-api.nvidia.com/v1/models";
 const BLOCKED_IPS = ["75.2.113.119", "99.83.136.103"];
 const DEFAULT_TUI_TIMEOUT_SEC = 180;
 const MAX_TUI_TIMEOUT_SEC = 3600;
@@ -138,8 +138,10 @@ runIssue4434LiveTest(
       skip("Linux host required for DOCKER-USER iptables repro");
     }
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await artifacts.writeJson("scenario.json", {
       id: "issue-4434-tui-unreachable-inference",
@@ -243,7 +245,7 @@ runIssue4434LiveTest(
     const blockedEndpointProbe = await sandbox.execShell(
       instance.sandboxName,
       trustedSandboxShellScript(
-        `command -v curl >/dev/null && curl -sk --connect-timeout 5 --max-time 12 ${shellSingleQuote(INTEGRATE_MODELS_URL)} >/tmp/issue4434-models.blocked.out 2>&1`,
+        `command -v curl >/dev/null && curl -sk --connect-timeout 5 --max-time 12 ${shellSingleQuote(INFERENCE_MODELS_URL)} >/tmp/issue4434-models.blocked.out 2>&1`,
       ),
       {
         artifactName: "issue4434-endpoint-probe-after-block",
@@ -253,7 +255,7 @@ runIssue4434LiveTest(
     );
     expect(
       blockedEndpointProbe.exitCode,
-      `integrate.api.nvidia.com remained reachable from inside the sandbox after firewall block\n${resultText(blockedEndpointProbe)}`,
+      `inference-api.nvidia.com remained reachable from inside the sandbox after firewall block\n${resultText(blockedEndpointProbe)}`,
     ).not.toBe(0);
 
     const captureFile = artifacts.pathFor("openclaw-tui-capture.log");
diff --git a/test/e2e-scenario/live/launchable-smoke.test.ts b/test/e2e-scenario/live/launchable-smoke.test.ts
index 466ac355a6..b966095094 100644
--- a/test/e2e-scenario/live/launchable-smoke.test.ts
+++ b/test/e2e-scenario/live/launchable-smoke.test.ts
@@ -234,8 +234,10 @@ runLaunchableSmokeTest(
       workflowRetirement: "deferred to #5098 Phase 11",
     });
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     expect(fs.existsSync(LAUNCHABLE_SCRIPT), `${LAUNCHABLE_SCRIPT} missing`).toBe(true);
 
@@ -255,10 +257,10 @@ runLaunchableSmokeTest(
 
     const network = await host.command(
       "curl",
-      ["-sf", "--max-time", "10", "https://integrate.api.nvidia.com/v1/models"],
-      { artifactName: "prereq-integrate-api-models", env: runEnv(), timeoutMs: 30_000 },
+      ["-sf", "--max-time", "10", "https://inference-api.nvidia.com/v1/models"],
+      { artifactName: "prereq-inference-api-models", env: runEnv(), timeoutMs: 30_000 },
     );
-    expectExitZero(network, "integrate.api.nvidia.com reachable");
+    expectExitZero(network, "inference-api.nvidia.com reachable");
 
     const cloneDir = path.join(os.tmpdir(), `NemoClaw-launchable-vitest-${randomUUID()}`);
     cleanup.add(`remove launchable clone ${cloneDir}`, async () =>
@@ -333,7 +335,7 @@ runLaunchableSmokeTest(
         cwd: cloneDir,
         env: runEnv({
           PATH: `/usr/local/bin:${process.env.PATH ?? ""}`,
-          NVIDIA_API_KEY: apiKey,
+          NVIDIA_INFERENCE_API_KEY: apiKey,
           NEMOCLAW_MODEL: MODEL,
           NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
           NEMOCLAW_RECREATE_SANDBOX: "1",
@@ -412,7 +414,7 @@ runLaunchableSmokeTest(
         "30",
         "-X",
         "POST",
-        "https://integrate.api.nvidia.com/v1/chat/completions",
+        "https://inference-api.nvidia.com/v1/chat/completions",
         "-H",
         "Content-Type: application/json",
         "-H",
diff --git a/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts b/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts
index 3cbd7d187e..79aec67462 100644
--- a/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts
+++ b/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts
@@ -75,7 +75,7 @@ function routedPongReason(raw: string): "ok" | string {
 function withProviderRoutedEnv(apiKey: string): NodeJS.ProcessEnv {
   return {
     ...buildAvailabilityProbeEnv(),
-    NVIDIA_API_KEY: apiKey,
+    NVIDIA_INFERENCE_API_KEY: apiKey,
     NEMOCLAW_PROVIDER_KEY: apiKey,
     NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
     NEMOCLAW_NON_INTERACTIVE: "1",
@@ -107,8 +107,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       skip("Docker is required for provider-routed Model Router onboarding");
     }
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await artifacts.writeJson("scenario.json", {
       id: "model-router-provider-routed-inference",
@@ -117,7 +119,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       legacySource: "test/e2e/test-model-router-provider-routed-inference.sh",
       contract: [
         "Docker is available before onboarding",
-        "NVIDIA_API_KEY is present and nvapi-prefixed",
+        "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed",
         "nemoclaw onboard --fresh completes with NEMOCLAW_PROVIDER=routed",
         "host model-router health reports at least one healthy endpoint",
         "sandbox inference.local returns model nvidia-routed with PONG content",
diff --git a/test/e2e-scenario/live/network-policy.test.ts b/test/e2e-scenario/live/network-policy.test.ts
index f968bd0bda..07980242e6 100644
--- a/test/e2e-scenario/live/network-policy.test.ts
+++ b/test/e2e-scenario/live/network-policy.test.ts
@@ -393,8 +393,10 @@ RUN_NETWORK_POLICY_TEST(
     });
     expect(openshellVersion.exitCode, text(openshellVersion)).toBe(0);
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     cleanup.add(`destroy network-policy sandbox ${SANDBOX_NAME}`, async () => {
       await runNemoclaw(host, [SANDBOX_NAME, "destroy", "--yes"], {
@@ -434,7 +436,7 @@ RUN_NETWORK_POLICY_TEST(
               ? "onboard-restricted-network-policy"
               : `onboard-restricted-network-policy-attempt-${attempt}`,
           env: baseEnv({
-            NVIDIA_API_KEY: apiKey,
+            NVIDIA_INFERENCE_API_KEY: apiKey,
             NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
             NEMOCLAW_RECREATE_SANDBOX: "1",
             NEMOCLAW_POLICY_TIER: "restricted",
@@ -679,7 +681,7 @@ printf '\n'
     expect(inferenceContent.trim().length).toBeGreaterThan(0);
     const directProvider = await fetchStatus(
       sandbox,
-      "https://integrate.api.nvidia.com/v1/models",
+      "https://inference-api.nvidia.com/v1/models",
       "tc-net-07-direct-provider-blocked",
     );
     expect(directProvider).toMatch(/STATUS_403|ERROR_/);
diff --git a/test/e2e-scenario/live/onboard-negative-paths.test.ts b/test/e2e-scenario/live/onboard-negative-paths.test.ts
index dc957e2ff5..074c798c96 100644
--- a/test/e2e-scenario/live/onboard-negative-paths.test.ts
+++ b/test/e2e-scenario/live/onboard-negative-paths.test.ts
@@ -16,7 +16,7 @@ import type { HostCliClient } from "../fixtures/clients/host.ts";
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js");
 const SESSION_FILE = path.join(process.env.HOME ?? "/tmp", ".nemoclaw", "onboard-session.json");
-const INVALID_NVIDIA_API_KEY = "not-a-nvidia-key";
+const INVALID_NVIDIA_INFERENCE_API_KEY = "not-a-nvidia-key";
 const STACK_TRACE_PATTERNS = [/(^|\s)(TypeError|ReferenceError|SyntaxError):/m, /^\s+at /m];
 
 process.env.NEMOCLAW_CLI_BIN ??= path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -123,9 +123,9 @@ liveTest(
           NEMOCLAW_RECREATE_SANDBOX: "1",
           NEMOCLAW_PROVIDER: "cloud",
           NEMOCLAW_POLICY_MODE: "skip",
-          NVIDIA_API_KEY: INVALID_NVIDIA_API_KEY,
+          NVIDIA_INFERENCE_API_KEY: INVALID_NVIDIA_INFERENCE_API_KEY,
         }),
-        redactionValues: [INVALID_NVIDIA_API_KEY],
+        redactionValues: [INVALID_NVIDIA_INFERENCE_API_KEY],
         timeoutMs: 5 * 60_000,
       },
     );
diff --git a/test/e2e-scenario/live/onboard-resume.test.ts b/test/e2e-scenario/live/onboard-resume.test.ts
index e411f75962..f5b9c5847e 100644
--- a/test/e2e-scenario/live/onboard-resume.test.ts
+++ b/test/e2e-scenario/live/onboard-resume.test.ts
@@ -16,7 +16,7 @@ import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 // Shape: drive the real `nemoclaw onboard` CLI through the deterministic E2E
 // failure-injection hook (NEMOCLAW_E2E_FAILURE_INJECTION +
 // NEMOCLAW_E2E_FORCE_FAIL_AT_STEP), then invoke
-// `nemoclaw onboard --resume --non-interactive` with NVIDIA_API_KEY stripped
+// `nemoclaw onboard --resume --non-interactive` with NVIDIA_INFERENCE_API_KEY stripped
 // from the environment to prove the credential is hydrated from the onboard
 // session file.
 //
@@ -93,7 +93,7 @@ function containsExactJsonToken(value: unknown, token: string): boolean {
 }
 
 // Gate the test on NEMOCLAW_RUN_E2E_SCENARIOS=1 so accidental cli-test-shard
-// discovery does not run it without real `openshell`, Docker, or NVIDIA_API_KEY.
+// discovery does not run it without real `openshell`, Docker, or NVIDIA_INFERENCE_API_KEY.
 // Live-only tests opt in to the same gate used by the `e2e-scenarios-live`
 // project include glob in vitest.config.ts.
 test.skipIf(!shouldRunLiveE2EScenarios())(
@@ -113,7 +113,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     // env (includes PATH, HOME, etc.) so spawn can locate `docker`.
     // The shell-probe boundary defaults to no env inheritance; fixture spawns
     // must opt in via buildAvailabilityProbeEnv() to keep secret-passthrough
-    // explicit (NVIDIA_API_KEY is NOT in the allowlist; we layer it explicitly
+    // explicit (NVIDIA_INFERENCE_API_KEY is NOT in the allowlist; we layer it explicitly
     // in Phase 2 below).
     const dockerInfo = await host.command("docker", ["info"], {
       artifactName: "prereq-docker-info",
@@ -132,10 +132,12 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     expect(openshellVersion.exitCode, openshellVersion.stderr).toBe(0);
 
     // Assertion: nvidia-api-key-present — secrets.required(...) skips the test
-    // if NVIDIA_API_KEY is unset (correct behavior under workflow_dispatch
+    // if NVIDIA_INFERENCE_API_KEY is unset (correct behavior under workflow_dispatch
     // without the secret wired in).
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     // ──────────────────────────────────────────────────────────────────
     // Phase 0 (deferred): pre-cleanup of leftover sandbox/session state.
@@ -210,7 +212,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       artifactName: "phase-2-onboard-interrupted",
       env: {
         ...buildAvailabilityProbeEnv(),
-        NVIDIA_API_KEY: apiKey,
+        NVIDIA_INFERENCE_API_KEY: apiKey,
         NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
         NEMOCLAW_RECREATE_SANDBOX: "1",
         NEMOCLAW_POLICY_MODE: "suggested",
@@ -256,7 +258,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     expect(interrupted.failure?.step).toBe("policies");
 
     // ──────────────────────────────────────────────────────────────────
-    // Phase 3: resume — NVIDIA_API_KEY removed from env so the resume run
+    // Phase 3: resume — NVIDIA_INFERENCE_API_KEY removed from env so the resume run
     // must hydrate the credential from the session file.
     // ──────────────────────────────────────────────────────────────────
     const resumeRun = await host.command(
@@ -264,10 +266,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       [CLI_ENTRYPOINT, "onboard", "--resume", "--non-interactive"],
       {
         artifactName: "phase-3-onboard-resume",
-        // buildAvailabilityProbeEnv() does NOT pass NVIDIA_API_KEY through —
+        // buildAvailabilityProbeEnv() does NOT pass NVIDIA_INFERENCE_API_KEY through —
         // it's outside the fixture env allowlist. Resume must hydrate the
         // credential from the session file. This is exactly the bash test's
-        // `env -u NVIDIA_API_KEY` invariant, expressed via explicit
+        // `env -u NVIDIA_INFERENCE_API_KEY` invariant, expressed via explicit
         // secret-passthrough.
         env: {
           ...buildAvailabilityProbeEnv(),
diff --git a/test/e2e-scenario/live/openclaw-tui-chat-correlation.test.ts b/test/e2e-scenario/live/openclaw-tui-chat-correlation.test.ts
index 9494507269..9759f12c26 100644
--- a/test/e2e-scenario/live/openclaw-tui-chat-correlation.test.ts
+++ b/test/e2e-scenario/live/openclaw-tui-chat-correlation.test.ts
@@ -485,7 +485,7 @@ async function runLiveIssue2603ReproWithEventCaptureRetry(
 test(
   "openclaw-tui-chat-correlation: rapid TUI/webchat sends stay correlated on a real OpenClaw sandbox (#2603 + #3145)",
   async ({ artifacts, environment, onboard, sandbox, secrets }) => {
-    secrets.required("NVIDIA_API_KEY");
+    secrets.required("NVIDIA_INFERENCE_API_KEY");
 
     await artifacts.writeJson("scenario.json", {
       id: "openclaw-tui-chat-correlation",
diff --git a/test/e2e-scenario/live/rebuild-openclaw.test.ts b/test/e2e-scenario/live/rebuild-openclaw.test.ts
index b5f91a3157..0c3418121e 100644
--- a/test/e2e-scenario/live/rebuild-openclaw.test.ts
+++ b/test/e2e-scenario/live/rebuild-openclaw.test.ts
@@ -136,7 +136,7 @@ function dockerContextEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
 
 function cliEnv(apiKey: string, extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
   return dockerContextEnv({
-    NVIDIA_API_KEY: apiKey,
+    NVIDIA_INFERENCE_API_KEY: apiKey,
     NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
     ...extra,
   });
@@ -205,9 +205,9 @@ async function configureGatewayInferenceRoute(
       [
         "set -euo pipefail",
         "if openshell provider get nvidia-prod >/dev/null 2>&1; then",
-        "  openshell provider update nvidia-prod --credential NVIDIA_API_KEY",
+        "  openshell provider update nvidia-prod --credential NVIDIA_INFERENCE_API_KEY",
         "else",
-        "  openshell provider create --name nvidia-prod --type nvidia --credential NVIDIA_API_KEY",
+        "  openshell provider create --name nvidia-prod --type nvidia --credential NVIDIA_INFERENCE_API_KEY",
         "fi",
         `openshell inference set --no-verify --provider nvidia-prod --model ${model}`,
       ].join("\n"),
@@ -260,7 +260,7 @@ function seedRegistryAndSession(): void {
     failure: null,
     provider: "nvidia-prod",
     model: DEFAULT_MODEL,
-    credentialEnv: "NVIDIA_API_KEY",
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     agent: null,
     steps: {
       preflight: complete,
@@ -346,8 +346,10 @@ function backupCredentialLeakPaths(backupDir: string, oldGatewayToken: string):
 test.skipIf(!shouldRunLiveE2EScenarios())(
   "rebuild-openclaw: old OpenClaw sandbox rebuild preserves state and rotates gateway token",
   async ({ artifacts, cleanup, host, sandbox, secrets, skip }) => {
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     expect(
       fs.existsSync(CLI_ENTRYPOINT),
diff --git a/test/e2e-scenario/live/sandbox-operations.test.ts b/test/e2e-scenario/live/sandbox-operations.test.ts
index 81f1cef506..acaf07de8f 100644
--- a/test/e2e-scenario/live/sandbox-operations.test.ts
+++ b/test/e2e-scenario/live/sandbox-operations.test.ts
@@ -98,9 +98,9 @@ async function onboardSandbox(
         NEMOCLAW_PROVIDER: "cloud",
         NEMOCLAW_SANDBOX_NAME: sandboxName,
         NEMOCLAW_RECREATE_SANDBOX: "1",
-        NVIDIA_API_KEY: process.env.NVIDIA_API_KEY ?? "",
+        NVIDIA_INFERENCE_API_KEY: process.env.NVIDIA_INFERENCE_API_KEY ?? "",
       },
-      redactionValues: [process.env.NVIDIA_API_KEY ?? ""],
+      redactionValues: [process.env.NVIDIA_INFERENCE_API_KEY ?? ""],
       timeoutMs: 20 * 60_000,
     },
   );
@@ -528,7 +528,7 @@ async function assertGatewayRecovery(host: HostCliClient, sandboxName: string):
 liveTest(
   "sandbox operations preserve list/status/logs/recovery/multi-sandbox contracts",
   async ({ artifacts, cleanup, environment, host, sandbox, secrets, skip }) => {
-    secrets.required("NVIDIA_API_KEY");
+    secrets.required("NVIDIA_INFERENCE_API_KEY");
 
     await artifacts.writeJson("scenario.json", {
       id: "sandbox-operations",
diff --git a/test/e2e-scenario/live/sandbox-rebuild.test.ts b/test/e2e-scenario/live/sandbox-rebuild.test.ts
index 8acb4c7d74..86003625cd 100644
--- a/test/e2e-scenario/live/sandbox-rebuild.test.ts
+++ b/test/e2e-scenario/live/sandbox-rebuild.test.ts
@@ -48,7 +48,7 @@ function sandboxRebuildEnv(apiKey: string, extra: NodeJS.ProcessEnv = {}): NodeJ
   return {
     ...buildAvailabilityProbeEnv(),
     ...extra,
-    NVIDIA_API_KEY: apiKey,
+    NVIDIA_INFERENCE_API_KEY: apiKey,
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
     NEMOCLAW_NON_INTERACTIVE: "1",
     NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
@@ -86,8 +86,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     stateValidation,
   }) => {
     assertTestOwnedSandboxName();
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     const dockerInfo = await host.command("docker", ["info"], {
       artifactName: "prereq-docker-info",
diff --git a/test/e2e-scenario/live/sandbox-survival.test.ts b/test/e2e-scenario/live/sandbox-survival.test.ts
index 6fe50badc5..52e415d9df 100644
--- a/test/e2e-scenario/live/sandbox-survival.test.ts
+++ b/test/e2e-scenario/live/sandbox-survival.test.ts
@@ -48,7 +48,7 @@ function extractSemver(raw: string): string | undefined {
 function installEnv(apiKey: string): NodeJS.ProcessEnv {
   return {
     ...buildAvailabilityProbeEnv(),
-    NVIDIA_API_KEY: apiKey,
+    NVIDIA_INFERENCE_API_KEY: apiKey,
     NEMOCLAW_NON_INTERACTIVE: "1",
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
     NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
@@ -84,8 +84,10 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     skip,
     stateValidation,
   }) => {
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await artifacts.writeJson("scenario.json", {
       id: "sandbox-survival",
@@ -117,9 +119,9 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
 
     const modelsReachable = await host.command(
       "curl",
-      ["-sf", "--max-time", "10", "https://integrate.api.nvidia.com/v1/models"],
+      ["-sf", "--max-time", "10", "https://inference-api.nvidia.com/v1/models"],
       {
-        artifactName: "prereq-integrate-api-models",
+        artifactName: "prereq-inference-api-models",
         env: buildAvailabilityProbeEnv(),
         redactionValues: [apiKey],
         timeoutMs: 15_000,
diff --git a/test/e2e-scenario/live/shields-config.test.ts b/test/e2e-scenario/live/shields-config.test.ts
index a8816d54c9..567e66c3b4 100644
--- a/test/e2e-scenario/live/shields-config.test.ts
+++ b/test/e2e-scenario/live/shields-config.test.ts
@@ -245,8 +245,10 @@ RUN_SHIELDS_TEST(
       skip("Docker is required for shields-config live E2E");
     }
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await cleanupSandbox(host, sandbox, "pre-cleanup");
     cleanup.add(`destroy shields-config sandbox ${SANDBOX_NAME}`, async () => {
@@ -259,7 +261,7 @@ RUN_SHIELDS_TEST(
       {
         artifactName: "phase-1-install-shields-config",
         env: commandEnv({
-          NVIDIA_API_KEY: apiKey,
+          NVIDIA_INFERENCE_API_KEY: apiKey,
           NEMOCLAW_RECREATE_SANDBOX: "1",
         }),
         redactionValues: [apiKey],
diff --git a/test/e2e-scenario/live/skill-agent.test.ts b/test/e2e-scenario/live/skill-agent.test.ts
index 6ef02c350b..82692c6831 100644
--- a/test/e2e-scenario/live/skill-agent.test.ts
+++ b/test/e2e-scenario/live/skill-agent.test.ts
@@ -220,8 +220,10 @@ runSkillAgentTest(
       skip("Docker is required for skill-agent E2E");
     }
 
-    const apiKey = secrets.required("NVIDIA_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_API_KEY must start with nvapi-").toBe(true);
+    const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
+    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
+      true,
+    );
 
     await artifacts.writeJson("scenario.json", {
       id: "skill-agent",
@@ -230,7 +232,7 @@ runSkillAgentTest(
       legacySource: "test/e2e/test-skill-agent-e2e.sh",
       contract: [
         "Docker is available before onboarding",
-        "NVIDIA_API_KEY is present and nvapi-prefixed",
+        "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed",
         "nemoclaw onboard creates/recreates a real OpenClaw sandbox",
         "skill-smoke-fixture is injected into sandbox and home skill roots",
         "openclaw agent reads SKILL.md and returns SKILL_SMOKE_VERIFY_K9X2",
@@ -296,7 +298,7 @@ runSkillAgentTest(
         artifactName: "onboard-skill-agent",
         env: {
           ...buildAvailabilityProbeEnv(),
-          NVIDIA_API_KEY: apiKey,
+          NVIDIA_INFERENCE_API_KEY: apiKey,
           NEMOCLAW_AGENT: "openclaw",
           NEMOCLAW_PROVIDER: "cloud",
           NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
@@ -351,7 +353,7 @@ runSkillAgentTest(
         cwd: REPO_ROOT,
         env: {
           ...buildAvailabilityProbeEnv(),
-          NVIDIA_API_KEY: apiKey,
+          NVIDIA_INFERENCE_API_KEY: apiKey,
           SANDBOX_NAME,
           SKILL_ID,
           VERIFY_TOKEN: VERIFY_PHRASE,
diff --git a/test/e2e-scenario/live/token-rotation.test.ts b/test/e2e-scenario/live/token-rotation.test.ts
index de2aa75c6b..40a938082f 100644
--- a/test/e2e-scenario/live/token-rotation.test.ts
+++ b/test/e2e-scenario/live/token-rotation.test.ts
@@ -255,7 +255,7 @@ function assertTokenPairsDiffer(): void {
 function redactionValues(): string[] {
   return [
     "token-rotation-compatible-e2e",
-    process.env.NVIDIA_API_KEY,
+    process.env.NVIDIA_INFERENCE_API_KEY,
     process.env.GITHUB_TOKEN,
     ...Object.values(TOKEN_A),
     ...Object.values(TOKEN_B),
@@ -399,7 +399,7 @@ liveTest(
         resources: [
           "Docker",
           "install.sh/OpenShell",
-          "NVIDIA_API_KEY or fake OpenAI-compatible endpoint",
+          "NVIDIA_INFERENCE_API_KEY or fake OpenAI-compatible endpoint",
           "fake messaging tokens",
         ],
       },
diff --git a/test/e2e-scenario/live/whatsapp-qr-compact.test.ts b/test/e2e-scenario/live/whatsapp-qr-compact.test.ts
index 0944560237..1272b9a519 100644
--- a/test/e2e-scenario/live/whatsapp-qr-compact.test.ts
+++ b/test/e2e-scenario/live/whatsapp-qr-compact.test.ts
@@ -15,7 +15,7 @@ import { testTimeoutOptions } from "../../helpers/timeouts";
 // @openclaw/whatsapp versions bundled by Dockerfile.base and measures the real
 // upstream terminal QR renderer with and without the NemoClaw compact preload.
 // It intentionally does not require a WhatsApp account, phone scan, sandbox,
-// Docker, or NVIDIA_API_KEY: the legacy contract is the renderer boundary.
+// Docker, or NVIDIA_INFERENCE_API_KEY: the legacy contract is the renderer boundary.
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const DOCKERFILE_BASE = path.join(REPO_ROOT, "Dockerfile.base");
diff --git a/test/e2e-scenario/manifests/hermes-nvidia-discord.yaml b/test/e2e-scenario/manifests/hermes-nvidia-discord.yaml
index 535506ae40..d40d290668 100644
--- a/test/e2e-scenario/manifests/hermes-nvidia-discord.yaml
+++ b/test/e2e-scenario/manifests/hermes-nvidia-discord.yaml
@@ -22,5 +22,5 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - DISCORD_BOT_TOKEN
diff --git a/test/e2e-scenario/manifests/hermes-nvidia-slack.yaml b/test/e2e-scenario/manifests/hermes-nvidia-slack.yaml
index 1715c5e364..584418d003 100644
--- a/test/e2e-scenario/manifests/hermes-nvidia-slack.yaml
+++ b/test/e2e-scenario/manifests/hermes-nvidia-slack.yaml
@@ -22,6 +22,6 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - SLACK_BOT_TOKEN
       - SLACK_APP_TOKEN
diff --git a/test/e2e-scenario/manifests/hermes-nvidia.yaml b/test/e2e-scenario/manifests/hermes-nvidia.yaml
index caee7a3308..6fa4ed0af7 100644
--- a/test/e2e-scenario/manifests/hermes-nvidia.yaml
+++ b/test/e2e-scenario/manifests/hermes-nvidia.yaml
@@ -21,4 +21,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-brave.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-brave.yaml
index f6fb1151a3..baf9527187 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-brave.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-brave.yaml
@@ -23,5 +23,5 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - BRAVE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-brev-launchable.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-brev-launchable.yaml
index 9f3da8e72f..9420693034 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-brev-launchable.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-brev-launchable.yaml
@@ -23,4 +23,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-custom-policies.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-custom-policies.yaml
index 091f76884b..c44f0a5c47 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-custom-policies.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-custom-policies.yaml
@@ -26,4 +26,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-discord.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-discord.yaml
index f5ec7d45f2..c90896c3b4 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-discord.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-discord.yaml
@@ -22,5 +22,5 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - DISCORD_BOT_TOKEN
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-double-provider-switch.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-double-provider-switch.yaml
index 687a2608d8..f2b7a705ac 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-double-provider-switch.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-double-provider-switch.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-double-same-provider.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-double-same-provider.yaml
index fa951a0d7d..2488fcc181 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-double-same-provider.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-double-same-provider.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-gateway-port-conflict.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-gateway-port-conflict.yaml
index c86e5c963d..b11018987a 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-gateway-port-conflict.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-gateway-port-conflict.yaml
@@ -24,4 +24,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-invalid-key.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-invalid-key.yaml
index 7c881c8edf..c8423359e7 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-invalid-key.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-invalid-key.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-macos.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-macos.yaml
index 06068fb633..14471421e1 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-macos.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-macos.yaml
@@ -21,4 +21,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-no-docker-negative.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-no-docker-negative.yaml
index cc26672a36..0b8c20c05b 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-no-docker-negative.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-no-docker-negative.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-post-reboot-recovery.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-post-reboot-recovery.yaml
index 9e4d6f664c..8c264d7612 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-post-reboot-recovery.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-post-reboot-recovery.yaml
@@ -40,4 +40,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml
index 8cf00c198f..7261bc157c 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml
@@ -24,4 +24,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-repair.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-repair.yaml
index e783edd65a..c1b53f9110 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-repair.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-repair.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-resume.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-resume.yaml
index 3ba269666c..bd1d31e011 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-resume.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-resume.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-slack.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-slack.yaml
index 100ea3e337..e82e9483f7 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-slack.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-slack.yaml
@@ -22,5 +22,5 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - SLACK_BOT_TOKEN
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-telegram.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-telegram.yaml
index 59c5676239..7484f11190 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-telegram.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-telegram.yaml
@@ -22,5 +22,5 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
       - TELEGRAM_BOT_TOKEN
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-token-rotation.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-token-rotation.yaml
index bc9d6d6e40..34c1789ed2 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-token-rotation.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-token-rotation.yaml
@@ -22,4 +22,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-wsl.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-wsl.yaml
index 74b7563a80..035a811c7f 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia-wsl.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-wsl.yaml
@@ -21,4 +21,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia.yaml b/test/e2e-scenario/manifests/openclaw-nvidia.yaml
index 30080e9db3..f865485aaa 100644
--- a/test/e2e-scenario/manifests/openclaw-nvidia.yaml
+++ b/test/e2e-scenario/manifests/openclaw-nvidia.yaml
@@ -21,4 +21,4 @@ spec:
   state:
     workspaceRef: default
     credentialRefs:
-      - NVIDIA_API_KEY
+      - NVIDIA_INFERENCE_API_KEY
diff --git a/test/e2e-scenario/scenarios/scenarios/baseline.ts b/test/e2e-scenario/scenarios/scenarios/baseline.ts
index f4b3c4209c..da0e11de40 100644
--- a/test/e2e-scenario/scenarios/scenarios/baseline.ts
+++ b/test/e2e-scenario/scenarios/scenarios/baseline.ts
@@ -68,7 +68,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "inference", "credentials"],
     description: "Ubuntu repo checkout with Docker and cloud OpenClaw onboarding.",
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-hermes",
@@ -76,7 +76,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-hermes"),
     expectedStateId: "cloud-hermes-ready",
     suiteIds: ["smoke", "inference", "hermes-specific"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "gpu-repo-local-ollama-openclaw",
@@ -94,7 +94,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     onboardingAssertionIds: ["base-installed"],
     suiteIds: ["platform-macos"],
     runnerRequirements: ["macos-latest"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
     skippedCapabilities: macosDockerSkipped,
   },
   {
@@ -104,7 +104,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "platform-wsl"],
     runnerRequirements: ["windows-latest", "wsl2"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "brev-launchable-cloud-openclaw",
@@ -113,7 +113,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "inference"],
     runnerRequirements: ["ubuntu-latest", "brev-api-token", "launchable-image"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-no-docker-preflight-negative",
@@ -122,7 +122,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "preflight-failure-no-sandbox",
     onboardingAssertionIds: ["base-installed", "preflight-expected-failed"],
     suiteIds: [],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
     expectedFailure: {
       phase: "preflight",
       errorClass: "docker-missing",
@@ -144,7 +144,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDockerLifecycle("cloud-openclaw", "rebuild-current-version"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "rebuild", "upgrade"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     // Failing-test-first regression scaffold for #4423. After
@@ -178,7 +178,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDockerLifecycle("cloud-openclaw", "post-reboot-recovery"),
     expectedStateId: "post-reboot-recovery-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
     description:
       "Failing-test-first guard for #4423: post-reboot recovery must preserve " +
       "the local registry entry and restart the labeled Docker container.",
@@ -197,7 +197,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-brave"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY", "BRAVE_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "BRAVE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-telegram",
@@ -205,7 +205,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-telegram"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "messaging-telegram"],
-    requiredSecrets: ["NVIDIA_API_KEY", "TELEGRAM_BOT_TOKEN"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "TELEGRAM_BOT_TOKEN"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-discord",
@@ -213,7 +213,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-discord"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "messaging-discord"],
-    requiredSecrets: ["NVIDIA_API_KEY", "DISCORD_BOT_TOKEN"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "DISCORD_BOT_TOKEN"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-slack",
@@ -221,7 +221,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-slack"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "messaging-slack"],
-    requiredSecrets: ["NVIDIA_API_KEY", "SLACK_BOT_TOKEN"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "SLACK_BOT_TOKEN"],
   },
   {
     id: "ubuntu-repo-cloud-hermes-discord",
@@ -229,7 +229,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-hermes-discord"),
     expectedStateId: "cloud-hermes-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY", "DISCORD_BOT_TOKEN"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "DISCORD_BOT_TOKEN"],
   },
   {
     id: "ubuntu-repo-cloud-hermes-slack",
@@ -237,7 +237,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-hermes-slack"),
     expectedStateId: "cloud-hermes-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY", "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY", "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-resume",
@@ -245,7 +245,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-resume-after-interrupt"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-repair",
@@ -253,7 +253,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-repair-existing-config"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-double-same-provider",
@@ -261,7 +261,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-double-same-provider"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-double-provider-switch",
@@ -269,7 +269,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-double-provider-switch"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-token-rotation",
@@ -277,7 +277,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     environment: ubuntuRepoDocker("cloud-nvidia-openclaw-token-rotation"),
     expectedStateId: "cloud-openclaw-ready",
     suiteIds: ["smoke", "messaging-token-rotation"],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-repo-cloud-openclaw-custom-policies",
@@ -293,7 +293,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
       "model-router",
       "snapshot-lifecycle",
     ],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
   },
   {
     id: "ubuntu-invalid-nvidia-key-negative",
@@ -302,7 +302,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "onboarding-failure-invalid-nvidia-key",
     onboardingAssertionIds: ["base-installed"],
     suiteIds: [],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
     expectedFailure: {
       phase: "onboarding",
       errorClass: "invalid-nvidia-api-key",
@@ -316,7 +316,7 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
     expectedStateId: "onboarding-failure-gateway-port-conflict",
     onboardingAssertionIds: ["base-installed"],
     suiteIds: [],
-    requiredSecrets: ["NVIDIA_API_KEY"],
+    requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
     expectedFailure: {
       phase: "onboarding",
       errorClass: "gateway-port-conflict",
diff --git a/test/e2e-scenario/scenarios/types.ts b/test/e2e-scenario/scenarios/types.ts
index a95f928f97..a72361031f 100644
--- a/test/e2e-scenario/scenarios/types.ts
+++ b/test/e2e-scenario/scenarios/types.ts
@@ -239,7 +239,7 @@ export interface PhaseAction {
   // spawn. See fixtures/redaction.ts. Each entry must match the
   // secret-key shape; the fixture layer rejects non-secret names so the
   // allowlist-vs-declared-secret boundary stays honest. Cloud install
-  // declares ["NVIDIA_API_KEY"]; slack onboarding declares the slack
+  // declares ["NVIDIA_INFERENCE_API_KEY"]; slack onboarding declares the slack
   // tokens it actually needs; etc.
   secretEnv?: readonly string[];
 }
diff --git a/test/e2e-scenario/support-tests/docker-probe.test.ts b/test/e2e-scenario/support-tests/docker-probe.test.ts
index 309893138e..128c994b79 100644
--- a/test/e2e-scenario/support-tests/docker-probe.test.ts
+++ b/test/e2e-scenario/support-tests/docker-probe.test.ts
@@ -28,7 +28,7 @@ describe("DockerProbe secret hygiene", () => {
         DOCKER_HOST: "unix:///tmp/docker.sock",
         DOCKER_CONTEXT: "desktop-linux",
         DOCKERHUB_TOKEN: "dockerhub-secret-token",
-        NVIDIA_API_KEY: "nvapi-secret-value",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-secret-value",
         RANDOM_SECRET: "other-secret-value",
       },
       "/tmp/docker-config",
@@ -42,19 +42,19 @@ describe("DockerProbe secret hygiene", () => {
       DOCKER_CONFIG: "/tmp/docker-config",
     });
     expect(env).not.toHaveProperty("DOCKERHUB_TOKEN");
-    expect(env).not.toHaveProperty("NVIDIA_API_KEY");
+    expect(env).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
     expect(env).not.toHaveProperty("RANDOM_SECRET");
   });
 
   it("redacts secret-shaped Docker diagnostics before artifacts are written", () => {
     const secret = "nvapi-supersecret-token";
-    const secrets = new SecretStore({ NVIDIA_API_KEY: secret }, (message) => {
+    const secrets = new SecretStore({ NVIDIA_INFERENCE_API_KEY: secret }, (message) => {
       throw new Error(message ?? "unexpected skip");
     });
 
     const result = redactDockerProbeResult(
       {
-        command: ["docker", "run", "--env", `NVIDIA_API_KEY=${secret}`],
+        command: ["docker", "run", "--env", `NVIDIA_INFERENCE_API_KEY=${secret}`],
         exitCode: 1,
         signal: null,
         stdout: `stdout ${secret}`,
diff --git a/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts
index 69db753193..294ab80d78 100644
--- a/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts
@@ -160,7 +160,7 @@ describe("E2E fixture primitives", () => {
   it("secret store redacts sensitive env values and skips missing required secrets", () => {
     const canonicalToken = `${"nv"}${"api"}-${"a".repeat(24)}`;
     const store = new SecretStore(
-      { NVIDIA_API_KEY: "nv-secret", PLAIN_VALUE: "visible" },
+      { NVIDIA_INFERENCE_API_KEY: "nv-secret", PLAIN_VALUE: "visible" },
       (note?: string): never => {
         throw new Error(note ?? "skipped");
       },
diff --git a/test/e2e-scenario/support-tests/e2e-manifests.test.ts b/test/e2e-scenario/support-tests/e2e-manifests.test.ts
index 3b51eccd94..ed3e57aaa3 100644
--- a/test/e2e-scenario/support-tests/e2e-manifests.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-manifests.test.ts
@@ -47,7 +47,7 @@ describe("NemoClawInstance manifests", () => {
       spec: {
         setup: { install: { source: "repo-current" } },
         onboarding: { agent: "openclaw", provider: "nvidia", apiKey: "nvapi-literal-secret" },
-        state: { credentialRefs: ["NVIDIA_API_KEY"] },
+        state: { credentialRefs: ["NVIDIA_INFERENCE_API_KEY"] },
       },
     };
 
diff --git a/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts b/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts
index acf00a359d..576ab316a1 100644
--- a/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts
@@ -209,11 +209,11 @@ describe("environment phase fixture", () => {
   });
 
   it("scopes availability probe env instead of inheriting unrelated secrets", async () => {
-    const previousSecret = process.env.NVIDIA_API_KEY;
+    const previousSecret = process.env.NVIDIA_INFERENCE_API_KEY;
     const previousDockerHost = process.env.DOCKER_HOST;
     const previousHome = process.env.HOME;
     const previousPath = process.env.PATH;
-    process.env.NVIDIA_API_KEY = "must-not-leak";
+    process.env.NVIDIA_INFERENCE_API_KEY = "must-not-leak";
     process.env.DOCKER_HOST = "unix:///tmp/e2e-docker.sock";
     process.env.HOME = "/tmp/e2e-home";
     process.env.PATH = "/usr/bin";
@@ -231,15 +231,15 @@ describe("environment phase fixture", () => {
       expect(dockerEnv).toMatchObject({ DOCKER_HOST: "unix:///tmp/e2e-docker.sock" });
       expect(cliEnv?.PATH).toBe("/tmp/e2e-home/.local/bin:/usr/bin");
       expect(dockerEnv?.PATH).toBe("/tmp/e2e-home/.local/bin:/usr/bin");
-      expect(cliEnv).not.toHaveProperty("NVIDIA_API_KEY");
-      expect(dockerEnv).not.toHaveProperty("NVIDIA_API_KEY");
+      expect(cliEnv).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
+      expect(dockerEnv).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
       expect(runner.calls[0]?.options?.inheritEnv).toBeUndefined();
       expect(runner.calls[1]?.options?.inheritEnv).toBeUndefined();
     } finally {
       if (previousSecret === undefined) {
-        delete process.env.NVIDIA_API_KEY;
+        delete process.env.NVIDIA_INFERENCE_API_KEY;
       } else {
-        process.env.NVIDIA_API_KEY = previousSecret;
+        process.env.NVIDIA_INFERENCE_API_KEY = previousSecret;
       }
       if (previousDockerHost === undefined) {
         delete process.env.DOCKER_HOST;
diff --git a/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts b/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts
index 7475fd0a9f..bb472b6f0d 100644
--- a/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts
@@ -120,7 +120,7 @@ describe("onboarding phase fixture", () => {
   it("runs cloud OpenClaw onboarding with explicit non-interactive inputs", async () => {
     const runner = new FakeRunner();
     runner.enqueue(shellResult(0, "onboarded\n"));
-    const secrets = new FakeSecrets({ NVIDIA_API_KEY: "secret-token" });
+    const secrets = new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" });
     const onboard = new OnboardingPhaseFixture(new HostCliClient(runner), secrets);
 
     const instance = await onboard.from(ready(), { sandboxName: "e2e-ubuntu-repo-cloud-openclaw" });
@@ -133,7 +133,7 @@ describe("onboarding phase fixture", () => {
       providerEnv: "cloud",
       gatewayUrl: "http://127.0.0.1:18789",
     });
-    expect(secrets.requiredCalls).toEqual(["NVIDIA_API_KEY"]);
+    expect(secrets.requiredCalls).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
     expect(runner.calls).toEqual([
       {
         command: "nemoclaw",
@@ -144,7 +144,7 @@ describe("onboarding phase fixture", () => {
             NEMOCLAW_AGENT: "openclaw",
             NEMOCLAW_PROVIDER: "cloud",
             NEMOCLAW_SANDBOX_NAME: "e2e-ubuntu-repo-cloud-openclaw",
-            NVIDIA_API_KEY: "secret-token",
+            NVIDIA_INFERENCE_API_KEY: "secret-token",
             PATH: expect.any(String),
           }),
           redactionValues: ["secret-token"],
@@ -159,7 +159,7 @@ describe("onboarding phase fixture", () => {
     runner.enqueue(shellResult(42, "provider rejected credential"));
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
     );
 
     await expect(onboard.from(ready())).rejects.toThrow(
@@ -173,7 +173,7 @@ describe("onboarding phase fixture", () => {
     const cleanup = new FakeCleanup();
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
       cleanup,
     );
 
@@ -200,7 +200,7 @@ describe("onboarding phase fixture", () => {
     const onboard = new OnboardingPhaseFixture(new HostCliClient(runner), new FakeSecrets());
 
     await expect(onboard.from(ready())).rejects.toThrow(
-      /missing required E2E secret: NVIDIA_API_KEY/,
+      /missing required E2E secret: NVIDIA_INFERENCE_API_KEY/,
     );
     expect(runner.calls).toEqual([]);
   });
@@ -208,7 +208,7 @@ describe("onboarding phase fixture", () => {
   it("requires Docker for cloud OpenClaw onboarding", async () => {
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(new FakeRunner()),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
     );
 
     await expect(
@@ -223,7 +223,7 @@ describe("onboarding phase fixture", () => {
   it("rejects invalid sandbox names before cloud OpenClaw side effects", async () => {
     const runner = new FakeRunner();
     const cleanup = new FakeCleanup();
-    const secrets = new FakeSecrets({ NVIDIA_API_KEY: "secret" });
+    const secrets = new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" });
     const onboard = new OnboardingPhaseFixture(new HostCliClient(runner), secrets, cleanup);
 
     await expect(onboard.from(ready(), { sandboxName: "bad name" })).rejects.toThrow(
@@ -241,7 +241,7 @@ describe("onboarding phase fixture", () => {
     const cleanup = new FakeCleanup();
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret-token" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" }),
       cleanup,
     );
 
@@ -267,7 +267,7 @@ describe("onboarding phase fixture", () => {
   it("runs the no-Docker negative path with a failing Docker shim", async () => {
     const runner = new FakeRunner();
     runner.enqueue(shellResult(7, "Cannot connect to the Docker daemon"));
-    const secrets = new FakeSecrets({ NVIDIA_API_KEY: "secret-token" });
+    const secrets = new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" });
     const cleanup = new FakeCleanup();
     const onboard = new OnboardingPhaseFixture(new HostCliClient(runner), secrets, cleanup);
 
@@ -302,10 +302,10 @@ describe("onboarding phase fixture", () => {
       NEMOCLAW_AGENT: "openclaw",
       NEMOCLAW_PROVIDER: "cloud",
       NEMOCLAW_SANDBOX_NAME: "e2e-no-docker",
-      NVIDIA_API_KEY: "secret-token",
+      NVIDIA_INFERENCE_API_KEY: "secret-token",
     });
     expect(runner.calls[0]?.options?.env?.PATH).toContain("e2e-no-docker-");
-    expect(secrets.requiredCalls).toEqual(["NVIDIA_API_KEY"]);
+    expect(secrets.requiredCalls).toEqual(["NVIDIA_INFERENCE_API_KEY"]);
     expect(cleanup.calls).toHaveLength(1);
     expect(cleanup.calls[0]?.name).toBe("destroy NemoClaw sandbox e2e-no-docker");
   });
@@ -319,7 +319,7 @@ describe("onboarding phase fixture", () => {
       runner.enqueue(shellResult(7, "Docker is required before onboarding with secret-token"));
       const onboard = new OnboardingPhaseFixture(
         new HostCliClient(runner),
-        new FakeSecrets({ NVIDIA_API_KEY: "secret-token" }),
+        new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" }),
       );
 
       await onboard.from(
@@ -349,7 +349,7 @@ describe("onboarding phase fixture", () => {
     runner.enqueue(shellResult(7, "Docker is not reachable. Please fix Docker and try again."));
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
     );
 
     const instance = await onboard.from(
@@ -370,7 +370,7 @@ describe("onboarding phase fixture", () => {
     const runner = new FakeRunner();
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
     );
 
     await expect(onboard.from(ready({ onboarding: "cloud-openclaw-no-docker" }))).rejects.toThrow(
@@ -389,7 +389,7 @@ describe("onboarding phase fixture", () => {
       runner.enqueue(shellResult(7, "Docker is required before onboarding"));
       const onboard = new OnboardingPhaseFixture(
         new HostCliClient(runner),
-        new FakeSecrets({ NVIDIA_API_KEY: "secret-token" }),
+        new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" }),
       );
 
       await onboard.from(
@@ -423,7 +423,7 @@ describe("onboarding phase fixture", () => {
     const cleanup = new FakeCleanup();
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
       cleanup,
     );
 
@@ -457,7 +457,7 @@ describe("onboarding phase fixture", () => {
     runner.enqueue(shellResult(9, "provider rejected credential"));
     const onboard = new OnboardingPhaseFixture(
       new HostCliClient(runner),
-      new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+      new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
     );
 
     await expect(
@@ -489,7 +489,7 @@ describe("onboarding phase fixture", () => {
       runner.enqueue(shellResult(0, "onboarded\n"));
       const onboard = new OnboardingPhaseFixture(
         new HostCliClient(runner),
-        new FakeSecrets({ NVIDIA_API_KEY: "secret-token" }),
+        new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret-token" }),
         undefined,
         new ArtifactSink(tmp),
       );
@@ -515,7 +515,7 @@ describe("onboarding phase fixture", () => {
     try {
       const onboard = new OnboardingPhaseFixture(
         new HostCliClient(new FakeRunner()),
-        new FakeSecrets({ NVIDIA_API_KEY: "secret" }),
+        new FakeSecrets({ NVIDIA_INFERENCE_API_KEY: "secret" }),
         undefined,
         new ArtifactSink(tmp),
       );
diff --git a/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts b/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts
index 1501991dad..8d7a50d280 100644
--- a/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts
@@ -432,8 +432,8 @@ describe("state-validation phase fixture", () => {
   });
 
   it("does not pass unrelated secret environment values to status probes", async () => {
-    const original = process.env.NVIDIA_API_KEY;
-    process.env.NVIDIA_API_KEY = "nvapi-test-secret-value";
+    const original = process.env.NVIDIA_INFERENCE_API_KEY;
+    process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-test-secret-value";
     try {
       const runner = new FakeRunner();
       runner.enqueue(shellResult(0, "nemoclaw v0.0.0\n"));
@@ -447,13 +447,13 @@ describe("state-validation phase fixture", () => {
       for (const call of runner.calls.slice(1)) {
         expect(call.options).not.toHaveProperty("inheritEnv");
         expect(call.options?.env).toEqual(expect.objectContaining({ PATH: expect.any(String) }));
-        expect(call.options?.env).not.toHaveProperty("NVIDIA_API_KEY");
+        expect(call.options?.env).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
       }
     } finally {
       if (original === undefined) {
-        delete process.env.NVIDIA_API_KEY;
+        delete process.env.NVIDIA_INFERENCE_API_KEY;
       } else {
-        process.env.NVIDIA_API_KEY = original;
+        process.env.NVIDIA_INFERENCE_API_KEY = original;
       }
     }
   });
diff --git a/test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts b/test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts
index b46c1e88d3..e9d3d1aaf7 100644
--- a/test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts
@@ -88,7 +88,7 @@ describe("live Vitest scenario matrix", () => {
       runtime: "docker-running",
       onboarding: "cloud-openclaw",
       expectedStateId: "cloud-openclaw-ready",
-      requiredSecrets: ["NVIDIA_API_KEY"],
+      requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
       supported: true,
       supportReasons: [],
       pendingRuntimeSuites: ["smoke", "inference", "credentials"],
@@ -105,7 +105,7 @@ describe("live Vitest scenario matrix", () => {
       runtime: "docker-running",
       onboarding: "cloud-openclaw",
       expectedStateId: "post-reboot-recovery-ready",
-      requiredSecrets: ["NVIDIA_API_KEY"],
+      requiredSecrets: ["NVIDIA_INFERENCE_API_KEY"],
       supported: true,
       supportReasons: [],
     });
diff --git a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
index 354712ecde..95d47ed9ce 100644
--- a/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts
@@ -413,7 +413,7 @@ jobs:
     env:
       E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/vitest
       NEMOCLAW_RUN_E2E_SCENARIOS: "1"
-      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -421,7 +421,7 @@ jobs:
       - name: Set up Node
         uses: actions/setup-node@v4
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       - name: Run Vitest live E2E scenarios
         env:
           TEST_FILTER: \${{ inputs.test_filter }}
@@ -442,7 +442,7 @@ jobs:
     env:
       E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/openshell-version-pin
       NEMOCLAW_RUN_E2E_SCENARIOS: "0"
-      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -450,12 +450,12 @@ jobs:
       - name: Set up Node
         uses: actions/setup-node@v4
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       - name: Install root dependencies
         run: npm install
       - name: Run OpenShell version-pin live test
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: npx vitest run --project e2e-scenarios-live "\${{ inputs.test_filter }}"
       - name: Upload OpenShell version-pin artifacts
         uses: actions/upload-artifact@v4
@@ -471,7 +471,7 @@ jobs:
     env:
       E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/onboard-negative-paths
       NEMOCLAW_RUN_E2E_SCENARIOS: "0"
-      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -479,12 +479,12 @@ jobs:
       - name: Set up Node
         uses: actions/setup-node@v4
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       - name: Install root dependencies
         run: npm install
       - name: Run onboard negative-paths live test
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: npx vitest run --project e2e-scenarios-live "\${{ inputs.test_filter }}"
       - name: Upload onboard negative-paths artifacts
         uses: actions/upload-artifact@v4
@@ -501,7 +501,7 @@ jobs:
       E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/network-policy
       NEMOCLAW_CLI_BIN: bin/not-nemoclaw.js
       NEMOCLAW_RUN_E2E_SCENARIOS: "0"
-      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       DOCKERHUB_USERNAME: \${{ secrets.DOCKERHUB_USERNAME }}
       DOCKERHUB_TOKEN: \${{ secrets.DOCKERHUB_TOKEN }}
       GITHUB_TOKEN: \${{ github.token }}
@@ -516,7 +516,7 @@ jobs:
       - name: Set up Node
         uses: actions/setup-node@v4
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       - name: Install root dependencies
         run: npm install
       - name: Build CLI
@@ -527,7 +527,7 @@ jobs:
         run: echo install
       - name: Run network-policy live test
         env:
-          NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: npx vitest run --project e2e-scenarios-live "\${{ inputs.test_filter }}"
       - name: Upload network-policy artifacts
         uses: actions/upload-artifact@v4
@@ -545,7 +545,7 @@ jobs:
       E2E_ARTIFACT_DIR: \${{ github.workspace }}/.e2e/double-onboard
       NEMOCLAW_CLI_BIN: ./bad-cli.js
       NEMOCLAW_RUN_E2E_SCENARIOS: "0"
-      NVIDIA_API_KEY: \${{ secrets.NVIDIA_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: \${{ secrets.NVIDIA_INFERENCE_API_KEY }}
       DOCKERHUB_TOKEN: \${{ secrets.DOCKERHUB_TOKEN }}
     steps:
       - uses: actions/checkout@v4
@@ -588,13 +588,13 @@ jobs:
           "workflow_dispatch must not expose legacy test_filter input",
           "workflow missing generate-matrix job",
           "live-scenarios job must run on the matrix runner",
-          "live-scenarios job env must not include NVIDIA_API_KEY",
+          "live-scenarios job env must not include NVIDIA_INFERENCE_API_KEY",
           "step 'Run Vitest live E2E scenarios' run script must not interpolate dispatch inputs directly",
-          "Vitest step must receive NVIDIA_API_KEY from secrets",
+          "Vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets",
           "artifact upload must set include-hidden-files: false",
           "upload-artifact action must be pinned to a full commit SHA",
           "openshell-version-pin-vitest job must use the shared jobs selector condition",
-          "network-policy-vitest job env must not include NVIDIA_API_KEY",
+          "network-policy-vitest job env must not include NVIDIA_INFERENCE_API_KEY",
           "network-policy-vitest step 'Install OpenShell' env must not include GITHUB_TOKEN",
           "double-onboard-vitest job env must not include DOCKERHUB_TOKEN",
           "step 'Run double-onboard live Vitest test' run script must not interpolate dispatch inputs directly",
diff --git a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
index 5b5138224a..22e71f35e9 100644
--- a/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
+++ b/test/e2e-scenario/support-tests/network-policy-transient-provider.test.ts
@@ -23,7 +23,7 @@ describe("network-policy transient provider validation classifier", () => {
 
     expect(
       isTransientProviderValidationFailure(
-        probeOutput("endpoint validation failed: invalid NVIDIA_API_KEY credential"),
+        probeOutput("endpoint validation failed: invalid NVIDIA_INFERENCE_API_KEY credential"),
       ),
     ).toBe(false);
     expect(
diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts
index db0571e95e..a7fcc2a6d2 100644
--- a/test/e2e-script-workflow.test.ts
+++ b/test/e2e-script-workflow.test.ts
@@ -266,7 +266,7 @@ describe("E2E reusable workflow contract", () => {
   it("passes only named secrets to reusable nightly jobs", () => {
     const reusableJobs = reusableNightlyJobs(nightlyWorkflow);
     const defaultSecrets = {
-      NVIDIA_API_KEY: "${{ secrets.NVIDIA_API_KEY }}",
+      NVIDIA_INFERENCE_API_KEY: "${{ secrets.NVIDIA_INFERENCE_API_KEY }}",
       BRAVE_API_KEY: "${{ secrets.BRAVE_API_KEY }}",
       DOCKERHUB_USERNAME:
         "${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}",
@@ -415,7 +415,7 @@ describe("E2E reusable workflow contract", () => {
     expect(runStep?.run).toContain("npx vitest run --project e2e-scenarios-live");
     expect(runStep?.run).toContain("test/e2e-scenario/live/credential-migration.test.ts");
     expect(runStep?.run).not.toContain("test/e2e/test-credential-migration.sh");
-    expect(runStep?.env?.NVIDIA_API_KEY).toBe("${{ secrets.NVIDIA_API_KEY }}");
+    expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe("${{ secrets.NVIDIA_INFERENCE_API_KEY }}");
     expect(runStep?.env?.GITHUB_TOKEN).toBeUndefined();
     expect(runStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1");
     expect(runStep?.env?.NEMOCLAW_SANDBOX_NAME).toBe("e2e-cred-migration");
diff --git a/test/e2e/brev-e2e.test.ts b/test/e2e/brev-e2e.test.ts
index 938dcd5ec4..f02d0670b4 100644
--- a/test/e2e/brev-e2e.test.ts
+++ b/test/e2e/brev-e2e.test.ts
@@ -18,7 +18,7 @@
  *   npx vitest run --project e2e-branch-validation
  *
  * Required env vars:
- *   NVIDIA_API_KEY   — passed to VM for inference config during onboarding
+ *   NVIDIA_INFERENCE_API_KEY   — passed to VM for inference config during onboarding
  *   GITHUB_TOKEN     — passed to VM for OpenShell binary download
  *   INSTANCE_NAME    — Brev instance name (e.g. pr-156-test)
  *
@@ -264,7 +264,7 @@ function sshEnv(
 ): string {
   const gpuE2eModel = process.env.NEMOCLAW_GPU_E2E_MODEL || "qwen3.5:9b";
   const envParts = [
-    `export NVIDIA_API_KEY='${shellEscape(process.env.NVIDIA_API_KEY)}'`,
+    `export NVIDIA_INFERENCE_API_KEY='${shellEscape(process.env.NVIDIA_INFERENCE_API_KEY)}'`,
     `export GITHUB_TOKEN='${shellEscape(process.env.GITHUB_TOKEN)}'`,
     `export NEMOCLAW_NON_INTERACTIVE=1`,
     `export NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1`,
@@ -1028,7 +1028,7 @@ function writeManualRegistry(elapsed: () => string): void {
 
 // --- suite ------------------------------------------------------------------
 
-const REQUIRED_VARS = ["NVIDIA_API_KEY", "GITHUB_TOKEN", "INSTANCE_NAME"];
+const REQUIRED_VARS = ["NVIDIA_INFERENCE_API_KEY", "GITHUB_TOKEN", "INSTANCE_NAME"];
 const hasRequiredVars = REQUIRED_VARS.every((key) => process.env[key]);
 const hasAuthenticatedBrev = (() => {
   try {
diff --git a/test/e2e/e2e-cloud-experimental/expect-interactive-install.sh b/test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
index 3c556208a2..c7cc96eaac 100755
--- a/test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
+++ b/test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
@@ -4,10 +4,10 @@
 #
 # Thin wrapper: real logic lives in test/e2e/test-e2e-cloud-experimental.sh (Phase 3 expect branch).
 #
-# Prereq: repo checkout at cwd or run from repo; NVIDIA_API_KEY for cloud onboard unless creds on disk.
+# Prereq: repo checkout at cwd or run from repo; NVIDIA_INFERENCE_API_KEY for cloud onboard unless creds on disk.
 #
 # Usage (full suite; Phase 3 is interactive by default in test-e2e-cloud-experimental.sh — this wrapper is optional):
-#   NVIDIA_API_KEY=nvapi-... bash test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
 #
 # Offline expect-only smoke:
 #   DEMO_FAKE_ONLY=1 bash test/e2e/e2e-cloud-experimental/expect-interactive-install.sh
diff --git a/test/e2e/e2e-cloud-experimental/features/skill/add-sandbox-skill.sh b/test/e2e/e2e-cloud-experimental/features/skill/add-sandbox-skill.sh
index ef7f52d247..bdc50e726d 100755
--- a/test/e2e/e2e-cloud-experimental/features/skill/add-sandbox-skill.sh
+++ b/test/e2e/e2e-cloud-experimental/features/skill/add-sandbox-skill.sh
@@ -20,7 +20,7 @@
 #   test/e2e/e2e-cloud-experimental/fixtures/skill-smoke-template.SKILL.md
 #
 # After deploy, optional: run one agent turn to prove the skill is used:
-#   NVIDIA_API_KEY=nvapi-... SANDBOX_NAME=... SKILL_ID=... bash test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... SANDBOX_NAME=... SKILL_ID=... bash test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
 #
 # Exit code:
 #   0 = add + query succeeded
diff --git a/test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh b/test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
index a88e31bea6..fa90daab2b 100755
--- a/test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
+++ b/test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
@@ -9,7 +9,7 @@
 # (includes SKILL_SMOKE_VERIFY_K9X2). Re-run add-sandbox-skill.sh after template updates.
 #
 # Usage (from repo root):
-#   NVIDIA_API_KEY=nvapi-... SANDBOX_NAME=test01 SKILL_ID=skill-smoke-fixture \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... SANDBOX_NAME=test01 SKILL_ID=skill-smoke-fixture \
 #     bash test/e2e/e2e-cloud-experimental/features/skill/verify-sandbox-skill-via-agent.sh
 #
 # Optional:
@@ -37,7 +37,7 @@ ok() { printf '%s\n' "verify-sandbox-skill-via-agent: OK: $*"; }
 info() { printf '%s\n' "verify-sandbox-skill-via-agent: INFO: $*"; }
 
 [ -n "$SANDBOX_NAME" ] || die "set SANDBOX_NAME (or NEMOCLAW_SANDBOX_NAME)"
-[ -n "${NVIDIA_API_KEY:-}" ] || die "set NVIDIA_API_KEY (needed for inference inside sandbox)"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || die "set NVIDIA_INFERENCE_API_KEY (needed for inference inside sandbox)"
 
 # Do NOT include ${VERIFY_TOKEN} in the prompt itself. The token must come
 # from the agent reading the skill's SKILL.md — that is the entire point of
@@ -58,7 +58,7 @@ command -v openshell >/dev/null 2>&1 || die "openshell not on PATH"
 command -v base64 >/dev/null 2>&1 || die "base64 not on PATH"
 
 prompt_b64=$(printf '%s' "$PROMPT" | base64 | tr -d '\n')
-nv_b64=$(printf '%s' "$NVIDIA_API_KEY" | base64 | tr -d '\n')
+nv_b64=$(printf '%s' "$NVIDIA_INFERENCE_API_KEY" | base64 | tr -d '\n')
 
 ssh_config="$(mktemp)"
 trap 'rm -f "$ssh_config"' EXIT
@@ -75,7 +75,7 @@ _lock_rm=""
 if [ "${SKILL_VERIFY_NO_CLEAR_LOCK:-0}" != "1" ]; then
   _lock_rm="rm -f '/sandbox/.openclaw/agents/main/sessions/${SESSION_ID}.jsonl.lock' 2>/dev/null || true; "
 fi
-remote_cmd="pm=\$(printf '%s' '${prompt_b64}' | base64 -d) || exit 1; nv=\$(printf '%s' '${nv_b64}' | base64 -d) || exit 1; export NVIDIA_API_KEY=\"\$nv\"; ${_lock_rm}${AGENT_LAUNCHER}openclaw agent --agent main --local -m \"\$pm\" --session-id '${SESSION_ID}'"
+remote_cmd="pm=\$(printf '%s' '${prompt_b64}' | base64 -d) || exit 1; nv=\$(printf '%s' '${nv_b64}' | base64 -d) || exit 1; export NVIDIA_INFERENCE_API_KEY=\"\$nv\"; ${_lock_rm}${AGENT_LAUNCHER}openclaw agent --agent main --local -m \"\$pm\" --session-id '${SESSION_ID}'"
 
 info "Running openclaw agent in sandbox '${SANDBOX_NAME}' (session ${SESSION_ID})..."
 
diff --git a/test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh b/test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh
index 67dd4a5f69..03aa92e82f 100755
--- a/test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh
+++ b/test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh
@@ -21,10 +21,10 @@
 #   NEMOCLAW_SANDBOX_NAME                    — default: e2e-cloud-experimental
 #   NEMOCLAW_NON_INTERACTIVE                — should be 1 (onboard non-interactive)
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1  — required for non-interactive onboard/re-onboard
-#   NVIDIA_API_KEY                          — required if onboard reaches cloud inference (restore path)
+#   NVIDIA_INFERENCE_API_KEY                          — required if onboard reaches cloud inference (restore path)
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh
 
 set -uo pipefail
 
diff --git a/test/e2e/test-agent-turn-latency-e2e.sh b/test/e2e/test-agent-turn-latency-e2e.sh
index ff729d4294..8172bfc030 100755
--- a/test/e2e/test-agent-turn-latency-e2e.sh
+++ b/test/e2e/test-agent-turn-latency-e2e.sh
@@ -10,7 +10,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
@@ -584,10 +584,10 @@ else
   finish
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   finish
 fi
 
diff --git a/test/e2e/test-bedrock-runtime-compatible-anthropic.sh b/test/e2e/test-bedrock-runtime-compatible-anthropic.sh
index 2f792f906f..e1cfcdd1ee 100755
--- a/test/e2e/test-bedrock-runtime-compatible-anthropic.sh
+++ b/test/e2e/test-bedrock-runtime-compatible-anthropic.sh
@@ -432,7 +432,7 @@ run_bedrock_onboard() {
   unset AWS_WEB_IDENTITY_TOKEN_FILE AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
   unset AWS_CONTAINER_CREDENTIALS_FULL_URI AWS_BEARER_TOKEN_BEDROCK
   unset AWS_REGION AWS_DEFAULT_REGION
-  unset NVIDIA_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY COMPATIBLE_API_KEY
+  unset NVIDIA_INFERENCE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY COMPATIBLE_API_KEY
   unset TELEGRAM_BOT_TOKEN DISCORD_BOT_TOKEN SLACK_BOT_TOKEN SLACK_APP_TOKEN
 
   destroy_sandbox_best_effort
diff --git a/test/e2e/test-brave-search-e2e.sh b/test/e2e/test-brave-search-e2e.sh
index 678fc9b209..9de1c06aa0 100755
--- a/test/e2e/test-brave-search-e2e.sh
+++ b/test/e2e/test-brave-search-e2e.sh
@@ -16,7 +16,7 @@
 #
 # Required env (CI injects from secrets):
 #   BRAVE_API_KEY    real Brave Search subscription token (skip-suite gate)
-#   NVIDIA_API_KEY   drives the agent inference turn in B4a
+#   NVIDIA_INFERENCE_API_KEY   drives the agent inference turn in B4a
 #
 # Secret hygiene: BRAVE_API_KEY is never echoed raw. All output that may
 # contain it pipes through redact_stream; GitHub Actions auto-mask is the
@@ -24,7 +24,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     BRAVE_API_KEY=... NVIDIA_API_KEY=... \
+#     BRAVE_API_KEY=... NVIDIA_INFERENCE_API_KEY=... \
 #     bash test/e2e/test-brave-search-e2e.sh
 
 set -uo pipefail
diff --git a/test/e2e/test-channels-add-remove.sh b/test/e2e/test-channels-add-remove.sh
index 8d7fff4047..6d887b69f3 100755
--- a/test/e2e/test-channels-add-remove.sh
+++ b/test/e2e/test-channels-add-remove.sh
@@ -16,12 +16,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key or fake OpenAI endpoint)
+#   - NVIDIA_INFERENCE_API_KEY set (real key or fake OpenAI endpoint)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-channels-add-remove.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-channels-add-remove.sh
 
 set -uo pipefail
 
@@ -311,11 +311,11 @@ telegram_egress_open() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "C0: NVIDIA_API_KEY is required"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "C0: NVIDIA_INFERENCE_API_KEY is required"
   print_summary
 fi
-pass "C0: NVIDIA_API_KEY is set"
+pass "C0: NVIDIA_INFERENCE_API_KEY is set"
 
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then
   fail "C0: NEMOCLAW_NON_INTERACTIVE=1 is required"
@@ -451,14 +451,14 @@ export TELEGRAM_REQUIRE_MENTION="$TELEGRAM_REQUIRE_MENTION_VALUE"
 maybe_skip_telegram_reachability_for_fake_token
 
 # Gateway-credential reuse gate. Before the fix, the rebuild preflight
-# aborted with "provider credential not found" when NVIDIA_API_KEY was unset
+# aborted with "provider credential not found" when NVIDIA_INFERENCE_API_KEY was unset
 # in the host env even though the inference provider was already registered
 # in the OpenShell gateway. Drop the key from the env around `channels add`
 # + rebuild so the post-add rebuild has to reuse the gateway-stored
 # credential instead of demanding it back on the host.
-NVIDIA_API_KEY_BACKUP="${NVIDIA_API_KEY:-}"
-unset NVIDIA_API_KEY
-info "NVIDIA_API_KEY unset for gateway-credential-reuse gate; gateway must hold the credential"
+NVIDIA_INFERENCE_API_KEY_BACKUP="${NVIDIA_INFERENCE_API_KEY:-}"
+unset NVIDIA_INFERENCE_API_KEY
+info "NVIDIA_INFERENCE_API_KEY unset for gateway-credential-reuse gate; gateway must hold the credential"
 
 if nemoclaw "$SANDBOX_NAME" channels add telegram >/tmp/nc-add.log 2>&1; then
   add_rc=0
@@ -483,8 +483,8 @@ else
   tail -100 /tmp/nc-rebuild-add.log 2>/dev/null || true
   # Restore env before bailing so later phases (and operators rerunning
   # the script interactively) still see the original key.
-  if [ -n "$NVIDIA_API_KEY_BACKUP" ]; then
-    export NVIDIA_API_KEY="$NVIDIA_API_KEY_BACKUP"
+  if [ -n "$NVIDIA_INFERENCE_API_KEY_BACKUP" ]; then
+    export NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY_BACKUP"
   fi
   print_summary
 fi
@@ -492,17 +492,17 @@ fi
 # Gateway-credential reuse assertion: the rebuild must not have aborted with
 # the "provider credential not found" error.
 if grep -q "provider credential not found" /tmp/nc-rebuild-add.log; then
-  fail "C3c: REGRESSION — rebuild aborted on missing NVIDIA_API_KEY despite gateway-registered credential"
+  fail "C3c: REGRESSION — rebuild aborted on missing NVIDIA_INFERENCE_API_KEY despite gateway-registered credential"
 else
-  pass "C3c: rebuild reused gateway-stored credential without NVIDIA_API_KEY"
+  pass "C3c: rebuild reused gateway-stored credential without NVIDIA_INFERENCE_API_KEY"
 fi
 
 # Restore for the remaining phases — `channels remove` + rebuild should
 # work in the normal env-present case too.
-if [ -n "$NVIDIA_API_KEY_BACKUP" ]; then
-  export NVIDIA_API_KEY="$NVIDIA_API_KEY_BACKUP"
+if [ -n "$NVIDIA_INFERENCE_API_KEY_BACKUP" ]; then
+  export NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY_BACKUP"
 fi
-unset NVIDIA_API_KEY_BACKUP
+unset NVIDIA_INFERENCE_API_KEY_BACKUP
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 4: Post-add assertions (Test 2 acceptance, regression #3437)
diff --git a/test/e2e/test-channels-stop-start.sh b/test/e2e/test-channels-stop-start.sh
index 0a51c7d441..11d414b05d 100755
--- a/test/e2e/test-channels-stop-start.sh
+++ b/test/e2e/test-channels-stop-start.sh
@@ -18,13 +18,13 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-channels-stop-start.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-channels-stop-start.sh
 
 set -uo pipefail
 
@@ -831,12 +831,12 @@ run_agent_scenario() {
 
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  msg="C0: NVIDIA_API_KEY is required"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  msg="C0: NVIDIA_INFERENCE_API_KEY is required"
   fail_msg "$msg"
   print_summary
 fi
-msg="C0: NVIDIA_API_KEY is set"
+msg="C0: NVIDIA_INFERENCE_API_KEY is set"
 pass_msg "$msg"
 
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then
diff --git a/test/e2e/test-cloud-inference-e2e.sh b/test/e2e/test-cloud-inference-e2e.sh
index 651ff67d77..491df699fd 100755
--- a/test/e2e/test-cloud-inference-e2e.sh
+++ b/test/e2e/test-cloud-inference-e2e.sh
@@ -12,7 +12,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
@@ -24,7 +24,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-cloud-inference-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-cloud-inference-e2e.sh
 
 set -uo pipefail
 
@@ -103,11 +103,11 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY not set or invalid"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
diff --git a/test/e2e/test-cloud-onboard-e2e.sh b/test/e2e/test-cloud-onboard-e2e.sh
index 5203a3cbdc..b819966aad 100755
--- a/test/e2e/test-cloud-onboard-e2e.sh
+++ b/test/e2e/test-cloud-onboard-e2e.sh
@@ -13,8 +13,8 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment:
 #   NEMOCLAW_NON_INTERACTIVE=1                         — required for non-interactive install
@@ -32,7 +32,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-cloud-onboard-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-cloud-onboard-e2e.sh
 
 set -uo pipefail
 
@@ -111,17 +111,17 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for cloud onboard"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for cloud onboard"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
@@ -275,7 +275,7 @@ fi
 # ══════════════════════════════════════════════════════════════════════
 section "Phase 4: Sandbox checks (Landlock, security, inference.local)"
 
-export SANDBOX_NAME CLOUD_EXPERIMENTAL_MODEL="$CLOUD_MODEL" REPO NVIDIA_API_KEY
+export SANDBOX_NAME CLOUD_EXPERIMENTAL_MODEL="$CLOUD_MODEL" REPO NVIDIA_INFERENCE_API_KEY
 export PATH="/usr/local/bin:${HOME}/.local/bin:${PATH}"
 
 shopt -s nullglob
diff --git a/test/e2e/test-common-egress-agent-e2e.sh b/test/e2e/test-common-egress-agent-e2e.sh
index 5bb702b2b8..16e65212a2 100755
--- a/test/e2e/test-common-egress-agent-e2e.sh
+++ b/test/e2e/test-common-egress-agent-e2e.sh
@@ -11,7 +11,7 @@
 #       and the Hermes agent fetches Wikidata through its API-server agent path.
 #
 # Required env:
-#   NVIDIA_API_KEY                         real NVIDIA Endpoints key for inference
+#   NVIDIA_INFERENCE_API_KEY                         real NVIDIA Endpoints key for inference
 #   NEMOCLAW_NON_INTERACTIVE=1             required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 required
 #
@@ -402,11 +402,11 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY not set or invalid"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   summary
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then
   fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
diff --git a/test/e2e/test-credential-migration.sh b/test/e2e/test-credential-migration.sh
index 64215d1cb4..cd7ddf64c0 100755
--- a/test/e2e/test-credential-migration.sh
+++ b/test/e2e/test-credential-migration.sh
@@ -35,11 +35,11 @@
 # Prerequisites:
 #   - Docker running
 #   - openshell + nemoclaw on PATH
-#   - NVIDIA_API_KEY set (used as the migrated value)
+#   - NVIDIA_INFERENCE_API_KEY set (used as the migrated value)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-credential-migration.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-credential-migration.sh
 
 set -uo pipefail
 
@@ -93,11 +93,11 @@ register_sandbox_for_teardown "$SANDBOX_NAME"
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! command -v openshell >/dev/null 2>&1 || ! command -v nemoclaw >/dev/null 2>&1; then
   info "openshell or nemoclaw not found; running install"
@@ -120,7 +120,7 @@ command -v nemoclaw >/dev/null 2>&1 || {
 }
 pass "openshell + nemoclaw on PATH"
 
-REAL_API_KEY="$NVIDIA_API_KEY"
+REAL_API_KEY="$NVIDIA_INFERENCE_API_KEY"
 NEMOCLAW_DIR="$HOME/.nemoclaw"
 LEGACY_FILE="$NEMOCLAW_DIR/credentials.json"
 
@@ -137,7 +137,7 @@ chmod 700 "$NEMOCLAW_DIR"
 # Tampered fixture: includes an unrelated key the migrator must ignore.
 cat >"$LEGACY_FILE" <<EOF
 {
-  "NVIDIA_API_KEY": "$REAL_API_KEY",
+  "NVIDIA_INFERENCE_API_KEY": "$REAL_API_KEY",
   "OPENSHELL_GATEWAY": "evil-gw-from-tampered-file",
   "NODE_OPTIONS": "--require=/tmp/evil.js"
 }
@@ -147,12 +147,12 @@ chmod 600 "$LEGACY_FILE"
 LEGACY_INODE_BEFORE=$(stat -c '%i' "$LEGACY_FILE" 2>/dev/null || stat -f '%i' "$LEGACY_FILE" 2>/dev/null || echo "")
 [ -n "$LEGACY_INODE_BEFORE" ] && info "Legacy file inode before onboard: $LEGACY_INODE_BEFORE"
 
-# Run onboard WITHOUT NVIDIA_API_KEY in the env. The only place the value
+# Run onboard WITHOUT NVIDIA_INFERENCE_API_KEY in the env. The only place the value
 # can come from is the legacy credentials.json — exactly the migration
 # path we want to exercise.
 ONBOARD_LOG="$(mktemp)"
 (
-  unset NVIDIA_API_KEY
+  unset NVIDIA_INFERENCE_API_KEY
   NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
@@ -200,7 +200,7 @@ fi
 info "Providers in nemoclaw gateway:"
 printf '%s\n' "$PROVIDERS_OUT" | indent
 
-# The legacy NVIDIA_API_KEY should have been registered as one of the
+# The legacy NVIDIA_INFERENCE_API_KEY should have been registered as one of the
 # inference providers (nvidia-prod, nvidia-nim, etc. — the exact name
 # depends on what onboarding chose). Just assert that at least one
 # provider was registered.
diff --git a/test/e2e/test-credential-sanitization.sh b/test/e2e/test-credential-sanitization.sh
index 9ccf806060..e85af483f0 100755
--- a/test/e2e/test-credential-sanitization.sh
+++ b/test/e2e/test-credential-sanitization.sh
@@ -17,15 +17,15 @@
 # Prerequisites:
 #   - Docker running
 #   - NemoClaw installed and sandbox running (test-full-e2e.sh Phase 0-3)
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 #   - openshell on PATH
 #
 # Environment variables:
 #   NEMOCLAW_SANDBOX_NAME  — sandbox name (default: e2e-test)
-#   NVIDIA_API_KEY         — required
+#   NVIDIA_INFERENCE_API_KEY         — required
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/test-credential-sanitization.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-credential-sanitization.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/pull/156
 
@@ -110,11 +110,11 @@ sandbox_exec() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! command -v openshell >/dev/null 2>&1; then
   fail "openshell not found on PATH"
@@ -193,7 +193,7 @@ cat >"$AUTH_DIR/auth-profiles.json" <<JSONEOF
   "nvidia:manual": {
     "type": "api_key",
     "provider": "nvidia",
-    "keyRef": { "source": "env", "id": "NVIDIA_API_KEY" },
+    "keyRef": { "source": "env", "id": "NVIDIA_INFERENCE_API_KEY" },
     "resolvedKey": "$FAKE_NVIDIA_KEY",
     "profileId": "nvidia:manual"
   },
@@ -720,7 +720,7 @@ const config = {
   displayName: 'should-be-preserved',
   sortKey: 'should-also-be-preserved',
   modelName: 'nvidia/nemotron-3-super-120b-a12b',
-  keyRef: { source: 'env', id: 'NVIDIA_API_KEY' },
+  keyRef: { source: 'env', id: 'NVIDIA_INFERENCE_API_KEY' },
   description: 'A secret garden (but not a real secret)',
   tokenizer: 'sentencepiece',
   endpoint: 'https://api.nvidia.com/v1',
@@ -754,7 +754,7 @@ for (const [key, expectedVal] of Object.entries(expected)) {
 }
 
 // keyRef is an object — check it's preserved structurally
-if (JSON.stringify(sanitized.keyRef) !== JSON.stringify({ source: 'env', id: 'NVIDIA_API_KEY' })) {
+if (JSON.stringify(sanitized.keyRef) !== JSON.stringify({ source: 'env', id: 'NVIDIA_INFERENCE_API_KEY' })) {
   console.log('CORRUPTED: keyRef');
   allPreserved = false;
 }
diff --git a/test/e2e/test-cron-preflight-inference-local-e2e.sh b/test/e2e/test-cron-preflight-inference-local-e2e.sh
index 21ad66db32..0ee7e92ab0 100755
--- a/test/e2e/test-cron-preflight-inference-local-e2e.sh
+++ b/test/e2e/test-cron-preflight-inference-local-e2e.sh
@@ -19,7 +19,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
@@ -30,7 +30,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-cron-preflight-inference-local-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-cron-preflight-inference-local-e2e.sh
 
 set -uo pipefail
 
@@ -99,13 +99,13 @@ if ! command -v jq >/dev/null 2>&1; then
   echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
   exit 0
 fi
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  skip "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  skip "NVIDIA_INFERENCE_API_KEY not set"
   echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
   exit 0
 fi
-if [ "${NVIDIA_API_KEY:0:6}" != "nvapi-" ]; then
-  skip "NVIDIA_API_KEY does not start with nvapi-"
+if [ "${NVIDIA_INFERENCE_API_KEY:0:6}" != "nvapi-" ]; then
+  skip "NVIDIA_INFERENCE_API_KEY does not start with nvapi-"
   echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
   exit 0
 fi
diff --git a/test/e2e/test-device-auth-health.sh b/test/e2e/test-device-auth-health.sh
index 23b77768d9..ff7a592223 100755
--- a/test/e2e/test-device-auth-health.sh
+++ b/test/e2e/test-device-auth-health.sh
@@ -21,13 +21,13 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-health-auth)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 600)
 #   NEMOCLAW_DASHBOARD_PORT                — dashboard port (default: 18789)
@@ -35,7 +35,7 @@
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-device-auth-health.sh
 # =============================================================================
 
@@ -123,8 +123,8 @@ sandbox_exec() {
 # ══════════════════════════════════════════════════════════════════════════════
 section "Phase 0: Preflight"
 
-if [[ -z "${NVIDIA_API_KEY:-}" ]]; then
-  echo "ERROR: NVIDIA_API_KEY not set" >&2
+if [[ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]]; then
+  echo "ERROR: NVIDIA_INFERENCE_API_KEY not set" >&2
   exit 1
 fi
 
diff --git a/test/e2e/test-diagnostics.sh b/test/e2e/test-diagnostics.sh
index 9109f6d9ab..64f3716b50 100755
--- a/test/e2e/test-diagnostics.sh
+++ b/test/e2e/test-diagnostics.sh
@@ -16,7 +16,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 # =============================================================================
 
 set -euo pipefail
@@ -99,7 +99,7 @@ install_nemoclaw() {
   fi
   log "=== Installing NemoClaw via install.sh ==="
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
-    NVIDIA_API_KEY="${NVIDIA_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
+    NVIDIA_INFERENCE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
@@ -120,9 +120,9 @@ preflight() {
   fi
   log "Docker is running"
 
-  local api_key="${NVIDIA_API_KEY:-}"
+  local api_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$api_key" ]]; then
-    log "ERROR: NVIDIA_API_KEY not set"
+    log "ERROR: NVIDIA_INFERENCE_API_KEY not set"
     exit 1
   fi
 
@@ -265,9 +265,9 @@ test_diag_01_debug_tarball() {
     return
   fi
 
-  local real_key="${NVIDIA_API_KEY:-}"
+  local real_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$real_key" ]]; then
-    skip "TC-DIAG-01: Credential check" "NVIDIA_API_KEY not set"
+    skip "TC-DIAG-01: Credential check" "NVIDIA_INFERENCE_API_KEY not set"
     rm -rf "$debug_dir"
     return
   fi
@@ -387,7 +387,7 @@ test_diag_05_sandbox_config() {
 test_diag_03_credentials() {
   log "=== TC-DIAG-03: Credentials List and Reset ==="
 
-  local real_key="${NVIDIA_API_KEY:-}"
+  local real_key="${NVIDIA_INFERENCE_API_KEY:-}"
 
   log "  Step 1: Running credentials list..."
   local list_output list_rc=0
@@ -411,7 +411,7 @@ test_diag_03_credentials() {
     return
   fi
 
-  if echo "$list_output" | grep -qiE "NVIDIA_API_KEY\|nvidia.api"; then
+  if echo "$list_output" | grep -qiE "NVIDIA_INFERENCE_API_KEY\|nvidia.api"; then
     pass "TC-DIAG-03: credentials list shows key name"
   else
     skip "TC-DIAG-03: Key name" "Expected credential key not found in list"
@@ -424,9 +424,9 @@ test_diag_03_credentials() {
     pass "TC-DIAG-03: credentials list does not expose key values"
   fi
 
-  log "  Step 2: Running credentials reset NVIDIA_API_KEY..."
+  log "  Step 2: Running credentials reset NVIDIA_INFERENCE_API_KEY..."
   local reset_output reset_rc=0
-  reset_output=$(nemoclaw credentials reset NVIDIA_API_KEY --yes 2>&1) || reset_rc=$?
+  reset_output=$(nemoclaw credentials reset NVIDIA_INFERENCE_API_KEY --yes 2>&1) || reset_rc=$?
   log "  Reset output (exit $reset_rc): ${reset_output:0:300}"
 
   if [[ $reset_rc -eq 0 ]]; then
@@ -439,10 +439,10 @@ test_diag_03_credentials() {
   log "  Step 3: Verifying key removed from list..."
   local post_list
   post_list=$(nemoclaw credentials list 2>&1) || true
-  if echo "$post_list" | grep -qiE "NVIDIA_API_KEY"; then
-    fail "TC-DIAG-03: Post-reset" "NVIDIA_API_KEY still in list after reset"
+  if echo "$post_list" | grep -qiE "NVIDIA_INFERENCE_API_KEY"; then
+    fail "TC-DIAG-03: Post-reset" "NVIDIA_INFERENCE_API_KEY still in list after reset"
   else
-    pass "TC-DIAG-03: NVIDIA_API_KEY removed after reset"
+    pass "TC-DIAG-03: NVIDIA_INFERENCE_API_KEY removed after reset"
   fi
 }
 
diff --git a/test/e2e/test-double-onboard.sh b/test/e2e/test-double-onboard.sh
index 535ba95535..5def539b4d 100755
--- a/test/e2e/test-double-onboard.sh
+++ b/test/e2e/test-double-onboard.sh
@@ -10,7 +10,7 @@
 #
 # This script intentionally uses a local fake OpenAI-compatible endpoint so it
 # matches the current onboarding flow. Older versions of this test relied on a
-# missing/invalid NVIDIA_API_KEY causing a late failure after sandbox creation;
+# missing/invalid NVIDIA_INFERENCE_API_KEY causing a late failure after sandbox creation;
 # that no longer reflects current non-interactive onboarding behavior.
 
 # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script.
diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index f824b1c2e1..6b0814b595 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -10,18 +10,18 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required (enables non-interactive install + onboard)
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required for non-interactive install/onboard
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-nightly)
 #   NEMOCLAW_RECREATE_SANDBOX=1            — recreate sandbox if it exists from a previous run
-#   NVIDIA_API_KEY                         — required for NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY                         — required for NVIDIA Endpoints inference
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/test-full-e2e.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-full-e2e.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/issues/71
 
@@ -114,17 +114,17 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for live inference"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
@@ -306,11 +306,11 @@ fi
 section "Phase 4: Live inference"
 
 # ── Test 4a: Direct NVIDIA Endpoints ──
-info "[LIVE] Direct API test → integrate.api.nvidia.com..."
+info "[LIVE] Direct API test → inference-api.nvidia.com..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://integrate.api.nvidia.com/v1/chat/completions \
+  -X POST https://inference-api.nvidia.com/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_API_KEY" \
+  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
   -d '{
     "model": "nvidia/nemotron-3-super-120b-a12b",
     "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
diff --git a/test/e2e/test-hermes-discord-e2e.sh b/test/e2e/test-hermes-discord-e2e.sh
index e565f9f2e3..28574713c2 100755
--- a/test/e2e/test-hermes-discord-e2e.sh
+++ b/test/e2e/test-hermes-discord-e2e.sh
@@ -21,7 +21,7 @@
 #   NEMOCLAW_RECREATE_SANDBOX=1            - auto-set
 #   NEMOCLAW_FRESH=1                       - auto-set to discard interrupted onboard sessions
 #   NEMOCLAW_OPENSHELL_BIN                 - optional OpenShell binary under test
-#   NVIDIA_API_KEY                         - required for Hermes onboarding
+#   NVIDIA_INFERENCE_API_KEY                         - required for Hermes onboarding
 #   DISCORD_BOT_TOKEN                      - defaults to a fake token
 #   DISCORD_SERVER_IDS                     - defaults to a fake snowflake
 #   DISCORD_ALLOWED_IDS                    - defaults to a fake snowflake
@@ -29,7 +29,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-hermes-discord-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-discord-e2e.sh
 
 set -uo pipefail
 
@@ -197,10 +197,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
@@ -584,7 +584,7 @@ fi
 
 section "Phase 8: Gateway-stored credential rebuild"
 
-# Rebuild with NVIDIA_API_KEY unset so the preflight is forced to reuse the
+# Rebuild with NVIDIA_INFERENCE_API_KEY unset so the preflight is forced to reuse the
 # gateway-stored inference credential. Catches the Hermes regression that
 # motivated the gateway-aware credential check in setupNim + rebuild.
 
@@ -601,9 +601,9 @@ if [ -d "$REPO/.tmp" ]; then
   sudo rm -rf "$REPO/.tmp"/fake-discord.* 2>/dev/null || rm -rf "$REPO/.tmp"/fake-discord.* 2>/dev/null || true
 fi
 
-NVIDIA_API_KEY_BACKUP="${NVIDIA_API_KEY:-}"
-unset NVIDIA_API_KEY
-info "NVIDIA_API_KEY unset; gateway must hold the inference credential"
+NVIDIA_INFERENCE_API_KEY_BACKUP="${NVIDIA_INFERENCE_API_KEY:-}"
+unset NVIDIA_INFERENCE_API_KEY
+info "NVIDIA_INFERENCE_API_KEY unset; gateway must hold the inference credential"
 
 HERMES_REBUILD_LOG="/tmp/nc-hermes-rebuild-noenv.log"
 if nemoclaw "$SANDBOX_NAME" rebuild --yes >"$HERMES_REBUILD_LOG" 2>&1; then
@@ -613,18 +613,18 @@ else
 fi
 
 if [ "$rebuild_rc" -ne 0 ]; then
-  fail "Hermes rebuild failed with NVIDIA_API_KEY unset (rc=${rebuild_rc})"
+  fail "Hermes rebuild failed with NVIDIA_INFERENCE_API_KEY unset (rc=${rebuild_rc})"
   tail -80 "$HERMES_REBUILD_LOG" 2>/dev/null || true
 elif grep -q "provider credential not found" "$HERMES_REBUILD_LOG"; then
-  fail "REGRESSION — rebuild aborted on missing NVIDIA_API_KEY despite gateway-registered credential"
+  fail "REGRESSION — rebuild aborted on missing NVIDIA_INFERENCE_API_KEY despite gateway-registered credential"
 else
-  pass "Hermes rebuild reused gateway-stored credential without NVIDIA_API_KEY"
+  pass "Hermes rebuild reused gateway-stored credential without NVIDIA_INFERENCE_API_KEY"
 fi
 
-if [ -n "$NVIDIA_API_KEY_BACKUP" ]; then
-  export NVIDIA_API_KEY="$NVIDIA_API_KEY_BACKUP"
+if [ -n "$NVIDIA_INFERENCE_API_KEY_BACKUP" ]; then
+  export NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY_BACKUP"
 fi
-unset NVIDIA_API_KEY_BACKUP
+unset NVIDIA_INFERENCE_API_KEY_BACKUP
 
 section "Phase 9: Cleanup"
 
diff --git a/test/e2e/test-hermes-e2e.sh b/test/e2e/test-hermes-e2e.sh
index db087fea95..b10e718755 100755
--- a/test/e2e/test-hermes-e2e.sh
+++ b/test/e2e/test-hermes-e2e.sh
@@ -11,8 +11,8 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required (enables non-interactive install + onboard)
@@ -22,10 +22,10 @@
 #   NEMOCLAW_RECREATE_SANDBOX=1            — recreate sandbox if it exists from a previous run
 #   NEMOCLAW_E2E_HERMES_DASHBOARD=1        — validate the built-in Hermes web dashboard end-to-end
 #   NEMOCLAW_HERMES_DASHBOARD_TUI=1        — enable Hermes' optional in-browser TUI tab during onboard
-#   NVIDIA_API_KEY                         — required for NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY                         — required for NVIDIA Endpoints inference
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/test-hermes-e2e.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-e2e.sh
 
 set -uo pipefail
 
@@ -196,17 +196,17 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for live inference"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
@@ -599,11 +599,11 @@ rm -f "$ssh_config"
 section "Phase 5: Live inference"
 
 # ── Test 5a: Direct NVIDIA Endpoints ──
-info "[LIVE] Direct API test → integrate.api.nvidia.com..."
+info "[LIVE] Direct API test → inference-api.nvidia.com..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://integrate.api.nvidia.com/v1/chat/completions \
+  -X POST https://inference-api.nvidia.com/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_API_KEY" \
+  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
   -d '{
     "model": "nvidia/nemotron-3-super-120b-a12b",
     "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
diff --git a/test/e2e/test-hermes-inference-switch.sh b/test/e2e/test-hermes-inference-switch.sh
index 6ae1543b97..521a18b5ee 100755
--- a/test/e2e/test-hermes-inference-switch.sh
+++ b/test/e2e/test-hermes-inference-switch.sh
@@ -10,7 +10,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 
@@ -445,8 +445,11 @@ SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-hermes-inference-switch}"
 SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}"
 SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}"
 SWITCH_INFERENCE_API="${NEMOCLAW_SWITCH_INFERENCE_API:-openai-completions}"
+# shellcheck disable=SC2034 # consumed by sourced anthropic-switch-provider.sh
 SWITCH_ENDPOINT_URL="${NEMOCLAW_SWITCH_ENDPOINT_URL:-}"
+# shellcheck disable=SC2034 # consumed by sourced anthropic-switch-provider.sh
 SWITCH_MOCK_ANTHROPIC="${NEMOCLAW_SWITCH_MOCK_ANTHROPIC:-0}"
+# shellcheck disable=SC2034 # consumed by sourced anthropic-switch-provider.sh
 SWITCH_MOCK_PORT="${NEMOCLAW_SWITCH_MOCK_PORT:-18766}"
 INSTALL_LOG="/tmp/nemoclaw-e2e-hermes-inference-switch-install.log"
 ENV_HASH_BEFORE=""
@@ -480,10 +483,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
diff --git a/test/e2e/test-hermes-slack-e2e.sh b/test/e2e/test-hermes-slack-e2e.sh
index 57ae031885..74f70f6369 100755
--- a/test/e2e/test-hermes-slack-e2e.sh
+++ b/test/e2e/test-hermes-slack-e2e.sh
@@ -17,13 +17,13 @@
 #   NEMOCLAW_POLICY_TIER=open              - auto-set if not already set
 #   NEMOCLAW_SANDBOX_NAME                  - sandbox name (default: e2e-hermes-slack)
 #   NEMOCLAW_RECREATE_SANDBOX=1            - auto-set
-#   NVIDIA_API_KEY                         - required for Hermes onboarding
+#   NVIDIA_INFERENCE_API_KEY                         - required for Hermes onboarding
 #   SLACK_BOT_TOKEN                        - defaults to a fake xoxb- token
 #   SLACK_APP_TOKEN                        - defaults to a fake xapp- token
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-hermes-slack-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-slack-e2e.sh
 
 set -uo pipefail
 
@@ -184,10 +184,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
diff --git a/test/e2e/test-inference-routing.sh b/test/e2e/test-inference-routing.sh
index 2e7b1fc9f8..2ceeee0b71 100755
--- a/test/e2e/test-inference-routing.sh
+++ b/test/e2e/test-inference-routing.sh
@@ -12,7 +12,7 @@
 # Covers:
 #   TC-INF-02: OpenAI provider end-to-end inference (requires OPENAI_API_KEY)
 #   TC-INF-03: Anthropic provider end-to-end inference (requires ANTHROPIC_API_KEY)
-#   TC-INF-05: Credential isolation inside sandbox (requires NVIDIA_API_KEY)
+#   TC-INF-05: Credential isolation inside sandbox (requires NVIDIA_INFERENCE_API_KEY)
 #   TC-INF-06: Invalid API key → classified "credential" error (PR-safe)
 #   TC-INF-07: Unreachable endpoint → classified "transport" error (PR-safe)
 #   TC-INF-09: Custom OpenAI-compatible endpoint (requires NEMOCLAW_ENDPOINT_URL + COMPATIBLE_API_KEY)
@@ -103,7 +103,7 @@ install_nemoclaw() {
   # Use a dummy key so install.sh doesn't prompt — the key will fail
   # validation, but install.sh only needs it for the onboard step which
   # we control separately in each test case.
-  NVIDIA_API_KEY="nvapi-DUMMY-FOR-INSTALL" \
+  NVIDIA_INFERENCE_API_KEY="nvapi-DUMMY-FOR-INSTALL" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
@@ -187,9 +187,9 @@ test_inf_05_credential_isolation() {
   log "=== TC-INF-05: Credential Isolation ==="
 
   # Determine the real API key to search for
-  local real_key="${NVIDIA_API_KEY:-}"
+  local real_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$real_key" ]]; then
-    skip "TC-INF-05" "NVIDIA_API_KEY not set — cannot test credential isolation"
+    skip "TC-INF-05" "NVIDIA_INFERENCE_API_KEY not set — cannot test credential isolation"
     return
   fi
 
@@ -279,13 +279,13 @@ try {
 
   # TC-INF-05d: Placeholder token IS present in environment
   local placeholder
-  placeholder=$(sandbox_exec "printenv NVIDIA_API_KEY 2>/dev/null || true") || true
+  placeholder=$(sandbox_exec "printenv NVIDIA_INFERENCE_API_KEY 2>/dev/null || true") || true
   if [[ -n "$placeholder" && "$placeholder" != "$real_key" ]]; then
     pass "TC-INF-05d: Placeholder token present in sandbox (not the real key)"
   elif [[ "$placeholder" == "$real_key" ]]; then
     fail "TC-INF-05d: Placeholder" "Sandbox has the REAL key, not a placeholder"
   else
-    skip "TC-INF-05d: Placeholder" "NVIDIA_API_KEY not set in sandbox (placeholder injection may not be active)"
+    skip "TC-INF-05d: Placeholder" "NVIDIA_INFERENCE_API_KEY not set in sandbox (placeholder injection may not be active)"
   fi
 }
 
@@ -297,8 +297,9 @@ test_inf_06_invalid_api_key() {
 
   rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
 
+  local invalid_api_key="nvapi-INTENTIONALLY-INVALID-KEY-FOR-E2E-TEST" # gitleaks:allow
   local output exit_code=0
-  output=$(NVIDIA_API_KEY="nvapi-INTENTIONALLY-INVALID-KEY-FOR-E2E-TEST" \
+  output=$(NVIDIA_INFERENCE_API_KEY="$invalid_api_key" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     NEMOCLAW_SANDBOX_NAME="e2e-invalid-key" \
@@ -362,7 +363,7 @@ test_inf_07_unreachable_endpoint() {
 
   # Use an RFC 2606 invalid domain — deterministic DNS failure across runners
   local output exit_code=0
-  output=$(NVIDIA_API_KEY="nvapi-valid-format-but-fake-key-1234567890" \
+  output=$(NVIDIA_INFERENCE_API_KEY="nvapi-valid-format-but-fake-key-1234567890" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     NEMOCLAW_SANDBOX_NAME="e2e-unreachable" \
diff --git a/test/e2e/test-issue-2478-crash-loop-recovery.sh b/test/e2e/test-issue-2478-crash-loop-recovery.sh
index 1b39f8e5a5..e144cf30bc 100755
--- a/test/e2e/test-issue-2478-crash-loop-recovery.sh
+++ b/test/e2e/test-issue-2478-crash-loop-recovery.sh
@@ -44,13 +44,13 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required for onboard
+#   NVIDIA_INFERENCE_API_KEY                         — required for onboard
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-2478)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 1500)
 #   NEMOCLAW_E2E_CRASH_CYCLES              — crash-recover cycles (default: 5)
@@ -59,7 +59,7 @@
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-issue-2478-crash-loop-recovery.sh
 
 set -uo pipefail
@@ -315,11 +315,11 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker running"
 
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY not set or invalid"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
-pass "NVIDIA_API_KEY set"
+pass "NVIDIA_INFERENCE_API_KEY set"
 
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ] || [ "${NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE:-}" != "1" ]; then
   fail "NEMOCLAW_NON_INTERACTIVE=1 and NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 are required"
diff --git a/test/e2e/test-issue-4434-tui-unreachable-inference.sh b/test/e2e/test-issue-4434-tui-unreachable-inference.sh
index 3ea95c341b..69004be5b2 100755
--- a/test/e2e/test-issue-4434-tui-unreachable-inference.sh
+++ b/test/e2e/test-issue-4434-tui-unreachable-inference.sh
@@ -8,7 +8,7 @@
 #
 # This mutates host firewall state. Run only on a Linux Docker host you control:
 #
-#   NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_API_KEY=nvapi-... \
+#   NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-issue-4434-tui-unreachable-inference.sh
 
 set -euo pipefail
@@ -71,8 +71,8 @@ for command in docker sudo expect curl timeout perl; do
 done
 docker info >/dev/null 2>&1 || fail "Docker is not running"
 sudo -n true >/dev/null 2>&1 || fail "passwordless sudo is required for non-interactive iptables cleanup"
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY must be set and start with nvapi-"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY must be set and start with nvapi-"
 fi
 
 mkdir -p "$CAPTURE_DIR"
@@ -127,12 +127,12 @@ done
 block_probe_log="${CAPTURE_DIR}/blocked-endpoint-probe.log"
 set +e
 timeout 25 openshell sandbox exec --name "$SANDBOX_NAME" -- sh -lc \
-  'curl -sk --connect-timeout 5 --max-time 12 https://integrate.api.nvidia.com/v1/models >/tmp/issue4434-models.out 2>&1' \
+  'curl -sk --connect-timeout 5 --max-time 12 https://inference-api.nvidia.com/v1/models >/tmp/issue4434-models.out 2>&1' \
   >"$block_probe_log" 2>&1
 block_probe_rc=$?
 set -e
 if [ "$block_probe_rc" -eq 0 ]; then
-  fail "integrate.api.nvidia.com was still reachable from inside the sandbox after firewall block"
+  fail "inference-api.nvidia.com was still reachable from inside the sandbox after firewall block"
 fi
 info "sandbox endpoint block verified (probe exit ${block_probe_rc})"
 
diff --git a/test/e2e/test-issue-4462-scope-upgrade-approval.sh b/test/e2e/test-issue-4462-scope-upgrade-approval.sh
index 8933626a70..30f7015e5b 100755
--- a/test/e2e/test-issue-4462-scope-upgrade-approval.sh
+++ b/test/e2e/test-issue-4462-scope-upgrade-approval.sh
@@ -20,7 +20,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 
@@ -717,11 +717,11 @@ tail -20 /tmp/auto-pair.log 2>/dev/null || true
 
 section "Phase 0: Preflight"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! docker info >/dev/null 2>&1; then
   fail "Docker is not running"
diff --git a/test/e2e/test-kimi-inference-compat.sh b/test/e2e/test-kimi-inference-compat.sh
index 3a40132762..f43f89d84d 100755
--- a/test/e2e/test-kimi-inference-compat.sh
+++ b/test/e2e/test-kimi-inference-compat.sh
@@ -394,7 +394,7 @@ run_kimi_onboard() {
   export NEMOCLAW_POLICY_TIER=restricted
   export NEMOCLAW_POLICY_MODE=skip
   export COMPATIBLE_API_KEY="$KIMI_MOCK_API_KEY"
-  unset NVIDIA_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
+  unset NVIDIA_INFERENCE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
   unset TELEGRAM_BOT_TOKEN DISCORD_BOT_TOKEN SLACK_BOT_TOKEN SLACK_APP_TOKEN
 
   prepare_source_cli || prep_exit=$?
diff --git a/test/e2e/test-launchable-smoke.sh b/test/e2e/test-launchable-smoke.sh
index 0511b1565b..b28cc89750 100755
--- a/test/e2e/test-launchable-smoke.sh
+++ b/test/e2e/test-launchable-smoke.sh
@@ -20,7 +20,7 @@
 # What this tests:
 #   1. Run brev-launchable-ci-cpu.sh with NEMOCLAW_REF=current branch
 #   2. Verify installation artifacts (nemoclaw, openshell, Node.js ≥22, Docker, sentinel)
-#   3. nemoclaw onboard --non-interactive with NVIDIA_API_KEY (cloud provider)
+#   3. nemoclaw onboard --non-interactive with NVIDIA_INFERENCE_API_KEY (cloud provider)
 #   4. Sandbox health: nemoclaw list, status, gateway running
 #   5. Live inference through the sandbox (same pattern as test-full-e2e.sh Phase 4)
 #   6. Destroy + cleanup
@@ -28,8 +28,8 @@
 # Prerequisites:
 #   - Ubuntu runner (ubuntu-latest)
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
@@ -37,12 +37,12 @@
 #   NEMOCLAW_REF              — git ref for brev-launchable-ci-cpu.sh (default: current branch)
 #   NEMOCLAW_SANDBOX_NAME     — sandbox name (default: e2e-launchable)
 #   NEMOCLAW_RECREATE_SANDBOX — set to 1 to recreate if exists
-#   NVIDIA_API_KEY            — required for NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY            — required for NVIDIA Endpoints inference
 #   SKIP_DOCKER_PULL          — set to 1 to skip Docker image pre-pulls (speeds up CI)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-launchable-smoke.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-launchable-smoke.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/issues/2599
 
@@ -177,17 +177,17 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for live inference"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
@@ -425,11 +425,11 @@ fi
 section "Phase 6: Live inference"
 
 # ── Test 6a: Direct NVIDIA Endpoints (sanity check) ──
-info "[LIVE] Direct API test → integrate.api.nvidia.com..."
+info "[LIVE] Direct API test → inference-api.nvidia.com..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://integrate.api.nvidia.com/v1/chat/completions \
+  -X POST https://inference-api.nvidia.com/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_API_KEY" \
+  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
   -d '{
     "model": "nvidia/nemotron-3-super-120b-a12b",
     "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
diff --git a/test/e2e/test-messaging-providers.sh b/test/e2e/test-messaging-providers.sh
index 9fbcc9cdc0..817e49abfb 100755
--- a/test/e2e/test-messaging-providers.sh
+++ b/test/e2e/test-messaging-providers.sh
@@ -36,11 +36,11 @@
 # Prerequisites:
 #   - Docker running
 #   - NemoClaw installed (install.sh or brev-setup.sh already ran)
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 #   - openshell on PATH
 #
 # Environment variables:
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-msg-provider)
@@ -80,7 +80,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-messaging-providers.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-messaging-providers.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/pull/1081
 
@@ -633,11 +633,11 @@ openclaw_message_send_exit_code() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! docker info >/dev/null 2>&1; then
   fail "Docker is not running"
diff --git a/test/e2e/test-model-router-provider-routed-inference.sh b/test/e2e/test-model-router-provider-routed-inference.sh
index 4daf611cbc..cac9687626 100755
--- a/test/e2e/test-model-router-provider-routed-inference.sh
+++ b/test/e2e/test-model-router-provider-routed-inference.sh
@@ -68,7 +68,7 @@ redact_file() {
   python3 - "$file" <<'PY'
 import os, sys
 path = sys.argv[1]
-secrets = [os.environ.get("NVIDIA_API_KEY", ""), os.environ.get("NEMOCLAW_PROVIDER_KEY", "")]
+secrets = [os.environ.get("NVIDIA_INFERENCE_API_KEY", ""), os.environ.get("NEMOCLAW_PROVIDER_KEY", "")]
 text = open(path, "r", errors="replace").read()
 for secret in filter(None, secrets):
     text = text.replace(secret, "<REDACTED>")
@@ -97,10 +97,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY is required and must start with nvapi-"
+  fail "NVIDIA_INFERENCE_API_KEY is required and must start with nvapi-"
   exit 1
 fi
 
@@ -124,13 +124,13 @@ rm -f "$HOME/.nemoclaw/onboard.lock" 2>/dev/null || true
 nemoclaw "$SANDBOX_NAME" destroy --yes >/dev/null 2>&1 || true
 
 env \
-  NEMOCLAW_PROVIDER_KEY="$NVIDIA_API_KEY" \
+  NEMOCLAW_PROVIDER_KEY="$NVIDIA_INFERENCE_API_KEY" \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_POLICY_TIER="open" \
   NEMOCLAW_PROVIDER="routed" \
-  NVIDIA_API_KEY="$NVIDIA_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY" \
   "$TIMEOUT_CMD" 1500 nemoclaw onboard --fresh --non-interactive --yes-i-accept-third-party-software \
   >"$ONBOARD_LOG" 2>&1
 onboard_rc=$?
diff --git a/test/e2e/test-network-policy.sh b/test/e2e/test-network-policy.sh
index d7d7ca7499..e03afe7803 100755
--- a/test/e2e/test-network-policy.sh
+++ b/test/e2e/test-network-policy.sh
@@ -23,7 +23,7 @@
 # Prerequisites:
 #   - Docker running
 #   - NemoClaw installed (or install.sh available)
-#   - NVIDIA_API_KEY for sandbox onboard
+#   - NVIDIA_INFERENCE_API_KEY for sandbox onboard
 # =============================================================================
 
 set -euo pipefail
@@ -92,7 +92,7 @@ install_nemoclaw() {
   fi
   log "=== Installing NemoClaw via install.sh ==="
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
-    NVIDIA_API_KEY="${NVIDIA_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
+    NVIDIA_INFERENCE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     NEMOCLAW_POLICY_TIER="restricted" \
@@ -242,9 +242,9 @@ wait_for_e2e_http_port() {
 
 # ── Onboard sandbox ─────────────────────────────────────────────────────────
 setup_sandbox() {
-  local api_key="${NVIDIA_API_KEY:-}"
+  local api_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$api_key" ]]; then
-    log "ERROR: NVIDIA_API_KEY not set"
+    log "ERROR: NVIDIA_INFERENCE_API_KEY not set"
     exit 1
   fi
 
@@ -681,7 +681,7 @@ test_net_07_inference_exemption() {
   log "  Step 2: Attempt direct connection to provider (should be blocked)..."
   local direct_response
   direct_response=$(sandbox_exec "node -e \"
-fetch('https://integrate.api.nvidia.com/v1/models', {signal: AbortSignal.timeout(15000)})
+fetch('https://inference-api.nvidia.com/v1/models', {signal: AbortSignal.timeout(15000)})
   .then(r => console.log('STATUS_' + r.status))
   .catch(e => console.log('ERROR_' + (e.cause?.code || e.code || e.message)))
 \"" 2>&1) || true
diff --git a/test/e2e/test-onboard-negative-paths.sh b/test/e2e/test-onboard-negative-paths.sh
index 83d2f5d1ee..be7b094621 100755
--- a/test/e2e/test-onboard-negative-paths.sh
+++ b/test/e2e/test-onboard-negative-paths.sh
@@ -77,7 +77,7 @@ CLOUD_MODEL="${NEMOCLAW_ONBOARD_NEGATIVE_MODEL:-nvidia/nemotron-3-super-120b-a12
 PORT_CONFLICT_PORT="${NEMOCLAW_ONBOARD_NEGATIVE_CONFLICT_PORT:-18080}"
 SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
 REGISTRY_FILE="$HOME/.nemoclaw/sandboxes.json"
-RESTORE_API_KEY="${NVIDIA_API_KEY:-}"
+RESTORE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-}"
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
@@ -217,7 +217,7 @@ const path = require("node:path");
 const repo = process.argv[2];
 const { validateNvidiaApiKeyValue } = require(path.join(repo, "dist", "lib", "validation.js"));
 
-const nvidiaError = validateNvidiaApiKeyValue("not-a-nvidia-key", "NVIDIA_API_KEY");
+const nvidiaError = validateNvidiaApiKeyValue("not-a-nvidia-key", "NVIDIA_INFERENCE_API_KEY");
 if (!nvidiaError || !nvidiaError.includes("Must start with nvapi-")) {
   throw new Error(`expected NVIDIA key prefix rejection, got: ${nvidiaError}`);
 }
@@ -295,9 +295,9 @@ else
 fi
 
 if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid; required for live onboard scenarios"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid; required for live onboard scenarios"
   print_summary
   exit 1
 fi
@@ -337,7 +337,7 @@ env -u NEMOCLAW_SANDBOX_NAME \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_API_KEY="$RESTORE_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \
   >"$FROM_GUARD_LOG" 2>&1
 from_guard_exit=$?
@@ -370,7 +370,7 @@ env \
   NEMOCLAW_SANDBOX_NAME="bad name" \
   NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_API_KEY="$RESTORE_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \
   >"$FROM_ENV_NAME_LOG" 2>&1
 from_env_name_exit=$?
@@ -405,7 +405,7 @@ NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_RECREATE_SANDBOX=1 \
   NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_API_KEY=not-a-nvidia-key \
+  NVIDIA_INFERENCE_API_KEY=not-a-nvidia-key \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$INVALID_KEY_LOG" 2>&1
 invalid_key_exit=$?
 invalid_key_output="$(cat "$INVALID_KEY_LOG")"
@@ -453,7 +453,7 @@ NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_GATEWAY_PORT="$PORT_CONFLICT_PORT" \
   NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_API_KEY="$RESTORE_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$PORT_CONFLICT_LOG" 2>&1
 port_conflict_exit=$?
 port_conflict_output="$(cat "$PORT_CONFLICT_LOG")"
@@ -495,7 +495,7 @@ NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_MODEL="$CLOUD_MODEL" \
   NEMOCLAW_POLICY_MODE=custom \
   NEMOCLAW_POLICY_PRESETS=npm,pypi \
-  NVIDIA_API_KEY="$RESTORE_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$LIVE_LOG" 2>&1
 live_exit=$?
 live_output="$(cat "$LIVE_LOG")"
diff --git a/test/e2e/test-onboard-repair.sh b/test/e2e/test-onboard-repair.sh
index 8351b74878..039d5ebf07 100755
--- a/test/e2e/test-onboard-repair.sh
+++ b/test/e2e/test-onboard-repair.sh
@@ -14,10 +14,10 @@
 #   - Docker running
 #   - openshell CLI installed
 #   - Node.js available
-#   - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test
+#   - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test
 #
 # Usage:
-#   NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh
 
 set -uo pipefail
 
@@ -80,7 +80,7 @@ if [ -n "$INSTALL_SANDBOX_NAME" ]; then
 fi
 
 SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
-RESTORE_API_KEY="${NVIDIA_API_KEY:-}"
+RESTORE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-}"
 
 wait_openshell_sandbox_absent() {
   local sandbox_name="$1"
@@ -149,14 +149,14 @@ else
 fi
 
 if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for resume completion"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion"
   exit 1
 fi
 
-export NVIDIA_API_KEY="$RESTORE_API_KEY"
-pass "Exported NVIDIA_API_KEY for the repair run (host writes nothing to disk; OpenShell gateway is the system of record)"
+export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
+pass "Exported NVIDIA_INFERENCE_API_KEY for the repair run (host writes nothing to disk; OpenShell gateway is the system of record)"
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 2: Create interrupted resumable state
@@ -222,7 +222,7 @@ else
 fi
 
 REPAIR_LOG="$(mktemp)"
-env -u NVIDIA_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
@@ -295,7 +295,7 @@ pass "Re-created interrupted session for conflict tests"
 info "Attempting resume with a different sandbox name..."
 
 SANDBOX_CONFLICT_LOG="$(mktemp)"
-env -u NVIDIA_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$OTHER_SANDBOX_NAME" \
@@ -324,7 +324,7 @@ section "Phase 5: Reject conflicting provider and model"
 info "Attempting resume with conflicting provider/model inputs..."
 
 PROVIDER_CONFLICT_LOG="$(mktemp)"
-env -u NVIDIA_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh
index 129f121e53..0810354968 100755
--- a/test/e2e/test-onboard-resume.sh
+++ b/test/e2e/test-onboard-resume.sh
@@ -15,10 +15,10 @@
 #   - Docker running
 #   - openshell CLI installed
 #   - Node.js available
-#   - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test
+#   - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test
 #
 # Usage:
-#   NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh
 
 set -uo pipefail
 
@@ -81,7 +81,7 @@ register_sandbox_for_teardown "$SANDBOX_NAME"
 
 SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
 REGISTRY="$HOME/.nemoclaw/sandboxes.json"
-RESTORE_API_KEY="${NVIDIA_API_KEY:-}"
+RESTORE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-}"
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 0: Pre-cleanup
@@ -122,21 +122,21 @@ else
 fi
 
 if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for resume completion"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
-export NVIDIA_API_KEY="$RESTORE_API_KEY"
-pass "Exported NVIDIA_API_KEY for the resume run (host writes nothing to disk; OpenShell gateway is the system of record)"
+export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
+pass "Exported NVIDIA_INFERENCE_API_KEY for the resume run (host writes nothing to disk; OpenShell gateway is the system of record)"
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 2: First onboard (forced failure after sandbox creation)
@@ -209,10 +209,10 @@ esac
 # Phase 3: Resume and complete
 # ══════════════════════════════════════════════════════════════════
 section "Phase 3: Resume"
-info "Running onboard --resume with NVIDIA_API_KEY removed from env..."
+info "Running onboard --resume with NVIDIA_INFERENCE_API_KEY removed from env..."
 
 RESUME_LOG="$(mktemp)"
-env -u NVIDIA_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
diff --git a/test/e2e/test-openclaw-discord-pairing.sh b/test/e2e/test-openclaw-discord-pairing.sh
index 506ae18367..29e6f64e17 100755
--- a/test/e2e/test-openclaw-discord-pairing.sh
+++ b/test/e2e/test-openclaw-discord-pairing.sh
@@ -18,13 +18,13 @@
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1              - required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 - required
-#   NVIDIA_API_KEY                         - required for onboarding
+#   NVIDIA_INFERENCE_API_KEY                         - required for onboarding
 #   NEMOCLAW_SANDBOX_NAME                  - sandbox name (default: e2e-openclaw-discord-pairing)
 #   DISCORD_BOT_TOKEN                      - defaults to a fake token
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-openclaw-discord-pairing.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-openclaw-discord-pairing.sh
 
 # shellcheck disable=SC2016,SC2329
 # SC2016: Single-quoted strings are intentional for commands evaluated inside
@@ -211,11 +211,11 @@ NODE
 
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! docker info >/dev/null 2>&1; then
   fail "Docker is not running"
diff --git a/test/e2e/test-openclaw-inference-switch.sh b/test/e2e/test-openclaw-inference-switch.sh
index 937ab09b87..276dbf2e9c 100755
--- a/test/e2e/test-openclaw-inference-switch.sh
+++ b/test/e2e/test-openclaw-inference-switch.sh
@@ -10,7 +10,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 
@@ -390,10 +390,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
diff --git a/test/e2e/test-openclaw-plugin-runtime-exdev.sh b/test/e2e/test-openclaw-plugin-runtime-exdev.sh
index 5555450149..5381787862 100755
--- a/test/e2e/test-openclaw-plugin-runtime-exdev.sh
+++ b/test/e2e/test-openclaw-plugin-runtime-exdev.sh
@@ -48,7 +48,7 @@ redact_file() {
   python3 - "$file" <<'PY'
 import os, sys
 path = sys.argv[1]
-secrets = [os.environ.get("NVIDIA_API_KEY", ""), os.environ.get("NEMOCLAW_PROVIDER_KEY", "")]
+secrets = [os.environ.get("NVIDIA_INFERENCE_API_KEY", ""), os.environ.get("NEMOCLAW_PROVIDER_KEY", "")]
 text = open(path, "r", errors="replace").read()
 for secret in filter(None, secrets):
     text = text.replace(secret, "<REDACTED>")
@@ -71,10 +71,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY is required and must start with nvapi-"
+  fail "NVIDIA_INFERENCE_API_KEY is required and must start with nvapi-"
   exit 1
 fi
 
@@ -119,13 +119,13 @@ for path in policy_paths:
         path.write_text(text.replace(needle, needle + additions, 1))
 PY
 env \
-  NEMOCLAW_PROVIDER_KEY="$NVIDIA_API_KEY" \
+  NEMOCLAW_PROVIDER_KEY="$NVIDIA_INFERENCE_API_KEY" \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_POLICY_MODE="skip" \
   NEMOCLAW_PROVIDER="build" \
-  NVIDIA_API_KEY="$NVIDIA_API_KEY" \
+  NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY" \
   "$TIMEOUT_CMD" 1500 nemoclaw onboard --fresh --non-interactive --yes-i-accept-third-party-software --agent openclaw --from "$REPO/Dockerfile" \
   >"$ONBOARD_LOG" 2>&1
 onboard_rc=$?
diff --git a/test/e2e/test-openclaw-skill-cli-e2e.sh b/test/e2e/test-openclaw-skill-cli-e2e.sh
index 77fae6d982..2eb6084d69 100755
--- a/test/e2e/test-openclaw-skill-cli-e2e.sh
+++ b/test/e2e/test-openclaw-skill-cli-e2e.sh
@@ -16,7 +16,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (needed to onboard the sandbox)
+#   - NVIDIA_INFERENCE_API_KEY set (needed to onboard the sandbox)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
@@ -25,7 +25,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-openclaw-skill-cli-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-openclaw-skill-cli-e2e.sh
 
 # shellcheck disable=SC2317
 set -uo pipefail
@@ -84,11 +84,11 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY not set or invalid"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
diff --git a/test/e2e/test-openclaw-slack-pairing.sh b/test/e2e/test-openclaw-slack-pairing.sh
index eb183d42be..2da00c60c5 100755
--- a/test/e2e/test-openclaw-slack-pairing.sh
+++ b/test/e2e/test-openclaw-slack-pairing.sh
@@ -16,14 +16,14 @@
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1              - required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 - required
-#   NVIDIA_API_KEY                         - required for onboarding
+#   NVIDIA_INFERENCE_API_KEY                         - required for onboarding
 #   NEMOCLAW_SANDBOX_NAME                  - sandbox name (default: e2e-openclaw-slack-pairing)
 #   SLACK_BOT_TOKEN                        - defaults to a fake xoxb- token
 #   SLACK_APP_TOKEN                        - defaults to a fake xapp- token
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-openclaw-slack-pairing.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-openclaw-slack-pairing.sh
 
 # shellcheck disable=SC2016
 # SC2016: Single-quoted strings are intentional for commands evaluated inside
@@ -208,11 +208,11 @@ NODE
 
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! docker info >/dev/null 2>&1; then
   fail "Docker is not running"
diff --git a/test/e2e/test-overlayfs-autofix.sh b/test/e2e/test-overlayfs-autofix.sh
index 2a5df82f70..5d9143a024 100755
--- a/test/e2e/test-overlayfs-autofix.sh
+++ b/test/e2e/test-overlayfs-autofix.sh
@@ -18,7 +18,7 @@
 # needs entry in the same PR that deletes src/lib/cluster-image-patch.ts.
 #
 # Test phases:
-#   1. Prerequisites — Docker running, NVIDIA_API_KEY, sudo, etc.
+#   1. Prerequisites — Docker running, NVIDIA_INFERENCE_API_KEY, sudo, etc.
 #   2. Setup — flip /etc/docker/daemon.json to enable containerd-snapshotter,
 #      restart Docker, verify the conflict config is active. Auto-skip on
 #      runners whose Docker does not support the feature flag.
@@ -46,12 +46,12 @@
 #   - Docker installed (any version that supports `features.containerd-snapshotter`,
 #     i.e. Docker 23+; the test skips cleanly on older versions)
 #   - Passwordless sudo (for editing /etc/docker/daemon.json + restarting Docker)
-#   - NVIDIA_API_KEY set (real key; required by install.sh)
+#   - NVIDIA_INFERENCE_API_KEY set (real key; required by install.sh)
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1                — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1    — required
-#   NVIDIA_API_KEY                            — required
+#   NVIDIA_INFERENCE_API_KEY                            — required
 #   NEMOCLAW_SANDBOX_NAME                     — sandbox name (default: e2e-overlayfs)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS              — overall timeout (default: 1500)
 #   NEMOCLAW_OVERLAYFS_E2E_NEGATIVE_TIMEOUT   — negative-phase k3s wait (default: 300)
@@ -59,7 +59,7 @@
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-overlayfs-autofix.sh
 
 # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script.
@@ -172,10 +172,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
diff --git a/test/e2e/test-rebuild-hermes.sh b/test/e2e/test-rebuild-hermes.sh
index cbc2bdbea3..f9de6f7e04 100755
--- a/test/e2e/test-rebuild-hermes.sh
+++ b/test/e2e/test-rebuild-hermes.sh
@@ -19,12 +19,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 
 set -euo pipefail
 
@@ -93,7 +93,7 @@ dump_hermes_sandbox_logs() {
 export NEMOCLAW_REBUILD_VERBOSE=1
 
 # ── Preflight ───────────────────────────────────────────────────────
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY is required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY is required"
 [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ] || fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
diff --git a/test/e2e/test-rebuild-openclaw.sh b/test/e2e/test-rebuild-openclaw.sh
index 6146db2536..6794649fff 100755
--- a/test/e2e/test-rebuild-openclaw.sh
+++ b/test/e2e/test-rebuild-openclaw.sh
@@ -19,12 +19,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 
 set -euo pipefail
 
@@ -80,7 +80,7 @@ read_sandbox_config_hash() {
 export NEMOCLAW_REBUILD_VERBOSE=1
 
 # ── Preflight ───────────────────────────────────────────────────────
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY is required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY is required"
 [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ] || fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
diff --git a/test/e2e/test-sandbox-operations.sh b/test/e2e/test-sandbox-operations.sh
index 0d702a21d6..98d1b01537 100755
--- a/test/e2e/test-sandbox-operations.sh
+++ b/test/e2e/test-sandbox-operations.sh
@@ -282,7 +282,7 @@ preflight() {
   fi
   log "Docker is running"
 
-  if [[ -z "${NVIDIA_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
+  if [[ -z "${NVIDIA_INFERENCE_API_KEY:-}" && -z "${OPENAI_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
     echo -e "${YELLOW}WARNING: No API key detected.${NC}"
   fi
 
diff --git a/test/e2e/test-sandbox-rebuild.sh b/test/e2e/test-sandbox-rebuild.sh
index f066e4ff6a..df68ad1afa 100755
--- a/test/e2e/test-sandbox-rebuild.sh
+++ b/test/e2e/test-sandbox-rebuild.sh
@@ -14,20 +14,20 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-rebuild)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 1200)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-sandbox-rebuild.sh
 
 set -euo pipefail
@@ -57,7 +57,7 @@ fail() {
 info() { echo -e "${YELLOW}[INFO]${NC} $1"; }
 
 # ── Preflight ───────────────────────────────────────────────────────
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY is required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY is required"
 [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ] || fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
 
 info "Starting rebuild E2E test (sandbox: ${SANDBOX_NAME}, timeout: ${TIMEOUT}s)"
diff --git a/test/e2e/test-sandbox-survival.sh b/test/e2e/test-sandbox-survival.sh
index ca509e611c..bc952247ba 100755
--- a/test/e2e/test-sandbox-survival.sh
+++ b/test/e2e/test-sandbox-survival.sh
@@ -22,20 +22,20 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required for real NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY                         — required for real NVIDIA Endpoints inference
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-survival)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 900)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     bash test/e2e/test-sandbox-survival.sh
 
 set -uo pipefail
@@ -207,17 +207,17 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
 else
-  fail "NVIDIA_API_KEY not set or invalid — required for live inference"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
   exit 1
 fi
 
-if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to integrate.api.nvidia.com"
+if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to inference-api.nvidia.com"
 else
-  fail "Cannot reach integrate.api.nvidia.com"
+  fail "Cannot reach inference-api.nvidia.com"
   exit 1
 fi
 
diff --git a/test/e2e/test-sessions-agents-cli.sh b/test/e2e/test-sessions-agents-cli.sh
index 8717526184..d9160fba0c 100755
--- a/test/e2e/test-sessions-agents-cli.sh
+++ b/test/e2e/test-sessions-agents-cli.sh
@@ -28,12 +28,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key or fake OpenAI endpoint)
+#   - NVIDIA_INFERENCE_API_KEY set (real key or fake OpenAI endpoint)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-sessions-agents-cli.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-sessions-agents-cli.sh
 # =============================================================================
 
 set -uo pipefail
@@ -156,12 +156,12 @@ preflight() {
     print_summary
     exit 1
   fi
-  if [ -z "${NVIDIA_API_KEY:-}" ]; then
-    skip "preflight: NVIDIA_API_KEY not set; sessions/agents E2E requires a working onboard credential"
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+    skip "preflight: NVIDIA_INFERENCE_API_KEY not set; sessions/agents E2E requires a working onboard credential"
     print_summary
     exit 0
   fi
-  pass "preflight: docker + NVIDIA_API_KEY available"
+  pass "preflight: docker + NVIDIA_INFERENCE_API_KEY available"
 }
 
 onboard_sandbox() {
diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh
index 7ed586e7c9..a806bbdc2b 100755
--- a/test/e2e/test-shields-config.sh
+++ b/test/e2e/test-shields-config.sh
@@ -19,12 +19,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-shields)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 900)
 
@@ -79,10 +79,10 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ] && [[ "${NVIDIA_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_API_KEY is set"
+if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
+  pass "NVIDIA_INFERENCE_API_KEY is set"
 else
-  fail "NVIDIA_API_KEY not set or invalid"
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
 
diff --git a/test/e2e/test-skill-agent-e2e.sh b/test/e2e/test-skill-agent-e2e.sh
index f08e0ccc53..ff93b92a3b 100755
--- a/test/e2e/test-skill-agent-e2e.sh
+++ b/test/e2e/test-skill-agent-e2e.sh
@@ -13,7 +13,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
@@ -24,7 +24,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... bash test/e2e/test-skill-agent-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-skill-agent-e2e.sh
 
 # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script.
 # shellcheck disable=SC2317
@@ -111,11 +111,11 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_API_KEY:-}" ] || [[ "${NVIDIA_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_API_KEY not set or invalid"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
@@ -193,7 +193,7 @@ while [ "$attempt" -le "$MAX_ATTEMPTS" ]; do
 
   set +e
   agent_out=$(
-    NVIDIA_API_KEY="$NVIDIA_API_KEY" \
+    NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY" \
       SANDBOX_NAME="$SANDBOX_NAME" \
       SKILL_ID="$SKILL_ID" \
       VERIFY_TOKEN="$VERIFY_PHRASE" \
diff --git a/test/e2e/test-snapshot-commands.sh b/test/e2e/test-snapshot-commands.sh
index e70d495349..2aaff425d6 100755
--- a/test/e2e/test-snapshot-commands.sh
+++ b/test/e2e/test-snapshot-commands.sh
@@ -15,12 +15,12 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_API_KEY                         — required
+#   NVIDIA_INFERENCE_API_KEY                         — required
 
 set -euo pipefail
 
@@ -80,7 +80,7 @@ run_capture() {
 }
 
 # ── Preflight ───────────────────────────────────────────────────────
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY is required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY is required"
 [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ] || fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
diff --git a/test/e2e/test-state-backup-restore.sh b/test/e2e/test-state-backup-restore.sh
index 1763943fec..7e8637106b 100755
--- a/test/e2e/test-state-backup-restore.sh
+++ b/test/e2e/test-state-backup-restore.sh
@@ -11,8 +11,8 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set
+#   - Network access to inference-api.nvidia.com
 # =============================================================================
 
 set -euo pipefail
@@ -81,7 +81,7 @@ install_nemoclaw() {
   fi
   log "=== Installing NemoClaw via install.sh ==="
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
-    NVIDIA_API_KEY="${NVIDIA_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
+    NVIDIA_INFERENCE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
@@ -102,9 +102,9 @@ preflight() {
   fi
   log "Docker is running"
 
-  local api_key="${NVIDIA_API_KEY:-}"
+  local api_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$api_key" ]]; then
-    log "ERROR: NVIDIA_API_KEY not set"
+    log "ERROR: NVIDIA_INFERENCE_API_KEY not set"
     exit 1
   fi
 
diff --git a/test/e2e/test-telegram-injection.sh b/test/e2e/test-telegram-injection.sh
index de90ddec76..aa6834b08f 100755
--- a/test/e2e/test-telegram-injection.sh
+++ b/test/e2e/test-telegram-injection.sh
@@ -25,15 +25,15 @@
 # Prerequisites:
 #   - Docker running
 #   - NemoClaw installed and sandbox running (test-full-e2e.sh Phase 0-3)
-#   - NVIDIA_API_KEY set
+#   - NVIDIA_INFERENCE_API_KEY set
 #   - openshell on PATH
 #
 # Environment variables:
 #   NEMOCLAW_SANDBOX_NAME  — sandbox name (default: e2e-test)
-#   NVIDIA_API_KEY         — required
+#   NVIDIA_INFERENCE_API_KEY         — required
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_API_KEY=nvapi-... bash test/e2e/test-telegram-injection.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-telegram-injection.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/issues/118
 #      https://github.com/NVIDIA/NemoClaw/pull/119
@@ -145,11 +145,11 @@ sandbox_exec() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
-if [ -z "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY not set"
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+  fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
 fi
-pass "NVIDIA_API_KEY is set"
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if ! command -v openshell >/dev/null 2>&1; then
   fail "openshell not found on PATH"
@@ -271,12 +271,12 @@ fi
 # ══════════════════════════════════════════════════════════════════
 section "Phase 3: Parameter Expansion"
 
-# T4: ${NVIDIA_API_KEY} must not expand to the actual key value
-info "T4: Testing \${NVIDIA_API_KEY} expansion..."
+# T4: ${NVIDIA_INFERENCE_API_KEY} must not expand to the actual key value
+info "T4: Testing \${NVIDIA_INFERENCE_API_KEY} expansion..."
 
 ssh_config_t4="$(mktemp)"
 openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config_t4" 2>/dev/null
-PAYLOAD_ENV='${NVIDIA_API_KEY}'
+PAYLOAD_ENV='${NVIDIA_INFERENCE_API_KEY}'
 
 t4_result=$(timeout 30 ssh -F "$ssh_config_t4" \
   -o StrictHostKeyChecking=no \
@@ -287,14 +287,14 @@ t4_result=$(timeout 30 ssh -F "$ssh_config_t4" \
   <<<"$PAYLOAD_ENV" 2>&1) || true
 rm -f "$ssh_config_t4"
 
-# The result should contain the literal string ${NVIDIA_API_KEY}, not a nvapi- value
+# The result should contain the literal string ${NVIDIA_INFERENCE_API_KEY}, not a nvapi- value
 if echo "$t4_result" | grep -q "nvapi-"; then
-  fail "T4: \${NVIDIA_API_KEY} expanded to actual key value — secret leaked!"
-elif echo "$t4_result" | grep -qF '${NVIDIA_API_KEY}'; then
-  pass "T4: \${NVIDIA_API_KEY} treated as literal string (not expanded)"
+  fail "T4: \${NVIDIA_INFERENCE_API_KEY} expanded to actual key value — secret leaked!"
+elif echo "$t4_result" | grep -qF '${NVIDIA_INFERENCE_API_KEY}'; then
+  pass "T4: \${NVIDIA_INFERENCE_API_KEY} treated as literal string (not expanded)"
 else
   # Empty or other result — still safe as long as key not leaked
-  pass "T4: \${NVIDIA_API_KEY} did not expand to key value (result: ${t4_result:0:100})"
+  pass "T4: \${NVIDIA_INFERENCE_API_KEY} did not expand to key value (result: ${t4_result:0:100})"
 fi
 
 # ══════════════════════════════════════════════════════════════════
@@ -302,11 +302,11 @@ fi
 # ══════════════════════════════════════════════════════════════════
 section "Phase 4: Process Table Leak Check"
 
-# T5: NVIDIA_API_KEY must not appear in ps aux output
+# T5: NVIDIA_INFERENCE_API_KEY must not appear in ps aux output
 info "T5: Checking process table for API key leaks..."
 
 # Get truncated key for a safe comparison (first 15 chars of key value)
-API_KEY_PREFIX="${NVIDIA_API_KEY:0:15}"
+API_KEY_PREFIX="${NVIDIA_INFERENCE_API_KEY:0:15}"
 
 # Check both the Brev host and inside the sandbox
 host_ps=$(ps aux 2>/dev/null || true)
@@ -331,9 +331,9 @@ if echo "$sandbox_ps" | grep -qF "$API_KEY_PREFIX"; then
 fi
 
 if [ "$HOST_LEAK" = true ]; then
-  fail "T5: NVIDIA_API_KEY found in HOST process table"
+  fail "T5: NVIDIA_INFERENCE_API_KEY found in HOST process table"
 elif [ "$SANDBOX_LEAK" = true ]; then
-  fail "T5: NVIDIA_API_KEY found in SANDBOX process table"
+  fail "T5: NVIDIA_INFERENCE_API_KEY found in SANDBOX process table"
 else
   pass "T5: API key not visible in process tables (host or sandbox)"
 fi
diff --git a/test/e2e/test-token-rotation.sh b/test/e2e/test-token-rotation.sh
index 8d3f48ad50..f459be49c0 100755
--- a/test/e2e/test-token-rotation.sh
+++ b/test/e2e/test-token-rotation.sh
@@ -16,7 +16,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (or fake OpenAI endpoint)
+#   - NVIDIA_INFERENCE_API_KEY set (or fake OpenAI endpoint)
 #   - TELEGRAM_BOT_TOKEN_A and TELEGRAM_BOT_TOKEN_B set (can be fake)
 #   - DISCORD_BOT_TOKEN_A and DISCORD_BOT_TOKEN_B set (can be fake)
 #   - SLACK_BOT_TOKEN_A and SLACK_BOT_TOKEN_B set (can be fake; xoxb- prefix)
@@ -24,7 +24,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_API_KEY=nvapi-... \
+#     NVIDIA_INFERENCE_API_KEY=nvapi-... \
 #     TELEGRAM_BOT_TOKEN_A=fake-a TELEGRAM_BOT_TOKEN_B=fake-b \
 #     DISCORD_BOT_TOKEN_A=fake-c DISCORD_BOT_TOKEN_B=fake-d \
 #     SLACK_BOT_TOKEN_A=xoxb-fake-a SLACK_BOT_TOKEN_B=xoxb-fake-b \
diff --git a/test/e2e/test-tunnel-lifecycle.sh b/test/e2e/test-tunnel-lifecycle.sh
index 7b2ec9ee4a..9c7260f0b3 100755
--- a/test/e2e/test-tunnel-lifecycle.sh
+++ b/test/e2e/test-tunnel-lifecycle.sh
@@ -13,8 +13,8 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set
-#   - Network access to integrate.api.nvidia.com
+#   - NVIDIA_INFERENCE_API_KEY set
+#   - Network access to inference-api.nvidia.com
 # =============================================================================
 
 set -euo pipefail
@@ -86,7 +86,7 @@ install_nemoclaw() {
   fi
   log "=== Installing NemoClaw via install.sh ==="
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
-    NVIDIA_API_KEY="${NVIDIA_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
+    NVIDIA_INFERENCE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-nvapi-DUMMY-FOR-INSTALL}" \
     NEMOCLAW_NON_INTERACTIVE=1 \
     NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
     bash "$REPO_ROOT/install.sh" --non-interactive --yes-i-accept-third-party-software \
@@ -107,9 +107,9 @@ preflight() {
   fi
   log "Docker is running"
 
-  local api_key="${NVIDIA_API_KEY:-}"
+  local api_key="${NVIDIA_INFERENCE_API_KEY:-}"
   if [[ -z "$api_key" ]]; then
-    log "ERROR: NVIDIA_API_KEY not set"
+    log "ERROR: NVIDIA_INFERENCE_API_KEY not set"
     exit 1
   fi
 
diff --git a/test/e2e/test-upgrade-stale-sandbox.sh b/test/e2e/test-upgrade-stale-sandbox.sh
index b2bad3dbe3..3b3790ae7d 100755
--- a/test/e2e/test-upgrade-stale-sandbox.sh
+++ b/test/e2e/test-upgrade-stale-sandbox.sh
@@ -18,7 +18,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
 
 set -euo pipefail
 
@@ -51,7 +51,7 @@ info() { echo -e "${YELLOW}[INFO]${NC} $1"; }
 diag() { echo -e "${YELLOW}[DIAG]${NC} $1"; }
 
 # ── Preflight ───────────────────────────────────────────────────────
-[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY is required"
+[ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] || fail "NVIDIA_INFERENCE_API_KEY is required"
 [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ] || fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
diff --git a/test/gateway-state-reconcile-2276.test.ts b/test/gateway-state-reconcile-2276.test.ts
index eb894ce0bc..11ff694808 100644
--- a/test/gateway-state-reconcile-2276.test.ts
+++ b/test/gateway-state-reconcile-2276.test.ts
@@ -756,7 +756,7 @@ describe("Scenario 14 (#4497): connect preserves registry so rebuild can recover
           // HOME — fine: the assertions below target the recovery markers that
           // are emitted BEFORE the recreate, proving rebuild crossed the
           // backup gate that previously blocked it.
-          NVIDIA_API_KEY: "",
+          NVIDIA_INFERENCE_API_KEY: "",
           NEMOCLAW_PROVIDER_KEY: "",
         },
       },
diff --git a/test/generate-openclaw-config.test.ts b/test/generate-openclaw-config.test.ts
index 02826f6f80..844da6f279 100644
--- a/test/generate-openclaw-config.test.ts
+++ b/test/generate-openclaw-config.test.ts
@@ -1357,7 +1357,7 @@ describe("generate-openclaw-config.mts: config generation", () => {
       { NEMOCLAW_MODEL: "deepseek-ai/DeepSeek-V4-Flash" },
       { NEMOCLAW_PROVIDER_KEY: "openai" },
       { NEMOCLAW_INFERENCE_API: "responses" },
-      { NEMOCLAW_INFERENCE_BASE_URL: "https://integrate.api.nvidia.com/v1" },
+      { NEMOCLAW_INFERENCE_BASE_URL: "https://inference-api.nvidia.com/v1" },
     ];
 
     for (const envCase of cases) {
@@ -1405,7 +1405,7 @@ describe("generate-openclaw-config.mts: config generation", () => {
       { NEMOCLAW_MODEL: "nvidia/nemotron-3-nano:30b" },
       { NEMOCLAW_PROVIDER_KEY: "nvidia" },
       { NEMOCLAW_INFERENCE_API: "responses" },
-      { NEMOCLAW_INFERENCE_BASE_URL: "https://integrate.api.nvidia.com/v1" },
+      { NEMOCLAW_INFERENCE_BASE_URL: "https://inference-api.nvidia.com/v1" },
     ];
 
     for (const envCase of cases) {
diff --git a/test/helpers/onboard-final-flow-phases.ts b/test/helpers/onboard-final-flow-phases.ts
index 7c3f996ec9..5a58014941 100644
--- a/test/helpers/onboard-final-flow-phases.ts
+++ b/test/helpers/onboard-final-flow-phases.ts
@@ -147,7 +147,7 @@ export function context(
     model: "nvidia/test",
     provider: "nim",
     endpointUrl: "https://example.test/v1",
-    credentialEnv: "NVIDIA_API_KEY",
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     hermesAuthMethod: null,
     hermesToolGateways: ["local"],
     preferredInferenceApi: "chat",
diff --git a/test/host-artifact-cleanup.test.ts b/test/host-artifact-cleanup.test.ts
index e1e8e1aa32..6aea1af635 100644
--- a/test/host-artifact-cleanup.test.ts
+++ b/test/host-artifact-cleanup.test.ts
@@ -68,7 +68,7 @@ describe("cleanupStaleHostFiles (post-upgrade sweep, #3105)", () => {
     const credsDir = path.join(home, ".nemoclaw");
     const legacyFile = path.join(credsDir, "credentials.json");
     fs.mkdirSync(credsDir, { recursive: true });
-    const payload = JSON.stringify({ NVIDIA_API_KEY: "nvapi-real" });
+    const payload = JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-real" });
     fs.writeFileSync(legacyFile, payload, { mode: 0o600 });
     vi.stubEnv("HOME", home);
 
diff --git a/test/nemoclaw-start.test.ts b/test/nemoclaw-start.test.ts
index cbb1abae9f..1024430483 100644
--- a/test/nemoclaw-start.test.ts
+++ b/test/nemoclaw-start.test.ts
@@ -4166,7 +4166,7 @@ describe("write_auth_profile (#1332)", () => {
 
   it("writes profile under the provider key from NEMOCLAW_PROVIDER_KEY", () => {
     const { home, authPath, status, stderr } = runWriteAuthProfile({
-      NVIDIA_API_KEY: "secret",
+      NVIDIA_INFERENCE_API_KEY: "secret",
       NEMOCLAW_PROVIDER_KEY: "openai",
     });
     try {
@@ -4176,7 +4176,7 @@ describe("write_auth_profile (#1332)", () => {
         "openai:manual": {
           type: "api_key",
           provider: "openai",
-          keyRef: { source: "env", id: "NVIDIA_API_KEY" },
+          keyRef: { source: "env", id: "NVIDIA_INFERENCE_API_KEY" },
           profileId: "openai:manual",
         },
       });
@@ -4187,7 +4187,7 @@ describe("write_auth_profile (#1332)", () => {
 
   it("falls back to 'inference' when NEMOCLAW_PROVIDER_KEY is unset", () => {
     const { home, authPath, status, stderr } = runWriteAuthProfile({
-      NVIDIA_API_KEY: "secret",
+      NVIDIA_INFERENCE_API_KEY: "secret",
     });
     try {
       expect(status, stderr).toBe(0);
@@ -4202,7 +4202,7 @@ describe("write_auth_profile (#1332)", () => {
 
   it("does not use 'nvidia' as the default provider key", () => {
     const { home, authPath, status } = runWriteAuthProfile({
-      NVIDIA_API_KEY: "secret",
+      NVIDIA_INFERENCE_API_KEY: "secret",
     });
     try {
       expect(status).toBe(0);
@@ -4219,7 +4219,7 @@ describe("write_auth_profile (#1332)", () => {
     // If the provider_key were interpolated into the heredoc instead of
     // passed as argv, $(...) inside the value would execute and replace it.
     const { home, authPath, status, stderr } = runWriteAuthProfile({
-      NVIDIA_API_KEY: "secret",
+      NVIDIA_INFERENCE_API_KEY: "secret",
       NEMOCLAW_PROVIDER_KEY: "$(echo pwned)",
     });
     try {
@@ -4233,7 +4233,7 @@ describe("write_auth_profile (#1332)", () => {
     }
   });
 
-  it("is a no-op when NVIDIA_API_KEY is unset", () => {
+  it("is a no-op when NVIDIA_INFERENCE_API_KEY is unset", () => {
     const { home, authPath, status } = runWriteAuthProfile({});
     try {
       expect(status).toBe(0);
@@ -4245,7 +4245,7 @@ describe("write_auth_profile (#1332)", () => {
 
   it("writes the auth profile with 0600 permissions", () => {
     const { home, authPath, status } = runWriteAuthProfile({
-      NVIDIA_API_KEY: "secret",
+      NVIDIA_INFERENCE_API_KEY: "secret",
       NEMOCLAW_PROVIDER_KEY: "openai",
     });
     try {
diff --git a/test/nemotron-inference-fix.test.ts b/test/nemotron-inference-fix.test.ts
index aa606d8fc9..7091eb273c 100644
--- a/test/nemotron-inference-fix.test.ts
+++ b/test/nemotron-inference-fix.test.ts
@@ -578,7 +578,7 @@ send('inference.local', JSON.stringify({
   model: 'nvidia/nemotron-3-ultra-550b-a55b',
   messages: [{ role: 'user', content: 'hi' }],
 }));
-send('integrate.api.nvidia.com', JSON.stringify({
+send('inference-api.nvidia.com', JSON.stringify({
   model: 'nvidia/nemotron-3-ultra-550b-a55b',
   messages: [{ role: 'user', content: 'hi' }],
 }));
diff --git a/test/no-direct-credential-env.test.ts b/test/no-direct-credential-env.test.ts
index eea90f46ee..cceda10431 100644
--- a/test/no-direct-credential-env.test.ts
+++ b/test/no-direct-credential-env.test.ts
@@ -19,12 +19,12 @@ import { findDirectCredentialEnvReads } from "../scripts/checks/direct-credentia
 describe("direct credential env guard", () => {
   it.each([
     // Assignments (write context) — allowed
-    'process.env.NVIDIA_API_KEY = "test";',
+    'process.env.NVIDIA_INFERENCE_API_KEY = "test";',
     "process.env.OPENAI_API_KEY = value;",
     "process.env[credentialEnv] = providerKey;",
 
     // Deletions (write context) — allowed
-    "delete process.env.NVIDIA_API_KEY;",
+    "delete process.env.NVIDIA_INFERENCE_API_KEY;",
     "delete process.env.ANTHROPIC_API_KEY;",
 
     // Non-credential env vars — allowed
@@ -36,26 +36,26 @@ describe("direct credential env guard", () => {
     "const x = process.env.NEMOCLAW_PROVIDER_KEY;",
 
     // Correct patterns — allowed
-    'const key = getCredential("NVIDIA_API_KEY");',
-    'const key = resolveProviderCredential("NVIDIA_API_KEY");',
+    'const key = getCredential("NVIDIA_INFERENCE_API_KEY");',
+    'const key = resolveProviderCredential("NVIDIA_INFERENCE_API_KEY");',
 
     // Bracketed string-literal assignments — allowed
-    'process.env["NVIDIA_API_KEY"] = "test";',
+    'process.env["NVIDIA_INFERENCE_API_KEY"] = "test";',
 
     // Dynamic access with non-credential variable name — allowed
     "const x = process.env[someKey];",
     "const x = process.env[envName];",
 
     // Explicitly suppressed raw-env reads — allowed
-    "// check-direct-credential-env-ignore -- raw env check required\nconst key = process.env.NVIDIA_API_KEY;",
-    "// no-direct-credential-env -- backward-compatible suppression\nconst key = process.env.NVIDIA_API_KEY;",
+    "// check-direct-credential-env-ignore -- raw env check required\nconst key = process.env.NVIDIA_INFERENCE_API_KEY;",
+    "// no-direct-credential-env -- backward-compatible suppression\nconst key = process.env.NVIDIA_INFERENCE_API_KEY;",
   ])("allows %s", (code) => {
     expect(findDirectCredentialEnvReads(code)).toEqual([]);
   });
 
   it.each([
     // Static reads of known credential keys
-    ["const key = process.env.NVIDIA_API_KEY;", "NVIDIA_API_KEY"],
+    ["const key = process.env.NVIDIA_INFERENCE_API_KEY;", "NVIDIA_INFERENCE_API_KEY"],
     ["const key = process.env.OPENAI_API_KEY;", "OPENAI_API_KEY"],
     ["const key = process.env.ANTHROPIC_API_KEY;", "ANTHROPIC_API_KEY"],
     ["const key = process.env.GEMINI_API_KEY;", "GEMINI_API_KEY"],
@@ -63,10 +63,10 @@ describe("direct credential env guard", () => {
     ["const key = process.env.COMPATIBLE_ANTHROPIC_API_KEY;", "COMPATIBLE_ANTHROPIC_API_KEY"],
 
     // Conditional check (read context)
-    ["if (!process.env.NVIDIA_API_KEY) {}", "NVIDIA_API_KEY"],
+    ["if (!process.env.NVIDIA_INFERENCE_API_KEY) {}", "NVIDIA_INFERENCE_API_KEY"],
 
     // Bracketed string-literal reads
-    ['const key = process.env["NVIDIA_API_KEY"];', "NVIDIA_API_KEY"],
+    ['const key = process.env["NVIDIA_INFERENCE_API_KEY"];', "NVIDIA_INFERENCE_API_KEY"],
     ['if (!process.env["OPENAI_API_KEY"]) {}', "OPENAI_API_KEY"],
 
     // Dynamic read with credential-containing variable name
@@ -75,8 +75,8 @@ describe("direct credential env guard", () => {
 
     // Suppression token inside non-comment text must not suppress.
     [
-      "const marker = 'no-direct-credential-env';\nconst key = process.env.NVIDIA_API_KEY;",
-      "NVIDIA_API_KEY",
+      "const marker = 'no-direct-credential-env';\nconst key = process.env.NVIDIA_INFERENCE_API_KEY;",
+      "NVIDIA_INFERENCE_API_KEY",
     ],
   ])("flags %s", (code, key) => {
     expect(findDirectCredentialEnvReads(code)).toMatchObject([{ key }]);
diff --git a/test/ollama-proxy-recovery.test.ts b/test/ollama-proxy-recovery.test.ts
index c28f2dd19a..86764567db 100644
--- a/test/ollama-proxy-recovery.test.ts
+++ b/test/ollama-proxy-recovery.test.ts
@@ -167,7 +167,7 @@ console.log(JSON.stringify({ proxySpawns, curlEnv }));
         ...process.env,
         HTTP_PROXY: "http://proxy.invalid:8888",
         HOME: tmpDir,
-        NVIDIA_API_KEY: "must-not-leak",
+        NVIDIA_INFERENCE_API_KEY: "must-not-leak",
         NO_PROXY: "",
       },
     });
@@ -178,7 +178,7 @@ console.log(JSON.stringify({ proxySpawns, curlEnv }));
       proxySpawns: object[];
     }>(result.stdout);
     assert.equal(payload.proxySpawns.length, 0);
-    assert.equal(payload.curlEnv.NVIDIA_API_KEY, undefined);
+    assert.equal(payload.curlEnv.NVIDIA_INFERENCE_API_KEY, undefined);
     assert.equal(payload.curlEnv.HTTP_PROXY, "http://proxy.invalid:8888");
     assert.match(payload.curlEnv.NO_PROXY, /(^|,)127\.0\.0\.1(,|$)/);
     assert.match(payload.curlEnv.NO_PROXY, /(^|,)localhost(,|$)/);
diff --git a/test/onboard-messaging.test.ts b/test/onboard-messaging.test.ts
index 28ce5120bb..c008c1b85d 100644
--- a/test/onboard-messaging.test.ts
+++ b/test/onboard-messaging.test.ts
@@ -263,9 +263,9 @@ const { createSandbox, setupMessagingChannels } = require(${onboardPath});
       "TELEGRAM_BOT_TOKEN must not be in sandbox env",
     );
     assert.equal(
-      createCommand.env.NVIDIA_API_KEY,
+      createCommand.env.NVIDIA_INFERENCE_API_KEY,
       undefined,
-      "NVIDIA_API_KEY must not be in sandbox env",
+      "NVIDIA_INFERENCE_API_KEY must not be in sandbox env",
     );
     assert.equal(createCommand.env.KUBECONFIG, undefined, "KUBECONFIG must not be in sandbox env");
     assert.equal(
diff --git a/test/onboard-model-router.test.ts b/test/onboard-model-router.test.ts
index 75c5eee4ba..0702e011f3 100644
--- a/test/onboard-model-router.test.ts
+++ b/test/onboard-model-router.test.ts
@@ -163,7 +163,7 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-router-secret";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-router-secret";
 
 const { setupInference, getSandboxInferenceConfig } = require(${onboardPath});
 
@@ -173,7 +173,7 @@ const { setupInference, getSandboxInferenceConfig } = require(${onboardPath});
     "nvidia-routed",
     "nvidia-router",
     "http://host.openshell.internal:" + routerPort + "/v1",
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   console.log(JSON.stringify({
     commands,
@@ -207,13 +207,13 @@ const { setupInference, getSandboxInferenceConfig } = require(${onboardPath});
       );
       assert.ok(providerCommand, JSON.stringify(payload.commands));
       assert.match(providerCommand.command, /--name nvidia-router/);
-      assert.match(providerCommand.command, /--credential NVIDIA_API_KEY/);
+      assert.match(providerCommand.command, /--credential NVIDIA_INFERENCE_API_KEY/);
       assert.match(
         providerCommand.command,
         new RegExp(`OPENAI_BASE_URL=http:\\/\\/host\\.openshell\\.internal:${routerPort}\\/v1`),
       );
       assert.doesNotMatch(providerCommand.command, /nvapi-router-secret/);
-      assert.equal(providerCommand.env?.NVIDIA_API_KEY, "nvapi-router-secret");
+      assert.equal(providerCommand.env?.NVIDIA_INFERENCE_API_KEY, "nvapi-router-secret");
 
       const inferenceCommand = payload.commands.find((entry) =>
         /inference set/.test(entry.command),
@@ -409,7 +409,7 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-router-secret";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-router-secret";
 
 const { setupInference } = require(${onboardPath});
 
@@ -419,7 +419,7 @@ const { setupInference } = require(${onboardPath});
     "nvidia-routed",
     "nvidia-router",
     "http://host.openshell.internal:" + routerPort + "/v1",
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   console.log(JSON.stringify({ commands }));
 })().catch((error) => {
@@ -599,7 +599,7 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-router-secret";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-router-secret";
 
 const { setupInference } = require(${onboardPath});
 
@@ -609,7 +609,7 @@ const { setupInference } = require(${onboardPath});
     "nvidia-routed",
     "nvidia-router",
     "http://host.openshell.internal:" + routerPort + "/v1",
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   console.log(JSON.stringify({ commands }));
 })().catch((error) => {
@@ -825,7 +825,7 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-router-secret";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-router-secret";
 
 const { setupInference } = require(${onboardPath});
 
@@ -835,7 +835,7 @@ const { setupInference } = require(${onboardPath});
     "nvidia-routed",
     "nvidia-router",
     "http://host.openshell.internal:" + routerPort + "/v1",
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   console.log(JSON.stringify({ commands }));
 })().catch((error) => {
@@ -1030,7 +1030,7 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-router-secret";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-router-secret";
 
 const { setupInference } = require(${onboardPath});
 
@@ -1040,7 +1040,7 @@ const { setupInference } = require(${onboardPath});
     "nvidia-routed",
     "nvidia-router",
     "http://host.openshell.internal:" + routerPort + "/v1",
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   const fpPath = path.join(${JSON.stringify(venvDir)}, ${JSON.stringify(MODEL_ROUTER_FINGERPRINT_FILE)});
   const fpExists = fs.existsSync(fpPath);
diff --git a/test/onboard-selection-vllm.test.ts b/test/onboard-selection-vllm.test.ts
index 51ea68c273..3aacb62b3d 100644
--- a/test/onboard-selection-vllm.test.ts
+++ b/test/onboard-selection-vllm.test.ts
@@ -399,7 +399,7 @@ process.env.NEMOCLAW_PROVIDER = "";
 process.env.NEMOCLAW_MODEL = "";
 
 credentials.ensureApiKey = async () => {
-  process.env.NVIDIA_API_KEY = "nvapi-good";
+  process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-good";
 };
 runner.runCapture = (command) => {
   const cmd = Array.isArray(command) ? command.join(" ") : command;
@@ -423,7 +423,7 @@ async function runScenario(scenario) {
   };
   process.env.NEMOCLAW_PROVIDER = "";
   process.env.NEMOCLAW_MODEL = "";
-  process.env.NVIDIA_API_KEY = "";
+  process.env.NVIDIA_INFERENCE_API_KEY = "";
   delete require.cache[require.resolve(${onboardPath})];
   const { setupNim } = require(${onboardPath});
   const originalLog = console.log;
diff --git a/test/onboard-selection.test.ts b/test/onboard-selection.test.ts
index f23fd1ff4c..86ea7d0aa0 100644
--- a/test/onboard-selection.test.ts
+++ b/test/onboard-selection.test.ts
@@ -183,7 +183,7 @@ const clearCredentialEnv = [
   "COMPATIBLE_API_KEY",
   "COMPATIBLE_ANTHROPIC_API_KEY",
   "NOUS_API_KEY",
-  "NVIDIA_API_KEY",
+  "NVIDIA_INFERENCE_API_KEY",
   "NGC_API_KEY",
   "NEMOCLAW_PROVIDER_KEY",
 ];
@@ -560,7 +560,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-test"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-test"; };
 runner.runCapture = (command) => {
   const cmd = Array.isArray(command) ? command.join(" ") : command;
   if (cmd.includes("command -v ollama")) return "";
@@ -663,7 +663,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-test"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-test"; };
 runner.runCapture = (command) => {
   // Normalize: onboard.ts still sends strings, local-inference.ts sends arrays.
   // Once onboard.ts is migrated to argv (#1889), these mocks can assert Array.isArray.
@@ -761,7 +761,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-test"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-test"; };
 runner.runCapture = (command) => {
   // Normalize: onboard.ts still sends strings, local-inference.ts sends arrays.
   // Once onboard.ts is migrated to argv (#1889), these mocks can assert Array.isArray.
@@ -1986,7 +1986,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-good"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-good"; };
 runner.run = () => ({ status: 0 });
 runner.runCapture = (command) => {
   // Normalize: onboard.ts still sends strings, local-inference.ts sends arrays.
@@ -3471,7 +3471,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-good"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-good"; };
 runner.runCapture = () => "";
 
 const { setupNim } = require(${onboardPath});
@@ -3543,7 +3543,7 @@ const clearCredentialEnv = [
   "COMPATIBLE_API_KEY",
   "COMPATIBLE_ANTHROPIC_API_KEY",
   "NOUS_API_KEY",
-  "NVIDIA_API_KEY",
+  "NVIDIA_INFERENCE_API_KEY",
   "NGC_API_KEY",
   "NEMOCLAW_PROVIDER_KEY",
 ];
@@ -3678,7 +3678,7 @@ const { setupNim } = require(${onboardPath});
       name: "Model Router",
       answers: ["back", ""],
       menuSelections: ["Model Router", "NVIDIA Endpoints"],
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       promptPattern: /Model Router API key: /,
     },
     {
@@ -3759,7 +3759,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-good"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-good"; };
 runner.runCapture = () => "";
 
 const { setupNim } = require(${onboardPath});
@@ -3849,10 +3849,10 @@ if echo "$url" | grep -q 'generativelanguage.googleapis.com' && echo "$url" | gr
 elif echo "$url" | grep -q 'generativelanguage.googleapis.com' && echo "$url" | grep -q '/chat/completions$'; then
   body='{"id":"chatcmpl-123","choices":[{"message":{"content":"OK"}}]}'
   status="200"
-elif echo "$url" | grep -q 'integrate.api.nvidia.com' && echo "$url" | grep -q '/responses$'; then
+elif echo "$url" | grep -q 'inference-api.nvidia.com' && echo "$url" | grep -q '/responses$'; then
   body='{"id":"resp_123"}'
   status="200"
-elif echo "$url" | grep -q 'integrate.api.nvidia.com' && echo "$url" | grep -q '/chat/completions$'; then
+elif echo "$url" | grep -q 'inference-api.nvidia.com' && echo "$url" | grep -q '/chat/completions$'; then
   body='{"id":"chatcmpl-123","choices":[{"message":{"content":"OK"}}]}'
   status="200"
 fi
@@ -3873,7 +3873,7 @@ credentials.prompt = async (message) => {
   messages.push(message);
   return answers.shift() || "";
 };
-credentials.ensureApiKey = async () => { process.env.NVIDIA_API_KEY = "nvapi-good"; };
+credentials.ensureApiKey = async () => { process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-good"; };
 runner.runCapture = () => "";
 
 const { setupNim } = require(${onboardPath});
@@ -3923,7 +3923,7 @@ const { setupNim } = require(${onboardPath});
     assert.equal(payload.messages.filter((message: string) => /Choose \[/.test(message)).length, 2);
   });
 
-  it("fails early in non-interactive mode when NVIDIA_API_KEY is not an nvapi- key", () => {
+  it("fails early in non-interactive mode when NVIDIA_INFERENCE_API_KEY is not an nvapi- key", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-build-noninteractive-"));
     const fakeBin = path.join(tmpDir, "bin");
@@ -3964,7 +3964,7 @@ onboardModule._compile(injected, onboardFile);
 const { setupNim, __setNonInteractive } = onboardModule.exports;
 
 (async () => {
-  process.env.NVIDIA_API_KEY = "sk-test";
+  process.env.NVIDIA_INFERENCE_API_KEY = "sk-test";
   __setNonInteractive(true);
   const originalLog = console.log;
   const originalError = console.error;
@@ -4029,7 +4029,7 @@ const { setupNim, __setNonInteractive } = onboardModule.exports;
     );
   });
 
-  it("fails early in non-interactive mode with copy-paste recovery hints when no NVIDIA_API_KEY is set", () => {
+  it("fails early in non-interactive mode with copy-paste recovery hints when no NVIDIA_INFERENCE_API_KEY is set", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-build-missingkey-"));
     const fakeBin = path.join(tmpDir, "bin");
@@ -4076,7 +4076,7 @@ onboardModule._compile(injected, onboardFile);
 const { setupNim, __setNonInteractive } = onboardModule.exports;
 
 (async () => {
-  delete process.env.NVIDIA_API_KEY;
+  delete process.env.NVIDIA_INFERENCE_API_KEY;
   delete process.env.NEMOCLAW_PROVIDER_KEY;
   __setNonInteractive(true);
   const originalLog = console.log;
@@ -4122,7 +4122,7 @@ const { setupNim, __setNonInteractive } = onboardModule.exports;
         ...process.env,
         HOME: tmpDir,
         PATH: `${fakeBin}:${process.env.PATH || ""}`,
-        NVIDIA_API_KEY: "",
+        NVIDIA_INFERENCE_API_KEY: "",
         NEMOCLAW_PROVIDER_KEY: "",
       },
     });
@@ -4135,7 +4135,7 @@ const { setupNim, __setNonInteractive } = onboardModule.exports;
     assert.ok(
       payload.lines.some((line: string) =>
         line.includes(
-          "NVIDIA_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.",
+          "NVIDIA_INFERENCE_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.",
         ),
       ),
     );
@@ -4143,7 +4143,7 @@ const { setupNim, __setNonInteractive } = onboardModule.exports;
     assert.ok(setWithIndex >= 0, "expected a standalone 'Set with:' line");
     assert.equal(
       payload.lines[setWithIndex + 1].trim(),
-      "export NVIDIA_API_KEY=nvapi-...",
+      "export NVIDIA_INFERENCE_API_KEY=nvapi-...",
       "expected the export command on its own line so it can be copy-pasted",
     );
     assert.ok(
@@ -4216,7 +4216,7 @@ runner.runCapture = () => "";
 const { setupNim } = require(${onboardPath});
 
 (async () => {
-  process.env.NVIDIA_API_KEY = "nvapi-bad";
+  process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-bad";
   const originalLog = console.log;
   const originalError = console.error;
   const lines = [];
@@ -4224,7 +4224,7 @@ const { setupNim } = require(${onboardPath});
   console.error = (...args) => lines.push(args.join(" "));
   try {
     const result = await setupNim(null);
-    originalLog(JSON.stringify({ result, messages, prompts, lines, key: process.env.NVIDIA_API_KEY }));
+    originalLog(JSON.stringify({ result, messages, prompts, lines, key: process.env.NVIDIA_INFERENCE_API_KEY }));
   } finally {
     console.log = originalLog;
     console.error = originalError;
@@ -4302,7 +4302,7 @@ runner.runCapture = () => "";
 const { setupNim } = require(${onboardPath});
 
 (async () => {
-  process.env.NVIDIA_API_KEY = "nvapi-bad";
+  process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-bad";
   const originalLog = console.log;
   const originalError = console.error;
   const lines = [];
@@ -4310,7 +4310,7 @@ const { setupNim } = require(${onboardPath});
   console.error = (...args) => lines.push(args.join(" "));
   try {
     const result = await setupNim(null);
-    originalLog(JSON.stringify({ result, messages, lines, key: process.env.NVIDIA_API_KEY }));
+    originalLog(JSON.stringify({ result, messages, lines, key: process.env.NVIDIA_INFERENCE_API_KEY }));
   } finally {
     console.log = originalLog;
     console.error = originalError;
diff --git a/test/onboard.test.ts b/test/onboard.test.ts
index eb82f5f791..f42d487329 100644
--- a/test/onboard.test.ts
+++ b/test/onboard.test.ts
@@ -741,13 +741,13 @@ runner.runCapture = (command) => {
 };
 registry.updateSandbox = () => true;
 
-process.env.NVIDIA_API_KEY = "nvapi-secret-value";
+process.env.NVIDIA_INFERENCE_API_KEY = "nvapi-secret-value";
 
 const { setupInference } = require(${onboardPath});
 
 (async () => {
   await setupInference("test-box", "nvidia/nemotron-3-super-120b-a12b", "nvidia-nim");
-  console.log(JSON.stringify({ commands, nvidiaApiKey: process.env.NVIDIA_API_KEY || null }));
+  console.log(JSON.stringify({ commands, nvidiaApiKey: process.env.NVIDIA_INFERENCE_API_KEY || null }));
 })().catch((error) => {
   console.error(error);
   process.exit(1);
@@ -773,7 +773,7 @@ const { setupInference } = require(${onboardPath});
     assert.equal(commands.length, 4);
     assert.match(commands[0].command, /gateway select nemoclaw/);
     assert.match(commands[1].command, /provider get/);
-    assert.match(commands[2].command, /--credential NVIDIA_API_KEY/);
+    assert.match(commands[2].command, /--credential NVIDIA_INFERENCE_API_KEY/);
     assert.doesNotMatch(commands[2].command, /nvapi-secret-value/);
     assert.match(commands[2].command, /provider update/);
     assert.match(commands[3].command, /inference set/);
@@ -2550,7 +2550,7 @@ const { createSandbox } = require(${onboardPath});
     assert.match(createCommand.command, /nemoclaw-start/);
     assert.doesNotMatch(createCommand.command, /--upload/);
     assert.doesNotMatch(createCommand.command, /OPENCLAW_CONFIG_PATH/);
-    assert.doesNotMatch(createCommand.command, /NVIDIA_API_KEY=/);
+    assert.doesNotMatch(createCommand.command, /NVIDIA_INFERENCE_API_KEY=/);
     assert.doesNotMatch(createCommand.command, /DISCORD_BOT_TOKEN=/);
     assert.doesNotMatch(createCommand.command, /SLACK_BOT_TOKEN=/);
     assert.ok(
diff --git a/test/openclaw-config-snapshot.test.ts b/test/openclaw-config-snapshot.test.ts
index 8a7698ce47..e43f7e115d 100644
--- a/test/openclaw-config-snapshot.test.ts
+++ b/test/openclaw-config-snapshot.test.ts
@@ -70,7 +70,7 @@ describe("OpenClaw durable config file (#5027)", () => {
           mode: "merge",
           providers: {
             nvidia: {
-              baseUrl: "https://integrate.api.nvidia.com/v1",
+              baseUrl: "https://inference-api.nvidia.com/v1",
               apiKey: "unused",
               models: [{ id: "moonshotai/kimi-k2" }],
             },
diff --git a/test/rebuild-credential-hydration.test.ts b/test/rebuild-credential-hydration.test.ts
index bb552e93fd..ce82510880 100644
--- a/test/rebuild-credential-hydration.test.ts
+++ b/test/rebuild-credential-hydration.test.ts
@@ -108,7 +108,11 @@ process.stdout.write(JSON.stringify(payload));
 describe("Issue #2273 Layer 1: credential hydration from legacy storage", () => {
   // Test each provider's credential env to ensure parametric coverage
   const providers = [
-    { name: "NVIDIA Endpoints", credentialEnv: "NVIDIA_API_KEY", value: "nvapi-test-hydrate" },
+    {
+      name: "NVIDIA Endpoints",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
+      value: "nvapi-test-hydrate",
+    },
     { name: "OpenAI", credentialEnv: "OPENAI_API_KEY", value: "sk-test-hydrate" },
     { name: "Anthropic", credentialEnv: "ANTHROPIC_API_KEY", value: "sk-ant-test-hydrate" },
     { name: "Google Gemini", credentialEnv: "GEMINI_API_KEY", value: "gemini-test-hydrate" },
diff --git a/test/rebuild-credential-preflight.test.ts b/test/rebuild-credential-preflight.test.ts
index fff0e1c9c4..e5f97dbfec 100644
--- a/test/rebuild-credential-preflight.test.ts
+++ b/test/rebuild-credential-preflight.test.ts
@@ -84,7 +84,7 @@ function createFixture(opts: {
   const {
     sandboxName = "my-assistant",
     provider = "nvidia-prod",
-    credentialEnv = "NVIDIA_API_KEY",
+    credentialEnv = "NVIDIA_INFERENCE_API_KEY",
     savedCredential,
     providerSelectionStatus = "complete",
     agent = null,
@@ -340,7 +340,7 @@ describe("Issue #2273: atomic rebuild", () => {
       // No credential in env or credentials.json AND no gateway-registered
       // provider — preflight must still abort so the sandbox is preserved.
       const f = createFixture({
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         providerRegistered: false,
         // no savedCredential
       });
@@ -350,7 +350,7 @@ describe("Issue #2273: atomic rebuild", () => {
 
       // Should mention preflight failure
       expect(output).toContain("preflight failed");
-      expect(output).toContain("NVIDIA_API_KEY");
+      expect(output).toContain("NVIDIA_INFERENCE_API_KEY");
       // Should say sandbox is untouched
       expect(output).toContain("untouched");
       // Sandbox should still be in the registry (not destroyed)
@@ -362,9 +362,9 @@ describe("Issue #2273: atomic rebuild", () => {
     }, () => {
       // Credential saved in credentials.json but NOT in process.env
       const f = createFixture({
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         savedCredential: {
-          key: "NVIDIA_API_KEY",
+          key: "NVIDIA_INFERENCE_API_KEY",
           value: "nvapi-test-key-for-rebuild",
         },
       });
@@ -384,9 +384,9 @@ describe("Issue #2273: atomic rebuild", () => {
       const f = createFixture({
         agent: "hermes",
         messagingPlanChannels: ["discord"],
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         savedCredential: {
-          key: "NVIDIA_API_KEY",
+          key: "NVIDIA_INFERENCE_API_KEY",
           value: "nvapi-test-key-for-rebuild",
         },
       });
@@ -409,9 +409,9 @@ describe("Issue #2273: atomic rebuild", () => {
     }, () => {
       const f = createFixture({
         agent: "hermes",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         savedCredential: {
-          key: "NVIDIA_API_KEY",
+          key: "NVIDIA_INFERENCE_API_KEY",
           value: "nvapi-test-key-for-rebuild",
         },
         dockerBuildExitCode: 23,
@@ -541,24 +541,24 @@ describe("Issue #2273: atomic rebuild", () => {
       expect(output).toContain("Backing up sandbox state");
     });
 
-    it("uses the registered nvidia-prod provider in OpenShell instead of requiring NVIDIA_API_KEY", {
+    it("uses the registered nvidia-prod provider in OpenShell instead of requiring NVIDIA_INFERENCE_API_KEY", {
       timeout: 60_000,
     }, () => {
       // After `nemohermes channels add wechat` the rebuild preflight used to
-      // abort because NVIDIA_API_KEY was not set in the environment, even
+      // abort because NVIDIA_INFERENCE_API_KEY was not set in the environment, even
       // though `nvidia-prod` was already registered in the OpenShell
       // gateway. Reuse the gateway-stored credential instead.
       const f = createFixture({
         provider: "nvidia-prod",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         providerRegistered: true,
-        // no savedCredential — host env has no NVIDIA_API_KEY
+        // no savedCredential — host env has no NVIDIA_INFERENCE_API_KEY
       });
 
       const result = runRebuild(f);
       const output = (result.stderr || "") + (result.stdout || "");
 
-      expect(output).not.toContain("Missing credential: NVIDIA_API_KEY");
+      expect(output).not.toContain("Missing credential: NVIDIA_INFERENCE_API_KEY");
       expect(output).not.toContain("provider credential not found");
       expect(output).toContain("Backing up sandbox state");
     });
@@ -571,7 +571,7 @@ describe("Issue #2273: atomic rebuild", () => {
       // empty, the preflight must still bail so the sandbox is preserved.
       const f = createFixture({
         provider: "nvidia-prod",
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         providerRegistered: false,
       });
 
@@ -580,7 +580,7 @@ describe("Issue #2273: atomic rebuild", () => {
 
       expect(result.status).not.toBe(0);
       expect(output).toContain("preflight failed");
-      expect(output).toContain("NVIDIA_API_KEY");
+      expect(output).toContain("NVIDIA_INFERENCE_API_KEY");
       expect(output).toContain("untouched");
       expect(registryHasSandbox(f)).toBe(true);
     });
@@ -617,9 +617,9 @@ describe("Issue #2273: atomic rebuild", () => {
       // The key thing: rebuild should catch the failure and print
       // recovery instructions instead of silently exiting.
       const f = createFixture({
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         savedCredential: {
-          key: "NVIDIA_API_KEY",
+          key: "NVIDIA_INFERENCE_API_KEY",
           value: "nvapi-test-key-for-rebuild",
         },
         // Force provider_selection to re-run (not resume) so onboard
@@ -649,7 +649,7 @@ describe("Issue #2273: atomic rebuild", () => {
       // observable CLI behavior — the preflight check fails and bail()
       // calls process.exit with a non-zero code.
       const f = createFixture({
-        credentialEnv: "NVIDIA_API_KEY",
+        credentialEnv: "NVIDIA_INFERENCE_API_KEY",
         providerRegistered: false,
         // No credential — preflight will fail and exit non-zero
       });
diff --git a/test/rebuild-shields-auto-unlock.test.ts b/test/rebuild-shields-auto-unlock.test.ts
index 13d68ec242..cb510657aa 100644
--- a/test/rebuild-shields-auto-unlock.test.ts
+++ b/test/rebuild-shields-auto-unlock.test.ts
@@ -98,7 +98,7 @@ function createFixture(opts: { shieldsLocked: boolean }) {
 
   fs.writeFileSync(
     path.join(nemoclawDir, "credentials.json"),
-    JSON.stringify({ NVIDIA_API_KEY: "nvapi-test" }),
+    JSON.stringify({ NVIDIA_INFERENCE_API_KEY: "nvapi-test" }),
     { mode: 0o600 },
   );
 
@@ -120,7 +120,7 @@ function createFixture(opts: { shieldsLocked: boolean }) {
       provider: "nvidia-prod",
       model: "meta/llama-3.3-70b-instruct",
       endpointUrl: null,
-      credentialEnv: "NVIDIA_API_KEY",
+      credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       preferredInferenceApi: null,
       nimContainer: null,
       webSearchConfig: null,
diff --git a/test/rebuild-stale-recovery.test.ts b/test/rebuild-stale-recovery.test.ts
index 0de4bbed97..945d6528db 100644
--- a/test/rebuild-stale-recovery.test.ts
+++ b/test/rebuild-stale-recovery.test.ts
@@ -62,7 +62,7 @@ function createStaleFixture(
   } = opts;
   const sandboxName = "my-assistant";
   const provider = "nvidia-prod";
-  const credentialEnv = "NVIDIA_API_KEY";
+  const credentialEnv = "NVIDIA_INFERENCE_API_KEY";
 
   const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-4497-"));
   tmpFixtures.push(tmpDir);
diff --git a/test/regression-e2e-workflow.test.ts b/test/regression-e2e-workflow.test.ts
index 2019378cfb..87b8233402 100644
--- a/test/regression-e2e-workflow.test.ts
+++ b/test/regression-e2e-workflow.test.ts
@@ -81,7 +81,10 @@ describe("Regression E2E workflow contract", () => {
     expect(setupNodeStep?.uses).toMatch(FULL_SHA_ACTION);
     expect(runVitestStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1");
     for (const step of steps) {
-      expect(step.env?.NVIDIA_API_KEY, step.name ?? step.uses ?? "<unnamed>").toBeUndefined();
+      expect(
+        step.env?.NVIDIA_INFERENCE_API_KEY,
+        step.name ?? step.uses ?? "<unnamed>",
+      ).toBeUndefined();
     }
 
     expect(runText).toContain("test/e2e-scenario/live/openclaw-plugin-runtime-exdev.test.ts");
diff --git a/test/runner.test.ts b/test/runner.test.ts
index 3d699e2e30..0c6888594e 100644
--- a/test/runner.test.ts
+++ b/test/runner.test.ts
@@ -366,8 +366,10 @@ describe("redact", () => {
 
   it("masks key assignments in commands", () => {
     const { redact } = require(runnerPath);
-    expect(redact("export NVIDIA_API_KEY=nvapi-realkey12345")).toContain("nvap");
-    expect(redact("export NVIDIA_API_KEY=nvapi-realkey12345")).not.toContain("realkey12345");
+    expect(redact("export NVIDIA_INFERENCE_API_KEY=nvapi-realkey12345")).toContain("nvap");
+    expect(redact("export NVIDIA_INFERENCE_API_KEY=nvapi-realkey12345")).not.toContain(
+      "realkey12345",
+    );
   });
 
   it("masks variables ending in _KEY", () => {
@@ -632,7 +634,7 @@ describe("regression guards", () => {
   });
 
   describe("credential exposure guards (#429)", () => {
-    it("walkthrough.sh does not embed NVIDIA_API_KEY in tmux or sandbox commands", () => {
+    it("walkthrough.sh does not embed NVIDIA_INFERENCE_API_KEY in tmux or sandbox commands", () => {
       const fs = require("fs");
       const src = fs.readFileSync(
         path.join(import.meta.dirname, "..", "scripts", "walkthrough.sh"),
@@ -648,7 +650,7 @@ describe("regression guards", () => {
             (l.includes("tmux") || l.includes("openshell sandbox connect")),
         );
       for (const line of cmdLines) {
-        expect(line.includes("NVIDIA_API_KEY")).toBe(false);
+        expect(line.includes("NVIDIA_INFERENCE_API_KEY")).toBe(false);
       }
     });
 
diff --git a/test/secret-redaction.test.ts b/test/secret-redaction.test.ts
index 54994db83f..775fd1f355 100644
--- a/test/secret-redaction.test.ts
+++ b/test/secret-redaction.test.ts
@@ -60,7 +60,7 @@ describe("secret redaction consistency (#1736)", () => {
 
   describe("redactor consistency (#2381)", () => {
     it("runner and debug redactors both mask shared token patterns", () => {
-      const text = "provider failed with NVIDIA_API_KEY=nvapi-" + "a".repeat(30);
+      const text = "provider failed with NVIDIA_INFERENCE_API_KEY=nvapi-" + "a".repeat(30);
       expect(runnerRedact(text)).not.toContain("nvapi-");
       expect(debugRedact(text)).not.toContain("nvapi-");
     });
@@ -73,7 +73,7 @@ describe("secret redaction consistency (#1736)", () => {
       mkdirSync(fakeBin);
       writeFileSync(
         join(fakeBin, "date"),
-        "#!/bin/sh\necho NVIDIA_API_KEY=nvapi-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n",
+        "#!/bin/sh\necho NVIDIA_INFERENCE_API_KEY=nvapi-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n",
         { mode: 0o755 },
       );
       try {
@@ -92,7 +92,7 @@ describe("secret redaction consistency (#1736)", () => {
           },
         );
         expect(result.status).toBe(0);
-        expect(result.stdout).toContain("NVIDIA_API_KEY=<REDACTED>");
+        expect(result.stdout).toContain("NVIDIA_INFERENCE_API_KEY=<REDACTED>");
         expect(result.stdout).not.toContain("nvapi-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
       } finally {
         rmSync(tmp, { recursive: true, force: true });
diff --git a/test/smoke-macos-install.test.ts b/test/smoke-macos-install.test.ts
index 0236c08055..0a38a9ff29 100644
--- a/test/smoke-macos-install.test.ts
+++ b/test/smoke-macos-install.test.ts
@@ -18,22 +18,22 @@ describe.skip("macOS smoke install script guardrails", () => {
     expect(result.stdout).toMatch(/Usage: \.\/scripts\/smoke-macos-install\.sh/);
   });
 
-  it("requires NVIDIA_API_KEY", () => {
+  it("requires NVIDIA_INFERENCE_API_KEY", () => {
     const result = spawnSync("bash", [SMOKE_SCRIPT], {
       cwd: path.join(import.meta.dirname, ".."),
       encoding: "utf-8",
-      env: { ...process.env, NVIDIA_API_KEY: "" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "" },
     });
 
     expect(result.status).not.toBe(0);
-    expect(`${result.stdout}${result.stderr}`).toMatch(/NVIDIA_API_KEY must be set/);
+    expect(`${result.stdout}${result.stderr}`).toMatch(/NVIDIA_INFERENCE_API_KEY must be set/);
   });
 
   it("rejects invalid sandbox names", () => {
     const result = spawnSync("bash", [SMOKE_SCRIPT, "--sandbox-name", "Bad Name"], {
       cwd: path.join(import.meta.dirname, ".."),
       encoding: "utf-8",
-      env: { ...process.env, NVIDIA_API_KEY: "nvapi-test" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "nvapi-test" },
     });
 
     expect(result.status).not.toBe(0);
@@ -44,7 +44,7 @@ describe.skip("macOS smoke install script guardrails", () => {
     const result = spawnSync("bash", [SMOKE_SCRIPT, "--runtime", "lxc"], {
       cwd: path.join(import.meta.dirname, ".."),
       encoding: "utf-8",
-      env: { ...process.env, NVIDIA_API_KEY: "nvapi-test" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "nvapi-test" },
     });
 
     expect(result.status).not.toBe(0);
@@ -57,7 +57,7 @@ describe.skip("macOS smoke install script guardrails", () => {
       encoding: "utf-8",
       env: {
         ...process.env,
-        NVIDIA_API_KEY: "nvapi-test",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-test",
         HOME: "/tmp/nemoclaw-smoke-no-runtime",
       },
     });
@@ -72,7 +72,7 @@ describe.skip("macOS smoke install script guardrails", () => {
       encoding: "utf-8",
       env: {
         ...process.env,
-        NVIDIA_API_KEY: "nvapi-test",
+        NVIDIA_INFERENCE_API_KEY: "nvapi-test",
         HOME: "/tmp/nemoclaw-smoke-no-runtime",
       },
     });
@@ -105,7 +105,7 @@ describe.skip("macOS smoke install script guardrails", () => {
     const result = spawnSync("bash", ["--noprofile", "--norc", "-c", script], {
       cwd: path.join(import.meta.dirname, ".."),
       encoding: "utf-8",
-      env: { ...process.env, NVIDIA_API_KEY: "nvapi-test" },
+      env: { ...process.env, NVIDIA_INFERENCE_API_KEY: "nvapi-test" },
       timeout: 10_000,
     });
 
diff --git a/test/validate-blueprint.test.ts b/test/validate-blueprint.test.ts
index d2b94405ff..e4d9f1a49b 100644
--- a/test/validate-blueprint.test.ts
+++ b/test/validate-blueprint.test.ts
@@ -205,9 +205,9 @@ describe("blueprint.yaml", () => {
 describe("Model Router pool config", () => {
   const pool = loadYaml<RouterPoolConfig>(ROUTER_POOL_CONFIG_PATH);
 
-  it("regression #3255: routes NVIDIA API keys to the public NVIDIA Build endpoint", () => {
+  it("regression #3255: routes NVIDIA API keys to the public NVIDIA inference endpoint", () => {
     const apiBases = new Set((pool.models ?? []).map((model) => model.api_base));
-    expect(apiBases).toEqual(new Set(["https://integrate.api.nvidia.com/v1"]));
+    expect(apiBases).toEqual(new Set(["https://inference-api.nvidia.com/v1"]));
   });
 
   it("regression #3255: uses valid LiteLLM NVIDIA model identifiers", () => {
@@ -280,18 +280,17 @@ describe("base sandbox policy", () => {
     expect(violations).toEqual([]);
   });
 
-  it("allows NVIDIA embeddings on both NVIDIA inference hosts", () => {
+  it("allows NVIDIA embeddings on the NVIDIA inference host", () => {
     const np = policy.network_policies ?? {};
     const endpoints = np.nvidia?.endpoints;
     const missingHosts: string[] = [];
-    for (const host of ["integrate.api.nvidia.com", "inference-api.nvidia.com"]) {
-      const endpoint = endpoints?.find((entry) => entry.host === host);
-      const hasEmbeddingsRule = endpoint?.rules?.some(
-        (rule) => rule.allow?.method === "POST" && rule.allow?.path === "/v1/embeddings",
-      );
-      if (!hasEmbeddingsRule) {
-        missingHosts.push(host);
-      }
+    const host = "inference-api.nvidia.com";
+    const endpoint = endpoints?.find((entry) => entry.host === host);
+    const hasEmbeddingsRule = endpoint?.rules?.some(
+      (rule) => rule.allow?.method === "POST" && rule.allow?.path === "/v1/embeddings",
+    );
+    if (!hasEmbeddingsRule) {
+      missingHosts.push(host);
     }
     expect(missingHosts).toEqual([]);
   });
diff --git a/test/validate-config-schemas.test.ts b/test/validate-config-schemas.test.ts
index 85ee98dcbc..de805df182 100644
--- a/test/validate-config-schemas.test.ts
+++ b/test/validate-config-schemas.test.ts
@@ -241,7 +241,7 @@ describe("router-pool-config.schema.json", () => {
     const firstModel = asRecord(Array.isArray(root.models) ? root.models[0] : undefined);
     const bad = {
       ...root,
-      models: [{ ...firstModel, api_base: "http://integrate.api.nvidia.com/v1" }],
+      models: [{ ...firstModel, api_base: "http://inference-api.nvidia.com/v1" }],
     };
     expect(validate(bad)).toBe(false);
   });
diff --git a/test/validate-configs-dangerous-hosts.test.ts b/test/validate-configs-dangerous-hosts.test.ts
index 42d1e38887..2c47577f78 100644
--- a/test/validate-configs-dangerous-hosts.test.ts
+++ b/test/validate-configs-dangerous-hosts.test.ts
@@ -59,14 +59,14 @@ describe("findDangerousRouterApiBases", () => {
   it("allows the public NVIDIA Build endpoint", () => {
     expect(
       findDangerousRouterApiBases({
-        models: [{ api_base: "https://integrate.api.nvidia.com/v1" }],
+        models: [{ api_base: "https://inference-api.nvidia.com/v1" }],
       }),
     ).toEqual([]);
-    expect(ROUTER_API_BASE_HOST_ALLOWLIST.has("integrate.api.nvidia.com")).toBe(true);
+    expect(ROUTER_API_BASE_HOST_ALLOWLIST.has("inference-api.nvidia.com")).toBe(true);
   });
 
   it.each([
-    "http://integrate.api.nvidia.com/v1",
+    "http://inference-api.nvidia.com/v1",
     "https://localhost/v1",
     "https://127.0.0.1/v1",
     "https://10.0.0.5/v1",
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 9e55fc3ff3..49aed9cea3 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -420,7 +420,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe
       "openshell-version-pin-vitest job must write artifacts under e2e-artifacts/vitest/openshell-version-pin",
     );
   }
-  requireEnvDoesNotExposeSecret(errors, "openshell-version-pin-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "openshell-version-pin-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -429,7 +429,7 @@ function validateOpenShellVersionPinVitestJob(errors: string[], jobs: WorkflowRe
       errors,
       `openshell-version-pin-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
       asRecord(step.env),
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
     );
   }
 
@@ -499,7 +499,7 @@ function validateSkillAgentVitestJob(errors: string[], jobs: WorkflowRecord): vo
   if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) {
     errors.push("skill-agent-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI");
   }
-  requireEnvDoesNotExposeSecret(errors, "skill-agent-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "skill-agent-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -509,7 +509,7 @@ function validateSkillAgentVitestJob(errors: string[], jobs: WorkflowRecord): vo
         errors,
         `skill-agent-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
         asRecord(step.env),
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
   }
@@ -536,8 +536,8 @@ function validateSkillAgentVitestJob(errors: string[], jobs: WorkflowRecord): vo
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run skill-agent live test");
   const runEnv = asRecord(runVitest?.env);
-  if (runEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("skill-agent-vitest run step must receive NVIDIA_API_KEY from secrets");
+  if (runEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("skill-agent-vitest run step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, 'export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"');
   requireRunContains(errors, runVitest, 'OPENSHELL_BIN="$(command -v openshell)"');
@@ -612,7 +612,7 @@ function validateNetworkPolicyVitestJob(errors: string[], jobs: WorkflowRecord):
   if (jobEnv.OPENSHELL_GATEWAY !== "nemoclaw") {
     errors.push("network-policy-vitest job must force OPENSHELL_GATEWAY=nemoclaw");
   }
-  for (const secret of ["NVIDIA_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
+  for (const secret of ["NVIDIA_INFERENCE_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
     requireEnvDoesNotExposeSecret(errors, "network-policy-vitest job", jobEnv, secret);
   }
 
@@ -626,7 +626,7 @@ function validateNetworkPolicyVitestJob(errors: string[], jobs: WorkflowRecord):
         errors,
         `network-policy-vitest step '${stepName}'`,
         stepEnv,
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
     if (step.name !== "Authenticate to Docker Hub") {
@@ -682,13 +682,13 @@ function validateNetworkPolicyVitestJob(errors: string[], jobs: WorkflowRecord):
   requireRunContains(errors, installOpenShell, "env -u DOCKER_CONFIG");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_USERNAME");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_TOKEN");
-  requireRunContains(errors, installOpenShell, "-u NVIDIA_API_KEY");
+  requireRunContains(errors, installOpenShell, "-u NVIDIA_INFERENCE_API_KEY");
   requireRunContains(errors, installOpenShell, "-u GITHUB_TOKEN");
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run network-policy live test");
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("network-policy-vitest Vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("network-policy-vitest Vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
   requireRunContains(errors, runVitest, "test/e2e-scenario/live/network-policy.test.ts");
@@ -747,7 +747,7 @@ function validateShieldsConfigVitestJob(errors: string[], jobs: WorkflowRecord):
   if (jobEnv.NEMOCLAW_SANDBOX_NAME !== "e2e-shields") {
     errors.push("shields-config-vitest job must set NEMOCLAW_SANDBOX_NAME=e2e-shields");
   }
-  requireEnvDoesNotExposeSecret(errors, "shields-config-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "shields-config-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
   requireEnvDoesNotExposeSecret(errors, "shields-config-vitest job", jobEnv, "DOCKERHUB_USERNAME");
   requireEnvDoesNotExposeSecret(errors, "shields-config-vitest job", jobEnv, "DOCKERHUB_TOKEN");
 
@@ -761,7 +761,7 @@ function validateShieldsConfigVitestJob(errors: string[], jobs: WorkflowRecord):
         errors,
         `shields-config-vitest step '${stepName}'`,
         stepEnv,
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
     if (step.name !== "Authenticate to Docker Hub") {
@@ -806,8 +806,8 @@ function validateShieldsConfigVitestJob(errors: string[], jobs: WorkflowRecord):
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run shields-config live test");
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("shields-config-vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("shields-config-vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
   requireRunContains(errors, runVitest, "test/e2e-scenario/live/shields-config.test.ts");
@@ -857,7 +857,7 @@ function validateRebuildOpenClawVitestJob(errors: string[], jobs: WorkflowRecord
   if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) {
     errors.push("rebuild-openclaw-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI");
   }
-  requireEnvDoesNotExposeSecret(errors, "rebuild-openclaw-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "rebuild-openclaw-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -867,7 +867,7 @@ function validateRebuildOpenClawVitestJob(errors: string[], jobs: WorkflowRecord
         errors,
         `rebuild-openclaw-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
         asRecord(step.env),
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
   }
@@ -905,13 +905,13 @@ function validateRebuildOpenClawVitestJob(errors: string[], jobs: WorkflowRecord
   requireRunContains(errors, installOpenShell, "env -u DOCKER_CONFIG");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_USERNAME");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_TOKEN");
-  requireRunContains(errors, installOpenShell, "-u NVIDIA_API_KEY");
+  requireRunContains(errors, installOpenShell, "-u NVIDIA_INFERENCE_API_KEY");
   requireRunContains(errors, installOpenShell, "-u GITHUB_TOKEN");
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run OpenClaw rebuild live test");
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("rebuild-openclaw-vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("rebuild-openclaw-vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "OPENSHELL_BIN");
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
@@ -965,7 +965,7 @@ function validateSandboxRebuildVitestJob(errors: string[], jobs: WorkflowRecord)
   if (jobEnv.OPENSHELL_GATEWAY !== "nemoclaw") {
     errors.push("sandbox-rebuild-vitest job must force OPENSHELL_GATEWAY=nemoclaw");
   }
-  for (const secret of ["NVIDIA_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
+  for (const secret of ["NVIDIA_INFERENCE_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
     requireEnvDoesNotExposeSecret(errors, "sandbox-rebuild-vitest job", jobEnv, secret);
   }
 
@@ -975,7 +975,7 @@ function validateSandboxRebuildVitestJob(errors: string[], jobs: WorkflowRecord)
     const stepName = `sandbox-rebuild-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`;
     const stepEnv = asRecord(step.env);
     if (step.name !== "Run sandbox rebuild live test") {
-      requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_API_KEY");
+      requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_INFERENCE_API_KEY");
     }
     if (step.name !== "Authenticate to Docker Hub") {
       requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_USERNAME");
@@ -1018,13 +1018,13 @@ function validateSandboxRebuildVitestJob(errors: string[], jobs: WorkflowRecord)
   requireRunContains(errors, installOpenShell, "env -u DOCKER_CONFIG");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_USERNAME");
   requireRunContains(errors, installOpenShell, "-u DOCKERHUB_TOKEN");
-  requireRunContains(errors, installOpenShell, "-u NVIDIA_API_KEY");
+  requireRunContains(errors, installOpenShell, "-u NVIDIA_INFERENCE_API_KEY");
   requireRunContains(errors, installOpenShell, "-u GITHUB_TOKEN");
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run sandbox rebuild live test");
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("sandbox-rebuild-vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("sandbox-rebuild-vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "OPENSHELL_BIN");
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
@@ -1074,7 +1074,7 @@ function validateTokenRotationVitestJob(errors: string[], jobs: WorkflowRecord):
   if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) {
     errors.push("token-rotation-vitest job must point NEMOCLAW_CLI_BIN at the repo CLI");
   }
-  requireEnvDoesNotExposeSecret(errors, "token-rotation-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "token-rotation-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -1084,7 +1084,7 @@ function validateTokenRotationVitestJob(errors: string[], jobs: WorkflowRecord):
         errors,
         `token-rotation-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
         asRecord(step.env),
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
   }
@@ -1122,7 +1122,7 @@ function validateTokenRotationVitestJob(errors: string[], jobs: WorkflowRecord):
     errors,
     "token-rotation-vitest step",
     runVitestEnv,
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   if (runVitestEnv.GITHUB_TOKEN !== "${{ github.token }}") {
     errors.push("token-rotation-vitest step must receive GITHUB_TOKEN from github.token");
@@ -1193,7 +1193,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR
       "onboard-negative-paths-vitest job must write artifacts under e2e-artifacts/vitest/onboard-negative-paths",
     );
   }
-  requireEnvDoesNotExposeSecret(errors, "onboard-negative-paths-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "onboard-negative-paths-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -1202,7 +1202,7 @@ function validateOnboardNegativePathsVitestJob(errors: string[], jobs: WorkflowR
       errors,
       `onboard-negative-paths-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
       asRecord(step.env),
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
     );
   }
 
@@ -1292,7 +1292,7 @@ function validateDoubleOnboardVitestJob(errors: string[], jobs: WorkflowRecord):
       "double-onboard-vitest job must write artifacts under e2e-artifacts/vitest/double-onboard",
     );
   }
-  requireEnvDoesNotExposeSecret(errors, "double-onboard-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "double-onboard-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
   requireEnvDoesNotExposeSecret(errors, "double-onboard-vitest job", jobEnv, "DOCKERHUB_TOKEN");
 
   const steps = asSteps(job.steps);
@@ -1310,7 +1310,7 @@ function validateDoubleOnboardVitestJob(errors: string[], jobs: WorkflowRecord):
       errors,
       `double-onboard-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
       asRecord(step.env),
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
     );
   }
 
@@ -1392,7 +1392,7 @@ function validateDoubleOnboardVitestJob(errors: string[], jobs: WorkflowRecord):
   if (jobEnv.E2E_ARTIFACT_DIR !== "${{ github.workspace }}/e2e-artifacts/vitest/runtime-overrides") {
     errors.push("runtime-overrides-vitest job must write artifacts under e2e-artifacts/vitest/runtime-overrides");
   }
-  requireEnvDoesNotExposeSecret(errors, "runtime-overrides-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "runtime-overrides-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
   requireEnvDoesNotExposeSecret(errors, "runtime-overrides-vitest job", jobEnv, "DOCKERHUB_USERNAME");
   requireEnvDoesNotExposeSecret(errors, "runtime-overrides-vitest job", jobEnv, "DOCKERHUB_TOKEN");
 
@@ -1401,7 +1401,7 @@ function validateDoubleOnboardVitestJob(errors: string[], jobs: WorkflowRecord):
   for (const step of steps) {
     const stepName = `runtime-overrides-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`;
     const stepEnv = asRecord(step.env);
-    requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_API_KEY");
+    requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_INFERENCE_API_KEY");
     requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_USERNAME");
     requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_TOKEN");
     requireNoDockerHubAuthInRun(errors, stepName, stringValue(step.run));
@@ -1484,7 +1484,7 @@ function validateHermesE2EVitestJob(errors: string[], jobs: WorkflowRecord): voi
   if (jobEnv.NEMOCLAW_ONBOARD_VALIDATION_TIMEOUT_SECONDS !== "60") {
     errors.push("hermes-e2e-vitest job must give hosted endpoint validation a CI-safe timeout");
   }
-  requireEnvDoesNotExposeSecret(errors, "hermes-e2e-vitest job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "hermes-e2e-vitest job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(job.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -1494,7 +1494,7 @@ function validateHermesE2EVitestJob(errors: string[], jobs: WorkflowRecord): voi
         errors,
         `hermes-e2e-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`,
         asRecord(step.env),
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
   }
@@ -1518,8 +1518,8 @@ function validateHermesE2EVitestJob(errors: string[], jobs: WorkflowRecord): voi
 
   const runVitest = requireJobStep(errors, jobName, steps, "Run Hermes live Vitest test");
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("hermes-e2e-vitest Vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("hermes-e2e-vitest Vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
   requireRunContains(errors, runVitest, "test/e2e-scenario/live/hermes-e2e.test.ts");
@@ -1588,7 +1588,7 @@ function validateHermesRootEntrypointSmokeVitestJob(
     errors,
     "hermes-root-entrypoint-smoke-vitest job",
     jobEnv,
-    "NVIDIA_API_KEY",
+    "NVIDIA_INFERENCE_API_KEY",
   );
   requireEnvDoesNotExposeSecret(
     errors,
@@ -1612,7 +1612,7 @@ function validateHermesRootEntrypointSmokeVitestJob(
       errors,
       `hermes-root-entrypoint-smoke-vitest step '${stepName}'`,
       stepEnv,
-      "NVIDIA_API_KEY",
+      "NVIDIA_INFERENCE_API_KEY",
     );
     requireEnvDoesNotExposeSecret(
       errors,
@@ -1751,7 +1751,7 @@ function validateModelRouterProviderRoutedInferenceVitestJob(
       "model-router-provider-routed-inference-vitest job must force OPENSHELL_GATEWAY=nemoclaw",
     );
   }
-  for (const secret of ["NVIDIA_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
+  for (const secret of ["NVIDIA_INFERENCE_API_KEY", "DOCKERHUB_USERNAME", "DOCKERHUB_TOKEN", "GITHUB_TOKEN"]) {
     requireEnvDoesNotExposeSecret(
       errors,
       "model-router-provider-routed-inference-vitest job",
@@ -1766,7 +1766,7 @@ function validateModelRouterProviderRoutedInferenceVitestJob(
     const stepName = `model-router-provider-routed-inference-vitest step '${step.name ?? step.uses ?? "<unnamed>"}'`;
     const stepEnv = asRecord(step.env);
     if (step.name !== "Run Model Router provider-routed inference live test") {
-      requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_API_KEY");
+      requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "NVIDIA_INFERENCE_API_KEY");
     }
     if (step.name !== "Authenticate to Docker Hub") {
       requireEnvDoesNotExposeSecret(errors, stepName, stepEnv, "DOCKERHUB_USERNAME");
@@ -1842,9 +1842,9 @@ function validateModelRouterProviderRoutedInferenceVitestJob(
     "Run Model Router provider-routed inference live test",
   );
   const runVitestEnv = asRecord(runVitest?.env);
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
     errors.push(
-      "model-router-provider-routed-inference-vitest Vitest step must receive NVIDIA_API_KEY from secrets",
+      "model-router-provider-routed-inference-vitest Vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets",
     );
   }
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
@@ -2004,7 +2004,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   if (!stringValue(jobEnv.NEMOCLAW_CLI_BIN).includes("bin/nemoclaw.js")) {
     errors.push("live-scenarios job must point NEMOCLAW_CLI_BIN at the repo CLI");
   }
-  requireEnvDoesNotExposeSecret(errors, "live-scenarios job", jobEnv, "NVIDIA_API_KEY");
+  requireEnvDoesNotExposeSecret(errors, "live-scenarios job", jobEnv, "NVIDIA_INFERENCE_API_KEY");
 
   const steps = asSteps(liveScenarios.steps);
   requireNoDispatchInputInterpolation(errors, steps);
@@ -2014,7 +2014,7 @@ export function validateE2eVitestScenariosWorkflowBoundary(
         errors,
         `step '${step.name ?? step.uses ?? "<unnamed>"}'`,
         asRecord(step.env),
-        "NVIDIA_API_KEY",
+        "NVIDIA_INFERENCE_API_KEY",
       );
     }
   }
@@ -2038,8 +2038,8 @@ export function validateE2eVitestScenariosWorkflowBoundary(
   if (runVitestEnv.SCENARIO_ID !== "${{ matrix.id }}") {
     errors.push("Vitest step must pass matrix.id through SCENARIO_ID env");
   }
-  if (runVitestEnv.NVIDIA_API_KEY !== "${{ secrets.NVIDIA_API_KEY }}") {
-    errors.push("Vitest step must receive NVIDIA_API_KEY from secrets");
+  if (runVitestEnv.NVIDIA_INFERENCE_API_KEY !== "${{ secrets.NVIDIA_INFERENCE_API_KEY }}") {
+    errors.push("Vitest step must receive NVIDIA_INFERENCE_API_KEY from secrets");
   }
   requireRunContains(errors, runVitest, "npx vitest run --project e2e-scenarios-live");
   requireRunContains(errors, runVitest, "test/e2e-scenario/live/registry-scenarios.test.ts");

From 81512ef40519b179624483d15b983e92b470a952 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:29:55 -0700
Subject: [PATCH 02/11] chore(inference): keep onboard entrypoint net neutral

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 src/lib/onboard.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index 9ec40237d9..e89849e329 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -3646,9 +3646,8 @@ async function setupNim(
         hydrateCredentialEnv(credentialEnv);
 
         if (selected.key === "build") {
-          // Allow NEMOCLAW_PROVIDER_KEY as a fallback for NVIDIA_INFERENCE_API_KEY.
-          // Check raw process.env first — NEMOCLAW_PROVIDER_KEY is a user-facing
-          // override that should take precedence before resolving from credentials.json.
+          // Let NEMOCLAW_PROVIDER_KEY fill the canonical NVIDIA key without
+          // overriding an explicit env or migrated legacy credential.
           const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim();
           const existingNvidiaKey =
             getCredential("NVIDIA_INFERENCE_API_KEY") || getCredential("NVIDIA_API_KEY") || "";

From b1372992e1af14ac0fc583dfa35d95b7a11b6c9d Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 14:48:43 -0700
Subject: [PATCH 03/11] fix(inference): keep public NVIDIA endpoint

---
 agents/hermes/policy-additions.yaml           |  2 +-
 agents/hermes/policy-permissive.yaml          |  2 +-
 agents/openclaw/policy-permissive.yaml        |  2 +-
 docs/inference/inference-options.mdx          |  6 +--
 docs/reference/network-policies.mdx           |  2 +-
 docs/reference/troubleshooting.mdx            |  4 +-
 docs/security/best-practices.mdx              |  2 +-
 nemoclaw-blueprint/blueprint.yaml             |  2 +-
 .../policies/openclaw-sandbox-permissive.yaml |  2 +-
 .../policies/openclaw-sandbox.yaml            |  2 +-
 nemoclaw-blueprint/router/pool-config.yaml    |  4 +-
 .../scripts/nemotron-inference-fix.js         |  2 +-
 nemoclaw/src/banner.test.ts                   |  2 +-
 nemoclaw/src/blueprint/runner.test.ts         | 10 ++---
 nemoclaw/src/blueprint/ssrf.test.ts           |  2 +-
 nemoclaw/src/commands/config-show.test.ts     | 12 ++---
 scripts/validate-configs.ts                   |  2 +-
 src/lib/actions/sandbox/status.test.ts        |  4 +-
 src/lib/diagnostics/debug.ts                  |  4 +-
 src/lib/inference/model-prompts.test.ts       |  4 +-
 src/lib/inference/onboard-probes.test.ts      | 14 +++---
 src/lib/inference/provider-models.ts          |  2 +-
 src/lib/onboard/bridge-dns-preflight.ts       |  4 +-
 src/lib/onboard/docker-gpu-patch.test.ts      |  6 +--
 src/lib/onboard/host-dns-preflight.test.ts    | 44 +++++++++----------
 src/lib/onboard/initial-policy.test.ts        |  2 +-
 .../handlers/provider-inference.test.ts       |  4 +-
 src/lib/onboard/preflight.ts                  |  4 +-
 src/lib/onboard/providers.test.ts             |  8 ++--
 src/lib/onboard/providers.ts                  |  2 +-
 src/lib/security/credential-filter.test.ts    |  2 +-
 src/lib/validation.ts                         |  2 +-
 test/generate-openclaw-config.test.ts         |  4 +-
 test/nemotron-inference-fix.test.ts           |  2 +-
 test/onboard-selection.test.ts                |  4 +-
 test/openclaw-config-snapshot.test.ts         |  2 +-
 test/validate-blueprint.test.ts               |  4 +-
 test/validate-config-schemas.test.ts          |  2 +-
 test/validate-configs-dangerous-hosts.test.ts |  6 +--
 tools/advisors/session.mts                    |  2 +-
 40 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/agents/hermes/policy-additions.yaml b/agents/hermes/policy-additions.yaml
index 38b24e6805..0386ddef63 100644
--- a/agents/hermes/policy-additions.yaml
+++ b/agents/hermes/policy-additions.yaml
@@ -65,7 +65,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
         protocol: rest
         enforcement: enforce
diff --git a/agents/hermes/policy-permissive.yaml b/agents/hermes/policy-permissive.yaml
index ce57a8b466..5502935633 100644
--- a/agents/hermes/policy-permissive.yaml
+++ b/agents/hermes/policy-permissive.yaml
@@ -43,7 +43,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
         protocol: rest
         enforcement: enforce
diff --git a/agents/openclaw/policy-permissive.yaml b/agents/openclaw/policy-permissive.yaml
index d7cdf6c972..18c4f7f1f5 100644
--- a/agents/openclaw/policy-permissive.yaml
+++ b/agents/openclaw/policy-permissive.yaml
@@ -43,7 +43,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
         protocol: rest
         enforcement: enforce
diff --git a/docs/inference/inference-options.mdx b/docs/inference/inference-options.mdx
index 7ad7caa7f2..9168de037e 100644
--- a/docs/inference/inference-options.mdx
+++ b/docs/inference/inference-options.mdx
@@ -80,7 +80,7 @@ NVIDIA Nemotron models expose OpenAI-compatible APIs across every supported depl
 
 | Nemotron Host | Onboard Wizard Option | Why |
 |---|---|---|
-| `build.nvidia.com` (NVIDIA-hosted) | **Option 1: NVIDIA Endpoints** | NemoClaw sets the base URL to `https://inference-api.nvidia.com/v1` for you and validates the model against the build catalog. |
+| `build.nvidia.com` (NVIDIA-hosted) | **Option 1: NVIDIA Endpoints** | NemoClaw sets the base URL to `https://integrate.api.nvidia.com/v1` for you and validates the model against the build catalog. |
 | Self-hosted NIM container | **Option 3: Other OpenAI-compatible endpoint** | NIM exposes an OpenAI-compatible `/v1/chat/completions` route. Point the base URL at your NIM service and enter the Nemotron model ID. |
 | Enterprise NVIDIA AI Enterprise gateway | **Option 3: Other OpenAI-compatible endpoint** | Enterprise gateways front Nemotron with the same OpenAI-compatible contract. Use the gateway's base URL and your enterprise token. |
 | vLLM, SGLang, or TRT-LLM serving Nemotron weights | **Option 3: Other OpenAI-compatible endpoint** | Each runtime exposes Nemotron through `/v1/chat/completions`. Use the runtime's base URL and the model ID it reports. |
@@ -109,12 +109,12 @@ models:
   - name: nano
     litellm_model: "openai/nvidia/nvidia/Nemotron-3-Nano-30B-A3B"
     cost_per_m_input_tokens: 0.05
-    api_base: "https://inference-api.nvidia.com"
+    api_base: "https://integrate.api.nvidia.com"
 
   - name: super
     litellm_model: "openai/nvidia/nvidia/nemotron-3-super-v3"
     cost_per_m_input_tokens: 0.10
-    api_base: "https://inference-api.nvidia.com"
+    api_base: "https://integrate.api.nvidia.com"
 ```
 
 The `tolerance` parameter controls the accuracy-cost tradeoff.
diff --git a/docs/reference/network-policies.mdx b/docs/reference/network-policies.mdx
index 000049a8a3..4fb8c04be4 100644
--- a/docs/reference/network-policies.mdx
+++ b/docs/reference/network-policies.mdx
@@ -41,7 +41,7 @@ The following endpoint groups are allowed by default:
 
 | Policy | Endpoints | Binaries | Rules |
 | --- | --- | --- | --- |
-| `nvidia` | `inference-api.nvidia.com:443` | `/usr/local/bin/openclaw` | POST to inference and embedding paths, GET to model listings |
+| `nvidia` | `integrate.api.nvidia.com:443` | `/usr/local/bin/openclaw` | POST to inference and embedding paths, GET to model listings |
 | `clawhub` | `clawhub.ai:443` | `/usr/local/bin/openclaw`, `/usr/local/bin/node` | GET, POST |
 | `openclaw_api` | `openclaw.ai:443` | `/usr/local/bin/openclaw`, `/usr/local/bin/node` | GET, POST |
 | `openclaw_docs` | `docs.openclaw.ai:443` | `/usr/local/bin/openclaw` | GET only |
diff --git a/docs/reference/troubleshooting.mdx b/docs/reference/troubleshooting.mdx
index 4f8bb46baf..6c5f900453 100644
--- a/docs/reference/troubleshooting.mdx
+++ b/docs/reference/troubleshooting.mdx
@@ -182,13 +182,13 @@ When the lookup returns an answer, retry onboarding.
 ### Host DNS resolution is blocked before provider validation
 
 NemoClaw also checks that the host process can resolve the provider host before it starts NVIDIA provider validation.
-A firewall rule that blocks host DNS traffic on port `53` can make later validation fail with `curl: (6) Could not resolve host: inference-api.nvidia.com` even when container DNS probes look healthy.
+A firewall rule that blocks host DNS traffic on port `53` can make later validation fail with `curl: (6) Could not resolve host: integrate.api.nvidia.com` even when container DNS probes look healthy.
 Current onboarding stops earlier with a host DNS diagnostic and remediation hints.
 
 Verify host DNS outside NemoClaw:
 
 ```bash
-node -e 'require("node:dns").resolve4("inference-api.nvidia.com", (err, addrs) => { if (err) { console.error(err); process.exit(1); } console.log(addrs.join(",")); })'
+node -e 'require("node:dns").resolve4("integrate.api.nvidia.com", (err, addrs) => { if (err) { console.error(err); process.exit(1); } console.log(addrs.join(",")); })'
 ```
 
 Fix the host firewall, VPN, or DNS policy so the host can resolve the provider endpoint, then rerun onboarding.
diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx
index 7a9e69a681..bab83df377 100644
--- a/docs/security/best-practices.mdx
+++ b/docs/security/best-practices.mdx
@@ -129,7 +129,7 @@ Endpoint rules restrict allowed HTTP methods and URL paths.
 
 | Aspect | Detail |
 |---|---|
-| Default | Some endpoints allow GET and POST on `/**` (for example, `clawhub.ai`). Others restrict methods and paths to specific API routes (for example, `inference-api.nvidia.com` allows POST only to inference and embedding paths and GET to model listings). Read-only endpoints such as `docs.openclaw.ai`, the `npm_registry` baseline entry, and the `pypi` preset allow GET only (PyPI also allows HEAD). The `npm` preset is an intentional exception: npm/Yarn registry traffic uses L4 pass-through for Node 22 undici CONNECT compatibility. |
+| Default | Some endpoints allow GET and POST on `/**` (for example, `clawhub.ai`). Others restrict methods and paths to specific API routes (for example, `integrate.api.nvidia.com` allows POST only to inference and embedding paths and GET to model listings). Read-only endpoints such as `docs.openclaw.ai`, the `npm_registry` baseline entry, and the `pypi` preset allow GET only (PyPI also allows HEAD). The `npm` preset is an intentional exception: npm/Yarn registry traffic uses L4 pass-through for Node 22 undici CONNECT compatibility. |
 | What you can change | Add methods (PUT, DELETE, PATCH) or restrict paths to specific prefixes. |
 | Risk if relaxed | Allowing all methods on an API endpoint gives the agent write and delete access. For example, allowing DELETE on `api.github.com` lets the agent delete repositories. |
 | Recommendation | Use GET-only rules for endpoints that the agent only reads. Add write methods only for endpoints where the agent must create or modify resources. Restrict paths to specific API routes when possible. |
diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml
index c6336ab1d8..7d2437bee7 100644
--- a/nemoclaw-blueprint/blueprint.yaml
+++ b/nemoclaw-blueprint/blueprint.yaml
@@ -54,7 +54,7 @@ components:
       default:
         provider_type: "nvidia"
         provider_name: "nvidia-inference"
-        endpoint: "https://inference-api.nvidia.com/v1"
+        endpoint: "https://integrate.api.nvidia.com/v1"
         model: "nvidia/nemotron-3-super-120b-a12b"
 
       ncp:
diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
index 160bfaea30..b0915b2b70 100644
--- a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
+++ b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml
@@ -49,7 +49,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
         protocol: rest
         enforcement: enforce
diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
index 5bc856cb5f..49ea154929 100644
--- a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
+++ b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml
@@ -73,7 +73,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
         protocol: rest
         enforcement: enforce
diff --git a/nemoclaw-blueprint/router/pool-config.yaml b/nemoclaw-blueprint/router/pool-config.yaml
index a457f79d33..ddd9af5300 100644
--- a/nemoclaw-blueprint/router/pool-config.yaml
+++ b/nemoclaw-blueprint/router/pool-config.yaml
@@ -26,11 +26,11 @@ models:
     litellm_model: "openai/nvidia/nemotron-3-nano-30b-a3b"
     cost_per_m_input_tokens: 0.05
     cost_per_m_output_tokens: 0.20
-    api_base: "https://inference-api.nvidia.com/v1"
+    api_base: "https://integrate.api.nvidia.com/v1"
 
   - name: nemotron-3-super
     display_name: "Nemotron 3 Super 120B"
     litellm_model: "openai/nvidia/nemotron-3-super-120b-a12b"
     cost_per_m_input_tokens: 0.10
     cost_per_m_output_tokens: 0.40
-    api_base: "https://inference-api.nvidia.com/v1"
+    api_base: "https://integrate.api.nvidia.com/v1"
diff --git a/nemoclaw-blueprint/scripts/nemotron-inference-fix.js b/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
index efd844eb44..f3871ce99e 100644
--- a/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
+++ b/nemoclaw-blueprint/scripts/nemotron-inference-fix.js
@@ -78,7 +78,7 @@
 //   against a local OpenAI-compatible endpoint, asserting both the injected
 //   system message and the refreshed Content-Length. The runtime model-
 //   output behavior (acceptance criteria from #4851) is validated against
-//   inference-api.nvidia.com via the checked-in runbook at
+//   integrate.api.nvidia.com via the checked-in runbook at
 //   test/e2e-runtime/4851-ultra-toolless-validation.md — anyone reviewing
 //   acceptance can re-run it directly. Re-run when this preload changes
 //   or when OpenClaw bumps a version that may shift Ultra's chat template.
diff --git a/nemoclaw/src/banner.test.ts b/nemoclaw/src/banner.test.ts
index ceb896a40a..512345759e 100644
--- a/nemoclaw/src/banner.test.ts
+++ b/nemoclaw/src/banner.test.ts
@@ -10,7 +10,7 @@ describe("renderBox (plugin)", () => {
       [
         "  NemoClaw registered",
         null,
-        "  Endpoint:  https://inference-api.nvidia.com/v1",
+        "  Endpoint:  https://integrate.api.nvidia.com/v1",
         "  Provider:  NVIDIA Endpoints",
         "  Model:     nvidia/nemotron-3-super-120b-a12b",
         "  Slash:     /nemoclaw",
diff --git a/nemoclaw/src/blueprint/runner.test.ts b/nemoclaw/src/blueprint/runner.test.ts
index 0505b260b7..40e433cb30 100644
--- a/nemoclaw/src/blueprint/runner.test.ts
+++ b/nemoclaw/src/blueprint/runner.test.ts
@@ -664,7 +664,7 @@ describe("runner", () => {
                 name: "nim_service",
                 endpoints: [
                   {
-                    host: "inference-api.nvidia.com",
+                    host: "integrate.api.nvidia.com",
                     port: 443,
                     access: "full",
                   },
@@ -736,7 +736,7 @@ describe("runner", () => {
           name: "nim_service",
           endpoints: [
             {
-              host: "inference-api.nvidia.com",
+              host: "integrate.api.nvidia.com",
               port: 443,
               access: "full",
             },
@@ -759,7 +759,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(
@@ -779,7 +779,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(["Version: 1", "Hash: sha256:test"].join("\n"));
@@ -797,7 +797,7 @@ describe("runner", () => {
       const bp = blueprintWithPolicyAdditions({
         nim_service: {
           name: "nim_service",
-          endpoints: [{ host: "inference-api.nvidia.com", port: 443, access: "full" }],
+          endpoints: [{ host: "integrate.api.nvidia.com", port: 443, access: "full" }],
         },
       });
       mockCurrentPolicy(["Version: 1", "Hash: sha256:test", "---"].join("\n"));
diff --git a/nemoclaw/src/blueprint/ssrf.test.ts b/nemoclaw/src/blueprint/ssrf.test.ts
index 6722feadfb..5d52b3dd26 100644
--- a/nemoclaw/src/blueprint/ssrf.test.ts
+++ b/nemoclaw/src/blueprint/ssrf.test.ts
@@ -218,7 +218,7 @@ describe("validateEndpointUrl", () => {
 
   it("allows NVIDIA API endpoint", async () => {
     mockPublicDns();
-    const url = "https://inference-api.nvidia.com/v1";
+    const url = "https://integrate.api.nvidia.com/v1";
     const result = await validateEndpointUrl(url);
     expect(result.url).toBe(url);
     expect(result.pinnedUrl).toBe("https://93.184.216.34/v1");
diff --git a/nemoclaw/src/commands/config-show.test.ts b/nemoclaw/src/commands/config-show.test.ts
index 07fccd58bb..c54043b7b3 100644
--- a/nemoclaw/src/commands/config-show.test.ts
+++ b/nemoclaw/src/commands/config-show.test.ts
@@ -36,7 +36,7 @@ describe("commands/config-show", () => {
   it("shows config with redacted credentials when config exists", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://inference-api.nvidia.com/v1",
+      endpointUrl: "https://integrate.api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
@@ -44,12 +44,12 @@ describe("commands/config-show", () => {
       onboardedAt: "2026-04-10T14:22:00Z",
     };
     mockedLoadOnboardConfig.mockReturnValue(config);
-    mockedDescribeOnboardEndpoint.mockReturnValue("build (https://inference-api.nvidia.com/v1)");
+    mockedDescribeOnboardEndpoint.mockReturnValue("build (https://integrate.api.nvidia.com/v1)");
     mockedDescribeOnboardProvider.mockReturnValue("NVIDIA Endpoint API");
 
     const result = slashConfigShow();
     expect(result.text).toContain("NemoClaw Config");
-    expect(result.text).toContain("build (https://inference-api.nvidia.com/v1)");
+    expect(result.text).toContain("build (https://integrate.api.nvidia.com/v1)");
     expect(result.text).toContain("$NVIDIA_INFERENCE_API_KEY");
     expect(result.text).toContain("NVIDIA Endpoint API");
     expect(result.text).toContain("nvidia/nemotron-3-super-120b-a12b");
@@ -59,7 +59,7 @@ describe("commands/config-show", () => {
   it("does not expose raw credential values", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://inference-api.nvidia.com/v1",
+      endpointUrl: "https://integrate.api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
@@ -97,7 +97,7 @@ describe("commands/config-show", () => {
   it("shows not configured when credentialEnv is empty", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://inference-api.nvidia.com/v1",
+      endpointUrl: "https://integrate.api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
@@ -115,7 +115,7 @@ describe("commands/config-show", () => {
   it("notes that config is host-only modifiable", () => {
     const config: NemoClawOnboardConfig = {
       endpointType: "build",
-      endpointUrl: "https://inference-api.nvidia.com/v1",
+      endpointUrl: "https://integrate.api.nvidia.com/v1",
       ncpPartner: null,
       model: "nvidia/nemotron-3-super-120b-a12b",
       profile: "default",
diff --git a/scripts/validate-configs.ts b/scripts/validate-configs.ts
index f0b42bb571..cd24310fd4 100755
--- a/scripts/validate-configs.ts
+++ b/scripts/validate-configs.ts
@@ -209,7 +209,7 @@ interface DangerousHostFinding {
   host: string;
 }
 
-const ROUTER_API_BASE_HOST_ALLOWLIST: ReadonlySet<string> = new Set(["inference-api.nvidia.com"]);
+const ROUTER_API_BASE_HOST_ALLOWLIST: ReadonlySet<string> = new Set(["integrate.api.nvidia.com"]);
 
 /**
  * Walk a parsed policy document (full `network_policies` map or a preset
diff --git a/src/lib/actions/sandbox/status.test.ts b/src/lib/actions/sandbox/status.test.ts
index 1a2894cad1..66d3c6f7a0 100644
--- a/src/lib/actions/sandbox/status.test.ts
+++ b/src/lib/actions/sandbox/status.test.ts
@@ -27,7 +27,7 @@ describe("sandbox status inference health", () => {
           ok: true,
           probed: true,
           providerLabel: "NVIDIA Endpoints",
-          endpoint: "https://inference-api.nvidia.com/v1/chat/completions",
+          endpoint: "https://integrate.api.nvidia.com/v1/chat/completions",
           detail: "healthy",
         };
       },
@@ -184,7 +184,7 @@ describe("maybeGetSandboxStatusInferenceHealth", () => {
           ok: true,
           probed: true,
           providerLabel: "NVIDIA Endpoints",
-          endpoint: "https://inference-api.nvidia.com/v1/chat/completions",
+          endpoint: "https://integrate.api.nvidia.com/v1/chat/completions",
           detail: "healthy",
         };
       },
diff --git a/src/lib/diagnostics/debug.ts b/src/lib/diagnostics/debug.ts
index 862c07d3c1..a41e70d98b 100644
--- a/src/lib/diagnostics/debug.ts
+++ b/src/lib/diagnostics/debug.ts
@@ -440,11 +440,11 @@ function collectNetwork(collectDir: string): void {
     collect(collectDir, "ip-route", "ip", ["route"]);
     collectShell(collectDir, "resolv-conf", "cat /etc/resolv.conf");
   }
-  collect(collectDir, "nslookup", "nslookup", ["inference-api.nvidia.com"]);
+  collect(collectDir, "nslookup", "nslookup", ["integrate.api.nvidia.com"]);
   collectShell(
     collectDir,
     "curl-models",
-    'code=$(curl -s -o /dev/null -w "%{http_code}" https://inference-api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi',
+    'code=$(curl -s -o /dev/null -w "%{http_code}" https://integrate.api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi',
   );
   collectShell(collectDir, "lsof-net", "lsof -i -P -n 2>/dev/null | head -50");
   collect(collectDir, "lsof-18789", "lsof", ["-i", `:${DASHBOARD_PORT}`]);
diff --git a/src/lib/inference/model-prompts.test.ts b/src/lib/inference/model-prompts.test.ts
index 9f16f524a6..0d574b4ae7 100644
--- a/src/lib/inference/model-prompts.test.ts
+++ b/src/lib/inference/model-prompts.test.ts
@@ -54,13 +54,13 @@ describe("model prompt helpers", () => {
       getCredentialFn: () => "nvapi-test",
       validateNvidiaEndpointModelFn: (model) => ({
         ok: model === "nemotron-custom",
-        message: `Model '${model}' is not available from NVIDIA Endpoints. Checked https://inference-api.nvidia.com/v1/models.`,
+        message: `Model '${model}' is not available from NVIDIA Endpoints. Checked https://integrate.api.nvidia.com/v1/models.`,
       }),
     });
 
     expect(result).toBe("nemotron-custom");
     expect(errorLine).toHaveBeenCalledWith(
-      "  Model 'bad-model' is not available from NVIDIA Endpoints. Checked https://inference-api.nvidia.com/v1/models.",
+      "  Model 'bad-model' is not available from NVIDIA Endpoints. Checked https://integrate.api.nvidia.com/v1/models.",
     );
   });
 
diff --git a/src/lib/inference/onboard-probes.test.ts b/src/lib/inference/onboard-probes.test.ts
index 808db27181..8fbfacfd63 100644
--- a/src/lib/inference/onboard-probes.test.ts
+++ b/src/lib/inference/onboard-probes.test.ts
@@ -321,7 +321,7 @@ describe("OpenAI-compatible inference probes", () => {
       const args = getChatCompletionsProbeCurlArgs({
         authHeader: ["-H", "Authorization: Bearer nvapi-test"],
         model,
-        url: "https://inference-api.nvidia.com/v1/chat/completions",
+        url: "https://integrate.api.nvidia.com/v1/chat/completions",
         isWsl: false,
       });
       expect(args[args.indexOf("--connect-timeout") + 1]).toBe("10");
@@ -331,7 +331,7 @@ describe("OpenAI-compatible inference probes", () => {
     const wslArgs = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "qwen/qwen3.5-397b-a17b",
-      url: "https://inference-api.nvidia.com/v1/chat/completions",
+      url: "https://integrate.api.nvidia.com/v1/chat/completions",
       isWsl: true,
     });
     expect(wslArgs[wslArgs.indexOf("--connect-timeout") + 1]).toBe("30");
@@ -362,7 +362,7 @@ describe("OpenAI-compatible inference probes", () => {
     const args = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "moonshotai/kimi-k2.6",
-      url: "https://inference-api.nvidia.com/v1/chat/completions",
+      url: "https://integrate.api.nvidia.com/v1/chat/completions",
       isWsl: false,
     });
 
@@ -388,7 +388,7 @@ describe("OpenAI-compatible inference probes", () => {
     const args = getChatCompletionsProbeCurlArgs({
       authHeader: ["-H", "Authorization: Bearer nvapi-test"],
       model: "deepseek-ai/deepseek-v4-pro",
-      url: "https://inference-api.nvidia.com/v1/chat/completions",
+      url: "https://integrate.api.nvidia.com/v1/chat/completions",
       isWsl: false,
     });
 
@@ -504,7 +504,7 @@ exit 0
       console.log = (...args) => lines.push(args.join(" "));
       try {
         const result = probeOpenAiLikeEndpoint(
-          "https://inference-api.nvidia.com/v1",
+          "https://integrate.api.nvidia.com/v1",
           "nvidia/nemotron-3-super-120b-a12b",
           "nvapi-test",
           { skipResponsesProbe: true },
@@ -858,7 +858,7 @@ exit 0
       console.log = (...args) => lines.push(args.join(" "));
       try {
         const result = probeOpenAiLikeEndpoint(
-          "https://inference-api.nvidia.com/v1",
+          "https://integrate.api.nvidia.com/v1",
           "nvidia/nemotron-3-super-120b-a12b",
           "nvapi-test",
           { skipResponsesProbe: true },
@@ -911,7 +911,7 @@ exit 28
     console.log = (...args) => lines.push(args.join(" "));
     try {
       const result = probeOpenAiLikeEndpoint(
-        "https://inference-api.nvidia.com/v1",
+        "https://integrate.api.nvidia.com/v1",
         "deepseek-ai/deepseek-v4-pro",
         "nvapi-test",
         { skipResponsesProbe: true },
diff --git a/src/lib/inference/provider-models.ts b/src/lib/inference/provider-models.ts
index f78e543b8d..2d219599fa 100644
--- a/src/lib/inference/provider-models.ts
+++ b/src/lib/inference/provider-models.ts
@@ -8,7 +8,7 @@ import type { ModelCatalogFetchResult, ModelValidationResult } from "../onboard/
 // credentials.ts still uses CommonJS-style exports.
 const { normalizeCredentialValue } = require("../credentials/store");
 
-export const BUILD_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
+export const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1";
 
 export interface ProviderModelOptions {
   runCurlProbeImpl?: (argv: string[]) => CurlProbeResult;
diff --git a/src/lib/onboard/bridge-dns-preflight.ts b/src/lib/onboard/bridge-dns-preflight.ts
index 9d346c2465..e37cce3f77 100644
--- a/src/lib/onboard/bridge-dns-preflight.ts
+++ b/src/lib/onboard/bridge-dns-preflight.ts
@@ -278,7 +278,7 @@ function hostDnsPreflightSkipped(env: NodeJS.ProcessEnv = process.env): boolean
 }
 
 // `NEMOCLAW_PROVIDER` keys that resolve to NVIDIA-hosted endpoints
-// (inference-api.nvidia.com). Mirrors the aliases in
+// (integrate.api.nvidia.com). Mirrors the aliases in
 // `onboard/providers.ts::getNonInteractiveProvider`. Local/custom and
 // other hosted providers (ollama, vllm, openai, anthropic, nim-local, …)
 // do not need this host, so the NVIDIA host DNS probe must not gate them.
@@ -286,7 +286,7 @@ const NVIDIA_ENDPOINT_PROVIDER_KEYS = new Set(["build", "cloud", "routed"]);
 
 /**
  * Whether onboarding's effective inference provider is NVIDIA Endpoints,
- * so the `inference-api.nvidia.com` host DNS probe is relevant.
+ * so the `integrate.api.nvidia.com` host DNS probe is relevant.
  *
  * `NEMOCLAW_PROVIDER` is honored only in non-interactive mode (mirroring
  * `getRequestedProviderHint`), where an unset value defaults to NVIDIA
diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts
index 29dd4c81b7..5e7146e31e 100644
--- a/src/lib/onboard/docker-gpu-patch.test.ts
+++ b/src/lib/onboard/docker-gpu-patch.test.ts
@@ -765,12 +765,12 @@ describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => {
     );
   });
 
-  it("regression manifest: host.openshell.internal + google.com + gateway.discord.gg + inference-api.nvidia.com (#3579 manager spec)", () => {
+  it("regression manifest: host.openshell.internal + google.com + gateway.discord.gg + integrate.api.nvidia.com (#3579 manager spec)", () => {
     // The four hostnames called out in #3579's manager-provided spec:
     //   host.openshell.internal      → resolved via --add-host (mount namespace)
     //   google.com                   → public DNS via embedded Docker resolver
     //   gateway.discord.gg           → public DNS via embedded Docker resolver
-    //   inference-api.nvidia.com     → public DNS via embedded Docker resolver
+    //   integrate.api.nvidia.com     → public DNS via embedded Docker resolver
     //
     // Unit-testable invariants that together cover all four:
     //   1. --add-host preserves the host.openshell.internal mapping
@@ -788,7 +788,7 @@ describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => {
     expect(args).toEqual(
       expect.arrayContaining(["--add-host", "host.openshell.internal:172.17.0.1"]),
     );
-    // google.com / gateway.discord.gg / inference-api.nvidia.com — covered by
+    // google.com / gateway.discord.gg / integrate.api.nvidia.com — covered by
     // (a) not pinning --network=host and (b) injecting --dns when the host
     // has a loopback-only resolver.
     expect(args).not.toEqual(expect.arrayContaining(["--network", "host"]));
diff --git a/src/lib/onboard/host-dns-preflight.test.ts b/src/lib/onboard/host-dns-preflight.test.ts
index fde6432bb5..c522875e0c 100644
--- a/src/lib/onboard/host-dns-preflight.test.ts
+++ b/src/lib/onboard/host-dns-preflight.test.ts
@@ -4,7 +4,7 @@
 // Host DNS preflight (#4784): the CLI process must be able to resolve the
 // provider endpoint over port 53. A host OUTPUT chain that drops tcp/udp:53
 // lets the container DNS probe pass while later provider validation dies with
-// `curl: (6) Could not resolve host: inference-api.nvidia.com`. These tests
+// `curl: (6) Could not resolve host: integrate.api.nvidia.com`. These tests
 // cover the host-side probe (preflight.ts) plus the gate and remediation that
 // surface it before provider validation (bridge-dns-preflight.ts).
 
@@ -34,7 +34,7 @@ describe("probeHostDns (#4784)", () => {
       runProbeImpl: exec({ stdout: "HOSTDNS_OK 1.2.3.4,5.6.7.8", exitCode: 0 }),
     });
     expect(result.ok).toBe(true);
-    expect(result.hostname).toBe("inference-api.nvidia.com");
+    expect(result.hostname).toBe("integrate.api.nvidia.com");
     expect(result.reason).toBeUndefined();
     expect(isFatalHostDnsProbeFailure(result)).toBe(false);
   });
@@ -144,11 +144,11 @@ describe("printHostDnsRemediation (#4784)", () => {
     vi.spyOn(console, "error").mockImplementation((arg?: unknown) => {
       messages.push(String(arg ?? ""));
     });
-    printHostDnsRemediation({ platform: "linux", isWsl: false }, "inference-api.nvidia.com");
+    printHostDnsRemediation({ platform: "linux", isWsl: false }, "integrate.api.nvidia.com");
     const blob = messages.join("\n");
-    expect(blob).toContain("could not resolve inference-api.nvidia.com");
+    expect(blob).toContain("could not resolve integrate.api.nvidia.com");
     expect(blob).toContain("Container DNS may still look healthy");
-    expect(blob).toContain("curl: (6) Could not resolve host: inference-api.nvidia.com");
+    expect(blob).toContain("curl: (6) Could not resolve host: integrate.api.nvidia.com");
     expect(blob).toContain("--dport 53");
     expect(blob).toContain("NEMOCLAW_SKIP_HOST_DNS_PREFLIGHT=1");
     expect(blob).toContain("#4784");
@@ -187,7 +187,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
       env: {},
       nonInteractive: true,
       exit,
-      probeHostDnsImpl: () => ({ ok: true, hostname: "inference-api.nvidia.com" }),
+      probeHostDnsImpl: () => ({ ok: true, hostname: "integrate.api.nvidia.com" }),
     });
     expect(logs.join("\n")).toContain("✓ Host DNS resolution works");
     expect(exit).not.toHaveBeenCalled();
@@ -205,15 +205,15 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "inference-api.nvidia.com",
+        hostname: "integrate.api.nvidia.com",
         reason: "servers_unreachable",
-        details: "dns.resolve inference-api.nvidia.com: ECONNREFUSED",
+        details: "dns.resolve integrate.api.nvidia.com: ECONNREFUSED",
       }),
     });
     expect(exit).toHaveBeenCalledWith(1);
     const blob = errors.join("\n");
     expect(blob).toContain("✗ Host DNS resolution failed");
-    expect(blob).toContain("could not resolve inference-api.nvidia.com");
+    expect(blob).toContain("could not resolve integrate.api.nvidia.com");
     expect(blob).toContain("--dport 53");
   });
 
@@ -229,9 +229,9 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "inference-api.nvidia.com",
+        hostname: "integrate.api.nvidia.com",
         reason: "resolution_failed",
-        details: "dns.resolve inference-api.nvidia.com: ENOTFOUND",
+        details: "dns.resolve integrate.api.nvidia.com: ENOTFOUND",
       }),
     });
     expect(exit).toHaveBeenCalledWith(1);
@@ -248,7 +248,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
       exit,
       probeHostDnsImpl: () => ({
         ok: false,
-        hostname: "inference-api.nvidia.com",
+        hostname: "integrate.api.nvidia.com",
         reason: "error",
         details: "spawn node ENOENT",
       }),
@@ -261,7 +261,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const logs: string[] = [];
     vi.spyOn(console, "log").mockImplementation((arg?: unknown) => logs.push(String(arg ?? "")));
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_SKIP_HOST_DNS_PREFLIGHT: "1" },
       exit,
@@ -274,7 +274,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips silently (no probe, no exit) when a non-NVIDIA provider is selected (codex P2)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     // A user who picked a local/non-NVIDIA provider must not be blocked by
     // NVIDIA-domain DNS even if their host cannot resolve it — including in
     // non-interactive mode where the choice is explicit.
@@ -292,7 +292,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips an unset provider in interactive mode (provider not yet chosen — codex P2)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     // Fresh interactive onboarding hits preflight before the provider menu;
     // it may end up on Ollama/vLLM, so an NVIDIA-DNS block must not abort here.
     assertHostDnsHealthy(host, {
@@ -308,7 +308,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
   it("runs for an unset provider only in non-interactive mode (NVIDIA Endpoints default)", () => {
     const exit = vi.fn();
     vi.spyOn(console, "log").mockImplementation(() => {});
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: {},
       nonInteractive: true,
@@ -323,7 +323,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const exit = vi.fn();
     vi.spyOn(console, "log").mockImplementation(() => {});
     for (const provider of ["build", "cloud", "routed"]) {
-      const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+      const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
       assertHostDnsHealthy(host, {
         env: { NEMOCLAW_PROVIDER: provider },
         nonInteractive: true,
@@ -337,7 +337,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("ignores NEMOCLAW_PROVIDER in interactive mode (onboard ignores it there too)", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     // Interactive onboarding ignores NEMOCLAW_PROVIDER and shows the menu, so
     // we must not assume NVIDIA from it before the user has chosen.
     assertHostDnsHealthy(host, {
@@ -352,9 +352,9 @@ describe("assertHostDnsHealthy (#4784)", () => {
 
   it("skips an explicit local NIM provider (nim-local) in non-interactive mode", () => {
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     // `nim-local` runs NIM locally and validates against localhost, not
-    // inference-api.nvidia.com, so the NVIDIA host DNS probe must not gate it.
+    // integrate.api.nvidia.com, so the NVIDIA host DNS probe must not gate it.
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_PROVIDER: "nim-local" },
       nonInteractive: true,
@@ -369,7 +369,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
     const logs: string[] = [];
     vi.spyOn(console, "log").mockImplementation((arg?: unknown) => logs.push(String(arg ?? "")));
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: { NEMOCLAW_PROVIDER: "build", HTTPS_PROXY: "http://proxy.corp:3128" },
       nonInteractive: true,
@@ -384,7 +384,7 @@ describe("assertHostDnsHealthy (#4784)", () => {
   it("still runs when NO_PROXY exempts the provider host from the proxy", () => {
     vi.spyOn(console, "log").mockImplementation(() => {});
     const exit = vi.fn();
-    const probe = vi.fn(() => ({ ok: true as const, hostname: "inference-api.nvidia.com" }));
+    const probe = vi.fn(() => ({ ok: true as const, hostname: "integrate.api.nvidia.com" }));
     assertHostDnsHealthy(host, {
       env: {
         NEMOCLAW_PROVIDER: "build",
diff --git a/src/lib/onboard/initial-policy.test.ts b/src/lib/onboard/initial-policy.test.ts
index 72d8971a25..4343b3e8fe 100644
--- a/src/lib/onboard/initial-policy.test.ts
+++ b/src/lib/onboard/initial-policy.test.ts
@@ -116,7 +116,7 @@ network_policies:
   nvidia:
     name: nvidia
     endpoints:
-      - host: inference-api.nvidia.com
+      - host: integrate.api.nvidia.com
         port: 443
 `);
     const gpuDoc = YAML.parse(gpuPolicy);
diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts
index b0b618c527..e6af5f2018 100644
--- a/src/lib/onboard/machine/handlers/provider-inference.test.ts
+++ b/src/lib/onboard/machine/handlers/provider-inference.test.ts
@@ -17,7 +17,7 @@ type Host = { cpus?: number };
 const baseSelection: ProviderSelectionResult = {
   model: "nvidia/test",
   provider: "nvidia-prod",
-  endpointUrl: "https://inference-api.nvidia.com/v1",
+  endpointUrl: "https://integrate.api.nvidia.com/v1",
   credentialEnv: "NVIDIA_INFERENCE_API_KEY",
   hermesAuthMethod: null,
   hermesToolGateways: [],
@@ -158,7 +158,7 @@ describe("handleProviderInferenceState", () => {
       "my-assistant",
       "nvidia/test",
       "nvidia-prod",
-      "https://inference-api.nvidia.com/v1",
+      "https://integrate.api.nvidia.com/v1",
       "NVIDIA_INFERENCE_API_KEY",
       null,
       [],
diff --git a/src/lib/onboard/preflight.ts b/src/lib/onboard/preflight.ts
index 8e260dc165..e2bfaa35de 100644
--- a/src/lib/onboard/preflight.ts
+++ b/src/lib/onboard/preflight.ts
@@ -1926,13 +1926,13 @@ export function isFatalContainerDnsProbeFailure(result: DnsProbeResult): boolean
 // process itself cannot resolve the provider endpoint. That gap let
 // onboarding print "Container DNS resolution works" and then fail much
 // later at NVIDIA Endpoints validation with the cryptic
-// `curl: (6) Could not resolve host: inference-api.nvidia.com`. This
+// `curl: (6) Could not resolve host: integrate.api.nvidia.com`. This
 // probe resolves the provider hostname from the host (CLI) process so
 // the blocked-DNS condition surfaces up front, distinct from the
 // container-DNS path.
 
 /** The NVIDIA Endpoints provider host onboarding validates by default. */
-export const DEFAULT_HOST_DNS_PROBE_HOSTNAME = "inference-api.nvidia.com";
+export const DEFAULT_HOST_DNS_PROBE_HOSTNAME = "integrate.api.nvidia.com";
 
 /**
  * Host DNS probe budget (ms). Shorter than the container probe: there is
diff --git a/src/lib/onboard/providers.test.ts b/src/lib/onboard/providers.test.ts
index edca6afaa5..b5b25e4d5f 100644
--- a/src/lib/onboard/providers.test.ts
+++ b/src/lib/onboard/providers.test.ts
@@ -164,7 +164,7 @@ describe("onboard provider helpers", () => {
       "inference",
       "openai",
       "NVIDIA_INFERENCE_API_KEY",
-      "https://inference-api.nvidia.com/v1",
+      "https://integrate.api.nvidia.com/v1",
       {},
       (command) => {
         commands.push(command.join(" "));
@@ -177,7 +177,7 @@ describe("onboard provider helpers", () => {
     expect(commands[0]).toMatch(/provider get/);
     expect(commands[1]).toMatch(/provider update/);
     expect(commands[1]).toMatch(
-      /--config OPENAI_BASE_URL=https:\/\/inference-api\.nvidia\.com\/v1/,
+      /--config OPENAI_BASE_URL=https:\/\/integrate\.api\.nvidia\.com\/v1/,
     );
   });
 
@@ -187,7 +187,7 @@ describe("onboard provider helpers", () => {
       "nvidia-prod",
       "openai",
       "NVIDIA_INFERENCE_API_KEY",
-      "https://inference-api.nvidia.com/v1",
+      "https://integrate.api.nvidia.com/v1",
       {},
       (command) => {
         commands.push(command.join(" "));
@@ -202,7 +202,7 @@ describe("onboard provider helpers", () => {
     // OpenShell CLI rejects `--credential KEY` when the host env is empty;
     // dropping the flag turns the call into a no-op merge that succeeds.
     expect(commands[1]).not.toMatch(/--credential/);
-    expect(commands[1]).toMatch(/OPENAI_BASE_URL=https:\/\/inference-api\.nvidia\.com\/v1/);
+    expect(commands[1]).toMatch(/OPENAI_BASE_URL=https:\/\/integrate\.api\.nvidia\.com\/v1/);
   });
 
   it("keeps --credential on the create path even when env is empty", () => {
diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts
index 4dc46313ff..edb2d117ef 100644
--- a/src/lib/onboard/providers.ts
+++ b/src/lib/onboard/providers.ts
@@ -17,7 +17,7 @@ const { compactText } = require("../core/url-utils");
 
 // ── Constants ────────────────────────────────────────────────────
 
-const BUILD_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
+const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1";
 const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1";
 const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com";
 const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
diff --git a/src/lib/security/credential-filter.test.ts b/src/lib/security/credential-filter.test.ts
index a85806954f..22f64914bf 100644
--- a/src/lib/security/credential-filter.test.ts
+++ b/src/lib/security/credential-filter.test.ts
@@ -102,7 +102,7 @@ describe("valueLooksLikeSecret", () => {
 
   it("does not match benign values", () => {
     expect(valueLooksLikeSecret("npx")).toBe(false);
-    expect(valueLooksLikeSecret("https://inference-api.nvidia.com/v1")).toBe(false);
+    expect(valueLooksLikeSecret("https://integrate.api.nvidia.com/v1")).toBe(false);
     expect(valueLooksLikeSecret("moonshotai/kimi-k2")).toBe(false);
     expect(valueLooksLikeSecret("production")).toBe(false);
   });
diff --git a/src/lib/validation.ts b/src/lib/validation.ts
index 47b8147fa4..03ab1b3b90 100644
--- a/src/lib/validation.ts
+++ b/src/lib/validation.ts
@@ -216,7 +216,7 @@ export function isSafeModelId(value: string): boolean {
 /**
  * Detect NVIDIA Cloud Functions "Function not found for account" errors.
  *
- * NVIDIA Build (inference-api.nvidia.com) returns this when a model is in the
+ * NVIDIA Build (integrate.api.nvidia.com) returns this when a model is in the
  * public catalog but is not deployed for the caller's account/org. The raw
  * body looks like:
  *
diff --git a/test/generate-openclaw-config.test.ts b/test/generate-openclaw-config.test.ts
index 844da6f279..02826f6f80 100644
--- a/test/generate-openclaw-config.test.ts
+++ b/test/generate-openclaw-config.test.ts
@@ -1357,7 +1357,7 @@ describe("generate-openclaw-config.mts: config generation", () => {
       { NEMOCLAW_MODEL: "deepseek-ai/DeepSeek-V4-Flash" },
       { NEMOCLAW_PROVIDER_KEY: "openai" },
       { NEMOCLAW_INFERENCE_API: "responses" },
-      { NEMOCLAW_INFERENCE_BASE_URL: "https://inference-api.nvidia.com/v1" },
+      { NEMOCLAW_INFERENCE_BASE_URL: "https://integrate.api.nvidia.com/v1" },
     ];
 
     for (const envCase of cases) {
@@ -1405,7 +1405,7 @@ describe("generate-openclaw-config.mts: config generation", () => {
       { NEMOCLAW_MODEL: "nvidia/nemotron-3-nano:30b" },
       { NEMOCLAW_PROVIDER_KEY: "nvidia" },
       { NEMOCLAW_INFERENCE_API: "responses" },
-      { NEMOCLAW_INFERENCE_BASE_URL: "https://inference-api.nvidia.com/v1" },
+      { NEMOCLAW_INFERENCE_BASE_URL: "https://integrate.api.nvidia.com/v1" },
     ];
 
     for (const envCase of cases) {
diff --git a/test/nemotron-inference-fix.test.ts b/test/nemotron-inference-fix.test.ts
index 7091eb273c..aa606d8fc9 100644
--- a/test/nemotron-inference-fix.test.ts
+++ b/test/nemotron-inference-fix.test.ts
@@ -578,7 +578,7 @@ send('inference.local', JSON.stringify({
   model: 'nvidia/nemotron-3-ultra-550b-a55b',
   messages: [{ role: 'user', content: 'hi' }],
 }));
-send('inference-api.nvidia.com', JSON.stringify({
+send('integrate.api.nvidia.com', JSON.stringify({
   model: 'nvidia/nemotron-3-ultra-550b-a55b',
   messages: [{ role: 'user', content: 'hi' }],
 }));
diff --git a/test/onboard-selection.test.ts b/test/onboard-selection.test.ts
index 86ea7d0aa0..58dc04c203 100644
--- a/test/onboard-selection.test.ts
+++ b/test/onboard-selection.test.ts
@@ -3849,10 +3849,10 @@ if echo "$url" | grep -q 'generativelanguage.googleapis.com' && echo "$url" | gr
 elif echo "$url" | grep -q 'generativelanguage.googleapis.com' && echo "$url" | grep -q '/chat/completions$'; then
   body='{"id":"chatcmpl-123","choices":[{"message":{"content":"OK"}}]}'
   status="200"
-elif echo "$url" | grep -q 'inference-api.nvidia.com' && echo "$url" | grep -q '/responses$'; then
+elif echo "$url" | grep -q 'integrate.api.nvidia.com' && echo "$url" | grep -q '/responses$'; then
   body='{"id":"resp_123"}'
   status="200"
-elif echo "$url" | grep -q 'inference-api.nvidia.com' && echo "$url" | grep -q '/chat/completions$'; then
+elif echo "$url" | grep -q 'integrate.api.nvidia.com' && echo "$url" | grep -q '/chat/completions$'; then
   body='{"id":"chatcmpl-123","choices":[{"message":{"content":"OK"}}]}'
   status="200"
 fi
diff --git a/test/openclaw-config-snapshot.test.ts b/test/openclaw-config-snapshot.test.ts
index e43f7e115d..8a7698ce47 100644
--- a/test/openclaw-config-snapshot.test.ts
+++ b/test/openclaw-config-snapshot.test.ts
@@ -70,7 +70,7 @@ describe("OpenClaw durable config file (#5027)", () => {
           mode: "merge",
           providers: {
             nvidia: {
-              baseUrl: "https://inference-api.nvidia.com/v1",
+              baseUrl: "https://integrate.api.nvidia.com/v1",
               apiKey: "unused",
               models: [{ id: "moonshotai/kimi-k2" }],
             },
diff --git a/test/validate-blueprint.test.ts b/test/validate-blueprint.test.ts
index e4d9f1a49b..76fbad0e1e 100644
--- a/test/validate-blueprint.test.ts
+++ b/test/validate-blueprint.test.ts
@@ -207,7 +207,7 @@ describe("Model Router pool config", () => {
 
   it("regression #3255: routes NVIDIA API keys to the public NVIDIA inference endpoint", () => {
     const apiBases = new Set((pool.models ?? []).map((model) => model.api_base));
-    expect(apiBases).toEqual(new Set(["https://inference-api.nvidia.com/v1"]));
+    expect(apiBases).toEqual(new Set(["https://integrate.api.nvidia.com/v1"]));
   });
 
   it("regression #3255: uses valid LiteLLM NVIDIA model identifiers", () => {
@@ -284,7 +284,7 @@ describe("base sandbox policy", () => {
     const np = policy.network_policies ?? {};
     const endpoints = np.nvidia?.endpoints;
     const missingHosts: string[] = [];
-    const host = "inference-api.nvidia.com";
+    const host = "integrate.api.nvidia.com";
     const endpoint = endpoints?.find((entry) => entry.host === host);
     const hasEmbeddingsRule = endpoint?.rules?.some(
       (rule) => rule.allow?.method === "POST" && rule.allow?.path === "/v1/embeddings",
diff --git a/test/validate-config-schemas.test.ts b/test/validate-config-schemas.test.ts
index de805df182..85ee98dcbc 100644
--- a/test/validate-config-schemas.test.ts
+++ b/test/validate-config-schemas.test.ts
@@ -241,7 +241,7 @@ describe("router-pool-config.schema.json", () => {
     const firstModel = asRecord(Array.isArray(root.models) ? root.models[0] : undefined);
     const bad = {
       ...root,
-      models: [{ ...firstModel, api_base: "http://inference-api.nvidia.com/v1" }],
+      models: [{ ...firstModel, api_base: "http://integrate.api.nvidia.com/v1" }],
     };
     expect(validate(bad)).toBe(false);
   });
diff --git a/test/validate-configs-dangerous-hosts.test.ts b/test/validate-configs-dangerous-hosts.test.ts
index 2c47577f78..42d1e38887 100644
--- a/test/validate-configs-dangerous-hosts.test.ts
+++ b/test/validate-configs-dangerous-hosts.test.ts
@@ -59,14 +59,14 @@ describe("findDangerousRouterApiBases", () => {
   it("allows the public NVIDIA Build endpoint", () => {
     expect(
       findDangerousRouterApiBases({
-        models: [{ api_base: "https://inference-api.nvidia.com/v1" }],
+        models: [{ api_base: "https://integrate.api.nvidia.com/v1" }],
       }),
     ).toEqual([]);
-    expect(ROUTER_API_BASE_HOST_ALLOWLIST.has("inference-api.nvidia.com")).toBe(true);
+    expect(ROUTER_API_BASE_HOST_ALLOWLIST.has("integrate.api.nvidia.com")).toBe(true);
   });
 
   it.each([
-    "http://inference-api.nvidia.com/v1",
+    "http://integrate.api.nvidia.com/v1",
     "https://localhost/v1",
     "https://127.0.0.1/v1",
     "https://10.0.0.5/v1",
diff --git a/tools/advisors/session.mts b/tools/advisors/session.mts
index ccfb8df9b2..8dd86acf49 100644
--- a/tools/advisors/session.mts
+++ b/tools/advisors/session.mts
@@ -53,7 +53,7 @@ export type RunReadOnlyAdvisorOptions = {
 export function openAiAdvisorProviderConfig(credentialEnv: string): AdvisorProviderConfig {
   return {
     api: "openai-completions",
-    baseUrl: "https://inference-api.nvidia.com/v1",
+    baseUrl: "https://integrate.api.nvidia.com/v1",
     models: [advisorModel(DEFAULT_ADVISOR_MODEL, "GPT-5.5", 256000, 32768, true, ["text", "image"])],
     ["api" + "Key"]: credentialEnv,
   } as AdvisorProviderConfig;

From b27ac36f09d013a8709eb5ceae5ee639fcbc0141 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 16:03:48 -0700
Subject: [PATCH 04/11] ci(e2e): route cloud smoke through compatible endpoint

---
 .github/workflows/e2e-script.yaml       | 27 ++++++++++
 .github/workflows/nightly-e2e.yaml      |  3 ++
 test/e2e-script-workflow.test.ts        | 26 ++++++++++
 test/e2e/lib/ci-compatible-inference.sh | 69 +++++++++++++++++++++++++
 test/e2e/test-cloud-inference-e2e.sh    |  7 +--
 test/e2e/test-cloud-onboard-e2e.sh      | 23 +++++----
 test/e2e/test-full-e2e.sh               | 28 ++++++----
 7 files changed, 160 insertions(+), 23 deletions(-)
 create mode 100755 test/e2e/lib/ci-compatible-inference.sh

diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml
index 4a721b68fc..f33cc98370 100644
--- a/.github/workflows/e2e-script.yaml
+++ b/.github/workflows/e2e-script.yaml
@@ -47,6 +47,11 @@ on:
         required: false
         type: boolean
         default: false
+      nvidia_secret_as_compatible_api_key:
+        description: Expose NVIDIA_INFERENCE_API_KEY as COMPATIBLE_API_KEY for CI-only OpenAI-compatible inference.
+        required: false
+        type: boolean
+        default: false
       brave_api_key:
         description: Pass the BRAVE_API_KEY secret to the script.
         required: false
@@ -196,6 +201,28 @@ jobs:
             echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls."
           fi
 
+      - name: Export CI compatible inference environment
+        if: ${{ inputs.nvidia_secret_as_compatible_api_key }}
+        env:
+          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+            echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for CI compatible inference." >&2
+            exit 1
+          fi
+
+          {
+            printf 'NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1\n'
+            printf 'NEMOCLAW_PROVIDER=custom\n'
+            printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n'
+            printf 'NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
+            printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
+            printf 'COMPATIBLE_API_KEY=%s\n' "${NVIDIA_INFERENCE_API_KEY}"
+          } >> "$GITHUB_ENV"
+
       - name: Run E2E script
         uses: ./workflow-actions/.github/actions/run-e2e-script
         with:
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index 42c9dab91f..6583a0916d 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -204,6 +204,7 @@ jobs:
       artifact_path: "/tmp/nemoclaw-e2e-install.log"
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-nightly"}'
       nvidia_api_key: true
+      nvidia_secret_as_compatible_api_key: true
       github_token: true
     secrets: &nightly-e2e-default-secrets
       NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
@@ -224,6 +225,7 @@ jobs:
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_POLICY_MODE":"custom","NEMOCLAW_POLICY_PRESETS":"npm,pypi","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-onboard"}'
       checked_out_ref_env: "NEMOCLAW_PUBLIC_INSTALL_REF"
       nvidia_api_key: true
+      nvidia_secret_as_compatible_api_key: true
       github_token: true
     secrets: *nightly-e2e-default-secrets
   cloud-inference-e2e:
@@ -240,6 +242,7 @@ jobs:
       artifact_path: "/tmp/nemoclaw-e2e-cloud-inference-install.log"
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-inference"}'
       nvidia_api_key: true
+      nvidia_secret_as_compatible_api_key: true
     secrets: *nightly-e2e-default-secrets
   cron-preflight-inference-local-e2e:
     if: >-
diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts
index a7fcc2a6d2..29553c0f69 100644
--- a/test/e2e-script-workflow.test.ts
+++ b/test/e2e-script-workflow.test.ts
@@ -524,6 +524,32 @@ describe("E2E reusable workflow contract", () => {
     expect(exportStep?.run).toContain('>> "$GITHUB_ENV"');
   });
 
+  it("can route selected reusable jobs through the CI compatible inference endpoint", () => {
+    const exportStep = runnerWorkflow.jobs.run.steps.find(
+      (step) => step.name === "Export CI compatible inference environment",
+    );
+    const expectedJobs = ["cloud-e2e", "cloud-onboard-e2e", "cloud-inference-e2e"];
+    const workflowCall = runnerWorkflow.on?.workflow_call ?? runnerWorkflow.true?.workflow_call;
+
+    expect(workflowCall?.inputs?.nvidia_secret_as_compatible_api_key).toMatchObject({
+      required: false,
+      type: "boolean",
+      default: false,
+    });
+    expect(exportStep?.if).toBe("${{ inputs.nvidia_secret_as_compatible_api_key }}");
+    expect(exportStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(
+      "${{ secrets.NVIDIA_INFERENCE_API_KEY }}",
+    );
+    expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1");
+    expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom");
+    expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1");
+    expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s");
+
+    for (const name of expectedJobs) {
+      expect(nightlyWorkflow.jobs[name].with?.nvidia_secret_as_compatible_api_key, name).toBe(true);
+    }
+  });
+
   it("keeps converted jobs dispatchable through the reusable workflow", () => {
     const cloudJob = nightlyWorkflow.jobs["cloud-e2e"];
 
diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh
new file mode 100755
index 0000000000..5ff2ae7b46
--- /dev/null
+++ b/test/e2e/lib/ci-compatible-inference.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# CI-only compatibility shim: some live E2E lanes use the repository's
+# NVIDIA_INFERENCE_API_KEY secret against an OpenAI-compatible endpoint instead
+# of the public NVIDIA Endpoints provider. Keep this helper in test/e2e so the
+# product-facing provider/default endpoint remain unchanged.
+
+nemoclaw_e2e_using_compatible_inference() {
+  [ "${NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE:-}" = "1" ]
+}
+
+nemoclaw_e2e_configure_compatible_inference() {
+  if ! nemoclaw_e2e_using_compatible_inference; then
+    return 0
+  fi
+
+  export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-custom}"
+  export NEMOCLAW_ENDPOINT_URL="${NEMOCLAW_ENDPOINT_URL:-https://inference-api.nvidia.com/v1}"
+  export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-nvidia/nemotron-3-super-120b-a12b}}"
+  export NEMOCLAW_COMPAT_MODEL="${NEMOCLAW_COMPAT_MODEL:-$NEMOCLAW_MODEL}"
+
+  if [ -z "${COMPATIBLE_API_KEY:-}" ] && [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+    export COMPATIBLE_API_KEY="$NVIDIA_INFERENCE_API_KEY"
+  fi
+}
+
+nemoclaw_e2e_hosted_inference_key() {
+  if nemoclaw_e2e_using_compatible_inference; then
+    printf '%s' "${COMPATIBLE_API_KEY:-${NVIDIA_INFERENCE_API_KEY:-}}"
+  else
+    printf '%s' "${NVIDIA_INFERENCE_API_KEY:-}"
+  fi
+}
+
+nemoclaw_e2e_hosted_inference_base_url() {
+  if nemoclaw_e2e_using_compatible_inference; then
+    printf '%s' "${NEMOCLAW_ENDPOINT_URL:-https://inference-api.nvidia.com/v1}"
+  else
+    printf '%s' "https://inference-api.nvidia.com/v1"
+  fi
+}
+
+nemoclaw_e2e_hosted_inference_model() {
+  printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-nvidia/nemotron-3-super-120b-a12b}}"
+}
+
+nemoclaw_e2e_require_hosted_inference_key() {
+  local key
+  key="$(nemoclaw_e2e_hosted_inference_key)"
+
+  if nemoclaw_e2e_using_compatible_inference; then
+    if [ -n "$key" ]; then
+      pass "COMPATIBLE_API_KEY is set for CI compatible inference"
+    else
+      fail "COMPATIBLE_API_KEY not set — required for CI compatible inference"
+      return 1
+    fi
+    return 0
+  fi
+
+  if [ -n "$key" ] && [[ "$key" == nvapi-* ]]; then
+    pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
+  else
+    fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+    return 1
+  fi
+}
diff --git a/test/e2e/test-cloud-inference-e2e.sh b/test/e2e/test-cloud-inference-e2e.sh
index 491df699fd..8e1a3c4a0b 100755
--- a/test/e2e/test-cloud-inference-e2e.sh
+++ b/test/e2e/test-cloud-inference-e2e.sh
@@ -90,6 +90,9 @@ CLOUD_MODEL="${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-nvidia/nemotron-3-super-120b-a
 . "${E2E_DIR}/lib/sandbox-teardown.sh"
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 . "${E2E_DIR}/lib/install-path-refresh.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
+nemoclaw_e2e_configure_compatible_inference
 register_sandbox_for_teardown "$SANDBOX_NAME"
 
 # ══════════════════════════════════════════════════════════════════════
@@ -103,11 +106,9 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
-pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
diff --git a/test/e2e/test-cloud-onboard-e2e.sh b/test/e2e/test-cloud-onboard-e2e.sh
index b819966aad..724fa53ca4 100755
--- a/test/e2e/test-cloud-onboard-e2e.sh
+++ b/test/e2e/test-cloud-onboard-e2e.sh
@@ -88,6 +88,9 @@ PUBLIC_INSTALL_CWD="${NEMOCLAW_PUBLIC_INSTALL_CWD:-}"
 . "${E2E_DIR}/lib/sandbox-teardown.sh"
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 . "${E2E_DIR}/lib/install-path-refresh.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
+nemoclaw_e2e_configure_compatible_inference
 register_sandbox_for_teardown "$SANDBOX_NAME"
 
 # ══════════════════════════════════════════════════════════════════════
@@ -111,17 +114,19 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for cloud onboard"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
+
+if curl -sf --max-time 10 \
+  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
+  "${HOSTED_INFERENCE_BASE_URL}/models" >/dev/null 2>&1; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
@@ -152,8 +157,8 @@ section "Phase 3: Install via public URL"
 
 export NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME"
 export NEMOCLAW_EXPERIMENTAL=1
-export NEMOCLAW_PROVIDER=cloud
-export NEMOCLAW_MODEL="$CLOUD_MODEL"
+export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-cloud}"
+export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-$CLOUD_MODEL}"
 export NEMOCLAW_POLICY_MODE="${NEMOCLAW_POLICY_MODE:-custom}"
 export NEMOCLAW_POLICY_PRESETS="${NEMOCLAW_POLICY_PRESETS:-npm,pypi}"
 
diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index 6b0814b595..cb7ab46903 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -71,6 +71,8 @@ except Exception as e:
 
 # shellcheck source=test/e2e/lib/openclaw-json.sh
 . "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/openclaw-json.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh"
 
 # Determine repo root
 if [ -d /workspace ] && [ -f /workspace/install.sh ]; then
@@ -83,6 +85,7 @@ else
 fi
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-nightly}"
+nemoclaw_e2e_configure_compatible_inference
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
@@ -114,17 +117,20 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)"
+HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
+
+if curl -sf --max-time 10 \
+  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
+  "${HOSTED_INFERENCE_BASE_URL}/models" >/dev/null 2>&1; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
@@ -306,13 +312,13 @@ fi
 section "Phase 4: Live inference"
 
 # ── Test 4a: Direct NVIDIA Endpoints ──
-info "[LIVE] Direct API test → inference-api.nvidia.com..."
+info "[LIVE] Direct API test → ${HOSTED_INFERENCE_BASE_URL}..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -X POST "${HOSTED_INFERENCE_BASE_URL}/chat/completions" \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
+  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
   -d '{
-    "model": "nvidia/nemotron-3-super-120b-a12b",
+    "model": "'"${HOSTED_INFERENCE_MODEL}"'",
     "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
     "max_tokens": 100
   }' 2>/dev/null) || true

From 87c0ec0eeac12fcaa95dcee7648caeac5ff24ec0 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 16:32:06 -0700
Subject: [PATCH 05/11] ci(e2e): use compatible NVIDIA model id

---
 .github/workflows/e2e-script.yaml       |  4 ++--
 test/e2e-script-workflow.test.ts        |  2 ++
 test/e2e/lib/ci-compatible-inference.sh | 11 +++++++++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml
index f33cc98370..1bf3b299ac 100644
--- a/.github/workflows/e2e-script.yaml
+++ b/.github/workflows/e2e-script.yaml
@@ -218,8 +218,8 @@ jobs:
             printf 'NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1\n'
             printf 'NEMOCLAW_PROVIDER=custom\n'
             printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n'
-            printf 'NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
-            printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nemotron-3-super-120b-a12b\n'
+            printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'
+            printf 'NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'
             printf 'COMPATIBLE_API_KEY=%s\n' "${NVIDIA_INFERENCE_API_KEY}"
           } >> "$GITHUB_ENV"
 
diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts
index 29553c0f69..f75dbf9629 100644
--- a/test/e2e-script-workflow.test.ts
+++ b/test/e2e-script-workflow.test.ts
@@ -543,6 +543,8 @@ describe("E2E reusable workflow contract", () => {
     expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1");
     expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom");
     expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1");
+    expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3");
+    expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3");
     expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s");
 
     for (const name of expectedJobs) {
diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh
index 5ff2ae7b46..63632db4e6 100755
--- a/test/e2e/lib/ci-compatible-inference.sh
+++ b/test/e2e/lib/ci-compatible-inference.sh
@@ -7,6 +7,9 @@
 # of the public NVIDIA Endpoints provider. Keep this helper in test/e2e so the
 # product-facing provider/default endpoint remain unchanged.
 
+NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nvidia/nemotron-3-super-v3"
+NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b"
+
 nemoclaw_e2e_using_compatible_inference() {
   [ "${NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE:-}" = "1" ]
 }
@@ -18,7 +21,7 @@ nemoclaw_e2e_configure_compatible_inference() {
 
   export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-custom}"
   export NEMOCLAW_ENDPOINT_URL="${NEMOCLAW_ENDPOINT_URL:-https://inference-api.nvidia.com/v1}"
-  export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-nvidia/nemotron-3-super-120b-a12b}}"
+  export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}"
   export NEMOCLAW_COMPAT_MODEL="${NEMOCLAW_COMPAT_MODEL:-$NEMOCLAW_MODEL}"
 
   if [ -z "${COMPATIBLE_API_KEY:-}" ] && [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
@@ -43,7 +46,11 @@ nemoclaw_e2e_hosted_inference_base_url() {
 }
 
 nemoclaw_e2e_hosted_inference_model() {
-  printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-nvidia/nemotron-3-super-120b-a12b}}"
+  if nemoclaw_e2e_using_compatible_inference; then
+    printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}"
+  else
+    printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT}}"
+  fi
 }
 
 nemoclaw_e2e_require_hosted_inference_key() {

From b9d9ffed9089c7067df0d05e01270dfc3dd645bf Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 16:46:43 -0700
Subject: [PATCH 06/11] ci(e2e): accept compatible inference route

---
 .../checks/03-security-checks.sh              | 32 ++++++++++++-------
 test/e2e/test-cloud-onboard-e2e.sh            | 10 +++++-
 test/e2e/test-full-e2e.sh                     |  8 ++++-
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
index dd2a181bdc..0c8a0afe79 100755
--- a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
+++ b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
@@ -11,10 +11,12 @@
 
 set -euo pipefail
 
-# Env var name is assembled from fragments so static secret scanners do not match a single literal token.
-_api_key_env_name_part1='NVIDIA'
-_api_key_env_name_part2='_API_KEY'
-_api_key_env_name="${_api_key_env_name_part1}${_api_key_env_name_part2}"
+# The caller can point this check at the active hosted-inference credential.
+_api_key_env_name="${NEMOCLAW_E2E_CLOUD_API_KEY_ENV:-NVIDIA_INFERENCE_API_KEY}"
+if [[ ! "$_api_key_env_name" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]]; then
+  printf '%s\n' "03-security-checks: FAIL: invalid cloud API token env var name: ${_api_key_env_name}" >&2
+  exit 1
+fi
 : "${!_api_key_env_name:?cloud API token env var must be set (export before running)}"
 
 die() {
@@ -33,14 +35,20 @@ while IFS= read -r line; do
   esac
 done <<<"$ps_lines"
 
-# argv-style leak: NAME=<vendor key prefix> (prefix via escapes; no contiguous vendor prefix literal in source).
-_key_argv_prefix_marker=$'\x6e\x76\x61\x70\x69\x2d'
-_key_argv_needle="${_api_key_env_name}=${_key_argv_prefix_marker}"
-while IFS= read -r line; do
-  case "$line" in
-    *"${_key_argv_needle}"*) die "api-key-in-ps: env-style API key argv leak in ps" ;;
-  esac
-done <<<"$ps_lines"
+# argv-style leak: NAME=<vendor key prefix>. The CI-compatible endpoint secret
+# does not use the public NVIDIA key prefix, so keep this marker optional.
+_key_argv_prefix_marker="${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX:-}"
+if [ -z "${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX+x}" ] && [ "$_api_key_env_name" = "NVIDIA_INFERENCE_API_KEY" ]; then
+  _key_argv_prefix_marker=$'\x6e\x76\x61\x70\x69\x2d'
+fi
+if [ -n "$_key_argv_prefix_marker" ]; then
+  _key_argv_needle="${_api_key_env_name}=${_key_argv_prefix_marker}"
+  while IFS= read -r line; do
+    case "$line" in
+      *"${_key_argv_needle}"*) die "api-key-in-ps: env-style API key argv leak in ps" ;;
+    esac
+  done <<<"$ps_lines"
+fi
 
 printf '%s\n' "03-security-checks: OK (api-key-in-ps)"
 exit 0
diff --git a/test/e2e/test-cloud-onboard-e2e.sh b/test/e2e/test-cloud-onboard-e2e.sh
index 724fa53ca4..1ee1ec0451 100755
--- a/test/e2e/test-cloud-onboard-e2e.sh
+++ b/test/e2e/test-cloud-onboard-e2e.sh
@@ -91,6 +91,9 @@ PUBLIC_INSTALL_CWD="${NEMOCLAW_PUBLIC_INSTALL_CWD:-}"
 # shellcheck source=test/e2e/lib/ci-compatible-inference.sh
 . "${E2E_DIR}/lib/ci-compatible-inference.sh"
 nemoclaw_e2e_configure_compatible_inference
+if nemoclaw_e2e_using_compatible_inference; then
+  CLOUD_MODEL="$(nemoclaw_e2e_hosted_inference_model)"
+fi
 register_sandbox_for_teardown "$SANDBOX_NAME"
 
 # ══════════════════════════════════════════════════════════════════════
@@ -280,7 +283,12 @@ fi
 # ══════════════════════════════════════════════════════════════════════
 section "Phase 4: Sandbox checks (Landlock, security, inference.local)"
 
-export SANDBOX_NAME CLOUD_EXPERIMENTAL_MODEL="$CLOUD_MODEL" REPO NVIDIA_INFERENCE_API_KEY
+if nemoclaw_e2e_using_compatible_inference; then
+  export NEMOCLAW_E2E_CLOUD_API_KEY_ENV=COMPATIBLE_API_KEY
+else
+  export NEMOCLAW_E2E_CLOUD_API_KEY_ENV=NVIDIA_INFERENCE_API_KEY
+fi
+export SANDBOX_NAME CLOUD_EXPERIMENTAL_MODEL="$CLOUD_MODEL" REPO NVIDIA_INFERENCE_API_KEY COMPATIBLE_API_KEY
 export PATH="/usr/local/bin:${HOME}/.local/bin:${PATH}"
 
 shopt -s nullglob
diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index cb7ab46903..a562c9b199 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -242,7 +242,13 @@ fi
 # 3c: Inference must be configured by onboard (no fallback — if onboard
 # failed to configure it, that's a bug we want to catch)
 if inf_check=$(openshell inference get 2>&1); then
-  if grep -qi "nvidia-prod" <<<"$inf_check"; then
+  if nemoclaw_e2e_using_compatible_inference; then
+    if grep -qi "Provider:[[:space:]]*custom" <<<"$inf_check" && grep -Fq "$HOSTED_INFERENCE_MODEL" <<<"$inf_check"; then
+      pass "Inference configured via onboard (CI-compatible endpoint)"
+    else
+      fail "Inference not configured — onboard did not set up CI-compatible custom provider"
+    fi
+  elif grep -qi "nvidia-prod" <<<"$inf_check"; then
     pass "Inference configured via onboard"
   else
     fail "Inference not configured — onboard did not set up nvidia-prod provider"

From d8c068949b6eccd8fa8d6ee5bf349e8c419b7067 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 16:52:26 -0700
Subject: [PATCH 07/11] ci(e2e): probe compatible endpoint with chat

---
 test/e2e/lib/ci-compatible-inference.sh | 24 ++++++++++++++++++++++++
 test/e2e/test-cloud-onboard-e2e.sh      |  5 +----
 test/e2e/test-full-e2e.sh               |  4 +---
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh
index 63632db4e6..15b64ecdbe 100755
--- a/test/e2e/lib/ci-compatible-inference.sh
+++ b/test/e2e/lib/ci-compatible-inference.sh
@@ -53,6 +53,30 @@ nemoclaw_e2e_hosted_inference_model() {
   fi
 }
 
+nemoclaw_e2e_probe_hosted_inference() {
+  local base_url key
+  base_url="$(nemoclaw_e2e_hosted_inference_base_url)"
+  key="$(nemoclaw_e2e_hosted_inference_key)"
+
+  if nemoclaw_e2e_using_compatible_inference; then
+    local model payload
+    model="$(nemoclaw_e2e_hosted_inference_model)"
+    payload=$(
+      printf '{"model":"%s","messages":[{"role":"user","content":"Respond with OK."}],"temperature":0,"max_tokens":8}' "$model"
+    )
+    curl -sf --max-time 30 \
+      -X POST "${base_url}/chat/completions" \
+      -H "Authorization: Bearer $key" \
+      -H "Content-Type: application/json" \
+      -d "$payload" >/dev/null 2>&1
+    return $?
+  fi
+
+  curl -sf --max-time 10 \
+    -H "Authorization: Bearer $key" \
+    "${base_url}/models" >/dev/null 2>&1
+}
+
 nemoclaw_e2e_require_hosted_inference_key() {
   local key
   key="$(nemoclaw_e2e_hosted_inference_key)"
diff --git a/test/e2e/test-cloud-onboard-e2e.sh b/test/e2e/test-cloud-onboard-e2e.sh
index 1ee1ec0451..84a500e8f1 100755
--- a/test/e2e/test-cloud-onboard-e2e.sh
+++ b/test/e2e/test-cloud-onboard-e2e.sh
@@ -122,11 +122,8 @@ if ! nemoclaw_e2e_require_hosted_inference_key; then
 fi
 
 HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
-HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
 
-if curl -sf --max-time 10 \
-  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
-  "${HOSTED_INFERENCE_BASE_URL}/models" >/dev/null 2>&1; then
+if nemoclaw_e2e_probe_hosted_inference; then
   pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
   fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index a562c9b199..810839f035 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -125,9 +125,7 @@ HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
 HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)"
 HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
 
-if curl -sf --max-time 10 \
-  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
-  "${HOSTED_INFERENCE_BASE_URL}/models" >/dev/null 2>&1; then
+if nemoclaw_e2e_probe_hosted_inference; then
   pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
   fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"

From 5d7de67e6b8a437149415783a6c40c7f8104bcf2 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 17:02:46 -0700
Subject: [PATCH 08/11] ci(e2e): accept compatible provider name

---
 test/e2e/test-full-e2e.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index 810839f035..5bab679bce 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -241,10 +241,10 @@ fi
 # failed to configure it, that's a bug we want to catch)
 if inf_check=$(openshell inference get 2>&1); then
   if nemoclaw_e2e_using_compatible_inference; then
-    if grep -qi "Provider:[[:space:]]*custom" <<<"$inf_check" && grep -Fq "$HOSTED_INFERENCE_MODEL" <<<"$inf_check"; then
+    if grep -Eqi "Provider:[[:space:]]*(custom|compatible-endpoint)" <<<"$inf_check" && grep -Fq "$HOSTED_INFERENCE_MODEL" <<<"$inf_check"; then
       pass "Inference configured via onboard (CI-compatible endpoint)"
     else
-      fail "Inference not configured — onboard did not set up CI-compatible custom provider"
+      fail "Inference not configured — onboard did not set up CI-compatible provider: ${inf_check:0:200}"
     fi
   elif grep -qi "nvidia-prod" <<<"$inf_check"; then
     pass "Inference configured via onboard"

From 7f1c6a158dd2660dd36d8b08ebb622cafa2c0e8f Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 17:12:01 -0700
Subject: [PATCH 09/11] ci(e2e): strip styled inference output

---
 test/e2e/test-full-e2e.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/e2e/test-full-e2e.sh b/test/e2e/test-full-e2e.sh
index 5bab679bce..6d144f13ae 100755
--- a/test/e2e/test-full-e2e.sh
+++ b/test/e2e/test-full-e2e.sh
@@ -240,13 +240,14 @@ fi
 # 3c: Inference must be configured by onboard (no fallback — if onboard
 # failed to configure it, that's a bug we want to catch)
 if inf_check=$(openshell inference get 2>&1); then
+  inf_check_plain="$(sed -E $'s/\x1B\\[[0-9;]*[A-Za-z]//g' <<<"$inf_check")"
   if nemoclaw_e2e_using_compatible_inference; then
-    if grep -Eqi "Provider:[[:space:]]*(custom|compatible-endpoint)" <<<"$inf_check" && grep -Fq "$HOSTED_INFERENCE_MODEL" <<<"$inf_check"; then
+    if grep -Eqi "Provider:[[:space:]]*(custom|compatible-endpoint)" <<<"$inf_check_plain" && grep -Fq "$HOSTED_INFERENCE_MODEL" <<<"$inf_check_plain"; then
       pass "Inference configured via onboard (CI-compatible endpoint)"
     else
-      fail "Inference not configured — onboard did not set up CI-compatible provider: ${inf_check:0:200}"
+      fail "Inference not configured — onboard did not set up CI-compatible provider: ${inf_check_plain:0:200}"
     fi
-  elif grep -qi "nvidia-prod" <<<"$inf_check"; then
+  elif grep -qi "nvidia-prod" <<<"$inf_check_plain"; then
     pass "Inference configured via onboard"
   else
     fail "Inference not configured — onboard did not set up nvidia-prod provider"

From f8c8ab004f30c75ecea69f09c9b12e11099e7e56 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 17:35:20 -0700
Subject: [PATCH 10/11] ci(e2e): preserve NVIDIA key alias fallbacks

---
 docs/network-policy/approve-network-requests.mdx    |  3 ++-
 nemoclaw/src/index.ts                               | 10 ++++++----
 scripts/nemoclaw-start.sh                           |  4 ++++
 src/lib/diagnostics/debug.test.ts                   |  2 +-
 src/lib/inference/nim.ts                            |  9 +++++++--
 src/lib/onboard.ts                                  | 13 ++++++++++---
 .../checks/03-security-checks.sh                    |  8 ++++----
 test/e2e/test-hermes-discord-e2e.sh                 |  9 +++++++--
 test/e2e/test-hermes-e2e.sh                         |  4 +++-
 test/e2e/test-kimi-inference-compat.sh              |  2 +-
 test/e2e/test-messaging-providers.sh                |  4 ++++
 test/no-direct-credential-env.test.ts               |  5 +++++
 test/onboard-selection-vllm.test.ts                 |  1 +
 test/rebuild-credential-hydration.test.ts           |  5 +++++
 14 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/docs/network-policy/approve-network-requests.mdx b/docs/network-policy/approve-network-requests.mdx
index 9dc0038996..f311709b9e 100644
--- a/docs/network-policy/approve-network-requests.mdx
+++ b/docs/network-policy/approve-network-requests.mdx
@@ -64,7 +64,8 @@ From the NemoClaw repository root, run the walkthrough script after you have onb
 ```
 
 This script opens a split tmux session with the TUI on the left and the agent on the right.
-The walkthrough requires tmux and the `NVIDIA_INFERENCE_API_KEY` environment variable, and it assumes an existing sandbox to attach to.
+The walkthrough requires tmux and the `NVIDIA_INFERENCE_API_KEY` environment variable.
+It assumes an existing sandbox to attach to.
 
 ## Related Topics
 
diff --git a/nemoclaw/src/index.ts b/nemoclaw/src/index.ts
index 3c24bd761d..188259f061 100644
--- a/nemoclaw/src/index.ts
+++ b/nemoclaw/src/index.ts
@@ -282,10 +282,12 @@ function registeredProviderForConfig(
   activeModel: string,
   providerCredentialEnv: string,
 ): ProviderPlugin {
-  const authLabel =
-    providerCredentialEnv === "NVIDIA_INFERENCE_API_KEY"
-      ? `NVIDIA API Key (${providerCredentialEnv})`
-      : `OpenAI API Key (${providerCredentialEnv})`;
+  const isNvidiaCredential =
+    providerCredentialEnv === "NVIDIA_INFERENCE_API_KEY" ||
+    providerCredentialEnv === "NVIDIA_API_KEY";
+  const authLabel = isNvidiaCredential
+    ? `NVIDIA API Key (${providerCredentialEnv})`
+    : `OpenAI API Key (${providerCredentialEnv})`;
 
   return {
     id: "inference",
diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh
index d3924c7080..2ad417f574 100755
--- a/scripts/nemoclaw-start.sh
+++ b/scripts/nemoclaw-start.sh
@@ -1700,6 +1700,10 @@ prepare_gateway_token_for_current_command() {
 
 # Write an auth profile JSON for the NVIDIA API key so the gateway can authenticate.
 write_auth_profile() {
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] && [ -n "${NVIDIA_API_KEY:-}" ]; then
+    export NVIDIA_INFERENCE_API_KEY="$NVIDIA_API_KEY"
+  fi
+
   if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
     return
   fi
diff --git a/src/lib/diagnostics/debug.test.ts b/src/lib/diagnostics/debug.test.ts
index 23716555e9..201583f139 100644
--- a/src/lib/diagnostics/debug.test.ts
+++ b/src/lib/diagnostics/debug.test.ts
@@ -18,7 +18,7 @@ import {
 
 describe("redact", () => {
   it("redacts NVIDIA_INFERENCE_API_KEY=value patterns", () => {
-    const key = ["NVIDIA", "API", "KEY"].join("_");
+    const key = ["NVIDIA", "INFERENCE", "API", "KEY"].join("_");
     expect(redact(`${key}=some-value`)).toBe(`${key}=<REDACTED>`);
   });
 
diff --git a/src/lib/inference/nim.ts b/src/lib/inference/nim.ts
index 5503eeed51..0767c3ebc5 100644
--- a/src/lib/inference/nim.ts
+++ b/src/lib/inference/nim.ts
@@ -821,13 +821,18 @@ export function startNimContainerByName(
     process.exit(1);
   }
 
-  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_INFERENCE_API_KEY
+  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_INFERENCE_API_KEY,
+  // then the legacy NVIDIA_API_KEY alias.
   // (covers users who only set the NVIDIA key for cloud inference but reuse it
   // against NGC). Without this, NIM's in-container model-manifest download
   // returns "Authentication Error" and the container exits 0 a few seconds in.
   // Regression of #210 — see #3333.
   const ngcApiKey =
-    opts.ngcApiKey ?? process.env.NGC_API_KEY ?? process.env.NVIDIA_INFERENCE_API_KEY ?? "";
+    opts.ngcApiKey ??
+    process.env.NGC_API_KEY ??
+    process.env.NVIDIA_INFERENCE_API_KEY ??
+    process.env.NVIDIA_API_KEY ??
+    "";
   // Use `-e KEY` (no value) so the secret never appears in argv; pass the
   // value through the spawn env instead. Docker reads each named var from
   // its own process env and forwards it to the container.
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index e89849e329..336274a86f 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -3647,10 +3647,17 @@ async function setupNim(
 
         if (selected.key === "build") {
           // Let NEMOCLAW_PROVIDER_KEY fill the canonical NVIDIA key without
-          // overriding an explicit env or migrated legacy credential.
+          // overriding an explicit env value.
           const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim();
-          const existingNvidiaKey =
-            getCredential("NVIDIA_INFERENCE_API_KEY") || getCredential("NVIDIA_API_KEY") || "";
+          const existingNvidiaInferenceKey = normalizeCredentialValue(
+            // check-direct-credential-env-ignore -- intentional: NEMOCLAW_PROVIDER_KEY only checks current raw env before applying its override
+            process.env.NVIDIA_INFERENCE_API_KEY ?? "",
+          );
+          const existingLegacyNvidiaKey = normalizeCredentialValue(
+            // check-direct-credential-env-ignore -- intentional: legacy alias participates in the same raw-env override check
+            process.env.NVIDIA_API_KEY ?? "",
+          );
+          const existingNvidiaKey = existingNvidiaInferenceKey || existingLegacyNvidiaKey;
           if (_nvProviderKey && !existingNvidiaKey) {
             process.env.NVIDIA_INFERENCE_API_KEY = _nvProviderKey;
           }
diff --git a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
index 0c8a0afe79..911bcf13da 100755
--- a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
+++ b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
@@ -35,11 +35,11 @@ while IFS= read -r line; do
   esac
 done <<<"$ps_lines"
 
-# argv-style leak: NAME=<vendor key prefix>. The CI-compatible endpoint secret
-# does not use the public NVIDIA key prefix, so keep this marker optional.
+# argv-style leak: NAME=<first six key characters>. The caller can override or
+# disable this marker with NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX.
 _key_argv_prefix_marker="${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX:-}"
-if [ -z "${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX+x}" ] && [ "$_api_key_env_name" = "NVIDIA_INFERENCE_API_KEY" ]; then
-  _key_argv_prefix_marker=$'\x6e\x76\x61\x70\x69\x2d'
+if [ -z "${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX+x}" ]; then
+  _key_argv_prefix_marker="$(printf '%.6s' "$_api_key_value")"
 fi
 if [ -n "$_key_argv_prefix_marker" ]; then
   _key_argv_needle="${_api_key_env_name}=${_key_argv_prefix_marker}"
diff --git a/test/e2e/test-hermes-discord-e2e.sh b/test/e2e/test-hermes-discord-e2e.sh
index 28574713c2..c3c24c2fa5 100755
--- a/test/e2e/test-hermes-discord-e2e.sh
+++ b/test/e2e/test-hermes-discord-e2e.sh
@@ -602,8 +602,9 @@ if [ -d "$REPO/.tmp" ]; then
 fi
 
 NVIDIA_INFERENCE_API_KEY_BACKUP="${NVIDIA_INFERENCE_API_KEY:-}"
-unset NVIDIA_INFERENCE_API_KEY
-info "NVIDIA_INFERENCE_API_KEY unset; gateway must hold the inference credential"
+NVIDIA_API_KEY_BACKUP="${NVIDIA_API_KEY:-}"
+unset NVIDIA_INFERENCE_API_KEY NVIDIA_API_KEY
+info "NVIDIA_INFERENCE_API_KEY and NVIDIA_API_KEY unset; gateway must hold the inference credential"
 
 HERMES_REBUILD_LOG="/tmp/nc-hermes-rebuild-noenv.log"
 if nemoclaw "$SANDBOX_NAME" rebuild --yes >"$HERMES_REBUILD_LOG" 2>&1; then
@@ -624,7 +625,11 @@ fi
 if [ -n "$NVIDIA_INFERENCE_API_KEY_BACKUP" ]; then
   export NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY_BACKUP"
 fi
+if [ -n "$NVIDIA_API_KEY_BACKUP" ]; then
+  export NVIDIA_API_KEY="$NVIDIA_API_KEY_BACKUP"
+fi
 unset NVIDIA_INFERENCE_API_KEY_BACKUP
+unset NVIDIA_API_KEY_BACKUP
 
 section "Phase 9: Cleanup"
 
diff --git a/test/e2e/test-hermes-e2e.sh b/test/e2e/test-hermes-e2e.sh
index b10e718755..73db775f3b 100755
--- a/test/e2e/test-hermes-e2e.sh
+++ b/test/e2e/test-hermes-e2e.sh
@@ -203,7 +203,9 @@ else
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+if curl -sf --max-time 10 \
+  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
+  https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
   pass "Network access to inference-api.nvidia.com"
 else
   fail "Cannot reach inference-api.nvidia.com"
diff --git a/test/e2e/test-kimi-inference-compat.sh b/test/e2e/test-kimi-inference-compat.sh
index f43f89d84d..31bab4b542 100755
--- a/test/e2e/test-kimi-inference-compat.sh
+++ b/test/e2e/test-kimi-inference-compat.sh
@@ -394,7 +394,7 @@ run_kimi_onboard() {
   export NEMOCLAW_POLICY_TIER=restricted
   export NEMOCLAW_POLICY_MODE=skip
   export COMPATIBLE_API_KEY="$KIMI_MOCK_API_KEY"
-  unset NVIDIA_INFERENCE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
+  unset NVIDIA_INFERENCE_API_KEY NVIDIA_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
   unset TELEGRAM_BOT_TOKEN DISCORD_BOT_TOKEN SLACK_BOT_TOKEN SLACK_APP_TOKEN
 
   prepare_source_cli || prep_exit=$?
diff --git a/test/e2e/test-messaging-providers.sh b/test/e2e/test-messaging-providers.sh
index 817e49abfb..d788037fb6 100755
--- a/test/e2e/test-messaging-providers.sh
+++ b/test/e2e/test-messaging-providers.sh
@@ -633,6 +633,10 @@ openclaw_message_send_exit_code() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] && [ -n "${NVIDIA_API_KEY:-}" ]; then
+  export NVIDIA_INFERENCE_API_KEY="${NVIDIA_API_KEY}"
+  info "Using legacy NVIDIA_API_KEY as fallback for NVIDIA_INFERENCE_API_KEY"
+fi
 if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
   fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
diff --git a/test/no-direct-credential-env.test.ts b/test/no-direct-credential-env.test.ts
index cceda10431..ce92749ada 100644
--- a/test/no-direct-credential-env.test.ts
+++ b/test/no-direct-credential-env.test.ts
@@ -20,11 +20,13 @@ describe("direct credential env guard", () => {
   it.each([
     // Assignments (write context) — allowed
     'process.env.NVIDIA_INFERENCE_API_KEY = "test";',
+    'process.env.NVIDIA_API_KEY = "test";',
     "process.env.OPENAI_API_KEY = value;",
     "process.env[credentialEnv] = providerKey;",
 
     // Deletions (write context) — allowed
     "delete process.env.NVIDIA_INFERENCE_API_KEY;",
+    "delete process.env.NVIDIA_API_KEY;",
     "delete process.env.ANTHROPIC_API_KEY;",
 
     // Non-credential env vars — allowed
@@ -56,6 +58,7 @@ describe("direct credential env guard", () => {
   it.each([
     // Static reads of known credential keys
     ["const key = process.env.NVIDIA_INFERENCE_API_KEY;", "NVIDIA_INFERENCE_API_KEY"],
+    ["const key = process.env.NVIDIA_API_KEY;", "NVIDIA_API_KEY"],
     ["const key = process.env.OPENAI_API_KEY;", "OPENAI_API_KEY"],
     ["const key = process.env.ANTHROPIC_API_KEY;", "ANTHROPIC_API_KEY"],
     ["const key = process.env.GEMINI_API_KEY;", "GEMINI_API_KEY"],
@@ -64,9 +67,11 @@ describe("direct credential env guard", () => {
 
     // Conditional check (read context)
     ["if (!process.env.NVIDIA_INFERENCE_API_KEY) {}", "NVIDIA_INFERENCE_API_KEY"],
+    ["if (!process.env.NVIDIA_API_KEY) {}", "NVIDIA_API_KEY"],
 
     // Bracketed string-literal reads
     ['const key = process.env["NVIDIA_INFERENCE_API_KEY"];', "NVIDIA_INFERENCE_API_KEY"],
+    ['const key = process.env["NVIDIA_API_KEY"];', "NVIDIA_API_KEY"],
     ['if (!process.env["OPENAI_API_KEY"]) {}', "OPENAI_API_KEY"],
 
     // Dynamic read with credential-containing variable name
diff --git a/test/onboard-selection-vllm.test.ts b/test/onboard-selection-vllm.test.ts
index 3aacb62b3d..91837942b6 100644
--- a/test/onboard-selection-vllm.test.ts
+++ b/test/onboard-selection-vllm.test.ts
@@ -424,6 +424,7 @@ async function runScenario(scenario) {
   process.env.NEMOCLAW_PROVIDER = "";
   process.env.NEMOCLAW_MODEL = "";
   process.env.NVIDIA_INFERENCE_API_KEY = "";
+  process.env.NVIDIA_API_KEY = "";
   delete require.cache[require.resolve(${onboardPath})];
   const { setupNim } = require(${onboardPath});
   const originalLog = console.log;
diff --git a/test/rebuild-credential-hydration.test.ts b/test/rebuild-credential-hydration.test.ts
index ce82510880..7c8d05391f 100644
--- a/test/rebuild-credential-hydration.test.ts
+++ b/test/rebuild-credential-hydration.test.ts
@@ -113,6 +113,11 @@ describe("Issue #2273 Layer 1: credential hydration from legacy storage", () =>
       credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       value: "nvapi-test-hydrate",
     },
+    {
+      name: "NVIDIA Endpoints legacy alias",
+      credentialEnv: "NVIDIA_API_KEY",
+      value: "nvapi-test-hydrate",
+    },
     { name: "OpenAI", credentialEnv: "OPENAI_API_KEY", value: "sk-test-hydrate" },
     { name: "Anthropic", credentialEnv: "ANTHROPIC_API_KEY", value: "sk-ant-test-hydrate" },
     { name: "Google Gemini", credentialEnv: "GEMINI_API_KEY", value: "gemini-test-hydrate" },

From 90103593a47add41afd77789667ea0583796caa4 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Fri, 12 Jun 2026 17:35:20 -0700
Subject: [PATCH 11/11] ci(e2e): preserve NVIDIA key alias fallbacks

---
 docs/network-policy/approve-network-requests.mdx       |  3 ++-
 nemoclaw/src/index.ts                                  | 10 ++++++----
 scripts/nemoclaw-start.sh                              |  4 ++++
 src/lib/diagnostics/debug.test.ts                      |  2 +-
 src/lib/inference/nim.ts                               |  9 +++++++--
 src/lib/onboard.ts                                     |  8 ++++----
 .../checks/03-security-checks.sh                       |  8 ++++----
 test/e2e/test-hermes-discord-e2e.sh                    |  9 +++++++--
 test/e2e/test-hermes-e2e.sh                            |  4 +++-
 test/e2e/test-kimi-inference-compat.sh                 |  2 +-
 test/e2e/test-messaging-providers.sh                   |  4 ++++
 test/no-direct-credential-env.test.ts                  |  5 +++++
 test/onboard-selection-vllm.test.ts                    |  1 +
 test/rebuild-credential-hydration.test.ts              |  5 +++++
 14 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/docs/network-policy/approve-network-requests.mdx b/docs/network-policy/approve-network-requests.mdx
index 9dc0038996..f311709b9e 100644
--- a/docs/network-policy/approve-network-requests.mdx
+++ b/docs/network-policy/approve-network-requests.mdx
@@ -64,7 +64,8 @@ From the NemoClaw repository root, run the walkthrough script after you have onb
 ```
 
 This script opens a split tmux session with the TUI on the left and the agent on the right.
-The walkthrough requires tmux and the `NVIDIA_INFERENCE_API_KEY` environment variable, and it assumes an existing sandbox to attach to.
+The walkthrough requires tmux and the `NVIDIA_INFERENCE_API_KEY` environment variable.
+It assumes an existing sandbox to attach to.
 
 ## Related Topics
 
diff --git a/nemoclaw/src/index.ts b/nemoclaw/src/index.ts
index 3c24bd761d..188259f061 100644
--- a/nemoclaw/src/index.ts
+++ b/nemoclaw/src/index.ts
@@ -282,10 +282,12 @@ function registeredProviderForConfig(
   activeModel: string,
   providerCredentialEnv: string,
 ): ProviderPlugin {
-  const authLabel =
-    providerCredentialEnv === "NVIDIA_INFERENCE_API_KEY"
-      ? `NVIDIA API Key (${providerCredentialEnv})`
-      : `OpenAI API Key (${providerCredentialEnv})`;
+  const isNvidiaCredential =
+    providerCredentialEnv === "NVIDIA_INFERENCE_API_KEY" ||
+    providerCredentialEnv === "NVIDIA_API_KEY";
+  const authLabel = isNvidiaCredential
+    ? `NVIDIA API Key (${providerCredentialEnv})`
+    : `OpenAI API Key (${providerCredentialEnv})`;
 
   return {
     id: "inference",
diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh
index d3924c7080..2ad417f574 100755
--- a/scripts/nemoclaw-start.sh
+++ b/scripts/nemoclaw-start.sh
@@ -1700,6 +1700,10 @@ prepare_gateway_token_for_current_command() {
 
 # Write an auth profile JSON for the NVIDIA API key so the gateway can authenticate.
 write_auth_profile() {
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] && [ -n "${NVIDIA_API_KEY:-}" ]; then
+    export NVIDIA_INFERENCE_API_KEY="$NVIDIA_API_KEY"
+  fi
+
   if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
     return
   fi
diff --git a/src/lib/diagnostics/debug.test.ts b/src/lib/diagnostics/debug.test.ts
index 23716555e9..201583f139 100644
--- a/src/lib/diagnostics/debug.test.ts
+++ b/src/lib/diagnostics/debug.test.ts
@@ -18,7 +18,7 @@ import {
 
 describe("redact", () => {
   it("redacts NVIDIA_INFERENCE_API_KEY=value patterns", () => {
-    const key = ["NVIDIA", "API", "KEY"].join("_");
+    const key = ["NVIDIA", "INFERENCE", "API", "KEY"].join("_");
     expect(redact(`${key}=some-value`)).toBe(`${key}=<REDACTED>`);
   });
 
diff --git a/src/lib/inference/nim.ts b/src/lib/inference/nim.ts
index 5503eeed51..0767c3ebc5 100644
--- a/src/lib/inference/nim.ts
+++ b/src/lib/inference/nim.ts
@@ -821,13 +821,18 @@ export function startNimContainerByName(
     process.exit(1);
   }
 
-  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_INFERENCE_API_KEY
+  // Resolve the NGC key: explicit arg wins, then NGC_API_KEY, then NVIDIA_INFERENCE_API_KEY,
+  // then the legacy NVIDIA_API_KEY alias.
   // (covers users who only set the NVIDIA key for cloud inference but reuse it
   // against NGC). Without this, NIM's in-container model-manifest download
   // returns "Authentication Error" and the container exits 0 a few seconds in.
   // Regression of #210 — see #3333.
   const ngcApiKey =
-    opts.ngcApiKey ?? process.env.NGC_API_KEY ?? process.env.NVIDIA_INFERENCE_API_KEY ?? "";
+    opts.ngcApiKey ??
+    process.env.NGC_API_KEY ??
+    process.env.NVIDIA_INFERENCE_API_KEY ??
+    process.env.NVIDIA_API_KEY ??
+    "";
   // Use `-e KEY` (no value) so the secret never appears in argv; pass the
   // value through the spawn env instead. Docker reads each named var from
   // its own process env and forwards it to the container.
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index e89849e329..08d65d27d0 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -3646,11 +3646,11 @@ async function setupNim(
         hydrateCredentialEnv(credentialEnv);
 
         if (selected.key === "build") {
-          // Let NEMOCLAW_PROVIDER_KEY fill the canonical NVIDIA key without
-          // overriding an explicit env or migrated legacy credential.
+          // Let NEMOCLAW_PROVIDER_KEY fill the NVIDIA key without overriding explicit env.
           const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim();
-          const existingNvidiaKey =
-            getCredential("NVIDIA_INFERENCE_API_KEY") || getCredential("NVIDIA_API_KEY") || "";
+          const existingNvidiaKey = ["NVIDIA_INFERENCE_API_KEY", "NVIDIA_API_KEY"]
+            .map((envName) => normalizeCredentialValue(process.env[envName] ?? ""))
+            .find(Boolean);
           if (_nvProviderKey && !existingNvidiaKey) {
             process.env.NVIDIA_INFERENCE_API_KEY = _nvProviderKey;
           }
diff --git a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
index 0c8a0afe79..911bcf13da 100755
--- a/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
+++ b/test/e2e/e2e-cloud-experimental/checks/03-security-checks.sh
@@ -35,11 +35,11 @@ while IFS= read -r line; do
   esac
 done <<<"$ps_lines"
 
-# argv-style leak: NAME=<vendor key prefix>. The CI-compatible endpoint secret
-# does not use the public NVIDIA key prefix, so keep this marker optional.
+# argv-style leak: NAME=<first six key characters>. The caller can override or
+# disable this marker with NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX.
 _key_argv_prefix_marker="${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX:-}"
-if [ -z "${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX+x}" ] && [ "$_api_key_env_name" = "NVIDIA_INFERENCE_API_KEY" ]; then
-  _key_argv_prefix_marker=$'\x6e\x76\x61\x70\x69\x2d'
+if [ -z "${NEMOCLAW_E2E_CLOUD_API_KEY_ARGV_PREFIX+x}" ]; then
+  _key_argv_prefix_marker="$(printf '%.6s' "$_api_key_value")"
 fi
 if [ -n "$_key_argv_prefix_marker" ]; then
   _key_argv_needle="${_api_key_env_name}=${_key_argv_prefix_marker}"
diff --git a/test/e2e/test-hermes-discord-e2e.sh b/test/e2e/test-hermes-discord-e2e.sh
index 28574713c2..c3c24c2fa5 100755
--- a/test/e2e/test-hermes-discord-e2e.sh
+++ b/test/e2e/test-hermes-discord-e2e.sh
@@ -602,8 +602,9 @@ if [ -d "$REPO/.tmp" ]; then
 fi
 
 NVIDIA_INFERENCE_API_KEY_BACKUP="${NVIDIA_INFERENCE_API_KEY:-}"
-unset NVIDIA_INFERENCE_API_KEY
-info "NVIDIA_INFERENCE_API_KEY unset; gateway must hold the inference credential"
+NVIDIA_API_KEY_BACKUP="${NVIDIA_API_KEY:-}"
+unset NVIDIA_INFERENCE_API_KEY NVIDIA_API_KEY
+info "NVIDIA_INFERENCE_API_KEY and NVIDIA_API_KEY unset; gateway must hold the inference credential"
 
 HERMES_REBUILD_LOG="/tmp/nc-hermes-rebuild-noenv.log"
 if nemoclaw "$SANDBOX_NAME" rebuild --yes >"$HERMES_REBUILD_LOG" 2>&1; then
@@ -624,7 +625,11 @@ fi
 if [ -n "$NVIDIA_INFERENCE_API_KEY_BACKUP" ]; then
   export NVIDIA_INFERENCE_API_KEY="$NVIDIA_INFERENCE_API_KEY_BACKUP"
 fi
+if [ -n "$NVIDIA_API_KEY_BACKUP" ]; then
+  export NVIDIA_API_KEY="$NVIDIA_API_KEY_BACKUP"
+fi
 unset NVIDIA_INFERENCE_API_KEY_BACKUP
+unset NVIDIA_API_KEY_BACKUP
 
 section "Phase 9: Cleanup"
 
diff --git a/test/e2e/test-hermes-e2e.sh b/test/e2e/test-hermes-e2e.sh
index b10e718755..73db775f3b 100755
--- a/test/e2e/test-hermes-e2e.sh
+++ b/test/e2e/test-hermes-e2e.sh
@@ -203,7 +203,9 @@ else
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
+if curl -sf --max-time 10 \
+  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
+  https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
   pass "Network access to inference-api.nvidia.com"
 else
   fail "Cannot reach inference-api.nvidia.com"
diff --git a/test/e2e/test-kimi-inference-compat.sh b/test/e2e/test-kimi-inference-compat.sh
index f43f89d84d..31bab4b542 100755
--- a/test/e2e/test-kimi-inference-compat.sh
+++ b/test/e2e/test-kimi-inference-compat.sh
@@ -394,7 +394,7 @@ run_kimi_onboard() {
   export NEMOCLAW_POLICY_TIER=restricted
   export NEMOCLAW_POLICY_MODE=skip
   export COMPATIBLE_API_KEY="$KIMI_MOCK_API_KEY"
-  unset NVIDIA_INFERENCE_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
+  unset NVIDIA_INFERENCE_API_KEY NVIDIA_API_KEY OPENAI_API_KEY ANTHROPIC_API_KEY GEMINI_API_KEY
   unset TELEGRAM_BOT_TOKEN DISCORD_BOT_TOKEN SLACK_BOT_TOKEN SLACK_APP_TOKEN
 
   prepare_source_cli || prep_exit=$?
diff --git a/test/e2e/test-messaging-providers.sh b/test/e2e/test-messaging-providers.sh
index 817e49abfb..d788037fb6 100755
--- a/test/e2e/test-messaging-providers.sh
+++ b/test/e2e/test-messaging-providers.sh
@@ -633,6 +633,10 @@ openclaw_message_send_exit_code() {
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Prerequisites"
 
+if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] && [ -n "${NVIDIA_API_KEY:-}" ]; then
+  export NVIDIA_INFERENCE_API_KEY="${NVIDIA_API_KEY}"
+  info "Using legacy NVIDIA_API_KEY as fallback for NVIDIA_INFERENCE_API_KEY"
+fi
 if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
   fail "NVIDIA_INFERENCE_API_KEY not set"
   exit 1
diff --git a/test/no-direct-credential-env.test.ts b/test/no-direct-credential-env.test.ts
index cceda10431..ce92749ada 100644
--- a/test/no-direct-credential-env.test.ts
+++ b/test/no-direct-credential-env.test.ts
@@ -20,11 +20,13 @@ describe("direct credential env guard", () => {
   it.each([
     // Assignments (write context) — allowed
     'process.env.NVIDIA_INFERENCE_API_KEY = "test";',
+    'process.env.NVIDIA_API_KEY = "test";',
     "process.env.OPENAI_API_KEY = value;",
     "process.env[credentialEnv] = providerKey;",
 
     // Deletions (write context) — allowed
     "delete process.env.NVIDIA_INFERENCE_API_KEY;",
+    "delete process.env.NVIDIA_API_KEY;",
     "delete process.env.ANTHROPIC_API_KEY;",
 
     // Non-credential env vars — allowed
@@ -56,6 +58,7 @@ describe("direct credential env guard", () => {
   it.each([
     // Static reads of known credential keys
     ["const key = process.env.NVIDIA_INFERENCE_API_KEY;", "NVIDIA_INFERENCE_API_KEY"],
+    ["const key = process.env.NVIDIA_API_KEY;", "NVIDIA_API_KEY"],
     ["const key = process.env.OPENAI_API_KEY;", "OPENAI_API_KEY"],
     ["const key = process.env.ANTHROPIC_API_KEY;", "ANTHROPIC_API_KEY"],
     ["const key = process.env.GEMINI_API_KEY;", "GEMINI_API_KEY"],
@@ -64,9 +67,11 @@ describe("direct credential env guard", () => {
 
     // Conditional check (read context)
     ["if (!process.env.NVIDIA_INFERENCE_API_KEY) {}", "NVIDIA_INFERENCE_API_KEY"],
+    ["if (!process.env.NVIDIA_API_KEY) {}", "NVIDIA_API_KEY"],
 
     // Bracketed string-literal reads
     ['const key = process.env["NVIDIA_INFERENCE_API_KEY"];', "NVIDIA_INFERENCE_API_KEY"],
+    ['const key = process.env["NVIDIA_API_KEY"];', "NVIDIA_API_KEY"],
     ['if (!process.env["OPENAI_API_KEY"]) {}', "OPENAI_API_KEY"],
 
     // Dynamic read with credential-containing variable name
diff --git a/test/onboard-selection-vllm.test.ts b/test/onboard-selection-vllm.test.ts
index 3aacb62b3d..91837942b6 100644
--- a/test/onboard-selection-vllm.test.ts
+++ b/test/onboard-selection-vllm.test.ts
@@ -424,6 +424,7 @@ async function runScenario(scenario) {
   process.env.NEMOCLAW_PROVIDER = "";
   process.env.NEMOCLAW_MODEL = "";
   process.env.NVIDIA_INFERENCE_API_KEY = "";
+  process.env.NVIDIA_API_KEY = "";
   delete require.cache[require.resolve(${onboardPath})];
   const { setupNim } = require(${onboardPath});
   const originalLog = console.log;
diff --git a/test/rebuild-credential-hydration.test.ts b/test/rebuild-credential-hydration.test.ts
index ce82510880..7c8d05391f 100644
--- a/test/rebuild-credential-hydration.test.ts
+++ b/test/rebuild-credential-hydration.test.ts
@@ -113,6 +113,11 @@ describe("Issue #2273 Layer 1: credential hydration from legacy storage", () =>
       credentialEnv: "NVIDIA_INFERENCE_API_KEY",
       value: "nvapi-test-hydrate",
     },
+    {
+      name: "NVIDIA Endpoints legacy alias",
+      credentialEnv: "NVIDIA_API_KEY",
+      value: "nvapi-test-hydrate",
+    },
     { name: "OpenAI", credentialEnv: "OPENAI_API_KEY", value: "sk-test-hydrate" },
     { name: "Anthropic", credentialEnv: "ANTHROPIC_API_KEY", value: "sk-ant-test-hydrate" },
     { name: "Google Gemini", credentialEnv: "GEMINI_API_KEY", value: "gemini-test-hydrate" },