diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml
index 3185e08ad1..8fdef8f6ff 100644
--- a/.github/workflows/e2e-script.yaml
+++ b/.github/workflows/e2e-script.yaml
@@ -58,12 +58,7 @@ on:
         type: string
         default: ""
       nvidia_api_key:
-        description: Pass the NVIDIA_INFERENCE_API_KEY secret to the script.
-        required: false
-        type: boolean
-        default: false
-      nvidia_secret_as_compatible_api_key:
-        description: Expose NVIDIA_INFERENCE_API_KEY as COMPATIBLE_API_KEY for CI-only OpenAI-compatible inference.
+        description: Pass the hosted inference source secret as the CI custom endpoint credential.
         required: false
         type: boolean
         default: false
@@ -217,8 +212,8 @@ jobs:
             echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls."
           fi
 
-      - name: Export CI compatible inference environment
-        if: ${{ inputs.nvidia_secret_as_compatible_api_key }}
+      - name: Export hosted CI inference environment
+        if: ${{ inputs.nvidia_api_key }}
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         shell: bash
@@ -226,12 +221,12 @@ jobs:
           set -euo pipefail
 
           if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
-            echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for CI compatible inference." >&2
+            echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for hosted CI inference; it is withheld for workflow_dispatch target_ref runs." >&2
             exit 1
           fi
 
           {
-            printf 'NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1\n'
+            printf 'NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1\n'
             printf 'NEMOCLAW_PROVIDER=custom\n'
             printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n'
             printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index 5ed70533a2..ce97dce38b 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -3,9 +3,9 @@
 #
 # Nightly E2E tests:
 #
-#   cloud-e2e                Cloud inference (NVIDIA Endpoint API) on ubuntu-latest.
+#   cloud-e2e                Hosted inference (OpenAI-compatible endpoint) on ubuntu-latest.
 #   agent-turn-latency-e2e   Times one real OpenClaw turn and one real Hermes
-#                            turn through the configured NVIDIA Build model.
+#                            turn through the configured hosted inference model.
 #   messaging-providers-e2e  Validates messaging credential provider/placeholder/L7-proxy chain
 #                            for Telegram + Discord + Slack. Uses fake tokens. Slack additionally
 #                            exercises OpenShell provider-shaped alias resolution (#2085 follow-up).
@@ -99,7 +99,7 @@
 # Runs directly on the runner (not inside Docker) because OpenShell bootstraps
 # a K3s cluster inside a privileged Docker container — nesting would break networking.
 #
-# NVIDIA_INFERENCE_API_KEY for cloud-e2e:
+# NVIDIA_INFERENCE_API_KEY for hosted CI inference:
 #   - Repository secret: Settings → Secrets and variables → Actions → Repository secrets.
 #   - Environment secret: only available if the job sets `environment: <that environment name>`.
 #     (Storing the key under Environments / NVIDIA_INFERENCE_API_KEY without `environment:` here leaves the
@@ -204,10 +204,9 @@ jobs:
       artifact_path: "/tmp/nemoclaw-e2e-install.log"
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-nightly"}'
       nvidia_api_key: true
-      nvidia_secret_as_compatible_api_key: true
       github_token: true
     secrets: &nightly-e2e-default-secrets
-      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
       BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
       DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}
       DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}
@@ -228,7 +227,6 @@ jobs:
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_POLICY_MODE":"custom","NEMOCLAW_POLICY_PRESETS":"npm,pypi","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-onboard","NEMOCLAW_TRACE_DIR":"/tmp/nemoclaw-traces"}'
       checked_out_ref_env: "NEMOCLAW_PUBLIC_INSTALL_REF"
       nvidia_api_key: true
-      nvidia_secret_as_compatible_api_key: true
       github_token: true
     secrets: *nightly-e2e-default-secrets
   cloud-inference-e2e:
@@ -245,7 +243,6 @@ jobs:
       artifact_path: "/tmp/nemoclaw-e2e-cloud-inference-install.log"
       env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-inference"}'
       nvidia_api_key: true
-      nvidia_secret_as_compatible_api_key: true
     secrets: *nightly-e2e-default-secrets
   cron-preflight-inference-local-e2e:
     if: >-
@@ -277,7 +274,7 @@ jobs:
         /tmp/nemoclaw-e2e-openclaw-turn-latency-install.log
         /tmp/nemoclaw-e2e-hermes-turn-latency-install.log
         /tmp/nemoclaw-e2e-agent-turn-latency.json
-      env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_TURN_LATENCY_MODEL":"nvidia/nemotron-3-ultra-550b-a55b"}'
+      env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1"}'
       nvidia_api_key: true
     secrets: *nightly-e2e-default-secrets
   skill-agent-e2e:
@@ -382,7 +379,7 @@ jobs:
       github_token: true
       messaging_live_secrets: ${{ github.event_name != 'workflow_dispatch' || inputs.target_ref == '' }}
     secrets:
-      NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+      NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
       BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
       DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}
       DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}
@@ -467,7 +464,13 @@ jobs:
 
       - name: Run OpenClaw TUI chat correlation E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_RECREATE_SANDBOX: "1"
@@ -536,7 +539,13 @@ jobs:
 
       - name: "Run issue #4434 TUI unreachable inference E2E test"
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_ISSUE_4434_LIVE: "1"
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
@@ -550,7 +559,7 @@ jobs:
         if: failure()
         shell: bash
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           GITHUB_TOKEN: ${{ github.token }}
         run: |
           set -euo pipefail
@@ -958,8 +967,13 @@ jobs:
 
       - name: Run token rotation E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
-          NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1"
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_POLICY_TIER: "open"
@@ -1250,7 +1264,13 @@ jobs:
 
       - name: Run sandbox operations E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_POLICY_TIER: "open"
@@ -1551,11 +1571,12 @@ jobs:
         # COMPATIBLE_API_KEY. Keep checkout credentials disabled, do not pass
         # GITHUB_TOKEN, and rely on reviewed/maintainer-dispatched refs.
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_PROVIDER: custom
           NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
           NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
           NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration
           NEMOCLAW_RUN_E2E_SCENARIOS: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-cred-migration"
@@ -1774,13 +1795,25 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard repair E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1812,13 +1845,25 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard resume E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1850,13 +1895,25 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run onboard negative-path E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1889,13 +1946,25 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run runtime overrides E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
         run: |
@@ -1928,14 +1997,26 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw and onboard sandbox
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run credential sanitization E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
@@ -1970,14 +2051,26 @@ jobs:
       - *dockerhub-auth-step
       - name: Install NemoClaw and onboard sandbox
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
         run: bash install.sh --non-interactive --yes-i-accept-third-party-software
       - name: Run telegram injection E2E test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-test"
@@ -2050,7 +2143,13 @@ jobs:
 
       - name: Run launchable install-flow smoke test
         env:
-          NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
+          NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }}
           NEMOCLAW_NON_INTERACTIVE: "1"
           NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-launchable"
diff --git a/ci/env-var-doc-allowlist.json b/ci/env-var-doc-allowlist.json
index c04df7defc..c2b5ec56c8 100644
--- a/ci/env-var-doc-allowlist.json
+++ b/ci/env-var-doc-allowlist.json
@@ -42,5 +42,17 @@
   {
     "name": "NEMOCLAW_E2E_FORCE_FAIL_AT_STEP",
     "reason": "Internal E2E-only selector naming the onboarding step where deterministic fault injection should exit. Used only with NEMOCLAW_E2E_FAILURE_INJECTION in test scripts."
+  },
+  {
+    "name": "NEMOCLAW_E2E_USE_HOSTED_INFERENCE",
+    "reason": "Internal E2E-only sentinel that tells CI to route the repository NVIDIA_INFERENCE_API_KEY secret through the hosted inference-api.nvidia.com OpenAI-compatible endpoint. Not user-facing."
+  },
+  {
+    "name": "NEMOCLAW_COMPAT_MODEL",
+    "reason": "Internal E2E/test override for the model used by OpenAI-compatible endpoint scenarios. User-facing custom endpoint model selection is collected through onboard prompts or NEMOCLAW_MODEL."
+  },
+  {
+    "name": "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL",
+    "reason": "Legacy E2E-only model override used by cloud and hosted-inference live test scripts. Not a supported production configuration knob."
   }
 ]
diff --git a/src/lib/onboard/providers.test.ts b/src/lib/onboard/providers.test.ts
index b5b25e4d5f..56090df090 100644
--- a/src/lib/onboard/providers.test.ts
+++ b/src/lib/onboard/providers.test.ts
@@ -7,36 +7,87 @@ type RunResult = { status: number; stdout?: string; stderr?: string };
 type RunOptions = { env?: Record<string, string | undefined> };
 type RunOpenshell = (command: string[], opts?: RunOptions) => RunResult;
 
-const { buildProviderArgs, providerExistsInGateway, upsertProvider, upsertMessagingProviders } =
-  require("../../../dist/lib/onboard/providers") as {
-    buildProviderArgs: (
-      action: "create" | "update",
-      name: string,
-      type: string,
-      credentialEnv: string,
-      baseUrl: string | null,
-    ) => string[];
-    providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean;
-    upsertProvider: (
-      name: string,
-      type: string,
-      credentialEnv: string,
-      baseUrl: string | null,
-      env: Record<string, string | undefined>,
-      runOpenshell: RunOpenshell,
-      options?: { replaceExisting?: boolean },
-    ) => { ok: boolean; status?: number; message?: string };
-    upsertMessagingProviders: (
-      tokenDefs: Array<{
-        name: string;
-        envKey: string;
-        token: string | null;
-        providerType?: string;
-      }>,
-      runOpenshell: RunOpenshell,
-      options?: { replaceExisting?: boolean; bestEffort?: boolean },
-    ) => string[];
-  };
+const {
+  HOSTED_INFERENCE_ENDPOINT_URL,
+  HOSTED_INFERENCE_MODEL,
+  buildProviderArgs,
+  getRequestedModelHint,
+  getRequestedProviderHint,
+  providerExistsInGateway,
+  stageHostedInferenceSourceSecretEnv,
+  upsertProvider,
+  upsertMessagingProviders,
+} = require("../../../dist/lib/onboard/providers") as {
+  HOSTED_INFERENCE_ENDPOINT_URL: string;
+  HOSTED_INFERENCE_MODEL: string;
+  buildProviderArgs: (
+    action: "create" | "update",
+    name: string,
+    type: string,
+    credentialEnv: string,
+    baseUrl: string | null,
+  ) => string[];
+  getRequestedModelHint: (nonInteractive: boolean) => string | null;
+  getRequestedProviderHint: (nonInteractive: boolean) => string | null;
+  providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean;
+  stageHostedInferenceSourceSecretEnv: () => boolean;
+  upsertProvider: (
+    name: string,
+    type: string,
+    credentialEnv: string,
+    baseUrl: string | null,
+    env: Record<string, string | undefined>,
+    runOpenshell: RunOpenshell,
+    options?: { replaceExisting?: boolean },
+  ) => { ok: boolean; status?: number; message?: string };
+  upsertMessagingProviders: (
+    tokenDefs: Array<{
+      name: string;
+      envKey: string;
+      token: string | null;
+      providerType?: string;
+    }>,
+    runOpenshell: RunOpenshell,
+    options?: { replaceExisting?: boolean; bestEffort?: boolean },
+  ) => string[];
+};
+
+function withProviderEnv(next: Record<string, string | undefined>, testBody: () => void): void {
+  const keys = new Set([
+    "NVIDIA_INFERENCE_API_KEY",
+    "NEMOCLAW_PROVIDER",
+    "NEMOCLAW_ENDPOINT_URL",
+    "NEMOCLAW_MODEL",
+    "NEMOCLAW_COMPAT_MODEL",
+    "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL",
+    "NEMOCLAW_E2E_USE_HOSTED_INFERENCE",
+    "COMPATIBLE_API_KEY",
+    ...Object.keys(next),
+  ]);
+  const previous = new Map<string, string | undefined>();
+  for (const key of keys) {
+    previous.set(key, process.env[key]);
+    delete process.env[key];
+  }
+  for (const [key, value] of Object.entries(next)) {
+    if (value === undefined) {
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+  try {
+    testBody();
+  } finally {
+    for (const [key, value] of previous.entries()) {
+      if (value === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = value;
+      }
+    }
+  }
+}
 
 describe("onboard provider helpers", () => {
   it("builds create arguments for generic providers", () => {
@@ -239,6 +290,55 @@ describe("onboard provider helpers", () => {
     expect(commands[1]).toMatch(/--credential NVIDIA_INFERENCE_API_KEY/);
   });
 
+  it("stages non-nvapi NVIDIA_INFERENCE_API_KEY as hosted custom inference", () => {
+    withProviderEnv(
+      {
+        NVIDIA_INFERENCE_API_KEY: "  repo-hosted-key  ",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(true);
+        expect(getRequestedProviderHint(true)).toBe("custom");
+        expect(getRequestedModelHint(true)).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.NEMOCLAW_PROVIDER).toBe("custom");
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBe(HOSTED_INFERENCE_ENDPOINT_URL);
+        expect(process.env.NEMOCLAW_MODEL).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.NEMOCLAW_COMPAT_MODEL).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.COMPATIBLE_API_KEY).toBe("repo-hosted-key");
+      },
+    );
+  });
+
+  it("keeps explicit cloud provider selection on the Build provider path", () => {
+    withProviderEnv(
+      {
+        NVIDIA_INFERENCE_API_KEY: "repo-hosted-key",
+        NEMOCLAW_PROVIDER: "cloud",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(false);
+        expect(getRequestedProviderHint(true)).toBe("build");
+        expect(process.env.COMPATIBLE_API_KEY).toBeUndefined();
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined();
+      },
+    );
+  });
+
+  it("preserves explicit custom provider credentials when NVIDIA_INFERENCE_API_KEY is unrelated", () => {
+    withProviderEnv(
+      {
+        COMPATIBLE_API_KEY: "custom-endpoint-key",
+        NVIDIA_INFERENCE_API_KEY: "repo-hosted-key",
+        NEMOCLAW_PROVIDER: "custom",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(false);
+        expect(getRequestedProviderHint(true)).toBe("custom");
+        expect(process.env.COMPATIBLE_API_KEY).toBe("custom-endpoint-key");
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined();
+      },
+    );
+  });
+
   it("returns redacted error details when create or update fails", () => {
     const result = upsertProvider("bad-provider", "generic", "SOME_KEY", null, {}, (command) => {
       if (command.includes("get")) return { status: 1, stdout: "", stderr: "" };
diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts
index edb2d117ef..4ba749c2e1 100644
--- a/src/lib/onboard/providers.ts
+++ b/src/lib/onboard/providers.ts
@@ -5,6 +5,7 @@
 // Provider metadata, lookup helpers, and gateway provider CRUD.
 
 const { redact } = require("../runner");
+const { normalizeCredentialValue } = require("../credentials/store");
 const {
   DEFAULT_CLOUD_MODEL,
   DEFAULT_HERMES_PROVIDER_MODEL,
@@ -22,6 +23,10 @@ const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1";
 const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com";
 const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
 const HERMES_INFERENCE_ENDPOINT_URL = "https://inference-api.nousresearch.com/v1";
+const HOSTED_INFERENCE_SOURCE_ENV = "NVIDIA_INFERENCE_API_KEY";
+const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY";
+const HOSTED_INFERENCE_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
+const HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
 
 const REMOTE_PROVIDER_CONFIG = {
   build: {
@@ -167,6 +172,7 @@ function getEffectiveProviderName(providerKey) {
 // ── Non-interactive helpers ──────────────────────────────────────
 
 function getNonInteractiveProvider() {
+  stageHostedInferenceSourceSecretEnv();
   const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
   if (!providerKey) return null;
   const aliases = {
@@ -208,6 +214,50 @@ function getNonInteractiveProvider() {
   return normalized;
 }
 
+function stageHostedInferenceSourceSecretEnv() {
+  const sourceKey = normalizeCredentialValue(process.env[HOSTED_INFERENCE_SOURCE_ENV] ?? "");
+  if (!sourceKey) return false;
+
+  const rawProvider = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
+  const aliases = {
+    cloud: "build",
+    anthropiccompatible: "anthropicCompatible",
+    hermes: "hermesProvider",
+    "hermes-provider": "hermesProvider",
+    hermesprovider: "hermesProvider",
+    nous: "hermesProvider",
+    "nous-portal": "hermesProvider",
+  };
+  const normalizedProvider = aliases[rawProvider] || rawProvider;
+  const hostedFlag = (process.env.NEMOCLAW_E2E_USE_HOSTED_INFERENCE || "").trim() === "1";
+  const compatibleKey = normalizeCredentialValue(
+    process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] ?? "",
+  );
+  const explicitHostedCustom =
+    normalizedProvider === "custom" &&
+    (hostedFlag || (!compatibleKey && !sourceKey.startsWith("nvapi-")));
+  const implicitHostedCustom =
+    !normalizedProvider && (hostedFlag || !sourceKey.startsWith("nvapi-"));
+  const shouldStage = explicitHostedCustom || implicitHostedCustom;
+
+  if (!shouldStage) return false;
+
+  if (!normalizedProvider) {
+    process.env.NEMOCLAW_PROVIDER = "custom";
+  }
+  process.env.NEMOCLAW_ENDPOINT_URL =
+    (process.env.NEMOCLAW_ENDPOINT_URL || "").trim() || HOSTED_INFERENCE_ENDPOINT_URL;
+  const model =
+    (process.env.NEMOCLAW_MODEL || "").trim() ||
+    (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() ||
+    (process.env.NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL || "").trim() ||
+    HOSTED_INFERENCE_MODEL;
+  process.env.NEMOCLAW_MODEL = model;
+  process.env.NEMOCLAW_COMPAT_MODEL = (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() || model;
+  process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] = sourceKey;
+  return true;
+}
+
 function getNonInteractiveModel(providerKey) {
   const model = (process.env.NEMOCLAW_MODEL || "").trim();
   if (!model) return null;
@@ -399,8 +449,13 @@ module.exports = {
   OLLAMA_PROXY_CREDENTIAL_ENV,
   VLLM_LOCAL_CREDENTIAL_ENV,
   DISCORD_SNOWFLAKE_RE,
+  HOSTED_INFERENCE_SOURCE_ENV,
+  HOSTED_INFERENCE_CREDENTIAL_ENV,
+  HOSTED_INFERENCE_ENDPOINT_URL,
+  HOSTED_INFERENCE_MODEL,
   getProviderLabel,
   getEffectiveProviderName,
+  stageHostedInferenceSourceSecretEnv,
   getNonInteractiveProvider,
   getNonInteractiveModel,
   getRequestedProviderHint,
diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts
index 512e9d377a..5fb007becb 100644
--- a/test/e2e-script-workflow.test.ts
+++ b/test/e2e-script-workflow.test.ts
@@ -45,6 +45,9 @@ const require = createRequire(import.meta.url);
 const traceTiming = require("../scripts/scorecard/analyze-trace-timing.ts") as TraceTimingAnalyzer;
 
 const TRACE_SUMMARY_FILE = "cloud-onboard-trace-timing-summary.json";
+const TRUSTED_REF_GUARD = "github.event_name != 'workflow_dispatch' || inputs.target_ref == ''";
+const GUARDED_HOSTED_INFERENCE_SECRET = `\${{ (${TRUSTED_REF_GUARD}) && secrets.NVIDIA_INFERENCE_API_KEY || '' }}`;
+const RAW_HOSTED_INFERENCE_SECRET = "${{ secrets.NVIDIA_INFERENCE_API_KEY }}";
 
 function timingSummary(
   phases: Record<string, number> = { "nemoclaw.onboard.phase.preflight": 1000 },
@@ -127,6 +130,12 @@ function traceGithubFixture(options: {
   return github;
 }
 
+function envReferencesHostedInferenceSecret(env?: Record<string, string>): boolean {
+  return Object.values(env ?? {}).some((value) =>
+    String(value).includes("secrets.NVIDIA_INFERENCE_API_KEY"),
+  );
+}
+
 // Direct legacy bash E2Es are being migrated toward Vitest coverage. Keep the
 // top-level shell suite frozen so new coverage starts in the newer E2E surface
 // unless maintainers intentionally update this allowlist.
@@ -385,22 +394,21 @@ describe("E2E reusable workflow contract", () => {
   it("passes only named secrets to reusable nightly jobs", () => {
     const reusableJobs = reusableNightlyJobs(nightlyWorkflow);
     const defaultSecrets = {
-      NVIDIA_INFERENCE_API_KEY: "${{ secrets.NVIDIA_INFERENCE_API_KEY }}",
+      NVIDIA_INFERENCE_API_KEY: GUARDED_HOSTED_INFERENCE_SECRET,
       BRAVE_API_KEY: "${{ secrets.BRAVE_API_KEY }}",
       DOCKERHUB_USERNAME:
         "${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}",
       DOCKERHUB_TOKEN:
         "${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}",
     };
-    const trustedRefGuard = "github.event_name != 'workflow_dispatch' || inputs.target_ref == ''";
     const messagingLiveSecrets = {
-      TELEGRAM_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.TELEGRAM_BOT_TOKEN_REAL || '' }}`,
-      TELEGRAM_CHAT_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.TELEGRAM_CHAT_ID_E2E || '' }}`,
-      DISCORD_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.DISCORD_BOT_TOKEN_REAL || '' }}`,
-      DISCORD_CHANNEL_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.DISCORD_CHANNEL_ID_E2E || '' }}`,
-      SLACK_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.SLACK_BOT_TOKEN_REAL || '' }}`,
-      SLACK_APP_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.SLACK_APP_TOKEN_REAL || '' }}`,
-      SLACK_CHANNEL_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.SLACK_CHANNEL_ID_E2E || '' }}`,
+      TELEGRAM_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.TELEGRAM_BOT_TOKEN_REAL || '' }}`,
+      TELEGRAM_CHAT_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.TELEGRAM_CHAT_ID_E2E || '' }}`,
+      DISCORD_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.DISCORD_BOT_TOKEN_REAL || '' }}`,
+      DISCORD_CHANNEL_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.DISCORD_CHANNEL_ID_E2E || '' }}`,
+      SLACK_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_BOT_TOKEN_REAL || '' }}`,
+      SLACK_APP_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_APP_TOKEN_REAL || '' }}`,
+      SLACK_CHANNEL_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_CHANNEL_ID_E2E || '' }}`,
     };
 
     expect(reusableJobs.length).toBeGreaterThan(20);
@@ -534,7 +542,12 @@ describe("E2E reusable workflow contract", () => {
     expect(runStep?.run).toContain("npx vitest run --project e2e-scenarios-live");
     expect(runStep?.run).toContain("test/e2e-scenario/live/credential-migration.test.ts");
     expect(runStep?.run).not.toContain("test/e2e/test-credential-migration.sh");
-    expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe("${{ secrets.NVIDIA_INFERENCE_API_KEY }}");
+    expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
+    expect(runStep?.env?.NEMOCLAW_PROVIDER).toBe("custom");
+    expect(runStep?.env?.NEMOCLAW_ENDPOINT_URL).toBe("https://inference-api.nvidia.com/v1");
+    expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3");
+    expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3");
+    expect(runStep?.env?.COMPATIBLE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
     expect(runStep?.env?.GITHUB_TOKEN).toBeUndefined();
     expect(runStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1");
     expect(runStep?.env?.NEMOCLAW_SANDBOX_NAME).toBe("e2e-cred-migration");
@@ -869,42 +882,96 @@ describe("E2E reusable workflow contract", () => {
     expect(exportStep?.run).toContain('>> "$GITHUB_ENV"');
   });
 
-  it("can route selected reusable jobs through the CI compatible inference endpoint", () => {
+  it("routes reusable hosted inference jobs through the hosted custom endpoint", () => {
     const exportStep = runnerWorkflow.jobs.run.steps.find(
-      (step) => step.name === "Export CI compatible inference environment",
+      (step) => step.name === "Export hosted CI inference environment",
     );
-    const expectedJobs = ["cloud-e2e", "cloud-onboard-e2e", "cloud-inference-e2e"];
     const workflowCall = runnerWorkflow.on?.workflow_call ?? runnerWorkflow.true?.workflow_call;
+    const hostedJobs = reusableNightlyJobs(nightlyWorkflow).filter(
+      ([, job]) => String(job.with?.nvidia_api_key) === "true",
+    );
 
-    expect(workflowCall?.inputs?.nvidia_secret_as_compatible_api_key).toMatchObject({
+    expect(workflowCall?.inputs?.nvidia_api_key).toMatchObject({
       required: false,
       type: "boolean",
       default: false,
     });
-    expect(exportStep?.if).toBe("${{ inputs.nvidia_secret_as_compatible_api_key }}");
-    expect(exportStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(
-      "${{ secrets.NVIDIA_INFERENCE_API_KEY }}",
-    );
-    expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1");
+    expect(workflowCall?.inputs?.nvidia_secret_as_compatible_api_key).toBeUndefined();
+    expect(exportStep?.if).toBe("${{ inputs.nvidia_api_key }}");
+    expect(exportStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(RAW_HOSTED_INFERENCE_SECRET);
+    expect(exportStep?.run).toContain("withheld for workflow_dispatch target_ref runs");
+    expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1");
     expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom");
     expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1");
     expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3");
     expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3");
     expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s");
 
-    for (const name of expectedJobs) {
-      expect(nightlyWorkflow.jobs[name].with?.nvidia_secret_as_compatible_api_key, name).toBe(true);
+    expect(hostedJobs.length).toBeGreaterThan(20);
+    for (const [name, job] of hostedJobs) {
+      expect(job.with?.nvidia_secret_as_compatible_api_key, name).toBeUndefined();
     }
   });
 
-  it("routes legacy token rotation through the CI compatible inference endpoint", () => {
+  it("routes direct hosted-secret jobs through the hosted custom inference endpoint", () => {
+    const trustedWorkflowSecretExceptions = new Set([
+      "issue-4434-tui-unreachable-inference-e2e:Sanitize issue #4434 logs on failure",
+    ]);
+    const directSecretSteps = Object.entries(nightlyWorkflow.jobs).flatMap(([jobName, job]) =>
+      job.uses
+        ? []
+        : (job.steps ?? [])
+            .filter((step) => envReferencesHostedInferenceSecret(step.env))
+            .map((step) => ({ jobName, step })),
+    );
+    const directSecretStepNames = directSecretSteps.map(
+      ({ jobName, step }) => `${jobName}:${step.name ?? "<unnamed>"}`,
+    );
+
+    expect(directSecretStepNames).toEqual(
+      expect.arrayContaining([
+        "openclaw-tui-chat-correlation-e2e:Run OpenClaw TUI chat correlation E2E test",
+        "issue-4434-tui-unreachable-inference-e2e:Run issue #4434 TUI unreachable inference E2E test",
+        "issue-4434-tui-unreachable-inference-e2e:Sanitize issue #4434 logs on failure",
+        "token-rotation-e2e:Run token rotation E2E test",
+        "sandbox-operations-e2e:Run sandbox operations E2E test",
+        "credential-migration-e2e:Run credential migration Vitest test",
+        "onboard-repair-e2e:Install NemoClaw",
+        "onboard-repair-e2e:Run onboard repair E2E test",
+        "onboard-resume-e2e:Install NemoClaw",
+        "onboard-resume-e2e:Run onboard resume E2E test",
+        "onboard-negative-paths-e2e:Install NemoClaw",
+        "onboard-negative-paths-e2e:Run onboard negative-path E2E test",
+        "runtime-overrides-e2e:Install NemoClaw",
+        "runtime-overrides-e2e:Run runtime overrides E2E test",
+        "credential-sanitization-e2e:Install NemoClaw and onboard sandbox",
+        "telegram-injection-e2e:Install NemoClaw and onboard sandbox",
+        "launchable-smoke-e2e:Run launchable install-flow smoke test",
+      ]),
+    );
+
+    expect(directSecretSteps.length).toBeGreaterThanOrEqual(17);
+    for (const { jobName, step } of directSecretSteps) {
+      const stepKey = `${jobName}:${step.name ?? "<unnamed>"}`;
+      expect(step.env?.NVIDIA_INFERENCE_API_KEY, stepKey).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
+      if (trustedWorkflowSecretExceptions.has(stepKey)) {
+        expect(step.run, stepKey).toContain("[REDACTED_NVIDIA_INFERENCE_API_KEY]");
+        continue;
+      }
+      expect(step.env?.NEMOCLAW_PROVIDER, jobName).toBe("custom");
+      expect(step.env?.NEMOCLAW_ENDPOINT_URL, jobName).toBe("https://inference-api.nvidia.com/v1");
+      expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3");
+      expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3");
+      expect(step.env?.COMPATIBLE_API_KEY, jobName).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
+    }
+
     const runStep = nightlyWorkflow.jobs["token-rotation-e2e"].steps?.find(
       (step) => step.name === "Run token rotation E2E test",
     );
     const script = readFileSync(new URL("./e2e/test-token-rotation.sh", import.meta.url), "utf8");
 
-    expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe("${{ secrets.NVIDIA_INFERENCE_API_KEY }}");
-    expect(runStep?.env?.NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE).toBe("1");
+    expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET);
+    expect(runStep?.env?.NEMOCLAW_E2E_USE_HOSTED_INFERENCE).toBe("1");
     expect(script).toContain("lib/ci-compatible-inference.sh");
     expect(script).toContain("nemoclaw_e2e_configure_compatible_inference");
   });
diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh
index 15b64ecdbe..94c3cb1867 100755
--- a/test/e2e/lib/ci-compatible-inference.sh
+++ b/test/e2e/lib/ci-compatible-inference.sh
@@ -2,16 +2,25 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# CI-only compatibility shim: some live E2E lanes use the repository's
-# NVIDIA_INFERENCE_API_KEY secret against an OpenAI-compatible endpoint instead
-# of the public NVIDIA Endpoints provider. Keep this helper in test/e2e so the
+# CI-only hosted inference shim: live E2E lanes use the repository's
+# NVIDIA_INFERENCE_API_KEY secret against the hosted OpenAI-compatible endpoint
+# at inference-api.nvidia.com. Keep this helper in test/e2e so the
 # product-facing provider/default endpoint remain unchanged.
 
 NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nvidia/nemotron-3-super-v3"
+NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT="compatible-endpoint"
 NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b"
 
 nemoclaw_e2e_using_compatible_inference() {
-  [ "${NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE:-}" = "1" ]
+  if [ "${NEMOCLAW_E2E_USE_HOSTED_INFERENCE:-}" = "1" ]; then
+    return 0
+  fi
+  case "${NEMOCLAW_PROVIDER:-}" in
+    build | cloud | nvidia | nvidia-prod)
+      return 1
+      ;;
+  esac
+  [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]
 }
 
 nemoclaw_e2e_configure_compatible_inference() {
@@ -19,22 +28,20 @@ nemoclaw_e2e_configure_compatible_inference() {
     return 0
   fi
 
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+    echo "ERROR: NVIDIA_INFERENCE_API_KEY is required for hosted CI inference" >&2
+    return 1
+  fi
+
   export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-custom}"
   export NEMOCLAW_ENDPOINT_URL="${NEMOCLAW_ENDPOINT_URL:-https://inference-api.nvidia.com/v1}"
   export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}"
   export NEMOCLAW_COMPAT_MODEL="${NEMOCLAW_COMPAT_MODEL:-$NEMOCLAW_MODEL}"
-
-  if [ -z "${COMPATIBLE_API_KEY:-}" ] && [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
-    export COMPATIBLE_API_KEY="$NVIDIA_INFERENCE_API_KEY"
-  fi
+  export COMPATIBLE_API_KEY="$NVIDIA_INFERENCE_API_KEY"
 }
 
 nemoclaw_e2e_hosted_inference_key() {
-  if nemoclaw_e2e_using_compatible_inference; then
-    printf '%s' "${COMPATIBLE_API_KEY:-${NVIDIA_INFERENCE_API_KEY:-}}"
-  else
-    printf '%s' "${NVIDIA_INFERENCE_API_KEY:-}"
-  fi
+  printf '%s' "${NVIDIA_INFERENCE_API_KEY:-}"
 }
 
 nemoclaw_e2e_hosted_inference_base_url() {
@@ -45,6 +52,49 @@ nemoclaw_e2e_hosted_inference_base_url() {
   fi
 }
 
+nemoclaw_e2e_expected_route_provider() {
+  if nemoclaw_e2e_using_compatible_inference; then
+    printf '%s' "$NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT"
+  else
+    printf '%s' "nvidia-prod"
+  fi
+}
+
+nemoclaw_e2e_strip_ansi() {
+  if command -v perl >/dev/null 2>&1; then
+    perl -pe 's/\x1b\][^\a]*(?:\a|\x1b\\)//g; s/\x1b\[[0-9;?]*[ -\/]*[@-~]//g'
+  else
+    sed -E $'s/\x1B\\[[0-9;?]*[ -\\/]*[@-~]//g'
+  fi
+}
+
+nemoclaw_e2e_inference_output_matches() {
+  local output="$1"
+  local provider="$2"
+  local model="${3:-}"
+  local plain
+
+  plain="$(printf '%s' "$output" | nemoclaw_e2e_strip_ansi)"
+  grep -Eqi "Provider:[[:space:]]*${provider}" <<<"$plain" || return 1
+  [ -z "$model" ] || grep -Fq "$model" <<<"$plain"
+}
+
+nemoclaw_e2e_note_pass() {
+  if declare -F pass >/dev/null 2>&1; then
+    pass "$@"
+  else
+    printf 'PASS: %s\n' "$*"
+  fi
+}
+
+nemoclaw_e2e_note_fail() {
+  if declare -F fail >/dev/null 2>&1; then
+    fail "$@"
+  else
+    printf 'ERROR: %s\n' "$*" >&2
+  fi
+}
+
 nemoclaw_e2e_hosted_inference_model() {
   if nemoclaw_e2e_using_compatible_inference; then
     printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}"
@@ -83,18 +133,18 @@ nemoclaw_e2e_require_hosted_inference_key() {
 
   if nemoclaw_e2e_using_compatible_inference; then
     if [ -n "$key" ]; then
-      pass "COMPATIBLE_API_KEY is set for CI compatible inference"
+      nemoclaw_e2e_note_pass "NVIDIA_INFERENCE_API_KEY is set for hosted CI inference"
     else
-      fail "COMPATIBLE_API_KEY not set — required for CI compatible inference"
+      nemoclaw_e2e_note_fail "NVIDIA_INFERENCE_API_KEY not set - required for hosted CI inference"
       return 1
     fi
     return 0
   fi
 
   if [ -n "$key" ] && [[ "$key" == nvapi-* ]]; then
-    pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
+    nemoclaw_e2e_note_pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
   else
-    fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+    nemoclaw_e2e_note_fail "NVIDIA_INFERENCE_API_KEY not set or invalid - required for live inference"
     return 1
   fi
 }
diff --git a/test/e2e/test-agent-turn-latency-e2e.sh b/test/e2e/test-agent-turn-latency-e2e.sh
index 8172bfc030..694317f7a1 100755
--- a/test/e2e/test-agent-turn-latency-e2e.sh
+++ b/test/e2e/test-agent-turn-latency-e2e.sh
@@ -4,13 +4,13 @@
 #
 # Real agent turn latency E2E.
 #
-# Installs one OpenClaw sandbox and one Hermes sandbox against NVIDIA Endpoints,
-# verifies that both are configured for the requested model, and times one real
-# model-backed turn through each runtime.
+# Installs one OpenClaw sandbox and one Hermes sandbox against the configured
+# hosted inference endpoint, verifies that both are configured for the requested
+# model, and times one real model-backed turn through each runtime.
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
@@ -32,6 +32,8 @@ source "${SCRIPT_DIR}/e2e-timeout.sh"
 source "${SCRIPT_DIR}/lib/openclaw-json.sh"
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 source "${SCRIPT_DIR}/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR}/lib/ci-compatible-inference.sh"
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 source "${SCRIPT_DIR}/lib/install-path-refresh.sh"
 
@@ -91,7 +93,7 @@ PY
 }
 
 strip_ansi() {
-  python3 -c 'import re, sys; sys.stdout.write(re.sub(r"\x1b\[[0-9;]*m", "", sys.stdin.read()))'
+  nemoclaw_e2e_strip_ansi
 }
 
 parse_chat_content() {
@@ -136,8 +138,7 @@ assert_route() {
   fi
   plain_output=$(printf '%s' "$output" | strip_ansi)
 
-  if grep -Fq "Provider: ${EXPECTED_ROUTE_PROVIDER}" <<<"$plain_output" \
-    && grep -Fq "Model: ${TURN_MODEL}" <<<"$plain_output"; then
+  if nemoclaw_e2e_inference_output_matches "$plain_output" "$EXPECTED_ROUTE_PROVIDER" "$TURN_MODEL"; then
     pass "${label}: OpenShell route is ${EXPECTED_ROUTE_PROVIDER} / ${TURN_MODEL}"
   else
     fail "${label}: route is not ${EXPECTED_ROUTE_PROVIDER} / ${TURN_MODEL}: ${plain_output:0:400}"
@@ -551,14 +552,14 @@ else
   exit 1
 fi
 
-TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-${NEMOCLAW_MODEL:-nvidia/nemotron-3-ultra-550b-a55b}}"
-TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-build}"
-EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-nvidia-prod}"
 OPENCLAW_SANDBOX_NAME="${NEMOCLAW_OPENCLAW_TURN_LATENCY_SANDBOX_NAME:-e2e-openclaw-turn-latency}"
 HERMES_SANDBOX_NAME="${NEMOCLAW_HERMES_TURN_LATENCY_SANDBOX_NAME:-e2e-hermes-turn-latency}"
 OPENCLAW_INSTALL_LOG="/tmp/nemoclaw-e2e-openclaw-turn-latency-install.log"
 HERMES_INSTALL_LOG="/tmp/nemoclaw-e2e-hermes-turn-latency-install.log"
 RESULTS_JSON="/tmp/nemoclaw-e2e-agent-turn-latency.json"
+TURN_MODEL=""
+TURN_PROVIDER_KEY=""
+EXPECTED_ROUTE_PROVIDER=""
 
 MAX_TURN_SECONDS="${NEMOCLAW_TURN_LATENCY_MAX_SECONDS:-300}"
 is_positive_int "$MAX_TURN_SECONDS" || MAX_TURN_SECONDS=300
@@ -575,6 +576,20 @@ HERMES_REPLY=""
 register_sandbox_for_teardown "$OPENCLAW_SANDBOX_NAME"
 register_sandbox_for_teardown "$HERMES_SANDBOX_NAME"
 nemoclaw_ensure_local_bin_on_path
+nemoclaw_e2e_configure_compatible_inference || {
+  fail "Hosted CI inference could not be configured"
+  finish
+}
+
+if nemoclaw_e2e_using_compatible_inference; then
+  TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}"
+  TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-custom}"
+  EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}"
+else
+  TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-${NEMOCLAW_MODEL:-nvidia/nemotron-3-ultra-550b-a55b}}"
+  TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-build}"
+  EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-nvidia-prod}"
+fi
 
 section "Prerequisites"
 if docker info >/dev/null 2>&1; then
@@ -584,10 +599,7 @@ else
   finish
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   finish
 fi
 
diff --git a/test/e2e/test-common-egress-agent-e2e.sh b/test/e2e/test-common-egress-agent-e2e.sh
index 16e65212a2..53415555a0 100755
--- a/test/e2e/test-common-egress-agent-e2e.sh
+++ b/test/e2e/test-common-egress-agent-e2e.sh
@@ -11,7 +11,7 @@
 #       and the Hermes agent fetches Wikidata through its API-server agent path.
 #
 # Required env:
-#   NVIDIA_INFERENCE_API_KEY                         real NVIDIA Endpoints key for inference
+#   NVIDIA_INFERENCE_API_KEY                         hosted inference credential
 #   NEMOCLAW_NON_INTERACTIVE=1             required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 required
 #
@@ -392,6 +392,10 @@ echo "  Common Egress Agent E2E"
 echo "  $(date)"
 echo "============================================================"
 
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh"
+nemoclaw_e2e_configure_compatible_inference || summary
+
 section "Phase 0: Prerequisites"
 load_shell_path
 info "Repo: $REPO"
@@ -402,11 +406,9 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   summary
 fi
-pass "NVIDIA_INFERENCE_API_KEY is set"
 
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then
   fail "NEMOCLAW_NON_INTERACTIVE=1 is required"
diff --git a/test/e2e/test-cron-preflight-inference-local-e2e.sh b/test/e2e/test-cron-preflight-inference-local-e2e.sh
index 0ee7e92ab0..89fd2e5daf 100755
--- a/test/e2e/test-cron-preflight-inference-local-e2e.sh
+++ b/test/e2e/test-cron-preflight-inference-local-e2e.sh
@@ -4,12 +4,12 @@
 #
 # Cron preflight inference.local E2E.
 #
-# Onboards a fresh sandbox against the managed cloud provider (whose base URL
-# resolves through `inference.local`), then loads OpenClaw's cron isolated-agent
-# preflight runtime directly from the in-sandbox dist and invokes
-# `preflightCronModelProvider` against the onboarded provider/model. Asserts
-# the call returns `status: "available"` and never reports `EAI_AGAIN` or the
-# "local provider endpoint is not reachable" message.
+# Onboards a fresh sandbox against the configured hosted inference provider
+# (whose base URL resolves through `inference.local`), then loads OpenClaw's
+# cron isolated-agent preflight runtime directly from the in-sandbox dist and
+# invokes `preflightCronModelProvider` against the onboarded provider/model.
+# Asserts the call returns `status: "available"` and never reports `EAI_AGAIN`
+# or the "local provider endpoint is not reachable" message.
 #
 # This probes the exact runtime path Patch 6 modifies — the cron CLI surfaces
 # (`openclaw cron add` / `openclaw cron run`) need `operator.admin` scope, which
@@ -19,18 +19,18 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-cron-preflight)
 #   NEMOCLAW_RECREATE_SANDBOX=1            — destroy + recreate if exists
-#   NEMOCLAW_CRON_PREFLIGHT_MODEL          — cloud model (default: nvidia/nemotron-3-super-120b-a12b)
+#   NEMOCLAW_CRON_PREFLIGHT_MODEL          — model for non-hosted provider runs
 #   NEMOCLAW_CRON_PREFLIGHT_KEEP=1         — keep the sandbox after the test for inspection
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-cron-preflight-inference-local-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-cron-preflight-inference-local-e2e.sh
 
 set -uo pipefail
 
@@ -86,6 +86,8 @@ INSTALL_LOG="/tmp/nemoclaw-e2e-cron-preflight-install.log"
 . "${E2E_DIR}/lib/sandbox-teardown.sh"
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 . "${E2E_DIR}/lib/install-path-refresh.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
 
 # ── Prereqs ──
 section "Prerequisites"
@@ -99,15 +101,27 @@ if ! command -v jq >/dev/null 2>&1; then
   echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
   exit 0
 fi
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
-  skip "NVIDIA_INFERENCE_API_KEY not set"
+if ! nemoclaw_e2e_configure_compatible_inference; then
+  fail "hosted CI inference could not be configured"
   echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
-  exit 0
+  exit 1
 fi
-if [ "${NVIDIA_INFERENCE_API_KEY:0:6}" != "nvapi-" ]; then
-  skip "NVIDIA_INFERENCE_API_KEY does not start with nvapi-"
-  echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
-  exit 0
+if nemoclaw_e2e_using_compatible_inference; then
+  if ! nemoclaw_e2e_require_hosted_inference_key; then
+    echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
+    exit 1
+  fi
+else
+  if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
+    skip "NVIDIA_INFERENCE_API_KEY not set"
+    echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
+    exit 0
+  fi
+  if [ "${NVIDIA_INFERENCE_API_KEY:0:6}" != "nvapi-" ]; then
+    skip "NVIDIA_INFERENCE_API_KEY does not start with nvapi-"
+    echo "  Total: $TOTAL  Pass: $PASS  Fail: $FAIL  Skip: $SKIP"
+    exit 0
+  fi
 fi
 if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then
   skip "NEMOCLAW_NON_INTERACTIVE must be 1; refusing to risk an interactive onboard prompt"
diff --git a/test/e2e/test-hermes-discord-e2e.sh b/test/e2e/test-hermes-discord-e2e.sh
index c3c24c2fa5..1aabaa0dba 100755
--- a/test/e2e/test-hermes-discord-e2e.sh
+++ b/test/e2e/test-hermes-discord-e2e.sh
@@ -29,7 +29,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-discord-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-discord-e2e.sh
 
 set -uo pipefail
 
@@ -183,7 +183,10 @@ export DISCORD_REQUIRE_MENTION="${DISCORD_REQUIRE_MENTION:-0}"
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 # shellcheck source=test/e2e/lib/discord-gateway-proof.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/discord-gateway-proof.sh"
@@ -197,10 +200,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-hermes-e2e.sh b/test/e2e/test-hermes-e2e.sh
index 73db775f3b..e0589eea2d 100755
--- a/test/e2e/test-hermes-e2e.sh
+++ b/test/e2e/test-hermes-e2e.sh
@@ -11,7 +11,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
@@ -22,10 +22,10 @@
 #   NEMOCLAW_RECREATE_SANDBOX=1            — recreate sandbox if it exists from a previous run
 #   NEMOCLAW_E2E_HERMES_DASHBOARD=1        — validate the built-in Hermes web dashboard end-to-end
 #   NEMOCLAW_HERMES_DASHBOARD_TUI=1        — enable Hermes' optional in-browser TUI tab during onboard
-#   NVIDIA_INFERENCE_API_KEY                         — required for NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY                         — required for hosted inference
 #
 # Usage:
-#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-e2e.sh
+#   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-e2e.sh
 
 set -uo pipefail
 
@@ -114,6 +114,9 @@ is_truthy_env_value() {
   esac
 }
 
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh"
+
 hermes_dashboard_e2e_enabled() {
   is_truthy_env_value "${NEMOCLAW_E2E_HERMES_DASHBOARD:-}" \
     || is_truthy_env_value "${NEMOCLAW_HERMES_DASHBOARD:-}"
@@ -159,6 +162,10 @@ fi
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-hermes}"
 export NEMOCLAW_AGENT="${NEMOCLAW_AGENT:-hermes}"
+nemoclaw_e2e_configure_compatible_inference || exit 1
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)"
+HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
@@ -196,19 +203,14 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
-if curl -sf --max-time 10 \
-  -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \
-  https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+if nemoclaw_e2e_probe_hosted_inference; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
@@ -358,10 +360,16 @@ fi
 
 # 3d: Inference must be configured by onboard
 if inf_check=$(openshell inference get 2>&1); then
-  if grep -qi "nvidia-prod" <<<"$inf_check"; then
-    pass "Inference configured via onboard"
+  expected_provider="$(nemoclaw_e2e_expected_route_provider)"
+  expected_model=""
+  if nemoclaw_e2e_using_compatible_inference; then
+    expected_model="$HOSTED_INFERENCE_MODEL"
+  fi
+  if nemoclaw_e2e_inference_output_matches "$inf_check" "$expected_provider" "$expected_model"; then
+    pass "Inference configured via onboard (${expected_provider})"
   else
-    fail "Inference not configured — onboard did not set up nvidia-prod provider"
+    inf_check_plain="$(printf '%s' "$inf_check" | nemoclaw_e2e_strip_ansi)"
+    fail "Inference not configured - onboard did not set up ${expected_provider}: ${inf_check_plain:0:200}"
   fi
 else
   fail "openshell inference get failed: ${inf_check:0:200}"
@@ -600,17 +608,13 @@ rm -f "$ssh_config"
 # ══════════════════════════════════════════════════════════════════
 section "Phase 5: Live inference"
 
-# ── Test 5a: Direct NVIDIA Endpoints ──
-info "[LIVE] Direct API test → inference-api.nvidia.com..."
+# ── Test 5a: Direct hosted inference endpoint ──
+info "[LIVE] Direct API test → ${HOSTED_INFERENCE_BASE_URL}..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -X POST "${HOSTED_INFERENCE_BASE_URL}/chat/completions" \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
-  -d '{
-    "model": "nvidia/nemotron-3-super-120b-a12b",
-    "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
-    "max_tokens": 100
-  }' 2>/dev/null) || true
+  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
+  -d "$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' "$HOSTED_INFERENCE_MODEL")" 2>/dev/null) || true
 
 if [ -n "$api_response" ]; then
   api_content=$(echo "$api_response" | parse_chat_content 2>/dev/null) || true
@@ -645,7 +649,7 @@ if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then
     "openshell-${SANDBOX_NAME}" \
     "curl -s --max-time 60 https://inference.local/v1/chat/completions \
       -H 'Content-Type: application/json' \
-      -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
+      -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
     2>&1) || true
 fi
 rm -f "$ssh_config"
@@ -653,8 +657,8 @@ rm -f "$ssh_config"
 if [ -n "$sandbox_response" ]; then
   sandbox_content=$(echo "$sandbox_response" | parse_chat_content 2>/dev/null) || true
   if grep -qi "PONG" <<<"$sandbox_content"; then
-    pass "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG"
-    info "Routing path proven: sandbox curl → DNS forwarder → gateway proxy → NVIDIA Endpoints (does not exercise the Hermes agent runtime or openclaw HTTP client)"
+    pass "[ROUTING] inference.local: OpenShell routed curl to the hosted inference endpoint and returned PONG"
+    info "Routing path proven: sandbox curl → DNS forwarder → gateway proxy → hosted inference endpoint (does not exercise the Hermes agent runtime or openclaw HTTP client)"
   else
     fail "[ROUTING] inference.local: expected PONG, got: ${sandbox_content:0:200}"
   fi
diff --git a/test/e2e/test-hermes-inference-switch.sh b/test/e2e/test-hermes-inference-switch.sh
index 521a18b5ee..b38c64f2f8 100755
--- a/test/e2e/test-hermes-inference-switch.sh
+++ b/test/e2e/test-hermes-inference-switch.sh
@@ -10,7 +10,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 
@@ -441,9 +441,16 @@ E2E_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 . "${E2E_DIR}/lib/inference-switch-retry.sh"
 # shellcheck source=test/e2e/lib/anthropic-switch-provider.sh
 . "${E2E_DIR}/lib/anthropic-switch-provider.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-hermes-inference-switch}"
-SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}"
-SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}"
+if nemoclaw_e2e_using_compatible_inference; then
+  SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}"
+  SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}"
+else
+  SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}"
+  SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}"
+fi
 SWITCH_INFERENCE_API="${NEMOCLAW_SWITCH_INFERENCE_API:-openai-completions}"
 # shellcheck disable=SC2034 # consumed by sourced anthropic-switch-provider.sh
 SWITCH_ENDPOINT_URL="${NEMOCLAW_SWITCH_ENDPOINT_URL:-}"
@@ -462,6 +469,7 @@ trap 'stop_mock_anthropic_switch_provider; _nemoclaw_sandbox_teardown' EXIT
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 . "${E2E_DIR}/lib/install-path-refresh.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 section "Phase 0: Pre-cleanup"
 if command -v nemohermes >/dev/null 2>&1; then
@@ -483,10 +491,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-hermes-slack-e2e.sh b/test/e2e/test-hermes-slack-e2e.sh
index 74f70f6369..8934dad48c 100755
--- a/test/e2e/test-hermes-slack-e2e.sh
+++ b/test/e2e/test-hermes-slack-e2e.sh
@@ -23,7 +23,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-slack-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-slack-e2e.sh
 
 set -uo pipefail
 
@@ -173,7 +173,10 @@ fi
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 section "Phase 0: Prerequisites"
 
@@ -184,10 +187,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-issue-4434-tui-unreachable-inference.sh b/test/e2e/test-issue-4434-tui-unreachable-inference.sh
index 69004be5b2..e0a04c5bf5 100755
--- a/test/e2e/test-issue-4434-tui-unreachable-inference.sh
+++ b/test/e2e/test-issue-4434-tui-unreachable-inference.sh
@@ -8,12 +8,14 @@
 #
 # This mutates host firewall state. Run only on a Linux Docker host you control:
 #
-#   NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... \
+#   NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_INFERENCE_API_KEY=... \
 #     bash test/e2e/test-issue-4434-tui-unreachable-inference.sh
 
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR}/lib/ci-compatible-inference.sh"
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-issue-4434-tui-unreachable}"
 INSTALL_LOG="${E2E_ISSUE_4434_INSTALL_LOG:-/tmp/nemoclaw-e2e-issue-4434-install.log}"
@@ -63,6 +65,11 @@ if [ "${NEMOCLAW_ISSUE_4434_LIVE:-0}" != "1" ]; then
   exit 0
 fi
 
+if nemoclaw_e2e_using_compatible_inference; then
+  info "skipping: hosted compatible inference is gateway-managed; this repro only blocks sandbox egress"
+  exit 0
+fi
+
 if [ "$(uname -s)" != "Linux" ]; then
   fail "Linux host required for DOCKER-USER iptables repro"
 fi
@@ -71,9 +78,8 @@ for command in docker sudo expect curl timeout perl; do
 done
 docker info >/dev/null 2>&1 || fail "Docker is not running"
 sudo -n true >/dev/null 2>&1 || fail "passwordless sudo is required for non-interactive iptables cleanup"
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_INFERENCE_API_KEY must be set and start with nvapi-"
-fi
+nemoclaw_e2e_configure_compatible_inference || fail "hosted CI inference could not be configured"
+nemoclaw_e2e_require_hosted_inference_key || exit 1
 
 mkdir -p "$CAPTURE_DIR"
 CLEANUP_SANDBOX=1
@@ -110,7 +116,33 @@ if ! nemoclaw "$SANDBOX_NAME" status >"$status_log" 2>&1; then
   fail "nemoclaw ${SANDBOX_NAME} status failed before firewall block"
 fi
 if ! grep -Eiq "inference.*healthy|healthy.*inference" "$status_log"; then
-  fail "pre-block status did not report healthy inference"
+  if grep -Eiq "Inference:[[:space:]]*not probed" "$status_log"; then
+    info "status skipped inference reachability; probing inference.local directly"
+  else
+    fail "pre-block status did not report healthy or not-probed inference"
+  fi
+fi
+
+route_log="${CAPTURE_DIR}/openshell-inference-before-block.log"
+if ! route_output=$(openshell inference get 2>&1); then
+  printf '%s\n' "$route_output" >"$route_log"
+  fail "openshell inference get failed before firewall block"
+fi
+printf '%s\n' "$route_output" >"$route_log"
+expected_provider="$(nemoclaw_e2e_expected_route_provider)"
+expected_model="$(nemoclaw_e2e_hosted_inference_model)"
+if ! nemoclaw_e2e_inference_output_matches "$route_output" "$expected_provider" "$expected_model"; then
+  route_plain="$(printf '%s' "$route_output" | nemoclaw_e2e_strip_ansi)"
+  fail "pre-block OpenShell route was not ${expected_provider} / ${expected_model}: ${route_plain:0:240}"
+fi
+
+preblock_probe_log="${CAPTURE_DIR}/inference-local-before-block.log"
+preblock_payload="$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with OK."}],"max_tokens":8}' "$expected_model")"
+preblock_payload_arg="$(printf '%q' "$preblock_payload")"
+if ! timeout 90 openshell sandbox exec --name "$SANDBOX_NAME" -- sh -lc \
+  "curl -sf --max-time 60 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d $preblock_payload_arg >/dev/null" \
+  >"$preblock_probe_log" 2>&1; then
+  fail "inference.local was not reachable from inside the sandbox before firewall block"
 fi
 
 connect_probe_log="${CAPTURE_DIR}/nemoclaw-connect-probe-before-block.log"
diff --git a/test/e2e/test-launchable-smoke.sh b/test/e2e/test-launchable-smoke.sh
index b28cc89750..73d993f227 100755
--- a/test/e2e/test-launchable-smoke.sh
+++ b/test/e2e/test-launchable-smoke.sh
@@ -20,7 +20,7 @@
 # What this tests:
 #   1. Run brev-launchable-ci-cpu.sh with NEMOCLAW_REF=current branch
 #   2. Verify installation artifacts (nemoclaw, openshell, Node.js ≥22, Docker, sentinel)
-#   3. nemoclaw onboard --non-interactive with NVIDIA_INFERENCE_API_KEY (cloud provider)
+#   3. nemoclaw onboard --non-interactive with hosted inference
 #   4. Sandbox health: nemoclaw list, status, gateway running
 #   5. Live inference through the sandbox (same pattern as test-full-e2e.sh Phase 4)
 #   6. Destroy + cleanup
@@ -28,7 +28,7 @@
 # Prerequisites:
 #   - Ubuntu runner (ubuntu-latest)
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - Network access to inference-api.nvidia.com
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
@@ -37,12 +37,12 @@
 #   NEMOCLAW_REF              — git ref for brev-launchable-ci-cpu.sh (default: current branch)
 #   NEMOCLAW_SANDBOX_NAME     — sandbox name (default: e2e-launchable)
 #   NEMOCLAW_RECREATE_SANDBOX — set to 1 to recreate if exists
-#   NVIDIA_INFERENCE_API_KEY            — required for NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY            — required for hosted inference
 #   SKIP_DOCKER_PULL          — set to 1 to skip Docker image pre-pulls (speeds up CI)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-launchable-smoke.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-launchable-smoke.sh
 #
 # See: https://github.com/NVIDIA/NemoClaw/issues/2599
 
@@ -97,6 +97,9 @@ except Exception as e:
 "
 }
 
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh"
+
 # Determine repo root
 if [ -f "$(cd "$(dirname "$0")/../.." && pwd)/scripts/brev-launchable-ci-cpu.sh" ]; then
   REPO="$(cd "$(dirname "$0")/../.." && pwd)"
@@ -133,6 +136,10 @@ exec > >(tee -a "$TEST_LOG") 2>&1
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)"
+HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)"
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 0: Pre-cleanup
@@ -177,17 +184,14 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+if nemoclaw_e2e_probe_hosted_inference; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
@@ -341,9 +345,9 @@ else
 fi
 
 # ══════════════════════════════════════════════════════════════════
-# Phase 4: Onboard (non-interactive, cloud provider)
+# Phase 4: Onboard (non-interactive, hosted inference)
 # ══════════════════════════════════════════════════════════════════
-section "Phase 4: Onboard (non-interactive, NVIDIA Endpoints)"
+section "Phase 4: Onboard (non-interactive, hosted inference)"
 
 # Run onboard from the launchable clone directory — this is the real
 # community path: the user's NemoClaw is in ~/NemoClaw, not a CI checkout.
@@ -353,7 +357,7 @@ cd "$NEMOCLAW_CLONE_DIR" || {
 }
 
 info "Running nemoclaw onboard --non-interactive..."
-info "Provider: NVIDIA Endpoints (cloud)"
+info "Provider: ${NEMOCLAW_PROVIDER:-configured hosted inference}"
 info "Sandbox name: $SANDBOX_NAME"
 
 ONBOARD_LOG="/tmp/nemoclaw-launchable-onboard.log"
@@ -403,10 +407,16 @@ fi
 
 # 5c: Inference configured by onboard
 if inf_check=$(openshell inference get 2>&1); then
-  if grep -qi "nvidia-prod" <<<"$inf_check"; then
-    pass "Inference configured via onboard (nvidia-prod)"
+  expected_provider="$(nemoclaw_e2e_expected_route_provider)"
+  expected_model=""
+  if nemoclaw_e2e_using_compatible_inference; then
+    expected_model="$HOSTED_INFERENCE_MODEL"
+  fi
+  if nemoclaw_e2e_inference_output_matches "$inf_check" "$expected_provider" "$expected_model"; then
+    pass "Inference configured via onboard (${expected_provider})"
   else
-    fail "Inference not configured — onboard did not set up nvidia-prod provider"
+    inf_check_plain="$(printf '%s' "$inf_check" | nemoclaw_e2e_strip_ansi)"
+    fail "Inference not configured - onboard did not set up ${expected_provider}: ${inf_check_plain:0:200}"
   fi
 else
   fail "openshell inference get failed: ${inf_check:0:200}"
@@ -424,17 +434,13 @@ fi
 # ══════════════════════════════════════════════════════════════════
 section "Phase 6: Live inference"
 
-# ── Test 6a: Direct NVIDIA Endpoints (sanity check) ──
-info "[LIVE] Direct API test → inference-api.nvidia.com..."
+# ── Test 6a: Direct hosted inference endpoint (sanity check) ──
+info "[LIVE] Direct API test → ${HOSTED_INFERENCE_BASE_URL}..."
 api_response=$(curl -s --max-time 30 \
-  -X POST https://inference-api.nvidia.com/v1/chat/completions \
+  -X POST "${HOSTED_INFERENCE_BASE_URL}/chat/completions" \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \
-  -d '{
-    "model": "nvidia/nemotron-3-super-120b-a12b",
-    "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}],
-    "max_tokens": 100
-  }' 2>/dev/null) || true
+  -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \
+  -d "$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' "$HOSTED_INFERENCE_MODEL")" 2>/dev/null) || true
 
 if [ -n "$api_response" ]; then
   api_content=$(echo "$api_response" | parse_chat_content 2>/dev/null) || true
@@ -461,7 +467,7 @@ if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then
     "openshell-${SANDBOX_NAME}" \
     "curl -s --max-time 60 https://inference.local/v1/chat/completions \
       -H 'Content-Type: application/json' \
-      -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
+      -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
     2>&1) || true
 fi
 rm -f "$ssh_config"
@@ -494,14 +500,14 @@ for pong_attempt in 1 2 3; do
       "openshell-${SANDBOX_NAME}" \
       "curl -s --max-time 60 https://inference.local/v1/chat/completions \
         -H 'Content-Type: application/json' \
-        -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
+        -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \
       2>&1) || true
   fi
   rm -f "$ssh_config"
 done
 
 if $pong_ok; then
-  pass "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG"
+  pass "[ROUTING] inference.local: OpenShell routed curl to the hosted inference endpoint and returned PONG"
 else
   fail "[ROUTING] inference.local: expected PONG after 3 attempts, got: ${sandbox_content:0:200}"
 fi
@@ -576,9 +582,9 @@ echo ""
 echo "  What this tested (issue #2599):"
 echo "    - brev-launchable-ci-cpu.sh bootstrap (Docker, Node.js, OpenShell, NemoClaw)"
 echo "    - Installation artifacts (binaries on PATH, sentinel file, built outputs)"
-echo "    - Onboard via launchable-installed NemoClaw (cloud provider)"
+echo "    - Onboard via launchable-installed NemoClaw (hosted inference)"
 echo "    - Sandbox health (list, status, inference config, gateway)"
-echo "    - Direct NVIDIA Endpoints inference"
+echo "    - Direct hosted inference"
 echo "    - Sandbox inference routing (curl → inference.local)"
 echo "    - openclaw agent mediated inference (the full stack)"
 echo "    - Destroy + cleanup"
diff --git a/test/e2e/test-onboard-negative-paths.sh b/test/e2e/test-onboard-negative-paths.sh
index be7b094621..30ff8c8580 100755
--- a/test/e2e/test-onboard-negative-paths.sh
+++ b/test/e2e/test-onboard-negative-paths.sh
@@ -24,6 +24,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=1800
 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 # shellcheck source=test/e2e/e2e-timeout.sh
 source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh"
 
 LOG_FILE="${NEMOCLAW_E2E_LOG:-/tmp/nemoclaw-e2e-onboard-negative-paths.log}"
 exec > >(tee "$LOG_FILE") 2>&1
@@ -73,11 +75,35 @@ if ! command -v nemoclaw >/dev/null 2>&1; then
 fi
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-onboard-negative}"
-CLOUD_MODEL="${NEMOCLAW_ONBOARD_NEGATIVE_MODEL:-nvidia/nemotron-3-super-120b-a12b}"
 PORT_CONFLICT_PORT="${NEMOCLAW_ONBOARD_NEGATIVE_CONFLICT_PORT:-18080}"
 SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
 REGISTRY_FILE="$HOME/.nemoclaw/sandboxes.json"
 RESTORE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-}"
+if [ -n "$RESTORE_API_KEY" ]; then
+  export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
+fi
+nemoclaw_e2e_configure_compatible_inference || {
+  fail "Hosted CI inference could not be configured"
+  exit 1
+}
+CLOUD_MODEL="${NEMOCLAW_ONBOARD_NEGATIVE_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}"
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+EXPECTED_PROVIDER="$(nemoclaw_e2e_expected_route_provider)"
+ONBOARD_INFERENCE_ENV=(
+  "NEMOCLAW_PROVIDER=cloud"
+  "NEMOCLAW_MODEL=$CLOUD_MODEL"
+  "NVIDIA_INFERENCE_API_KEY=$RESTORE_API_KEY"
+)
+if nemoclaw_e2e_using_compatible_inference; then
+  ONBOARD_INFERENCE_ENV=(
+    "NEMOCLAW_PROVIDER=custom"
+    "NEMOCLAW_ENDPOINT_URL=$HOSTED_INFERENCE_BASE_URL"
+    "NEMOCLAW_MODEL=$CLOUD_MODEL"
+    "NEMOCLAW_COMPAT_MODEL=$CLOUD_MODEL"
+    "COMPATIBLE_API_KEY=$RESTORE_API_KEY"
+    "NVIDIA_INFERENCE_API_KEY=$RESTORE_API_KEY"
+  )
+fi
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
@@ -294,10 +320,7 @@ else
   exit 1
 fi
 
-if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid; required for live onboard scenarios"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   print_summary
   exit 1
 fi
@@ -333,11 +356,10 @@ section "Phase 3: Entry option validation"
 
 FROM_GUARD_LOG="$(mktemp)"
 env -u NEMOCLAW_SANDBOX_NAME \
+  "${ONBOARD_INFERENCE_ENV[@]}" \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-  NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \
   >"$FROM_GUARD_LOG" 2>&1
 from_guard_exit=$?
@@ -365,12 +387,11 @@ fi
 
 FROM_ENV_NAME_LOG="$(mktemp)"
 env \
+  "${ONBOARD_INFERENCE_ENV[@]}" \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="bad name" \
-  NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \
   >"$FROM_ENV_NAME_LOG" 2>&1
 from_env_name_exit=$?
@@ -446,14 +467,14 @@ else
 fi
 
 PORT_CONFLICT_LOG="$(mktemp)"
-NEMOCLAW_NON_INTERACTIVE=1 \
+env \
+  "${ONBOARD_INFERENCE_ENV[@]}" \
+  NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="${SANDBOX_NAME}-port" \
   NEMOCLAW_RECREATE_SANDBOX=1 \
   NEMOCLAW_GATEWAY_PORT="$PORT_CONFLICT_PORT" \
-  NEMOCLAW_PROVIDER=cloud \
   NEMOCLAW_POLICY_MODE=skip \
-  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$PORT_CONFLICT_LOG" 2>&1
 port_conflict_exit=$?
 port_conflict_output="$(cat "$PORT_CONFLICT_LOG")"
@@ -487,15 +508,14 @@ fi
 section "Phase 6: Live non-interactive onboard honors presets and model"
 
 LIVE_LOG="$(mktemp)"
-NEMOCLAW_NON_INTERACTIVE=1 \
+env \
+  "${ONBOARD_INFERENCE_ENV[@]}" \
+  NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
   NEMOCLAW_RECREATE_SANDBOX=1 \
-  NEMOCLAW_PROVIDER=cloud \
-  NEMOCLAW_MODEL="$CLOUD_MODEL" \
   NEMOCLAW_POLICY_MODE=custom \
   NEMOCLAW_POLICY_PRESETS=npm,pypi \
-  NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \
   node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$LIVE_LOG" 2>&1
 live_exit=$?
 live_output="$(cat "$LIVE_LOG")"
@@ -510,20 +530,20 @@ else
   exit 1
 fi
 
-if printf '%s\n' "$live_output" | grep -q "Using NVIDIA Endpoints with model: ${CLOUD_MODEL}"; then
-  pass "Live onboard selected requested cloud model"
+if printf '%s\n' "$live_output" | grep -Fq "$CLOUD_MODEL"; then
+  pass "Live onboard selected requested hosted model"
 else
-  fail "Live onboard output did not confirm requested cloud model"
+  fail "Live onboard output did not confirm requested hosted model"
 fi
 
-if node - "$REGISTRY_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" <<'NODE'; then
+if node - "$REGISTRY_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" "$EXPECTED_PROVIDER" <<'NODE'; then
 const fs = require("node:fs");
-const [registryPath, sandboxName, expectedModel] = process.argv.slice(2);
+const [registryPath, sandboxName, expectedModel, expectedProvider] = process.argv.slice(2);
 const registry = JSON.parse(fs.readFileSync(registryPath, "utf8"));
 const sandbox = registry.sandboxes && registry.sandboxes[sandboxName];
 if (!sandbox) throw new Error(`missing sandbox registry entry: ${sandboxName}`);
-if (sandbox.provider !== "nvidia-prod") {
-  throw new Error(`expected provider nvidia-prod, got ${sandbox.provider}`);
+if (sandbox.provider !== expectedProvider) {
+  throw new Error(`expected provider ${expectedProvider}, got ${sandbox.provider}`);
 }
 if (sandbox.model !== expectedModel) {
   throw new Error(`expected model ${expectedModel}, got ${sandbox.model}`);
@@ -540,13 +560,13 @@ else
   fail "Registry did not record requested provider, model, and policy presets"
 fi
 
-if node - "$SESSION_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" <<'NODE'; then
+if node - "$SESSION_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" "$EXPECTED_PROVIDER" <<'NODE'; then
 const fs = require("node:fs");
-const [sessionPath, sandboxName, expectedModel] = process.argv.slice(2);
+const [sessionPath, sandboxName, expectedModel, expectedProvider] = process.argv.slice(2);
 const session = JSON.parse(fs.readFileSync(sessionPath, "utf8"));
 if (session.status !== "complete") throw new Error(`session status ${session.status}`);
 if (session.sandboxName !== sandboxName) throw new Error(`session sandbox ${session.sandboxName}`);
-if (session.provider !== "nvidia-prod") throw new Error(`session provider ${session.provider}`);
+if (session.provider !== expectedProvider) throw new Error(`session provider ${session.provider}`);
 if (session.model !== expectedModel) throw new Error(`session model ${session.model}`);
 const presets = Array.isArray(session.policyPresets) ? session.policyPresets : [];
 for (const preset of ["npm", "pypi"]) {
diff --git a/test/e2e/test-onboard-repair.sh b/test/e2e/test-onboard-repair.sh
index 958bd68bea..c62db5ce00 100755
--- a/test/e2e/test-onboard-repair.sh
+++ b/test/e2e/test-onboard-repair.sh
@@ -14,10 +14,10 @@
 #   - Docker running
 #   - openshell CLI installed
 #   - Node.js available
-#   - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test
+#   - NVIDIA_INFERENCE_API_KEY set before starting the test
 #
 # Usage:
-#   NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh
+#   NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-onboard-repair.sh
 
 set -uo pipefail
 
@@ -73,6 +73,8 @@ fi
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
 register_sandbox_for_teardown "$OTHER_SANDBOX_NAME"
 if [ -n "$INSTALL_SANDBOX_NAME" ]; then
@@ -148,14 +150,14 @@ else
   exit 1
 fi
 
-if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
+if [[ -z "$RESTORE_API_KEY" ]]; then
   fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion"
   exit 1
 fi
+pass "NVIDIA_INFERENCE_API_KEY is set"
 
 export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 pass "Exported NVIDIA_INFERENCE_API_KEY for the repair run (host writes nothing to disk; OpenShell gateway is the system of record)"
 
 # ══════════════════════════════════════════════════════════════════
@@ -226,7 +228,7 @@ else
 fi
 
 REPAIR_LOG="$(mktemp)"
-env -u NVIDIA_INFERENCE_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
@@ -299,7 +301,7 @@ pass "Re-created interrupted session for conflict tests"
 info "Attempting resume with a different sandbox name..."
 
 SANDBOX_CONFLICT_LOG="$(mktemp)"
-env -u NVIDIA_INFERENCE_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$OTHER_SANDBOX_NAME" \
@@ -328,7 +330,7 @@ section "Phase 5: Reject conflicting provider and model"
 info "Attempting resume with conflicting provider/model inputs..."
 
 PROVIDER_CONFLICT_LOG="$(mktemp)"
-env -u NVIDIA_INFERENCE_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh
index 0810354968..3145f5013d 100755
--- a/test/e2e/test-onboard-resume.sh
+++ b/test/e2e/test-onboard-resume.sh
@@ -15,10 +15,10 @@
 #   - Docker running
 #   - openshell CLI installed
 #   - Node.js available
-#   - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test
+#   - NVIDIA_INFERENCE_API_KEY set before starting the test
 #
 # Usage:
-#   NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh
+#   NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-onboard-resume.sh
 
 set -uo pipefail
 
@@ -77,6 +77,8 @@ fi
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
 
 SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
@@ -121,21 +123,24 @@ else
   exit 1
 fi
 
-if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
+if [[ -z "$RESTORE_API_KEY" ]]; then
   fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion"
   exit 1
 fi
+pass "NVIDIA_INFERENCE_API_KEY is set"
+
+export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
+nemoclaw_e2e_configure_compatible_inference || exit 1
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+EXPECTED_PROVIDER="$(nemoclaw_e2e_expected_route_provider)"
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+if nemoclaw_e2e_probe_hosted_inference; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
-export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY"
 pass "Exported NVIDIA_INFERENCE_API_KEY for the resume run (host writes nothing to disk; OpenShell gateway is the system of record)"
 
 # ══════════════════════════════════════════════════════════════════
@@ -212,7 +217,7 @@ section "Phase 3: Resume"
 info "Running onboard --resume with NVIDIA_INFERENCE_API_KEY removed from env..."
 
 RESUME_LOG="$(mktemp)"
-env -u NVIDIA_INFERENCE_API_KEY \
+env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \
   NEMOCLAW_NON_INTERACTIVE=1 \
   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
   NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
@@ -286,9 +291,10 @@ fi
 node -e '
 const fs = require("fs");
 const file = process.argv[1];
+const expectedProvider = process.argv[2];
 const data = JSON.parse(fs.readFileSync(file, "utf8"));
 if (data.status !== "complete") process.exit(1);
-if (data.provider !== "nvidia-prod") process.exit(2);
+if (data.provider !== expectedProvider) process.exit(2);
 if (data.steps.preflight.status !== "complete") process.exit(3);
 if (data.steps.gateway.status !== "complete") process.exit(4);
 if (data.steps.sandbox.status !== "complete") process.exit(5);
@@ -296,7 +302,7 @@ if (data.steps.provider_selection.status !== "complete") process.exit(6);
 if (data.steps.inference.status !== "complete") process.exit(7);
 if (data.steps.openclaw.status !== "complete") process.exit(8);
 if (data.steps.policies.status !== "complete") process.exit(9);
-' "$SESSION_FILE"
+' "$SESSION_FILE" "$EXPECTED_PROVIDER"
 case $? in
   0) pass "Session file recorded full completion after resume" ;;
   *) fail "Session file did not record the expected completed state after resume" ;;
diff --git a/test/e2e/test-openclaw-inference-switch.sh b/test/e2e/test-openclaw-inference-switch.sh
index 276dbf2e9c..d513f68390 100755
--- a/test/e2e/test-openclaw-inference-switch.sh
+++ b/test/e2e/test-openclaw-inference-switch.sh
@@ -10,7 +10,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - NEMOCLAW_NON_INTERACTIVE=1
 #   - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 
@@ -353,9 +353,16 @@ E2E_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 . "${E2E_DIR}/lib/inference-switch-retry.sh"
 # shellcheck source=test/e2e/lib/anthropic-switch-provider.sh
 . "${E2E_DIR}/lib/anthropic-switch-provider.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-openclaw-inference-switch}"
-SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}"
-SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}"
+if nemoclaw_e2e_using_compatible_inference; then
+  SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}"
+  SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}"
+else
+  SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}"
+  SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}"
+fi
 SWITCH_INFERENCE_API="${NEMOCLAW_SWITCH_INFERENCE_API:-openai-completions}"
 # shellcheck disable=SC2034  # consumed by anthropic-switch-provider.sh helpers
 SWITCH_ENDPOINT_URL="${NEMOCLAW_SWITCH_ENDPOINT_URL:-}"
@@ -371,6 +378,7 @@ trap 'stop_mock_anthropic_switch_provider; _nemoclaw_sandbox_teardown' EXIT
 # shellcheck source=test/e2e/lib/install-path-refresh.sh
 . "${E2E_DIR}/lib/install-path-refresh.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 section "Phase 0: Pre-cleanup"
 if command -v nemoclaw >/dev/null 2>&1; then
@@ -390,10 +398,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-openclaw-skill-cli-e2e.sh b/test/e2e/test-openclaw-skill-cli-e2e.sh
index 2eb6084d69..8229b6893a 100755
--- a/test/e2e/test-openclaw-skill-cli-e2e.sh
+++ b/test/e2e/test-openclaw-skill-cli-e2e.sh
@@ -25,7 +25,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-openclaw-skill-cli-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-openclaw-skill-cli-e2e.sh
 
 # shellcheck disable=SC2317
 set -uo pipefail
@@ -71,7 +71,10 @@ SKILL_DESCRIPTION="E2E fixture proving openclaw skills install + list roundtrip"
 # Source shared teardown helper
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "${E2E_DIR}/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 # ══════════════════════════════════════════════════════════════════════
 # Phase 1: Install + Prerequisites
@@ -84,11 +87,9 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
-pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
diff --git a/test/e2e/test-overlayfs-autofix.sh b/test/e2e/test-overlayfs-autofix.sh
index 5d9143a024..686f9f0023 100755
--- a/test/e2e/test-overlayfs-autofix.sh
+++ b/test/e2e/test-overlayfs-autofix.sh
@@ -59,7 +59,7 @@
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=... \
 #     bash test/e2e/test-overlayfs-autofix.sh
 
 # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script.
@@ -70,6 +70,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=1500
 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 # shellcheck source=test/e2e/e2e-timeout.sh
 source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh"
 
 PASS=0
 FAIL=0
@@ -127,6 +129,7 @@ ONBOARD_LOG_NEGATIVE="/tmp/nemoclaw-e2e-onboard-negative.log"
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
@@ -172,10 +175,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-sandbox-survival.sh b/test/e2e/test-sandbox-survival.sh
index bc952247ba..39117cf5b0 100755
--- a/test/e2e/test-sandbox-survival.sh
+++ b/test/e2e/test-sandbox-survival.sh
@@ -22,20 +22,20 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - Network access to inference-api.nvidia.com
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required
-#   NVIDIA_INFERENCE_API_KEY                         — required for real NVIDIA Endpoints inference
+#   NVIDIA_INFERENCE_API_KEY                         — required for hosted inference
 #   NEMOCLAW_SANDBOX_NAME                  — sandbox name (default: e2e-survival)
 #   NEMOCLAW_E2E_TIMEOUT_SECONDS           — overall timeout (default: 900)
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 \
 #   NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#   NVIDIA_INFERENCE_API_KEY=nvapi-... \
+#   NVIDIA_INFERENCE_API_KEY=... \
 #     bash test/e2e/test-sandbox-survival.sh
 
 set -uo pipefail
@@ -44,6 +44,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=900
 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 # shellcheck source=test/e2e/e2e-timeout.sh
 source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh"
 
 PASS=0
 FAIL=0
@@ -93,6 +95,9 @@ version_gte() {
 }
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-survival}"
+nemoclaw_e2e_configure_compatible_inference || exit 1
+HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)"
+MODEL="$(nemoclaw_e2e_hosted_inference_model)"
 
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
@@ -102,7 +107,6 @@ REGISTRY="$HOME/.nemoclaw/sandboxes.json"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 MIN_OPENSHELL="0.0.24"
-MODEL="nvidia/nemotron-3-super-120b-a12b"
 
 # SSH helper — sets up SSH config and common options for sandbox access
 # Sets: ssh_config, SSH_OPTS, SSH_TARGET
@@ -207,17 +211,14 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
-if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then
-  pass "Network access to inference-api.nvidia.com"
+if nemoclaw_e2e_probe_hosted_inference; then
+  pass "Network access to ${HOSTED_INFERENCE_BASE_URL}"
 else
-  fail "Cannot reach inference-api.nvidia.com"
+  fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}"
   exit 1
 fi
 
@@ -382,7 +383,7 @@ else
 fi
 
 # 4b: Live inference through sandbox
-info "[LIVE] Baseline inference: user → sandbox → gateway → NVIDIA Endpoints..."
+info "[LIVE] Baseline inference: user → sandbox → gateway → hosted inference endpoint..."
 # shellcheck disable=SC2029  # client-side expansion is intentional
 baseline_response=$(run_with_timeout 90 ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \
   "curl -s --max-time 60 https://inference.local/v1/chat/completions \
@@ -718,7 +719,7 @@ fi
 # ══════════════════════════════════════════════════════════════════
 section "Phase 10: Live inference after restart (THE definitive test)"
 
-info "[LIVE] Post-restart inference: user → sandbox → gateway → NVIDIA Endpoints..."
+info "[LIVE] Post-restart inference: user → sandbox → gateway → hosted inference endpoint..."
 # shellcheck disable=SC2029
 post_response=$(run_with_timeout 90 ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \
   "curl -s --max-time 60 https://inference.local/v1/chat/completions \
@@ -750,7 +751,7 @@ for pong_attempt in 1 2 3; do
 done
 if $pong_ok; then
   pass "[LIVE] Post-restart: model responded with PONG through sandbox"
-  info "Full path proven: user → sandbox → openshell gateway (resumed) → NVIDIA Endpoints → response"
+  info "Full path proven: user → sandbox → openshell gateway (resumed) → hosted inference endpoint → response"
   info "This proves #859's ask: reliable non-destructive gateway lifecycle with working inference"
 else
   fail "[LIVE] Post-restart: expected PONG after 3 attempts, got: ${post_content:0:200}"
diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh
index a806bbdc2b..20b1529fee 100755
--- a/test/e2e/test-shields-config.sh
+++ b/test/e2e/test-shields-config.sh
@@ -19,7 +19,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #
 # Environment variables:
 #   NEMOCLAW_NON_INTERACTIVE=1             — required
@@ -34,6 +34,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=900
 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 # shellcheck source=test/e2e/e2e-timeout.sh
 source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh"
 
 PASS=0
 FAIL=0
@@ -60,6 +62,7 @@ SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-shields}"
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
@@ -79,10 +82,7 @@ else
   exit 1
 fi
 
-if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then
-  pass "NVIDIA_INFERENCE_API_KEY is set"
-else
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
 
diff --git a/test/e2e/test-skill-agent-e2e.sh b/test/e2e/test-skill-agent-e2e.sh
index ff93b92a3b..f4ef86a929 100755
--- a/test/e2e/test-skill-agent-e2e.sh
+++ b/test/e2e/test-skill-agent-e2e.sh
@@ -13,7 +13,7 @@
 #
 # Prerequisites:
 #   - Docker running
-#   - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-)
+#   - NVIDIA_INFERENCE_API_KEY set for hosted inference
 #   - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1
 #
 # Environment:
@@ -24,7 +24,7 @@
 #
 # Usage:
 #   NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
-#     NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-skill-agent-e2e.sh
+#     NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-skill-agent-e2e.sh
 
 # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script.
 # shellcheck disable=SC2317
@@ -98,7 +98,10 @@ RETRY_SLEEP="${E2E_SKILL_AGENT_RETRY_SLEEP_SEC:-15}"
 # Source shared teardown helper
 # shellcheck source=test/e2e/lib/sandbox-teardown.sh
 . "${E2E_DIR}/lib/sandbox-teardown.sh"
+# shellcheck source=test/e2e/lib/ci-compatible-inference.sh
+. "${E2E_DIR}/lib/ci-compatible-inference.sh"
 register_sandbox_for_teardown "$SANDBOX_NAME"
+nemoclaw_e2e_configure_compatible_inference || exit 1
 
 # ══════════════════════════════════════════════════════════════════════
 # Phase 1: Install + Prerequisites
@@ -111,11 +114,9 @@ if ! docker info >/dev/null 2>&1; then
 fi
 pass "Docker is running"
 
-if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then
-  fail "NVIDIA_INFERENCE_API_KEY not set or invalid"
+if ! nemoclaw_e2e_require_hosted_inference_key; then
   exit 1
 fi
-pass "NVIDIA_INFERENCE_API_KEY is set"
 
 cd "$REPO" || {
   fail "Could not cd to repo root"
diff --git a/test/onboard-selection.test.ts b/test/onboard-selection.test.ts
index 58dc04c203..8b422643c2 100644
--- a/test/onboard-selection.test.ts
+++ b/test/onboard-selection.test.ts
@@ -3923,7 +3923,7 @@ const { setupNim } = require(${onboardPath});
     assert.equal(payload.messages.filter((message: string) => /Choose \[/.test(message)).length, 2);
   });
 
-  it("fails early in non-interactive mode when NVIDIA_INFERENCE_API_KEY is not an nvapi- key", () => {
+  it("fails early in non-interactive mode when explicit cloud provider key is not nvapi-", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-build-noninteractive-"));
     const fakeBin = path.join(tmpDir, "bin");
@@ -3964,7 +3964,7 @@ onboardModule._compile(injected, onboardFile);
 const { setupNim, __setNonInteractive } = onboardModule.exports;
 
 (async () => {
-  process.env.NVIDIA_INFERENCE_API_KEY = "sk-test";
+  process.env.NVIDIA_INFERENCE_API_KEY = "sk-test"; process.env.NEMOCLAW_PROVIDER = "cloud";
   __setNonInteractive(true);
   const originalLog = console.log;
   const originalError = console.error;