diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml index 3185e08ad1..8fdef8f6ff 100644 --- a/.github/workflows/e2e-script.yaml +++ b/.github/workflows/e2e-script.yaml @@ -58,12 +58,7 @@ on: type: string default: "" nvidia_api_key: - description: Pass the NVIDIA_INFERENCE_API_KEY secret to the script. - required: false - type: boolean - default: false - nvidia_secret_as_compatible_api_key: - description: Expose NVIDIA_INFERENCE_API_KEY as COMPATIBLE_API_KEY for CI-only OpenAI-compatible inference. + description: Pass the hosted inference source secret as the CI custom endpoint credential. required: false type: boolean default: false @@ -217,8 +212,8 @@ jobs: echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls." fi - - name: Export CI compatible inference environment - if: ${{ inputs.nvidia_secret_as_compatible_api_key }} + - name: Export hosted CI inference environment + if: ${{ inputs.nvidia_api_key }} env: NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} shell: bash @@ -226,12 +221,12 @@ jobs: set -euo pipefail if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then - echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for CI compatible inference." >&2 + echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for hosted CI inference; it is withheld for workflow_dispatch target_ref runs." >&2 exit 1 fi { - printf 'NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1\n' + printf 'NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1\n' printf 'NEMOCLAW_PROVIDER=custom\n' printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n' printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n' diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml index 5ed70533a2..ce97dce38b 100644 --- a/.github/workflows/nightly-e2e.yaml +++ b/.github/workflows/nightly-e2e.yaml @@ -3,9 +3,9 @@ # # Nightly E2E tests: # -# cloud-e2e Cloud inference (NVIDIA Endpoint API) on ubuntu-latest. +# cloud-e2e Hosted inference (OpenAI-compatible endpoint) on ubuntu-latest. # agent-turn-latency-e2e Times one real OpenClaw turn and one real Hermes -# turn through the configured NVIDIA Build model. +# turn through the configured hosted inference model. # messaging-providers-e2e Validates messaging credential provider/placeholder/L7-proxy chain # for Telegram + Discord + Slack. Uses fake tokens. Slack additionally # exercises OpenShell provider-shaped alias resolution (#2085 follow-up). @@ -99,7 +99,7 @@ # Runs directly on the runner (not inside Docker) because OpenShell bootstraps # a K3s cluster inside a privileged Docker container — nesting would break networking. # -# NVIDIA_INFERENCE_API_KEY for cloud-e2e: +# NVIDIA_INFERENCE_API_KEY for hosted CI inference: # - Repository secret: Settings → Secrets and variables → Actions → Repository secrets. # - Environment secret: only available if the job sets `environment: `. # (Storing the key under Environments / NVIDIA_INFERENCE_API_KEY without `environment:` here leaves the @@ -204,10 +204,9 @@ jobs: artifact_path: "/tmp/nemoclaw-e2e-install.log" env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-nightly"}' nvidia_api_key: true - nvidia_secret_as_compatible_api_key: true github_token: true secrets: &nightly-e2e-default-secrets - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }} DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }} DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }} @@ -228,7 +227,6 @@ jobs: env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_POLICY_MODE":"custom","NEMOCLAW_POLICY_PRESETS":"npm,pypi","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-onboard","NEMOCLAW_TRACE_DIR":"/tmp/nemoclaw-traces"}' checked_out_ref_env: "NEMOCLAW_PUBLIC_INSTALL_REF" nvidia_api_key: true - nvidia_secret_as_compatible_api_key: true github_token: true secrets: *nightly-e2e-default-secrets cloud-inference-e2e: @@ -245,7 +243,6 @@ jobs: artifact_path: "/tmp/nemoclaw-e2e-cloud-inference-install.log" env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_SANDBOX_NAME":"e2e-cloud-inference"}' nvidia_api_key: true - nvidia_secret_as_compatible_api_key: true secrets: *nightly-e2e-default-secrets cron-preflight-inference-local-e2e: if: >- @@ -277,7 +274,7 @@ jobs: /tmp/nemoclaw-e2e-openclaw-turn-latency-install.log /tmp/nemoclaw-e2e-hermes-turn-latency-install.log /tmp/nemoclaw-e2e-agent-turn-latency.json - env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1","NEMOCLAW_TURN_LATENCY_MODEL":"nvidia/nemotron-3-ultra-550b-a55b"}' + env_json: '{"NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE":"1","NEMOCLAW_NON_INTERACTIVE":"1","NEMOCLAW_RECREATE_SANDBOX":"1"}' nvidia_api_key: true secrets: *nightly-e2e-default-secrets skill-agent-e2e: @@ -382,7 +379,7 @@ jobs: github_token: true messaging_live_secrets: ${{ github.event_name != 'workflow_dispatch' || inputs.target_ref == '' }} secrets: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }} DOCKERHUB_USERNAME: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }} DOCKERHUB_TOKEN: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }} @@ -467,7 +464,13 @@ jobs: - name: Run OpenClaw TUI chat correlation E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_RECREATE_SANDBOX: "1" @@ -536,7 +539,13 @@ jobs: - name: "Run issue #4434 TUI unreachable inference E2E test" env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_ISSUE_4434_LIVE: "1" NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" @@ -550,7 +559,7 @@ jobs: if: failure() shell: bash env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} GITHUB_TOKEN: ${{ github.token }} run: | set -euo pipefail @@ -958,8 +967,13 @@ jobs: - name: Run token rotation E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} - NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_POLICY_TIER: "open" @@ -1250,7 +1264,13 @@ jobs: - name: Run sandbox operations E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_POLICY_TIER: "open" @@ -1551,11 +1571,12 @@ jobs: # COMPATIBLE_API_KEY. Keep checkout credentials disabled, do not pass # GITHUB_TOKEN, and rely on reviewed/maintainer-dispatched refs. env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_PROVIDER: custom NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration NEMOCLAW_RUN_E2E_SCENARIOS: "1" NEMOCLAW_SANDBOX_NAME: "e2e-cred-migration" @@ -1774,13 +1795,25 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run onboard repair E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: | @@ -1812,13 +1845,25 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run onboard resume E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: | @@ -1850,13 +1895,25 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run onboard negative-path E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: | @@ -1889,13 +1946,25 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run runtime overrides E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" run: | @@ -1928,14 +1997,26 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw and onboard sandbox env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_SANDBOX_NAME: "e2e-test" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run credential sanitization E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_SANDBOX_NAME: "e2e-test" @@ -1970,14 +2051,26 @@ jobs: - *dockerhub-auth-step - name: Install NemoClaw and onboard sandbox env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_SANDBOX_NAME: "e2e-test" run: bash install.sh --non-interactive --yes-i-accept-third-party-software - name: Run telegram injection E2E test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_SANDBOX_NAME: "e2e-test" @@ -2050,7 +2143,13 @@ jobs: - name: Run launchable install-flow smoke test env: - NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }} + NVIDIA_INFERENCE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} + NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1" + NEMOCLAW_PROVIDER: custom + NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1 + NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3 + NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3 + COMPATIBLE_API_KEY: ${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.NVIDIA_INFERENCE_API_KEY || '' }} NEMOCLAW_NON_INTERACTIVE: "1" NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1" NEMOCLAW_SANDBOX_NAME: "e2e-launchable" diff --git a/ci/env-var-doc-allowlist.json b/ci/env-var-doc-allowlist.json index c04df7defc..c2b5ec56c8 100644 --- a/ci/env-var-doc-allowlist.json +++ b/ci/env-var-doc-allowlist.json @@ -42,5 +42,17 @@ { "name": "NEMOCLAW_E2E_FORCE_FAIL_AT_STEP", "reason": "Internal E2E-only selector naming the onboarding step where deterministic fault injection should exit. Used only with NEMOCLAW_E2E_FAILURE_INJECTION in test scripts." + }, + { + "name": "NEMOCLAW_E2E_USE_HOSTED_INFERENCE", + "reason": "Internal E2E-only sentinel that tells CI to route the repository NVIDIA_INFERENCE_API_KEY secret through the hosted inference-api.nvidia.com OpenAI-compatible endpoint. Not user-facing." + }, + { + "name": "NEMOCLAW_COMPAT_MODEL", + "reason": "Internal E2E/test override for the model used by OpenAI-compatible endpoint scenarios. User-facing custom endpoint model selection is collected through onboard prompts or NEMOCLAW_MODEL." + }, + { + "name": "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL", + "reason": "Legacy E2E-only model override used by cloud and hosted-inference live test scripts. Not a supported production configuration knob." } ] diff --git a/src/lib/onboard/providers.test.ts b/src/lib/onboard/providers.test.ts index b5b25e4d5f..56090df090 100644 --- a/src/lib/onboard/providers.test.ts +++ b/src/lib/onboard/providers.test.ts @@ -7,36 +7,87 @@ type RunResult = { status: number; stdout?: string; stderr?: string }; type RunOptions = { env?: Record }; type RunOpenshell = (command: string[], opts?: RunOptions) => RunResult; -const { buildProviderArgs, providerExistsInGateway, upsertProvider, upsertMessagingProviders } = - require("../../../dist/lib/onboard/providers") as { - buildProviderArgs: ( - action: "create" | "update", - name: string, - type: string, - credentialEnv: string, - baseUrl: string | null, - ) => string[]; - providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean; - upsertProvider: ( - name: string, - type: string, - credentialEnv: string, - baseUrl: string | null, - env: Record, - runOpenshell: RunOpenshell, - options?: { replaceExisting?: boolean }, - ) => { ok: boolean; status?: number; message?: string }; - upsertMessagingProviders: ( - tokenDefs: Array<{ - name: string; - envKey: string; - token: string | null; - providerType?: string; - }>, - runOpenshell: RunOpenshell, - options?: { replaceExisting?: boolean; bestEffort?: boolean }, - ) => string[]; - }; +const { + HOSTED_INFERENCE_ENDPOINT_URL, + HOSTED_INFERENCE_MODEL, + buildProviderArgs, + getRequestedModelHint, + getRequestedProviderHint, + providerExistsInGateway, + stageHostedInferenceSourceSecretEnv, + upsertProvider, + upsertMessagingProviders, +} = require("../../../dist/lib/onboard/providers") as { + HOSTED_INFERENCE_ENDPOINT_URL: string; + HOSTED_INFERENCE_MODEL: string; + buildProviderArgs: ( + action: "create" | "update", + name: string, + type: string, + credentialEnv: string, + baseUrl: string | null, + ) => string[]; + getRequestedModelHint: (nonInteractive: boolean) => string | null; + getRequestedProviderHint: (nonInteractive: boolean) => string | null; + providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean; + stageHostedInferenceSourceSecretEnv: () => boolean; + upsertProvider: ( + name: string, + type: string, + credentialEnv: string, + baseUrl: string | null, + env: Record, + runOpenshell: RunOpenshell, + options?: { replaceExisting?: boolean }, + ) => { ok: boolean; status?: number; message?: string }; + upsertMessagingProviders: ( + tokenDefs: Array<{ + name: string; + envKey: string; + token: string | null; + providerType?: string; + }>, + runOpenshell: RunOpenshell, + options?: { replaceExisting?: boolean; bestEffort?: boolean }, + ) => string[]; +}; + +function withProviderEnv(next: Record, testBody: () => void): void { + const keys = new Set([ + "NVIDIA_INFERENCE_API_KEY", + "NEMOCLAW_PROVIDER", + "NEMOCLAW_ENDPOINT_URL", + "NEMOCLAW_MODEL", + "NEMOCLAW_COMPAT_MODEL", + "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL", + "NEMOCLAW_E2E_USE_HOSTED_INFERENCE", + "COMPATIBLE_API_KEY", + ...Object.keys(next), + ]); + const previous = new Map(); + for (const key of keys) { + previous.set(key, process.env[key]); + delete process.env[key]; + } + for (const [key, value] of Object.entries(next)) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + try { + testBody(); + } finally { + for (const [key, value] of previous.entries()) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + } +} describe("onboard provider helpers", () => { it("builds create arguments for generic providers", () => { @@ -239,6 +290,55 @@ describe("onboard provider helpers", () => { expect(commands[1]).toMatch(/--credential NVIDIA_INFERENCE_API_KEY/); }); + it("stages non-nvapi NVIDIA_INFERENCE_API_KEY as hosted custom inference", () => { + withProviderEnv( + { + NVIDIA_INFERENCE_API_KEY: " repo-hosted-key ", + }, + () => { + expect(stageHostedInferenceSourceSecretEnv()).toBe(true); + expect(getRequestedProviderHint(true)).toBe("custom"); + expect(getRequestedModelHint(true)).toBe(HOSTED_INFERENCE_MODEL); + expect(process.env.NEMOCLAW_PROVIDER).toBe("custom"); + expect(process.env.NEMOCLAW_ENDPOINT_URL).toBe(HOSTED_INFERENCE_ENDPOINT_URL); + expect(process.env.NEMOCLAW_MODEL).toBe(HOSTED_INFERENCE_MODEL); + expect(process.env.NEMOCLAW_COMPAT_MODEL).toBe(HOSTED_INFERENCE_MODEL); + expect(process.env.COMPATIBLE_API_KEY).toBe("repo-hosted-key"); + }, + ); + }); + + it("keeps explicit cloud provider selection on the Build provider path", () => { + withProviderEnv( + { + NVIDIA_INFERENCE_API_KEY: "repo-hosted-key", + NEMOCLAW_PROVIDER: "cloud", + }, + () => { + expect(stageHostedInferenceSourceSecretEnv()).toBe(false); + expect(getRequestedProviderHint(true)).toBe("build"); + expect(process.env.COMPATIBLE_API_KEY).toBeUndefined(); + expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined(); + }, + ); + }); + + it("preserves explicit custom provider credentials when NVIDIA_INFERENCE_API_KEY is unrelated", () => { + withProviderEnv( + { + COMPATIBLE_API_KEY: "custom-endpoint-key", + NVIDIA_INFERENCE_API_KEY: "repo-hosted-key", + NEMOCLAW_PROVIDER: "custom", + }, + () => { + expect(stageHostedInferenceSourceSecretEnv()).toBe(false); + expect(getRequestedProviderHint(true)).toBe("custom"); + expect(process.env.COMPATIBLE_API_KEY).toBe("custom-endpoint-key"); + expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined(); + }, + ); + }); + it("returns redacted error details when create or update fails", () => { const result = upsertProvider("bad-provider", "generic", "SOME_KEY", null, {}, (command) => { if (command.includes("get")) return { status: 1, stdout: "", stderr: "" }; diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts index edb2d117ef..4ba749c2e1 100644 --- a/src/lib/onboard/providers.ts +++ b/src/lib/onboard/providers.ts @@ -5,6 +5,7 @@ // Provider metadata, lookup helpers, and gateway provider CRUD. const { redact } = require("../runner"); +const { normalizeCredentialValue } = require("../credentials/store"); const { DEFAULT_CLOUD_MODEL, DEFAULT_HERMES_PROVIDER_MODEL, @@ -22,6 +23,10 @@ const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1"; const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com"; const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"; const HERMES_INFERENCE_ENDPOINT_URL = "https://inference-api.nousresearch.com/v1"; +const HOSTED_INFERENCE_SOURCE_ENV = "NVIDIA_INFERENCE_API_KEY"; +const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY"; +const HOSTED_INFERENCE_ENDPOINT_URL = "https://inference-api.nvidia.com/v1"; +const HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3"; const REMOTE_PROVIDER_CONFIG = { build: { @@ -167,6 +172,7 @@ function getEffectiveProviderName(providerKey) { // ── Non-interactive helpers ────────────────────────────────────── function getNonInteractiveProvider() { + stageHostedInferenceSourceSecretEnv(); const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); if (!providerKey) return null; const aliases = { @@ -208,6 +214,50 @@ function getNonInteractiveProvider() { return normalized; } +function stageHostedInferenceSourceSecretEnv() { + const sourceKey = normalizeCredentialValue(process.env[HOSTED_INFERENCE_SOURCE_ENV] ?? ""); + if (!sourceKey) return false; + + const rawProvider = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); + const aliases = { + cloud: "build", + anthropiccompatible: "anthropicCompatible", + hermes: "hermesProvider", + "hermes-provider": "hermesProvider", + hermesprovider: "hermesProvider", + nous: "hermesProvider", + "nous-portal": "hermesProvider", + }; + const normalizedProvider = aliases[rawProvider] || rawProvider; + const hostedFlag = (process.env.NEMOCLAW_E2E_USE_HOSTED_INFERENCE || "").trim() === "1"; + const compatibleKey = normalizeCredentialValue( + process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] ?? "", + ); + const explicitHostedCustom = + normalizedProvider === "custom" && + (hostedFlag || (!compatibleKey && !sourceKey.startsWith("nvapi-"))); + const implicitHostedCustom = + !normalizedProvider && (hostedFlag || !sourceKey.startsWith("nvapi-")); + const shouldStage = explicitHostedCustom || implicitHostedCustom; + + if (!shouldStage) return false; + + if (!normalizedProvider) { + process.env.NEMOCLAW_PROVIDER = "custom"; + } + process.env.NEMOCLAW_ENDPOINT_URL = + (process.env.NEMOCLAW_ENDPOINT_URL || "").trim() || HOSTED_INFERENCE_ENDPOINT_URL; + const model = + (process.env.NEMOCLAW_MODEL || "").trim() || + (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() || + (process.env.NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL || "").trim() || + HOSTED_INFERENCE_MODEL; + process.env.NEMOCLAW_MODEL = model; + process.env.NEMOCLAW_COMPAT_MODEL = (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() || model; + process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] = sourceKey; + return true; +} + function getNonInteractiveModel(providerKey) { const model = (process.env.NEMOCLAW_MODEL || "").trim(); if (!model) return null; @@ -399,8 +449,13 @@ module.exports = { OLLAMA_PROXY_CREDENTIAL_ENV, VLLM_LOCAL_CREDENTIAL_ENV, DISCORD_SNOWFLAKE_RE, + HOSTED_INFERENCE_SOURCE_ENV, + HOSTED_INFERENCE_CREDENTIAL_ENV, + HOSTED_INFERENCE_ENDPOINT_URL, + HOSTED_INFERENCE_MODEL, getProviderLabel, getEffectiveProviderName, + stageHostedInferenceSourceSecretEnv, getNonInteractiveProvider, getNonInteractiveModel, getRequestedProviderHint, diff --git a/test/e2e-script-workflow.test.ts b/test/e2e-script-workflow.test.ts index 512e9d377a..5fb007becb 100644 --- a/test/e2e-script-workflow.test.ts +++ b/test/e2e-script-workflow.test.ts @@ -45,6 +45,9 @@ const require = createRequire(import.meta.url); const traceTiming = require("../scripts/scorecard/analyze-trace-timing.ts") as TraceTimingAnalyzer; const TRACE_SUMMARY_FILE = "cloud-onboard-trace-timing-summary.json"; +const TRUSTED_REF_GUARD = "github.event_name != 'workflow_dispatch' || inputs.target_ref == ''"; +const GUARDED_HOSTED_INFERENCE_SECRET = `\${{ (${TRUSTED_REF_GUARD}) && secrets.NVIDIA_INFERENCE_API_KEY || '' }}`; +const RAW_HOSTED_INFERENCE_SECRET = "${{ secrets.NVIDIA_INFERENCE_API_KEY }}"; function timingSummary( phases: Record = { "nemoclaw.onboard.phase.preflight": 1000 }, @@ -127,6 +130,12 @@ function traceGithubFixture(options: { return github; } +function envReferencesHostedInferenceSecret(env?: Record): boolean { + return Object.values(env ?? {}).some((value) => + String(value).includes("secrets.NVIDIA_INFERENCE_API_KEY"), + ); +} + // Direct legacy bash E2Es are being migrated toward Vitest coverage. Keep the // top-level shell suite frozen so new coverage starts in the newer E2E surface // unless maintainers intentionally update this allowlist. @@ -385,22 +394,21 @@ describe("E2E reusable workflow contract", () => { it("passes only named secrets to reusable nightly jobs", () => { const reusableJobs = reusableNightlyJobs(nightlyWorkflow); const defaultSecrets = { - NVIDIA_INFERENCE_API_KEY: "${{ secrets.NVIDIA_INFERENCE_API_KEY }}", + NVIDIA_INFERENCE_API_KEY: GUARDED_HOSTED_INFERENCE_SECRET, BRAVE_API_KEY: "${{ secrets.BRAVE_API_KEY }}", DOCKERHUB_USERNAME: "${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_USERNAME || '' }}", DOCKERHUB_TOKEN: "${{ (github.event_name != 'workflow_dispatch' || inputs.target_ref == '') && secrets.DOCKERHUB_TOKEN || '' }}", }; - const trustedRefGuard = "github.event_name != 'workflow_dispatch' || inputs.target_ref == ''"; const messagingLiveSecrets = { - TELEGRAM_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.TELEGRAM_BOT_TOKEN_REAL || '' }}`, - TELEGRAM_CHAT_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.TELEGRAM_CHAT_ID_E2E || '' }}`, - DISCORD_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.DISCORD_BOT_TOKEN_REAL || '' }}`, - DISCORD_CHANNEL_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.DISCORD_CHANNEL_ID_E2E || '' }}`, - SLACK_BOT_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.SLACK_BOT_TOKEN_REAL || '' }}`, - SLACK_APP_TOKEN_REAL: `\${{ (${trustedRefGuard}) && secrets.SLACK_APP_TOKEN_REAL || '' }}`, - SLACK_CHANNEL_ID_E2E: `\${{ (${trustedRefGuard}) && secrets.SLACK_CHANNEL_ID_E2E || '' }}`, + TELEGRAM_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.TELEGRAM_BOT_TOKEN_REAL || '' }}`, + TELEGRAM_CHAT_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.TELEGRAM_CHAT_ID_E2E || '' }}`, + DISCORD_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.DISCORD_BOT_TOKEN_REAL || '' }}`, + DISCORD_CHANNEL_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.DISCORD_CHANNEL_ID_E2E || '' }}`, + SLACK_BOT_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_BOT_TOKEN_REAL || '' }}`, + SLACK_APP_TOKEN_REAL: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_APP_TOKEN_REAL || '' }}`, + SLACK_CHANNEL_ID_E2E: `\${{ (${TRUSTED_REF_GUARD}) && secrets.SLACK_CHANNEL_ID_E2E || '' }}`, }; expect(reusableJobs.length).toBeGreaterThan(20); @@ -534,7 +542,12 @@ describe("E2E reusable workflow contract", () => { expect(runStep?.run).toContain("npx vitest run --project e2e-scenarios-live"); expect(runStep?.run).toContain("test/e2e-scenario/live/credential-migration.test.ts"); expect(runStep?.run).not.toContain("test/e2e/test-credential-migration.sh"); - expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe("${{ secrets.NVIDIA_INFERENCE_API_KEY }}"); + expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET); + expect(runStep?.env?.NEMOCLAW_PROVIDER).toBe("custom"); + expect(runStep?.env?.NEMOCLAW_ENDPOINT_URL).toBe("https://inference-api.nvidia.com/v1"); + expect(runStep?.env?.NEMOCLAW_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(runStep?.env?.NEMOCLAW_COMPAT_MODEL).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(runStep?.env?.COMPATIBLE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET); expect(runStep?.env?.GITHUB_TOKEN).toBeUndefined(); expect(runStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1"); expect(runStep?.env?.NEMOCLAW_SANDBOX_NAME).toBe("e2e-cred-migration"); @@ -869,42 +882,96 @@ describe("E2E reusable workflow contract", () => { expect(exportStep?.run).toContain('>> "$GITHUB_ENV"'); }); - it("can route selected reusable jobs through the CI compatible inference endpoint", () => { + it("routes reusable hosted inference jobs through the hosted custom endpoint", () => { const exportStep = runnerWorkflow.jobs.run.steps.find( - (step) => step.name === "Export CI compatible inference environment", + (step) => step.name === "Export hosted CI inference environment", ); - const expectedJobs = ["cloud-e2e", "cloud-onboard-e2e", "cloud-inference-e2e"]; const workflowCall = runnerWorkflow.on?.workflow_call ?? runnerWorkflow.true?.workflow_call; + const hostedJobs = reusableNightlyJobs(nightlyWorkflow).filter( + ([, job]) => String(job.with?.nvidia_api_key) === "true", + ); - expect(workflowCall?.inputs?.nvidia_secret_as_compatible_api_key).toMatchObject({ + expect(workflowCall?.inputs?.nvidia_api_key).toMatchObject({ required: false, type: "boolean", default: false, }); - expect(exportStep?.if).toBe("${{ inputs.nvidia_secret_as_compatible_api_key }}"); - expect(exportStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe( - "${{ secrets.NVIDIA_INFERENCE_API_KEY }}", - ); - expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1"); + expect(workflowCall?.inputs?.nvidia_secret_as_compatible_api_key).toBeUndefined(); + expect(exportStep?.if).toBe("${{ inputs.nvidia_api_key }}"); + expect(exportStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(RAW_HOSTED_INFERENCE_SECRET); + expect(exportStep?.run).toContain("withheld for workflow_dispatch target_ref runs"); + expect(exportStep?.run).toContain("NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1"); expect(exportStep?.run).toContain("NEMOCLAW_PROVIDER=custom"); expect(exportStep?.run).toContain("NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1"); expect(exportStep?.run).toContain("NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3"); expect(exportStep?.run).toContain("NEMOCLAW_COMPAT_MODEL=nvidia/nvidia/nemotron-3-super-v3"); expect(exportStep?.run).toContain("COMPATIBLE_API_KEY=%s"); - for (const name of expectedJobs) { - expect(nightlyWorkflow.jobs[name].with?.nvidia_secret_as_compatible_api_key, name).toBe(true); + expect(hostedJobs.length).toBeGreaterThan(20); + for (const [name, job] of hostedJobs) { + expect(job.with?.nvidia_secret_as_compatible_api_key, name).toBeUndefined(); } }); - it("routes legacy token rotation through the CI compatible inference endpoint", () => { + it("routes direct hosted-secret jobs through the hosted custom inference endpoint", () => { + const trustedWorkflowSecretExceptions = new Set([ + "issue-4434-tui-unreachable-inference-e2e:Sanitize issue #4434 logs on failure", + ]); + const directSecretSteps = Object.entries(nightlyWorkflow.jobs).flatMap(([jobName, job]) => + job.uses + ? [] + : (job.steps ?? []) + .filter((step) => envReferencesHostedInferenceSecret(step.env)) + .map((step) => ({ jobName, step })), + ); + const directSecretStepNames = directSecretSteps.map( + ({ jobName, step }) => `${jobName}:${step.name ?? ""}`, + ); + + expect(directSecretStepNames).toEqual( + expect.arrayContaining([ + "openclaw-tui-chat-correlation-e2e:Run OpenClaw TUI chat correlation E2E test", + "issue-4434-tui-unreachable-inference-e2e:Run issue #4434 TUI unreachable inference E2E test", + "issue-4434-tui-unreachable-inference-e2e:Sanitize issue #4434 logs on failure", + "token-rotation-e2e:Run token rotation E2E test", + "sandbox-operations-e2e:Run sandbox operations E2E test", + "credential-migration-e2e:Run credential migration Vitest test", + "onboard-repair-e2e:Install NemoClaw", + "onboard-repair-e2e:Run onboard repair E2E test", + "onboard-resume-e2e:Install NemoClaw", + "onboard-resume-e2e:Run onboard resume E2E test", + "onboard-negative-paths-e2e:Install NemoClaw", + "onboard-negative-paths-e2e:Run onboard negative-path E2E test", + "runtime-overrides-e2e:Install NemoClaw", + "runtime-overrides-e2e:Run runtime overrides E2E test", + "credential-sanitization-e2e:Install NemoClaw and onboard sandbox", + "telegram-injection-e2e:Install NemoClaw and onboard sandbox", + "launchable-smoke-e2e:Run launchable install-flow smoke test", + ]), + ); + + expect(directSecretSteps.length).toBeGreaterThanOrEqual(17); + for (const { jobName, step } of directSecretSteps) { + const stepKey = `${jobName}:${step.name ?? ""}`; + expect(step.env?.NVIDIA_INFERENCE_API_KEY, stepKey).toBe(GUARDED_HOSTED_INFERENCE_SECRET); + if (trustedWorkflowSecretExceptions.has(stepKey)) { + expect(step.run, stepKey).toContain("[REDACTED_NVIDIA_INFERENCE_API_KEY]"); + continue; + } + expect(step.env?.NEMOCLAW_PROVIDER, jobName).toBe("custom"); + expect(step.env?.NEMOCLAW_ENDPOINT_URL, jobName).toBe("https://inference-api.nvidia.com/v1"); + expect(step.env?.NEMOCLAW_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(step.env?.NEMOCLAW_COMPAT_MODEL, jobName).toBe("nvidia/nvidia/nemotron-3-super-v3"); + expect(step.env?.COMPATIBLE_API_KEY, jobName).toBe(GUARDED_HOSTED_INFERENCE_SECRET); + } + const runStep = nightlyWorkflow.jobs["token-rotation-e2e"].steps?.find( (step) => step.name === "Run token rotation E2E test", ); const script = readFileSync(new URL("./e2e/test-token-rotation.sh", import.meta.url), "utf8"); - expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe("${{ secrets.NVIDIA_INFERENCE_API_KEY }}"); - expect(runStep?.env?.NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE).toBe("1"); + expect(runStep?.env?.NVIDIA_INFERENCE_API_KEY).toBe(GUARDED_HOSTED_INFERENCE_SECRET); + expect(runStep?.env?.NEMOCLAW_E2E_USE_HOSTED_INFERENCE).toBe("1"); expect(script).toContain("lib/ci-compatible-inference.sh"); expect(script).toContain("nemoclaw_e2e_configure_compatible_inference"); }); diff --git a/test/e2e/lib/ci-compatible-inference.sh b/test/e2e/lib/ci-compatible-inference.sh index 15b64ecdbe..94c3cb1867 100755 --- a/test/e2e/lib/ci-compatible-inference.sh +++ b/test/e2e/lib/ci-compatible-inference.sh @@ -2,16 +2,25 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# CI-only compatibility shim: some live E2E lanes use the repository's -# NVIDIA_INFERENCE_API_KEY secret against an OpenAI-compatible endpoint instead -# of the public NVIDIA Endpoints provider. Keep this helper in test/e2e so the +# CI-only hosted inference shim: live E2E lanes use the repository's +# NVIDIA_INFERENCE_API_KEY secret against the hosted OpenAI-compatible endpoint +# at inference-api.nvidia.com. Keep this helper in test/e2e so the # product-facing provider/default endpoint remain unchanged. NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT="nvidia/nvidia/nemotron-3-super-v3" +NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT="compatible-endpoint" NEMOCLAW_E2E_NVIDIA_INFERENCE_MODEL_DEFAULT="nvidia/nemotron-3-super-120b-a12b" nemoclaw_e2e_using_compatible_inference() { - [ "${NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE:-}" = "1" ] + if [ "${NEMOCLAW_E2E_USE_HOSTED_INFERENCE:-}" = "1" ]; then + return 0 + fi + case "${NEMOCLAW_PROVIDER:-}" in + build | cloud | nvidia | nvidia-prod) + return 1 + ;; + esac + [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]] } nemoclaw_e2e_configure_compatible_inference() { @@ -19,22 +28,20 @@ nemoclaw_e2e_configure_compatible_inference() { return 0 fi + if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then + echo "ERROR: NVIDIA_INFERENCE_API_KEY is required for hosted CI inference" >&2 + return 1 + fi + export NEMOCLAW_PROVIDER="${NEMOCLAW_PROVIDER:-custom}" export NEMOCLAW_ENDPOINT_URL="${NEMOCLAW_ENDPOINT_URL:-https://inference-api.nvidia.com/v1}" export NEMOCLAW_MODEL="${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}" export NEMOCLAW_COMPAT_MODEL="${NEMOCLAW_COMPAT_MODEL:-$NEMOCLAW_MODEL}" - - if [ -z "${COMPATIBLE_API_KEY:-}" ] && [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ]; then - export COMPATIBLE_API_KEY="$NVIDIA_INFERENCE_API_KEY" - fi + export COMPATIBLE_API_KEY="$NVIDIA_INFERENCE_API_KEY" } nemoclaw_e2e_hosted_inference_key() { - if nemoclaw_e2e_using_compatible_inference; then - printf '%s' "${COMPATIBLE_API_KEY:-${NVIDIA_INFERENCE_API_KEY:-}}" - else - printf '%s' "${NVIDIA_INFERENCE_API_KEY:-}" - fi + printf '%s' "${NVIDIA_INFERENCE_API_KEY:-}" } nemoclaw_e2e_hosted_inference_base_url() { @@ -45,6 +52,49 @@ nemoclaw_e2e_hosted_inference_base_url() { fi } +nemoclaw_e2e_expected_route_provider() { + if nemoclaw_e2e_using_compatible_inference; then + printf '%s' "$NEMOCLAW_E2E_HOSTED_INFERENCE_PROVIDER_DEFAULT" + else + printf '%s' "nvidia-prod" + fi +} + +nemoclaw_e2e_strip_ansi() { + if command -v perl >/dev/null 2>&1; then + perl -pe 's/\x1b\][^\a]*(?:\a|\x1b\\)//g; s/\x1b\[[0-9;?]*[ -\/]*[@-~]//g' + else + sed -E $'s/\x1B\\[[0-9;?]*[ -\\/]*[@-~]//g' + fi +} + +nemoclaw_e2e_inference_output_matches() { + local output="$1" + local provider="$2" + local model="${3:-}" + local plain + + plain="$(printf '%s' "$output" | nemoclaw_e2e_strip_ansi)" + grep -Eqi "Provider:[[:space:]]*${provider}" <<<"$plain" || return 1 + [ -z "$model" ] || grep -Fq "$model" <<<"$plain" +} + +nemoclaw_e2e_note_pass() { + if declare -F pass >/dev/null 2>&1; then + pass "$@" + else + printf 'PASS: %s\n' "$*" + fi +} + +nemoclaw_e2e_note_fail() { + if declare -F fail >/dev/null 2>&1; then + fail "$@" + else + printf 'ERROR: %s\n' "$*" >&2 + fi +} + nemoclaw_e2e_hosted_inference_model() { if nemoclaw_e2e_using_compatible_inference; then printf '%s' "${NEMOCLAW_MODEL:-${NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL:-$NEMOCLAW_E2E_COMPATIBLE_INFERENCE_MODEL_DEFAULT}}" @@ -83,18 +133,18 @@ nemoclaw_e2e_require_hosted_inference_key() { if nemoclaw_e2e_using_compatible_inference; then if [ -n "$key" ]; then - pass "COMPATIBLE_API_KEY is set for CI compatible inference" + nemoclaw_e2e_note_pass "NVIDIA_INFERENCE_API_KEY is set for hosted CI inference" else - fail "COMPATIBLE_API_KEY not set — required for CI compatible inference" + nemoclaw_e2e_note_fail "NVIDIA_INFERENCE_API_KEY not set - required for hosted CI inference" return 1 fi return 0 fi if [ -n "$key" ] && [[ "$key" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" + nemoclaw_e2e_note_pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference" + nemoclaw_e2e_note_fail "NVIDIA_INFERENCE_API_KEY not set or invalid - required for live inference" return 1 fi } diff --git a/test/e2e/test-agent-turn-latency-e2e.sh b/test/e2e/test-agent-turn-latency-e2e.sh index 8172bfc030..694317f7a1 100755 --- a/test/e2e/test-agent-turn-latency-e2e.sh +++ b/test/e2e/test-agent-turn-latency-e2e.sh @@ -4,13 +4,13 @@ # # Real agent turn latency E2E. # -# Installs one OpenClaw sandbox and one Hermes sandbox against NVIDIA Endpoints, -# verifies that both are configured for the requested model, and times one real -# model-backed turn through each runtime. +# Installs one OpenClaw sandbox and one Hermes sandbox against the configured +# hosted inference endpoint, verifies that both are configured for the requested +# model, and times one real model-backed turn through each runtime. # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - NEMOCLAW_NON_INTERACTIVE=1 # - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 # @@ -32,6 +32,8 @@ source "${SCRIPT_DIR}/e2e-timeout.sh" source "${SCRIPT_DIR}/lib/openclaw-json.sh" # shellcheck source=test/e2e/lib/sandbox-teardown.sh source "${SCRIPT_DIR}/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR}/lib/ci-compatible-inference.sh" # shellcheck source=test/e2e/lib/install-path-refresh.sh source "${SCRIPT_DIR}/lib/install-path-refresh.sh" @@ -91,7 +93,7 @@ PY } strip_ansi() { - python3 -c 'import re, sys; sys.stdout.write(re.sub(r"\x1b\[[0-9;]*m", "", sys.stdin.read()))' + nemoclaw_e2e_strip_ansi } parse_chat_content() { @@ -136,8 +138,7 @@ assert_route() { fi plain_output=$(printf '%s' "$output" | strip_ansi) - if grep -Fq "Provider: ${EXPECTED_ROUTE_PROVIDER}" <<<"$plain_output" \ - && grep -Fq "Model: ${TURN_MODEL}" <<<"$plain_output"; then + if nemoclaw_e2e_inference_output_matches "$plain_output" "$EXPECTED_ROUTE_PROVIDER" "$TURN_MODEL"; then pass "${label}: OpenShell route is ${EXPECTED_ROUTE_PROVIDER} / ${TURN_MODEL}" else fail "${label}: route is not ${EXPECTED_ROUTE_PROVIDER} / ${TURN_MODEL}: ${plain_output:0:400}" @@ -551,14 +552,14 @@ else exit 1 fi -TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-${NEMOCLAW_MODEL:-nvidia/nemotron-3-ultra-550b-a55b}}" -TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-build}" -EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-nvidia-prod}" OPENCLAW_SANDBOX_NAME="${NEMOCLAW_OPENCLAW_TURN_LATENCY_SANDBOX_NAME:-e2e-openclaw-turn-latency}" HERMES_SANDBOX_NAME="${NEMOCLAW_HERMES_TURN_LATENCY_SANDBOX_NAME:-e2e-hermes-turn-latency}" OPENCLAW_INSTALL_LOG="/tmp/nemoclaw-e2e-openclaw-turn-latency-install.log" HERMES_INSTALL_LOG="/tmp/nemoclaw-e2e-hermes-turn-latency-install.log" RESULTS_JSON="/tmp/nemoclaw-e2e-agent-turn-latency.json" +TURN_MODEL="" +TURN_PROVIDER_KEY="" +EXPECTED_ROUTE_PROVIDER="" MAX_TURN_SECONDS="${NEMOCLAW_TURN_LATENCY_MAX_SECONDS:-300}" is_positive_int "$MAX_TURN_SECONDS" || MAX_TURN_SECONDS=300 @@ -575,6 +576,20 @@ HERMES_REPLY="" register_sandbox_for_teardown "$OPENCLAW_SANDBOX_NAME" register_sandbox_for_teardown "$HERMES_SANDBOX_NAME" nemoclaw_ensure_local_bin_on_path +nemoclaw_e2e_configure_compatible_inference || { + fail "Hosted CI inference could not be configured" + finish +} + +if nemoclaw_e2e_using_compatible_inference; then + TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}" + TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-custom}" + EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}" +else + TURN_MODEL="${NEMOCLAW_TURN_LATENCY_MODEL:-${NEMOCLAW_MODEL:-nvidia/nemotron-3-ultra-550b-a55b}}" + TURN_PROVIDER_KEY="${NEMOCLAW_TURN_LATENCY_PROVIDER:-build}" + EXPECTED_ROUTE_PROVIDER="${NEMOCLAW_TURN_LATENCY_ROUTE_PROVIDER:-nvidia-prod}" +fi section "Prerequisites" if docker info >/dev/null 2>&1; then @@ -584,10 +599,7 @@ else finish fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then finish fi diff --git a/test/e2e/test-common-egress-agent-e2e.sh b/test/e2e/test-common-egress-agent-e2e.sh index 16e65212a2..53415555a0 100755 --- a/test/e2e/test-common-egress-agent-e2e.sh +++ b/test/e2e/test-common-egress-agent-e2e.sh @@ -11,7 +11,7 @@ # and the Hermes agent fetches Wikidata through its API-server agent path. # # Required env: -# NVIDIA_INFERENCE_API_KEY real NVIDIA Endpoints key for inference +# NVIDIA_INFERENCE_API_KEY hosted inference credential # NEMOCLAW_NON_INTERACTIVE=1 required # NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 required # @@ -392,6 +392,10 @@ echo " Common Egress Agent E2E" echo " $(date)" echo "============================================================" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh" +nemoclaw_e2e_configure_compatible_inference || summary + section "Phase 0: Prerequisites" load_shell_path info "Repo: $REPO" @@ -402,11 +406,9 @@ if ! docker info >/dev/null 2>&1; then fi pass "Docker is running" -if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then summary fi -pass "NVIDIA_INFERENCE_API_KEY is set" if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then fail "NEMOCLAW_NON_INTERACTIVE=1 is required" diff --git a/test/e2e/test-cron-preflight-inference-local-e2e.sh b/test/e2e/test-cron-preflight-inference-local-e2e.sh index 0ee7e92ab0..89fd2e5daf 100755 --- a/test/e2e/test-cron-preflight-inference-local-e2e.sh +++ b/test/e2e/test-cron-preflight-inference-local-e2e.sh @@ -4,12 +4,12 @@ # # Cron preflight inference.local E2E. # -# Onboards a fresh sandbox against the managed cloud provider (whose base URL -# resolves through `inference.local`), then loads OpenClaw's cron isolated-agent -# preflight runtime directly from the in-sandbox dist and invokes -# `preflightCronModelProvider` against the onboarded provider/model. Asserts -# the call returns `status: "available"` and never reports `EAI_AGAIN` or the -# "local provider endpoint is not reachable" message. +# Onboards a fresh sandbox against the configured hosted inference provider +# (whose base URL resolves through `inference.local`), then loads OpenClaw's +# cron isolated-agent preflight runtime directly from the in-sandbox dist and +# invokes `preflightCronModelProvider` against the onboarded provider/model. +# Asserts the call returns `status: "available"` and never reports `EAI_AGAIN` +# or the "local provider endpoint is not reachable" message. # # This probes the exact runtime path Patch 6 modifies — the cron CLI surfaces # (`openclaw cron add` / `openclaw cron run`) need `operator.admin` scope, which @@ -19,18 +19,18 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 # # Environment: # NEMOCLAW_SANDBOX_NAME — sandbox name (default: e2e-cron-preflight) # NEMOCLAW_RECREATE_SANDBOX=1 — destroy + recreate if exists -# NEMOCLAW_CRON_PREFLIGHT_MODEL — cloud model (default: nvidia/nemotron-3-super-120b-a12b) +# NEMOCLAW_CRON_PREFLIGHT_MODEL — model for non-hosted provider runs # NEMOCLAW_CRON_PREFLIGHT_KEEP=1 — keep the sandbox after the test for inspection # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-cron-preflight-inference-local-e2e.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-cron-preflight-inference-local-e2e.sh set -uo pipefail @@ -86,6 +86,8 @@ INSTALL_LOG="/tmp/nemoclaw-e2e-cron-preflight-install.log" . "${E2E_DIR}/lib/sandbox-teardown.sh" # shellcheck source=test/e2e/lib/install-path-refresh.sh . "${E2E_DIR}/lib/install-path-refresh.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${E2E_DIR}/lib/ci-compatible-inference.sh" # ── Prereqs ── section "Prerequisites" @@ -99,15 +101,27 @@ if ! command -v jq >/dev/null 2>&1; then echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" exit 0 fi -if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then - skip "NVIDIA_INFERENCE_API_KEY not set" +if ! nemoclaw_e2e_configure_compatible_inference; then + fail "hosted CI inference could not be configured" echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" - exit 0 + exit 1 fi -if [ "${NVIDIA_INFERENCE_API_KEY:0:6}" != "nvapi-" ]; then - skip "NVIDIA_INFERENCE_API_KEY does not start with nvapi-" - echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" - exit 0 +if nemoclaw_e2e_using_compatible_inference; then + if ! nemoclaw_e2e_require_hosted_inference_key; then + echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" + exit 1 + fi +else + if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then + skip "NVIDIA_INFERENCE_API_KEY not set" + echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" + exit 0 + fi + if [ "${NVIDIA_INFERENCE_API_KEY:0:6}" != "nvapi-" ]; then + skip "NVIDIA_INFERENCE_API_KEY does not start with nvapi-" + echo " Total: $TOTAL Pass: $PASS Fail: $FAIL Skip: $SKIP" + exit 0 + fi fi if [ "${NEMOCLAW_NON_INTERACTIVE:-}" != "1" ]; then skip "NEMOCLAW_NON_INTERACTIVE must be 1; refusing to risk an interactive onboard prompt" diff --git a/test/e2e/test-hermes-discord-e2e.sh b/test/e2e/test-hermes-discord-e2e.sh index c3c24c2fa5..1aabaa0dba 100755 --- a/test/e2e/test-hermes-discord-e2e.sh +++ b/test/e2e/test-hermes-discord-e2e.sh @@ -29,7 +29,7 @@ # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-discord-e2e.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-discord-e2e.sh set -uo pipefail @@ -183,7 +183,10 @@ export DISCORD_REQUIRE_MENTION="${DISCORD_REQUIRE_MENTION:-0}" # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 # shellcheck source=test/e2e/lib/discord-gateway-proof.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/discord-gateway-proof.sh" @@ -197,10 +200,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-hermes-e2e.sh b/test/e2e/test-hermes-e2e.sh index 73db775f3b..e0589eea2d 100755 --- a/test/e2e/test-hermes-e2e.sh +++ b/test/e2e/test-hermes-e2e.sh @@ -11,7 +11,7 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - Network access to inference-api.nvidia.com # # Environment variables: @@ -22,10 +22,10 @@ # NEMOCLAW_RECREATE_SANDBOX=1 — recreate sandbox if it exists from a previous run # NEMOCLAW_E2E_HERMES_DASHBOARD=1 — validate the built-in Hermes web dashboard end-to-end # NEMOCLAW_HERMES_DASHBOARD_TUI=1 — enable Hermes' optional in-browser TUI tab during onboard -# NVIDIA_INFERENCE_API_KEY — required for NVIDIA Endpoints inference +# NVIDIA_INFERENCE_API_KEY — required for hosted inference # # Usage: -# NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-e2e.sh +# NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-e2e.sh set -uo pipefail @@ -114,6 +114,9 @@ is_truthy_env_value() { esac } +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh" + hermes_dashboard_e2e_enabled() { is_truthy_env_value "${NEMOCLAW_E2E_HERMES_DASHBOARD:-}" \ || is_truthy_env_value "${NEMOCLAW_HERMES_DASHBOARD:-}" @@ -159,6 +162,10 @@ fi SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-hermes}" export NEMOCLAW_AGENT="${NEMOCLAW_AGENT:-hermes}" +nemoclaw_e2e_configure_compatible_inference || exit 1 +HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)" +HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)" +HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)" # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" @@ -196,19 +203,14 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi -if curl -sf --max-time 10 \ - -H "Authorization: Bearer ${NVIDIA_INFERENCE_API_KEY}" \ - https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then - pass "Network access to inference-api.nvidia.com" +if nemoclaw_e2e_probe_hosted_inference; then + pass "Network access to ${HOSTED_INFERENCE_BASE_URL}" else - fail "Cannot reach inference-api.nvidia.com" + fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}" exit 1 fi @@ -358,10 +360,16 @@ fi # 3d: Inference must be configured by onboard if inf_check=$(openshell inference get 2>&1); then - if grep -qi "nvidia-prod" <<<"$inf_check"; then - pass "Inference configured via onboard" + expected_provider="$(nemoclaw_e2e_expected_route_provider)" + expected_model="" + if nemoclaw_e2e_using_compatible_inference; then + expected_model="$HOSTED_INFERENCE_MODEL" + fi + if nemoclaw_e2e_inference_output_matches "$inf_check" "$expected_provider" "$expected_model"; then + pass "Inference configured via onboard (${expected_provider})" else - fail "Inference not configured — onboard did not set up nvidia-prod provider" + inf_check_plain="$(printf '%s' "$inf_check" | nemoclaw_e2e_strip_ansi)" + fail "Inference not configured - onboard did not set up ${expected_provider}: ${inf_check_plain:0:200}" fi else fail "openshell inference get failed: ${inf_check:0:200}" @@ -600,17 +608,13 @@ rm -f "$ssh_config" # ══════════════════════════════════════════════════════════════════ section "Phase 5: Live inference" -# ── Test 5a: Direct NVIDIA Endpoints ── -info "[LIVE] Direct API test → inference-api.nvidia.com..." +# ── Test 5a: Direct hosted inference endpoint ── +info "[LIVE] Direct API test → ${HOSTED_INFERENCE_BASE_URL}..." api_response=$(curl -s --max-time 30 \ - -X POST https://inference-api.nvidia.com/v1/chat/completions \ + -X POST "${HOSTED_INFERENCE_BASE_URL}/chat/completions" \ -H "Content-Type: application/json" \ - -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \ - -d '{ - "model": "nvidia/nemotron-3-super-120b-a12b", - "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}], - "max_tokens": 100 - }' 2>/dev/null) || true + -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \ + -d "$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' "$HOSTED_INFERENCE_MODEL")" 2>/dev/null) || true if [ -n "$api_response" ]; then api_content=$(echo "$api_response" | parse_chat_content 2>/dev/null) || true @@ -645,7 +649,7 @@ if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then "openshell-${SANDBOX_NAME}" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ - -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ + -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ 2>&1) || true fi rm -f "$ssh_config" @@ -653,8 +657,8 @@ rm -f "$ssh_config" if [ -n "$sandbox_response" ]; then sandbox_content=$(echo "$sandbox_response" | parse_chat_content 2>/dev/null) || true if grep -qi "PONG" <<<"$sandbox_content"; then - pass "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG" - info "Routing path proven: sandbox curl → DNS forwarder → gateway proxy → NVIDIA Endpoints (does not exercise the Hermes agent runtime or openclaw HTTP client)" + pass "[ROUTING] inference.local: OpenShell routed curl to the hosted inference endpoint and returned PONG" + info "Routing path proven: sandbox curl → DNS forwarder → gateway proxy → hosted inference endpoint (does not exercise the Hermes agent runtime or openclaw HTTP client)" else fail "[ROUTING] inference.local: expected PONG, got: ${sandbox_content:0:200}" fi diff --git a/test/e2e/test-hermes-inference-switch.sh b/test/e2e/test-hermes-inference-switch.sh index 521a18b5ee..b38c64f2f8 100755 --- a/test/e2e/test-hermes-inference-switch.sh +++ b/test/e2e/test-hermes-inference-switch.sh @@ -10,7 +10,7 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - NEMOCLAW_NON_INTERACTIVE=1 # - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 @@ -441,9 +441,16 @@ E2E_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" . "${E2E_DIR}/lib/inference-switch-retry.sh" # shellcheck source=test/e2e/lib/anthropic-switch-provider.sh . "${E2E_DIR}/lib/anthropic-switch-provider.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${E2E_DIR}/lib/ci-compatible-inference.sh" SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-hermes-inference-switch}" -SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}" -SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}" +if nemoclaw_e2e_using_compatible_inference; then + SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}" + SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}" +else + SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}" + SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}" +fi SWITCH_INFERENCE_API="${NEMOCLAW_SWITCH_INFERENCE_API:-openai-completions}" # shellcheck disable=SC2034 # consumed by sourced anthropic-switch-provider.sh SWITCH_ENDPOINT_URL="${NEMOCLAW_SWITCH_ENDPOINT_URL:-}" @@ -462,6 +469,7 @@ trap 'stop_mock_anthropic_switch_provider; _nemoclaw_sandbox_teardown' EXIT # shellcheck source=test/e2e/lib/install-path-refresh.sh . "${E2E_DIR}/lib/install-path-refresh.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 section "Phase 0: Pre-cleanup" if command -v nemohermes >/dev/null 2>&1; then @@ -483,10 +491,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-hermes-slack-e2e.sh b/test/e2e/test-hermes-slack-e2e.sh index 74f70f6369..8934dad48c 100755 --- a/test/e2e/test-hermes-slack-e2e.sh +++ b/test/e2e/test-hermes-slack-e2e.sh @@ -23,7 +23,7 @@ # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-hermes-slack-e2e.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-hermes-slack-e2e.sh set -uo pipefail @@ -173,7 +173,10 @@ fi # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 section "Phase 0: Prerequisites" @@ -184,10 +187,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-issue-4434-tui-unreachable-inference.sh b/test/e2e/test-issue-4434-tui-unreachable-inference.sh index 69004be5b2..e0a04c5bf5 100755 --- a/test/e2e/test-issue-4434-tui-unreachable-inference.sh +++ b/test/e2e/test-issue-4434-tui-unreachable-inference.sh @@ -8,12 +8,14 @@ # # This mutates host firewall state. Run only on a Linux Docker host you control: # -# NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_INFERENCE_API_KEY=nvapi-... \ +# NEMOCLAW_ISSUE_4434_LIVE=1 NVIDIA_INFERENCE_API_KEY=... \ # bash test/e2e/test-issue-4434-tui-unreachable-inference.sh set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR}/lib/ci-compatible-inference.sh" SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-issue-4434-tui-unreachable}" INSTALL_LOG="${E2E_ISSUE_4434_INSTALL_LOG:-/tmp/nemoclaw-e2e-issue-4434-install.log}" @@ -63,6 +65,11 @@ if [ "${NEMOCLAW_ISSUE_4434_LIVE:-0}" != "1" ]; then exit 0 fi +if nemoclaw_e2e_using_compatible_inference; then + info "skipping: hosted compatible inference is gateway-managed; this repro only blocks sandbox egress" + exit 0 +fi + if [ "$(uname -s)" != "Linux" ]; then fail "Linux host required for DOCKER-USER iptables repro" fi @@ -71,9 +78,8 @@ for command in docker sudo expect curl timeout perl; do done docker info >/dev/null 2>&1 || fail "Docker is not running" sudo -n true >/dev/null 2>&1 || fail "passwordless sudo is required for non-interactive iptables cleanup" -if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then - fail "NVIDIA_INFERENCE_API_KEY must be set and start with nvapi-" -fi +nemoclaw_e2e_configure_compatible_inference || fail "hosted CI inference could not be configured" +nemoclaw_e2e_require_hosted_inference_key || exit 1 mkdir -p "$CAPTURE_DIR" CLEANUP_SANDBOX=1 @@ -110,7 +116,33 @@ if ! nemoclaw "$SANDBOX_NAME" status >"$status_log" 2>&1; then fail "nemoclaw ${SANDBOX_NAME} status failed before firewall block" fi if ! grep -Eiq "inference.*healthy|healthy.*inference" "$status_log"; then - fail "pre-block status did not report healthy inference" + if grep -Eiq "Inference:[[:space:]]*not probed" "$status_log"; then + info "status skipped inference reachability; probing inference.local directly" + else + fail "pre-block status did not report healthy or not-probed inference" + fi +fi + +route_log="${CAPTURE_DIR}/openshell-inference-before-block.log" +if ! route_output=$(openshell inference get 2>&1); then + printf '%s\n' "$route_output" >"$route_log" + fail "openshell inference get failed before firewall block" +fi +printf '%s\n' "$route_output" >"$route_log" +expected_provider="$(nemoclaw_e2e_expected_route_provider)" +expected_model="$(nemoclaw_e2e_hosted_inference_model)" +if ! nemoclaw_e2e_inference_output_matches "$route_output" "$expected_provider" "$expected_model"; then + route_plain="$(printf '%s' "$route_output" | nemoclaw_e2e_strip_ansi)" + fail "pre-block OpenShell route was not ${expected_provider} / ${expected_model}: ${route_plain:0:240}" +fi + +preblock_probe_log="${CAPTURE_DIR}/inference-local-before-block.log" +preblock_payload="$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with OK."}],"max_tokens":8}' "$expected_model")" +preblock_payload_arg="$(printf '%q' "$preblock_payload")" +if ! timeout 90 openshell sandbox exec --name "$SANDBOX_NAME" -- sh -lc \ + "curl -sf --max-time 60 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' -d $preblock_payload_arg >/dev/null" \ + >"$preblock_probe_log" 2>&1; then + fail "inference.local was not reachable from inside the sandbox before firewall block" fi connect_probe_log="${CAPTURE_DIR}/nemoclaw-connect-probe-before-block.log" diff --git a/test/e2e/test-launchable-smoke.sh b/test/e2e/test-launchable-smoke.sh index b28cc89750..73d993f227 100755 --- a/test/e2e/test-launchable-smoke.sh +++ b/test/e2e/test-launchable-smoke.sh @@ -20,7 +20,7 @@ # What this tests: # 1. Run brev-launchable-ci-cpu.sh with NEMOCLAW_REF=current branch # 2. Verify installation artifacts (nemoclaw, openshell, Node.js ≥22, Docker, sentinel) -# 3. nemoclaw onboard --non-interactive with NVIDIA_INFERENCE_API_KEY (cloud provider) +# 3. nemoclaw onboard --non-interactive with hosted inference # 4. Sandbox health: nemoclaw list, status, gateway running # 5. Live inference through the sandbox (same pattern as test-full-e2e.sh Phase 4) # 6. Destroy + cleanup @@ -28,7 +28,7 @@ # Prerequisites: # - Ubuntu runner (ubuntu-latest) # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - Network access to inference-api.nvidia.com # - NEMOCLAW_NON_INTERACTIVE=1 # - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 @@ -37,12 +37,12 @@ # NEMOCLAW_REF — git ref for brev-launchable-ci-cpu.sh (default: current branch) # NEMOCLAW_SANDBOX_NAME — sandbox name (default: e2e-launchable) # NEMOCLAW_RECREATE_SANDBOX — set to 1 to recreate if exists -# NVIDIA_INFERENCE_API_KEY — required for NVIDIA Endpoints inference +# NVIDIA_INFERENCE_API_KEY — required for hosted inference # SKIP_DOCKER_PULL — set to 1 to skip Docker image pre-pulls (speeds up CI) # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-launchable-smoke.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-launchable-smoke.sh # # See: https://github.com/NVIDIA/NemoClaw/issues/2599 @@ -97,6 +97,9 @@ except Exception as e: " } +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/ci-compatible-inference.sh" + # Determine repo root if [ -f "$(cd "$(dirname "$0")/../.." && pwd)/scripts/brev-launchable-ci-cpu.sh" ]; then REPO="$(cd "$(dirname "$0")/../.." && pwd)" @@ -133,6 +136,10 @@ exec > >(tee -a "$TEST_LOG") 2>&1 # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 +HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)" +HOSTED_INFERENCE_MODEL="$(nemoclaw_e2e_hosted_inference_model)" +HOSTED_INFERENCE_KEY="$(nemoclaw_e2e_hosted_inference_key)" # ══════════════════════════════════════════════════════════════════ # Phase 0: Pre-cleanup @@ -177,17 +184,14 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi -if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then - pass "Network access to inference-api.nvidia.com" +if nemoclaw_e2e_probe_hosted_inference; then + pass "Network access to ${HOSTED_INFERENCE_BASE_URL}" else - fail "Cannot reach inference-api.nvidia.com" + fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}" exit 1 fi @@ -341,9 +345,9 @@ else fi # ══════════════════════════════════════════════════════════════════ -# Phase 4: Onboard (non-interactive, cloud provider) +# Phase 4: Onboard (non-interactive, hosted inference) # ══════════════════════════════════════════════════════════════════ -section "Phase 4: Onboard (non-interactive, NVIDIA Endpoints)" +section "Phase 4: Onboard (non-interactive, hosted inference)" # Run onboard from the launchable clone directory — this is the real # community path: the user's NemoClaw is in ~/NemoClaw, not a CI checkout. @@ -353,7 +357,7 @@ cd "$NEMOCLAW_CLONE_DIR" || { } info "Running nemoclaw onboard --non-interactive..." -info "Provider: NVIDIA Endpoints (cloud)" +info "Provider: ${NEMOCLAW_PROVIDER:-configured hosted inference}" info "Sandbox name: $SANDBOX_NAME" ONBOARD_LOG="/tmp/nemoclaw-launchable-onboard.log" @@ -403,10 +407,16 @@ fi # 5c: Inference configured by onboard if inf_check=$(openshell inference get 2>&1); then - if grep -qi "nvidia-prod" <<<"$inf_check"; then - pass "Inference configured via onboard (nvidia-prod)" + expected_provider="$(nemoclaw_e2e_expected_route_provider)" + expected_model="" + if nemoclaw_e2e_using_compatible_inference; then + expected_model="$HOSTED_INFERENCE_MODEL" + fi + if nemoclaw_e2e_inference_output_matches "$inf_check" "$expected_provider" "$expected_model"; then + pass "Inference configured via onboard (${expected_provider})" else - fail "Inference not configured — onboard did not set up nvidia-prod provider" + inf_check_plain="$(printf '%s' "$inf_check" | nemoclaw_e2e_strip_ansi)" + fail "Inference not configured - onboard did not set up ${expected_provider}: ${inf_check_plain:0:200}" fi else fail "openshell inference get failed: ${inf_check:0:200}" @@ -424,17 +434,13 @@ fi # ══════════════════════════════════════════════════════════════════ section "Phase 6: Live inference" -# ── Test 6a: Direct NVIDIA Endpoints (sanity check) ── -info "[LIVE] Direct API test → inference-api.nvidia.com..." +# ── Test 6a: Direct hosted inference endpoint (sanity check) ── +info "[LIVE] Direct API test → ${HOSTED_INFERENCE_BASE_URL}..." api_response=$(curl -s --max-time 30 \ - -X POST https://inference-api.nvidia.com/v1/chat/completions \ + -X POST "${HOSTED_INFERENCE_BASE_URL}/chat/completions" \ -H "Content-Type: application/json" \ - -H "Authorization: Bearer $NVIDIA_INFERENCE_API_KEY" \ - -d '{ - "model": "nvidia/nemotron-3-super-120b-a12b", - "messages": [{"role": "user", "content": "Reply with exactly one word: PONG"}], - "max_tokens": 100 - }' 2>/dev/null) || true + -H "Authorization: Bearer $HOSTED_INFERENCE_KEY" \ + -d "$(printf '{"model":"%s","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}' "$HOSTED_INFERENCE_MODEL")" 2>/dev/null) || true if [ -n "$api_response" ]; then api_content=$(echo "$api_response" | parse_chat_content 2>/dev/null) || true @@ -461,7 +467,7 @@ if openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then "openshell-${SANDBOX_NAME}" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ - -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ + -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ 2>&1) || true fi rm -f "$ssh_config" @@ -494,14 +500,14 @@ for pong_attempt in 1 2 3; do "openshell-${SANDBOX_NAME}" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ -H 'Content-Type: application/json' \ - -d '{\"model\":\"nvidia/nemotron-3-super-120b-a12b\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ + -d '{\"model\":\"$HOSTED_INFERENCE_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with exactly one word: PONG\"}],\"max_tokens\":100}'" \ 2>&1) || true fi rm -f "$ssh_config" done if $pong_ok; then - pass "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG" + pass "[ROUTING] inference.local: OpenShell routed curl to the hosted inference endpoint and returned PONG" else fail "[ROUTING] inference.local: expected PONG after 3 attempts, got: ${sandbox_content:0:200}" fi @@ -576,9 +582,9 @@ echo "" echo " What this tested (issue #2599):" echo " - brev-launchable-ci-cpu.sh bootstrap (Docker, Node.js, OpenShell, NemoClaw)" echo " - Installation artifacts (binaries on PATH, sentinel file, built outputs)" -echo " - Onboard via launchable-installed NemoClaw (cloud provider)" +echo " - Onboard via launchable-installed NemoClaw (hosted inference)" echo " - Sandbox health (list, status, inference config, gateway)" -echo " - Direct NVIDIA Endpoints inference" +echo " - Direct hosted inference" echo " - Sandbox inference routing (curl → inference.local)" echo " - openclaw agent mediated inference (the full stack)" echo " - Destroy + cleanup" diff --git a/test/e2e/test-onboard-negative-paths.sh b/test/e2e/test-onboard-negative-paths.sh index be7b094621..30ff8c8580 100755 --- a/test/e2e/test-onboard-negative-paths.sh +++ b/test/e2e/test-onboard-negative-paths.sh @@ -24,6 +24,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=1800 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" # shellcheck source=test/e2e/e2e-timeout.sh source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh" LOG_FILE="${NEMOCLAW_E2E_LOG:-/tmp/nemoclaw-e2e-onboard-negative-paths.log}" exec > >(tee "$LOG_FILE") 2>&1 @@ -73,11 +75,35 @@ if ! command -v nemoclaw >/dev/null 2>&1; then fi SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-onboard-negative}" -CLOUD_MODEL="${NEMOCLAW_ONBOARD_NEGATIVE_MODEL:-nvidia/nemotron-3-super-120b-a12b}" PORT_CONFLICT_PORT="${NEMOCLAW_ONBOARD_NEGATIVE_CONFLICT_PORT:-18080}" SESSION_FILE="$HOME/.nemoclaw/onboard-session.json" REGISTRY_FILE="$HOME/.nemoclaw/sandboxes.json" RESTORE_API_KEY="${NVIDIA_INFERENCE_API_KEY:-}" +if [ -n "$RESTORE_API_KEY" ]; then + export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" +fi +nemoclaw_e2e_configure_compatible_inference || { + fail "Hosted CI inference could not be configured" + exit 1 +} +CLOUD_MODEL="${NEMOCLAW_ONBOARD_NEGATIVE_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}" +HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)" +EXPECTED_PROVIDER="$(nemoclaw_e2e_expected_route_provider)" +ONBOARD_INFERENCE_ENV=( + "NEMOCLAW_PROVIDER=cloud" + "NEMOCLAW_MODEL=$CLOUD_MODEL" + "NVIDIA_INFERENCE_API_KEY=$RESTORE_API_KEY" +) +if nemoclaw_e2e_using_compatible_inference; then + ONBOARD_INFERENCE_ENV=( + "NEMOCLAW_PROVIDER=custom" + "NEMOCLAW_ENDPOINT_URL=$HOSTED_INFERENCE_BASE_URL" + "NEMOCLAW_MODEL=$CLOUD_MODEL" + "NEMOCLAW_COMPAT_MODEL=$CLOUD_MODEL" + "COMPATIBLE_API_KEY=$RESTORE_API_KEY" + "NVIDIA_INFERENCE_API_KEY=$RESTORE_API_KEY" + ) +fi # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" @@ -294,10 +320,7 @@ else exit 1 fi -if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid; required for live onboard scenarios" +if ! nemoclaw_e2e_require_hosted_inference_key; then print_summary exit 1 fi @@ -333,11 +356,10 @@ section "Phase 3: Entry option validation" FROM_GUARD_LOG="$(mktemp)" env -u NEMOCLAW_SANDBOX_NAME \ + "${ONBOARD_INFERENCE_ENV[@]}" \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ - NEMOCLAW_PROVIDER=cloud \ NEMOCLAW_POLICY_MODE=skip \ - NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \ node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \ >"$FROM_GUARD_LOG" 2>&1 from_guard_exit=$? @@ -365,12 +387,11 @@ fi FROM_ENV_NAME_LOG="$(mktemp)" env \ + "${ONBOARD_INFERENCE_ENV[@]}" \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="bad name" \ - NEMOCLAW_PROVIDER=cloud \ NEMOCLAW_POLICY_MODE=skip \ - NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \ node "$REPO/bin/nemoclaw.js" onboard --non-interactive --from "$REPO/Dockerfile" \ >"$FROM_ENV_NAME_LOG" 2>&1 from_env_name_exit=$? @@ -446,14 +467,14 @@ else fi PORT_CONFLICT_LOG="$(mktemp)" -NEMOCLAW_NON_INTERACTIVE=1 \ +env \ + "${ONBOARD_INFERENCE_ENV[@]}" \ + NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="${SANDBOX_NAME}-port" \ NEMOCLAW_RECREATE_SANDBOX=1 \ NEMOCLAW_GATEWAY_PORT="$PORT_CONFLICT_PORT" \ - NEMOCLAW_PROVIDER=cloud \ NEMOCLAW_POLICY_MODE=skip \ - NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \ node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$PORT_CONFLICT_LOG" 2>&1 port_conflict_exit=$? port_conflict_output="$(cat "$PORT_CONFLICT_LOG")" @@ -487,15 +508,14 @@ fi section "Phase 6: Live non-interactive onboard honors presets and model" LIVE_LOG="$(mktemp)" -NEMOCLAW_NON_INTERACTIVE=1 \ +env \ + "${ONBOARD_INFERENCE_ENV[@]}" \ + NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ NEMOCLAW_RECREATE_SANDBOX=1 \ - NEMOCLAW_PROVIDER=cloud \ - NEMOCLAW_MODEL="$CLOUD_MODEL" \ NEMOCLAW_POLICY_MODE=custom \ NEMOCLAW_POLICY_PRESETS=npm,pypi \ - NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" \ node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$LIVE_LOG" 2>&1 live_exit=$? live_output="$(cat "$LIVE_LOG")" @@ -510,20 +530,20 @@ else exit 1 fi -if printf '%s\n' "$live_output" | grep -q "Using NVIDIA Endpoints with model: ${CLOUD_MODEL}"; then - pass "Live onboard selected requested cloud model" +if printf '%s\n' "$live_output" | grep -Fq "$CLOUD_MODEL"; then + pass "Live onboard selected requested hosted model" else - fail "Live onboard output did not confirm requested cloud model" + fail "Live onboard output did not confirm requested hosted model" fi -if node - "$REGISTRY_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" <<'NODE'; then +if node - "$REGISTRY_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" "$EXPECTED_PROVIDER" <<'NODE'; then const fs = require("node:fs"); -const [registryPath, sandboxName, expectedModel] = process.argv.slice(2); +const [registryPath, sandboxName, expectedModel, expectedProvider] = process.argv.slice(2); const registry = JSON.parse(fs.readFileSync(registryPath, "utf8")); const sandbox = registry.sandboxes && registry.sandboxes[sandboxName]; if (!sandbox) throw new Error(`missing sandbox registry entry: ${sandboxName}`); -if (sandbox.provider !== "nvidia-prod") { - throw new Error(`expected provider nvidia-prod, got ${sandbox.provider}`); +if (sandbox.provider !== expectedProvider) { + throw new Error(`expected provider ${expectedProvider}, got ${sandbox.provider}`); } if (sandbox.model !== expectedModel) { throw new Error(`expected model ${expectedModel}, got ${sandbox.model}`); @@ -540,13 +560,13 @@ else fail "Registry did not record requested provider, model, and policy presets" fi -if node - "$SESSION_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" <<'NODE'; then +if node - "$SESSION_FILE" "$SANDBOX_NAME" "$CLOUD_MODEL" "$EXPECTED_PROVIDER" <<'NODE'; then const fs = require("node:fs"); -const [sessionPath, sandboxName, expectedModel] = process.argv.slice(2); +const [sessionPath, sandboxName, expectedModel, expectedProvider] = process.argv.slice(2); const session = JSON.parse(fs.readFileSync(sessionPath, "utf8")); if (session.status !== "complete") throw new Error(`session status ${session.status}`); if (session.sandboxName !== sandboxName) throw new Error(`session sandbox ${session.sandboxName}`); -if (session.provider !== "nvidia-prod") throw new Error(`session provider ${session.provider}`); +if (session.provider !== expectedProvider) throw new Error(`session provider ${session.provider}`); if (session.model !== expectedModel) throw new Error(`session model ${session.model}`); const presets = Array.isArray(session.policyPresets) ? session.policyPresets : []; for (const preset of ["npm", "pypi"]) { diff --git a/test/e2e/test-onboard-repair.sh b/test/e2e/test-onboard-repair.sh index 958bd68bea..c62db5ce00 100755 --- a/test/e2e/test-onboard-repair.sh +++ b/test/e2e/test-onboard-repair.sh @@ -14,10 +14,10 @@ # - Docker running # - openshell CLI installed # - Node.js available -# - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test +# - NVIDIA_INFERENCE_API_KEY set before starting the test # # Usage: -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-onboard-repair.sh set -uo pipefail @@ -73,6 +73,8 @@ fi # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" register_sandbox_for_teardown "$OTHER_SANDBOX_NAME" if [ -n "$INSTALL_SANDBOX_NAME" ]; then @@ -148,14 +150,14 @@ else exit 1 fi -if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else +if [[ -z "$RESTORE_API_KEY" ]]; then fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion" exit 1 fi +pass "NVIDIA_INFERENCE_API_KEY is set" export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" +nemoclaw_e2e_configure_compatible_inference || exit 1 pass "Exported NVIDIA_INFERENCE_API_KEY for the repair run (host writes nothing to disk; OpenShell gateway is the system of record)" # ══════════════════════════════════════════════════════════════════ @@ -226,7 +228,7 @@ else fi REPAIR_LOG="$(mktemp)" -env -u NVIDIA_INFERENCE_API_KEY \ +env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ @@ -299,7 +301,7 @@ pass "Re-created interrupted session for conflict tests" info "Attempting resume with a different sandbox name..." SANDBOX_CONFLICT_LOG="$(mktemp)" -env -u NVIDIA_INFERENCE_API_KEY \ +env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$OTHER_SANDBOX_NAME" \ @@ -328,7 +330,7 @@ section "Phase 5: Reject conflicting provider and model" info "Attempting resume with conflicting provider/model inputs..." PROVIDER_CONFLICT_LOG="$(mktemp)" -env -u NVIDIA_INFERENCE_API_KEY \ +env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh index 0810354968..3145f5013d 100755 --- a/test/e2e/test-onboard-resume.sh +++ b/test/e2e/test-onboard-resume.sh @@ -15,10 +15,10 @@ # - Docker running # - openshell CLI installed # - Node.js available -# - NVIDIA_INFERENCE_API_KEY set to a valid nvapi-* key before starting the test +# - NVIDIA_INFERENCE_API_KEY set before starting the test # # Usage: -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-onboard-resume.sh set -uo pipefail @@ -77,6 +77,8 @@ fi # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" SESSION_FILE="$HOME/.nemoclaw/onboard-session.json" @@ -121,21 +123,24 @@ else exit 1 fi -if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else +if [[ -z "$RESTORE_API_KEY" ]]; then fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for resume completion" exit 1 fi +pass "NVIDIA_INFERENCE_API_KEY is set" + +export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" +nemoclaw_e2e_configure_compatible_inference || exit 1 +HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)" +EXPECTED_PROVIDER="$(nemoclaw_e2e_expected_route_provider)" -if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then - pass "Network access to inference-api.nvidia.com" +if nemoclaw_e2e_probe_hosted_inference; then + pass "Network access to ${HOSTED_INFERENCE_BASE_URL}" else - fail "Cannot reach inference-api.nvidia.com" + fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}" exit 1 fi -export NVIDIA_INFERENCE_API_KEY="$RESTORE_API_KEY" pass "Exported NVIDIA_INFERENCE_API_KEY for the resume run (host writes nothing to disk; OpenShell gateway is the system of record)" # ══════════════════════════════════════════════════════════════════ @@ -212,7 +217,7 @@ section "Phase 3: Resume" info "Running onboard --resume with NVIDIA_INFERENCE_API_KEY removed from env..." RESUME_LOG="$(mktemp)" -env -u NVIDIA_INFERENCE_API_KEY \ +env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \ NEMOCLAW_NON_INTERACTIVE=1 \ NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ @@ -286,9 +291,10 @@ fi node -e ' const fs = require("fs"); const file = process.argv[1]; +const expectedProvider = process.argv[2]; const data = JSON.parse(fs.readFileSync(file, "utf8")); if (data.status !== "complete") process.exit(1); -if (data.provider !== "nvidia-prod") process.exit(2); +if (data.provider !== expectedProvider) process.exit(2); if (data.steps.preflight.status !== "complete") process.exit(3); if (data.steps.gateway.status !== "complete") process.exit(4); if (data.steps.sandbox.status !== "complete") process.exit(5); @@ -296,7 +302,7 @@ if (data.steps.provider_selection.status !== "complete") process.exit(6); if (data.steps.inference.status !== "complete") process.exit(7); if (data.steps.openclaw.status !== "complete") process.exit(8); if (data.steps.policies.status !== "complete") process.exit(9); -' "$SESSION_FILE" +' "$SESSION_FILE" "$EXPECTED_PROVIDER" case $? in 0) pass "Session file recorded full completion after resume" ;; *) fail "Session file did not record the expected completed state after resume" ;; diff --git a/test/e2e/test-openclaw-inference-switch.sh b/test/e2e/test-openclaw-inference-switch.sh index 276dbf2e9c..d513f68390 100755 --- a/test/e2e/test-openclaw-inference-switch.sh +++ b/test/e2e/test-openclaw-inference-switch.sh @@ -10,7 +10,7 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - NEMOCLAW_NON_INTERACTIVE=1 # - NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 @@ -353,9 +353,16 @@ E2E_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" . "${E2E_DIR}/lib/inference-switch-retry.sh" # shellcheck source=test/e2e/lib/anthropic-switch-provider.sh . "${E2E_DIR}/lib/anthropic-switch-provider.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${E2E_DIR}/lib/ci-compatible-inference.sh" SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-openclaw-inference-switch}" -SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}" -SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}" +if nemoclaw_e2e_using_compatible_inference; then + SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-$(nemoclaw_e2e_expected_route_provider)}" + SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-$(nemoclaw_e2e_hosted_inference_model)}" +else + SWITCH_PROVIDER="${NEMOCLAW_SWITCH_PROVIDER:-nvidia-prod}" + SWITCH_MODEL="${NEMOCLAW_SWITCH_MODEL:-z-ai/glm-5.1}" +fi SWITCH_INFERENCE_API="${NEMOCLAW_SWITCH_INFERENCE_API:-openai-completions}" # shellcheck disable=SC2034 # consumed by anthropic-switch-provider.sh helpers SWITCH_ENDPOINT_URL="${NEMOCLAW_SWITCH_ENDPOINT_URL:-}" @@ -371,6 +378,7 @@ trap 'stop_mock_anthropic_switch_provider; _nemoclaw_sandbox_teardown' EXIT # shellcheck source=test/e2e/lib/install-path-refresh.sh . "${E2E_DIR}/lib/install-path-refresh.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 section "Phase 0: Pre-cleanup" if command -v nemoclaw >/dev/null 2>&1; then @@ -390,10 +398,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-openclaw-skill-cli-e2e.sh b/test/e2e/test-openclaw-skill-cli-e2e.sh index 2eb6084d69..8229b6893a 100755 --- a/test/e2e/test-openclaw-skill-cli-e2e.sh +++ b/test/e2e/test-openclaw-skill-cli-e2e.sh @@ -25,7 +25,7 @@ # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-openclaw-skill-cli-e2e.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-openclaw-skill-cli-e2e.sh # shellcheck disable=SC2317 set -uo pipefail @@ -71,7 +71,10 @@ SKILL_DESCRIPTION="E2E fixture proving openclaw skills install + list roundtrip" # Source shared teardown helper # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "${E2E_DIR}/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${E2E_DIR}/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 # ══════════════════════════════════════════════════════════════════════ # Phase 1: Install + Prerequisites @@ -84,11 +87,9 @@ if ! docker info >/dev/null 2>&1; then fi pass "Docker is running" -if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi -pass "NVIDIA_INFERENCE_API_KEY is set" cd "$REPO" || { fail "Could not cd to repo root" diff --git a/test/e2e/test-overlayfs-autofix.sh b/test/e2e/test-overlayfs-autofix.sh index 5d9143a024..686f9f0023 100755 --- a/test/e2e/test-overlayfs-autofix.sh +++ b/test/e2e/test-overlayfs-autofix.sh @@ -59,7 +59,7 @@ # Usage: # NEMOCLAW_NON_INTERACTIVE=1 \ # NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... \ +# NVIDIA_INFERENCE_API_KEY=... \ # bash test/e2e/test-overlayfs-autofix.sh # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script. @@ -70,6 +70,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=1500 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" # shellcheck source=test/e2e/e2e-timeout.sh source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh" PASS=0 FAIL=0 @@ -127,6 +129,7 @@ ONBOARD_LOG_NEGATIVE="/tmp/nemoclaw-e2e-onboard-negative.log" # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" @@ -172,10 +175,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-sandbox-survival.sh b/test/e2e/test-sandbox-survival.sh index bc952247ba..39117cf5b0 100755 --- a/test/e2e/test-sandbox-survival.sh +++ b/test/e2e/test-sandbox-survival.sh @@ -22,20 +22,20 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - Network access to inference-api.nvidia.com # # Environment variables: # NEMOCLAW_NON_INTERACTIVE=1 — required # NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required -# NVIDIA_INFERENCE_API_KEY — required for real NVIDIA Endpoints inference +# NVIDIA_INFERENCE_API_KEY — required for hosted inference # NEMOCLAW_SANDBOX_NAME — sandbox name (default: e2e-survival) # NEMOCLAW_E2E_TIMEOUT_SECONDS — overall timeout (default: 900) # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 \ # NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... \ +# NVIDIA_INFERENCE_API_KEY=... \ # bash test/e2e/test-sandbox-survival.sh set -uo pipefail @@ -44,6 +44,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=900 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" # shellcheck source=test/e2e/e2e-timeout.sh source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh" PASS=0 FAIL=0 @@ -93,6 +95,9 @@ version_gte() { } SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-survival}" +nemoclaw_e2e_configure_compatible_inference || exit 1 +HOSTED_INFERENCE_BASE_URL="$(nemoclaw_e2e_hosted_inference_base_url)" +MODEL="$(nemoclaw_e2e_hosted_inference_model)" # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" @@ -102,7 +107,6 @@ REGISTRY="$HOME/.nemoclaw/sandboxes.json" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" MIN_OPENSHELL="0.0.24" -MODEL="nvidia/nemotron-3-super-120b-a12b" # SSH helper — sets up SSH config and common options for sandbox access # Sets: ssh_config, SSH_OPTS, SSH_TARGET @@ -207,17 +211,14 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set (starts with nvapi-)" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid — required for live inference" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi -if curl -sf --max-time 10 https://inference-api.nvidia.com/v1/models >/dev/null 2>&1; then - pass "Network access to inference-api.nvidia.com" +if nemoclaw_e2e_probe_hosted_inference; then + pass "Network access to ${HOSTED_INFERENCE_BASE_URL}" else - fail "Cannot reach inference-api.nvidia.com" + fail "Cannot reach ${HOSTED_INFERENCE_BASE_URL}" exit 1 fi @@ -382,7 +383,7 @@ else fi # 4b: Live inference through sandbox -info "[LIVE] Baseline inference: user → sandbox → gateway → NVIDIA Endpoints..." +info "[LIVE] Baseline inference: user → sandbox → gateway → hosted inference endpoint..." # shellcheck disable=SC2029 # client-side expansion is intentional baseline_response=$(run_with_timeout 90 ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ @@ -718,7 +719,7 @@ fi # ══════════════════════════════════════════════════════════════════ section "Phase 10: Live inference after restart (THE definitive test)" -info "[LIVE] Post-restart inference: user → sandbox → gateway → NVIDIA Endpoints..." +info "[LIVE] Post-restart inference: user → sandbox → gateway → hosted inference endpoint..." # shellcheck disable=SC2029 post_response=$(run_with_timeout 90 ssh "${SSH_OPTS[@]}" "$SSH_TARGET" \ "curl -s --max-time 60 https://inference.local/v1/chat/completions \ @@ -750,7 +751,7 @@ for pong_attempt in 1 2 3; do done if $pong_ok; then pass "[LIVE] Post-restart: model responded with PONG through sandbox" - info "Full path proven: user → sandbox → openshell gateway (resumed) → NVIDIA Endpoints → response" + info "Full path proven: user → sandbox → openshell gateway (resumed) → hosted inference endpoint → response" info "This proves #859's ask: reliable non-destructive gateway lifecycle with working inference" else fail "[LIVE] Post-restart: expected PONG after 3 attempts, got: ${post_content:0:200}" diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index a806bbdc2b..20b1529fee 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -19,7 +19,7 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # # Environment variables: # NEMOCLAW_NON_INTERACTIVE=1 — required @@ -34,6 +34,8 @@ export NEMOCLAW_E2E_DEFAULT_TIMEOUT=900 SCRIPT_DIR_TIMEOUT="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" # shellcheck source=test/e2e/e2e-timeout.sh source "${SCRIPT_DIR_TIMEOUT}/e2e-timeout.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${SCRIPT_DIR_TIMEOUT}/lib/ci-compatible-inference.sh" PASS=0 FAIL=0 @@ -60,6 +62,7 @@ SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-shields}" # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "$(dirname "${BASH_SOURCE[0]}")/lib/sandbox-teardown.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" @@ -79,10 +82,7 @@ else exit 1 fi -if [ -n "${NVIDIA_INFERENCE_API_KEY:-}" ] && [[ "${NVIDIA_INFERENCE_API_KEY}" == nvapi-* ]]; then - pass "NVIDIA_INFERENCE_API_KEY is set" -else - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi diff --git a/test/e2e/test-skill-agent-e2e.sh b/test/e2e/test-skill-agent-e2e.sh index ff93b92a3b..f4ef86a929 100755 --- a/test/e2e/test-skill-agent-e2e.sh +++ b/test/e2e/test-skill-agent-e2e.sh @@ -13,7 +13,7 @@ # # Prerequisites: # - Docker running -# - NVIDIA_INFERENCE_API_KEY set (real key, starts with nvapi-) +# - NVIDIA_INFERENCE_API_KEY set for hosted inference # - NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 # # Environment: @@ -24,7 +24,7 @@ # # Usage: # NEMOCLAW_NON_INTERACTIVE=1 NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \ -# NVIDIA_INFERENCE_API_KEY=nvapi-... bash test/e2e/test-skill-agent-e2e.sh +# NVIDIA_INFERENCE_API_KEY=... bash test/e2e/test-skill-agent-e2e.sh # ShellCheck cannot see EXIT trap invocations of cleanup helpers in this E2E script. # shellcheck disable=SC2317 @@ -98,7 +98,10 @@ RETRY_SLEEP="${E2E_SKILL_AGENT_RETRY_SLEEP_SEC:-15}" # Source shared teardown helper # shellcheck source=test/e2e/lib/sandbox-teardown.sh . "${E2E_DIR}/lib/sandbox-teardown.sh" +# shellcheck source=test/e2e/lib/ci-compatible-inference.sh +. "${E2E_DIR}/lib/ci-compatible-inference.sh" register_sandbox_for_teardown "$SANDBOX_NAME" +nemoclaw_e2e_configure_compatible_inference || exit 1 # ══════════════════════════════════════════════════════════════════════ # Phase 1: Install + Prerequisites @@ -111,11 +114,9 @@ if ! docker info >/dev/null 2>&1; then fi pass "Docker is running" -if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ] || [[ "${NVIDIA_INFERENCE_API_KEY}" != nvapi-* ]]; then - fail "NVIDIA_INFERENCE_API_KEY not set or invalid" +if ! nemoclaw_e2e_require_hosted_inference_key; then exit 1 fi -pass "NVIDIA_INFERENCE_API_KEY is set" cd "$REPO" || { fail "Could not cd to repo root" diff --git a/test/onboard-selection.test.ts b/test/onboard-selection.test.ts index 58dc04c203..8b422643c2 100644 --- a/test/onboard-selection.test.ts +++ b/test/onboard-selection.test.ts @@ -3923,7 +3923,7 @@ const { setupNim } = require(${onboardPath}); assert.equal(payload.messages.filter((message: string) => /Choose \[/.test(message)).length, 2); }); - it("fails early in non-interactive mode when NVIDIA_INFERENCE_API_KEY is not an nvapi- key", () => { + it("fails early in non-interactive mode when explicit cloud provider key is not nvapi-", () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-build-noninteractive-")); const fakeBin = path.join(tmpDir, "bin"); @@ -3964,7 +3964,7 @@ onboardModule._compile(injected, onboardFile); const { setupNim, __setNonInteractive } = onboardModule.exports; (async () => { - process.env.NVIDIA_INFERENCE_API_KEY = "sk-test"; + process.env.NVIDIA_INFERENCE_API_KEY = "sk-test"; process.env.NEMOCLAW_PROVIDER = "cloud"; __setNonInteractive(true); const originalLog = console.log; const originalError = console.error;