NVIDIA · cv · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/.github/workflows/e2e-script.yaml b/.github/workflows/e2e-script.yaml
@@ -58,12 +58,7 @@ on:
         type: string
         default: ""
       nvidia_api_key:
-        description: Pass the NVIDIA_INFERENCE_API_KEY secret to the script.
-        required: false
-        type: boolean
-        default: false
-      nvidia_secret_as_compatible_api_key:
-        description: Expose NVIDIA_INFERENCE_API_KEY as COMPATIBLE_API_KEY for CI-only OpenAI-compatible inference.
+        description: Pass the hosted inference source secret as the CI custom endpoint credential.
         required: false
         type: boolean
         default: false
@@ -217,21 +212,21 @@ jobs:
             echo "::warning::Docker Hub login failed after 3 attempts; continuing with anonymous pulls."
           fi
 
-      - name: Export CI compatible inference environment
-        if: ${{ inputs.nvidia_secret_as_compatible_api_key }}
+      - name: Export hosted CI inference environment
+        if: ${{ inputs.nvidia_api_key }}
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         shell: bash
         run: |
           set -euo pipefail
 
           if [ -z "${NVIDIA_INFERENCE_API_KEY:-}" ]; then
-            echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for CI compatible inference." >&2
+            echo "::error::NVIDIA_INFERENCE_API_KEY secret is required for hosted CI inference; it is withheld for workflow_dispatch target_ref runs." >&2
             exit 1
           fi
 
           {
-            printf 'NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE=1\n'
+            printf 'NEMOCLAW_E2E_USE_HOSTED_INFERENCE=1\n'
             printf 'NEMOCLAW_PROVIDER=custom\n'
             printf 'NEMOCLAW_ENDPOINT_URL=https://inference-api.nvidia.com/v1\n'
             printf 'NEMOCLAW_MODEL=nvidia/nvidia/nemotron-3-super-v3\n'

diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
diff --git a/ci/env-var-doc-allowlist.json b/ci/env-var-doc-allowlist.json
@@ -42,5 +42,17 @@
   {
     "name": "NEMOCLAW_E2E_FORCE_FAIL_AT_STEP",
     "reason": "Internal E2E-only selector naming the onboarding step where deterministic fault injection should exit. Used only with NEMOCLAW_E2E_FAILURE_INJECTION in test scripts."
+  },
+  {
+    "name": "NEMOCLAW_E2E_USE_HOSTED_INFERENCE",
+    "reason": "Internal E2E-only sentinel that tells CI to route the repository NVIDIA_INFERENCE_API_KEY secret through the hosted inference-api.nvidia.com OpenAI-compatible endpoint. Not user-facing."
+  },
+  {
+    "name": "NEMOCLAW_COMPAT_MODEL",
+    "reason": "Internal E2E/test override for the model used by OpenAI-compatible endpoint scenarios. User-facing custom endpoint model selection is collected through onboard prompts or NEMOCLAW_MODEL."
+  },
+  {
+    "name": "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL",
+    "reason": "Legacy E2E-only model override used by cloud and hosted-inference live test scripts. Not a supported production configuration knob."
   }
 ]
diff --git a/src/lib/onboard/providers.test.ts b/src/lib/onboard/providers.test.ts
@@ -7,36 +7,87 @@ type RunResult = { status: number; stdout?: string; stderr?: string };
 type RunOptions = { env?: Record<string, string | undefined> };
 type RunOpenshell = (command: string[], opts?: RunOptions) => RunResult;
 
-const { buildProviderArgs, providerExistsInGateway, upsertProvider, upsertMessagingProviders } =
-  require("../../../dist/lib/onboard/providers") as {
-    buildProviderArgs: (
-      action: "create" | "update",
-      name: string,
-      type: string,
-      credentialEnv: string,
-      baseUrl: string | null,
-    ) => string[];
-    providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean;
-    upsertProvider: (
-      name: string,
-      type: string,
-      credentialEnv: string,
-      baseUrl: string | null,
-      env: Record<string, string | undefined>,
-      runOpenshell: RunOpenshell,
-      options?: { replaceExisting?: boolean },
-    ) => { ok: boolean; status?: number; message?: string };
-    upsertMessagingProviders: (
-      tokenDefs: Array<{
-        name: string;
-        envKey: string;
-        token: string | null;
-        providerType?: string;
-      }>,
-      runOpenshell: RunOpenshell,
-      options?: { replaceExisting?: boolean; bestEffort?: boolean },
-    ) => string[];
-  };
+const {
+  HOSTED_INFERENCE_ENDPOINT_URL,
+  HOSTED_INFERENCE_MODEL,
+  buildProviderArgs,
+  getRequestedModelHint,
+  getRequestedProviderHint,
+  providerExistsInGateway,
+  stageHostedInferenceSourceSecretEnv,
+  upsertProvider,
+  upsertMessagingProviders,
+} = require("../../../dist/lib/onboard/providers") as {
+  HOSTED_INFERENCE_ENDPOINT_URL: string;
+  HOSTED_INFERENCE_MODEL: string;
+  buildProviderArgs: (
+    action: "create" | "update",
+    name: string,
+    type: string,
+    credentialEnv: string,
+    baseUrl: string | null,
+  ) => string[];
+  getRequestedModelHint: (nonInteractive: boolean) => string | null;
+  getRequestedProviderHint: (nonInteractive: boolean) => string | null;
+  providerExistsInGateway: (name: string, runOpenshell: RunOpenshell) => boolean;
+  stageHostedInferenceSourceSecretEnv: () => boolean;
+  upsertProvider: (
+    name: string,
+    type: string,
+    credentialEnv: string,
+    baseUrl: string | null,
+    env: Record<string, string | undefined>,
+    runOpenshell: RunOpenshell,
+    options?: { replaceExisting?: boolean },
+  ) => { ok: boolean; status?: number; message?: string };
+  upsertMessagingProviders: (
+    tokenDefs: Array<{
+      name: string;
+      envKey: string;
+      token: string | null;
+      providerType?: string;
+    }>,
+    runOpenshell: RunOpenshell,
+    options?: { replaceExisting?: boolean; bestEffort?: boolean },
+  ) => string[];
+};
+
+function withProviderEnv(next: Record<string, string | undefined>, testBody: () => void): void {
+  const keys = new Set([
+    "NVIDIA_INFERENCE_API_KEY",
+    "NEMOCLAW_PROVIDER",
+    "NEMOCLAW_ENDPOINT_URL",
+    "NEMOCLAW_MODEL",
+    "NEMOCLAW_COMPAT_MODEL",
+    "NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL",
+    "NEMOCLAW_E2E_USE_HOSTED_INFERENCE",
+    "COMPATIBLE_API_KEY",
+    ...Object.keys(next),
+  ]);
+  const previous = new Map<string, string | undefined>();
+  for (const key of keys) {
+    previous.set(key, process.env[key]);
+    delete process.env[key];
+  }
+  for (const [key, value] of Object.entries(next)) {
+    if (value === undefined) {
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+  try {
+    testBody();
+  } finally {
+    for (const [key, value] of previous.entries()) {
+      if (value === undefined) {
+        delete process.env[key];
+      } else {
+        process.env[key] = value;
+      }
+    }
+  }
+}
 
 describe("onboard provider helpers", () => {
   it("builds create arguments for generic providers", () => {
@@ -239,6 +290,55 @@ describe("onboard provider helpers", () => {
     expect(commands[1]).toMatch(/--credential NVIDIA_INFERENCE_API_KEY/);
   });
 
+  it("stages non-nvapi NVIDIA_INFERENCE_API_KEY as hosted custom inference", () => {
+    withProviderEnv(
+      {
+        NVIDIA_INFERENCE_API_KEY: "  repo-hosted-key  ",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(true);
+        expect(getRequestedProviderHint(true)).toBe("custom");
+        expect(getRequestedModelHint(true)).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.NEMOCLAW_PROVIDER).toBe("custom");
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBe(HOSTED_INFERENCE_ENDPOINT_URL);
+        expect(process.env.NEMOCLAW_MODEL).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.NEMOCLAW_COMPAT_MODEL).toBe(HOSTED_INFERENCE_MODEL);
+        expect(process.env.COMPATIBLE_API_KEY).toBe("repo-hosted-key");
+      },
+    );
+  });
+
+  it("keeps explicit cloud provider selection on the Build provider path", () => {
+    withProviderEnv(
+      {
+        NVIDIA_INFERENCE_API_KEY: "repo-hosted-key",
+        NEMOCLAW_PROVIDER: "cloud",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(false);
+        expect(getRequestedProviderHint(true)).toBe("build");
+        expect(process.env.COMPATIBLE_API_KEY).toBeUndefined();
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined();
+      },
+    );
+  });
+
+  it("preserves explicit custom provider credentials when NVIDIA_INFERENCE_API_KEY is unrelated", () => {
+    withProviderEnv(
+      {
+        COMPATIBLE_API_KEY: "custom-endpoint-key",
+        NVIDIA_INFERENCE_API_KEY: "repo-hosted-key",
+        NEMOCLAW_PROVIDER: "custom",
+      },
+      () => {
+        expect(stageHostedInferenceSourceSecretEnv()).toBe(false);
+        expect(getRequestedProviderHint(true)).toBe("custom");
+        expect(process.env.COMPATIBLE_API_KEY).toBe("custom-endpoint-key");
+        expect(process.env.NEMOCLAW_ENDPOINT_URL).toBeUndefined();
+      },
+    );
+  });
+
   it("returns redacted error details when create or update fails", () => {
     const result = upsertProvider("bad-provider", "generic", "SOME_KEY", null, {}, (command) => {
       if (command.includes("get")) return { status: 1, stdout: "", stderr: "" };

diff --git a/src/lib/onboard/providers.ts b/src/lib/onboard/providers.ts
@@ -5,6 +5,7 @@
 // Provider metadata, lookup helpers, and gateway provider CRUD.
 
 const { redact } = require("../runner");
+const { normalizeCredentialValue } = require("../credentials/store");
 const {
   DEFAULT_CLOUD_MODEL,
   DEFAULT_HERMES_PROVIDER_MODEL,
@@ -22,6 +23,10 @@ const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1";
 const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com";
 const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/";
 const HERMES_INFERENCE_ENDPOINT_URL = "https://inference-api.nousresearch.com/v1";
+const HOSTED_INFERENCE_SOURCE_ENV = "NVIDIA_INFERENCE_API_KEY";
+const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY";
+const HOSTED_INFERENCE_ENDPOINT_URL = "https://inference-api.nvidia.com/v1";
+const HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
 
 const REMOTE_PROVIDER_CONFIG = {
   build: {
@@ -167,6 +172,7 @@ function getEffectiveProviderName(providerKey) {
 // ── Non-interactive helpers ──────────────────────────────────────
 
 function getNonInteractiveProvider() {
+  stageHostedInferenceSourceSecretEnv();
   const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
   if (!providerKey) return null;
   const aliases = {
@@ -208,6 +214,50 @@ function getNonInteractiveProvider() {
   return normalized;
 }
 
+function stageHostedInferenceSourceSecretEnv() {
+  const sourceKey = normalizeCredentialValue(process.env[HOSTED_INFERENCE_SOURCE_ENV] ?? "");
+  if (!sourceKey) return false;
+
+  const rawProvider = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
+  const aliases = {
+    cloud: "build",
+    anthropiccompatible: "anthropicCompatible",
+    hermes: "hermesProvider",
+    "hermes-provider": "hermesProvider",
+    hermesprovider: "hermesProvider",
+    nous: "hermesProvider",
+    "nous-portal": "hermesProvider",
+  };
+  const normalizedProvider = aliases[rawProvider] || rawProvider;
+  const hostedFlag = (process.env.NEMOCLAW_E2E_USE_HOSTED_INFERENCE || "").trim() === "1";
+  const compatibleKey = normalizeCredentialValue(
+    process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] ?? "",
+  );
+  const explicitHostedCustom =
+    normalizedProvider === "custom" &&
+    (hostedFlag || (!compatibleKey && !sourceKey.startsWith("nvapi-")));
+  const implicitHostedCustom =
+    !normalizedProvider && (hostedFlag || !sourceKey.startsWith("nvapi-"));
+  const shouldStage = explicitHostedCustom || implicitHostedCustom;
+
+  if (!shouldStage) return false;
+
+  if (!normalizedProvider) {
+    process.env.NEMOCLAW_PROVIDER = "custom";
+  }
+  process.env.NEMOCLAW_ENDPOINT_URL =
+    (process.env.NEMOCLAW_ENDPOINT_URL || "").trim() || HOSTED_INFERENCE_ENDPOINT_URL;
+  const model =
+    (process.env.NEMOCLAW_MODEL || "").trim() ||
+    (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() ||
+    (process.env.NEMOCLAW_CLOUD_EXPERIMENTAL_MODEL || "").trim() ||
+    HOSTED_INFERENCE_MODEL;
+  process.env.NEMOCLAW_MODEL = model;
+  process.env.NEMOCLAW_COMPAT_MODEL = (process.env.NEMOCLAW_COMPAT_MODEL || "").trim() || model;
+  process.env[HOSTED_INFERENCE_CREDENTIAL_ENV] = sourceKey;
+  return true;
+}
+
 function getNonInteractiveModel(providerKey) {
   const model = (process.env.NEMOCLAW_MODEL || "").trim();
   if (!model) return null;
@@ -399,8 +449,13 @@ module.exports = {
   OLLAMA_PROXY_CREDENTIAL_ENV,
   VLLM_LOCAL_CREDENTIAL_ENV,
   DISCORD_SNOWFLAKE_RE,
+  HOSTED_INFERENCE_SOURCE_ENV,
+  HOSTED_INFERENCE_CREDENTIAL_ENV,
+  HOSTED_INFERENCE_ENDPOINT_URL,
+  HOSTED_INFERENCE_MODEL,
   getProviderLabel,
   getEffectiveProviderName,
+  stageHostedInferenceSourceSecretEnv,
   getNonInteractiveProvider,
   getNonInteractiveModel,
   getRequestedProviderHint,