From b636c0017995cda5f162afde5670f05a2f2f0018 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 13 Jun 2026 00:30:08 -0700
Subject: [PATCH 1/3] fix(e2e): support compatible credential migration

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/e2e-vitest-scenarios.yaml   | 12 ++-
 .github/workflows/nightly-e2e.yaml            | 14 ++-
 .../e2e-scenario/fixtures/hosted-inference.ts | 86 +++++++++++++++++++
 .../live/credential-migration.test.ts         | 53 +++++++-----
 .../support-tests/hosted-inference.test.ts    | 72 ++++++++++++++++
 5 files changed, 208 insertions(+), 29 deletions(-)
 create mode 100644 test/e2e-scenario/fixtures/hosted-inference.ts
 create mode 100644 test/e2e-scenario/support-tests/hosted-inference.test.ts

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index c3c3afa3e1..c9a1027707 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -982,11 +982,17 @@ jobs:
 
       - name: Run credential migration live test
         # Migrated from test/e2e/test-credential-migration.sh. This live test
-        # needs NVIDIA_INFERENCE_API_KEY only as the staged legacy credential value; it
-        # preserves the default NVIDIA provider/key migration path while
-        # pinning a lower-quota catalog model in the test fixture.
+        # stages the hosted inference credential through legacy credentials.json.
+        # CI uses the compatible-provider route so repository-scoped E2E
+        # credentials do not need an nvapi- prefix.
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index 284f3ec72a..07e5fc054d 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -1544,11 +1544,19 @@ jobs:
 
       - name: Run credential migration Vitest test
         # Trusted-code boundary: this job runs the checked-out target ref with
-        # NVIDIA_INFERENCE_API_KEY because it validates live credential migration into the
-        # OpenShell gateway. Keep checkout credentials disabled, do not pass
-        # GITHUB_TOKEN, and rely on reviewed/maintainer-dispatched refs.
+        # the hosted inference credential because it validates live credential
+        # migration into the OpenShell gateway. CI uses the compatible-provider
+        # route so repository-scoped E2E credentials do not need an nvapi-
+        # prefix. Keep checkout credentials disabled, do not pass GITHUB_TOKEN,
+        # and rely on reviewed/maintainer-dispatched refs.
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
+          NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1"
+          NEMOCLAW_PROVIDER: custom
+          NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
+          NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
+          COMPATIBLE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration
           NEMOCLAW_RUN_E2E_SCENARIOS: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-cred-migration"
diff --git a/test/e2e-scenario/fixtures/hosted-inference.ts b/test/e2e-scenario/fixtures/hosted-inference.ts
new file mode 100644
index 0000000000..49d77822d8
--- /dev/null
+++ b/test/e2e-scenario/fixtures/hosted-inference.ts
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const COMPATIBLE_INFERENCE_FLAG = "NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE";
+const DEFAULT_COMPATIBLE_BASE_URL = "https://inference-api.nvidia.com/v1";
+const DEFAULT_COMPATIBLE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
+
+export interface HostedInferenceSecrets {
+  optional(name: string): string | undefined;
+  required(name: string): string;
+}
+
+export interface HostedInferenceOptions {
+  nvidiaSecretName?: "NVIDIA_INFERENCE_API_KEY" | "NVIDIA_API_KEY";
+  nvidiaModel?: string;
+}
+
+export interface HostedInferenceConfig {
+  apiKey: string;
+  credentialEnv: "NVIDIA_INFERENCE_API_KEY" | "NVIDIA_API_KEY" | "COMPATIBLE_API_KEY";
+  provider: "nvidia" | "compatible";
+  providerName: "nvidia-prod" | "compatible-endpoint";
+  env: NodeJS.ProcessEnv;
+  model: string;
+  endpointUrl: string;
+  contractLabel: string;
+}
+
+export function usingCiCompatibleInference(env: NodeJS.ProcessEnv = process.env): boolean {
+  return env[COMPATIBLE_INFERENCE_FLAG] === "1";
+}
+
+export function requireHostedInferenceConfig(
+  secrets: HostedInferenceSecrets,
+  env: NodeJS.ProcessEnv = process.env,
+  options: HostedInferenceOptions = {},
+): HostedInferenceConfig {
+  const nvidiaSecretName = options.nvidiaSecretName ?? "NVIDIA_INFERENCE_API_KEY";
+
+  if (usingCiCompatibleInference(env)) {
+    const apiKey =
+      secrets.optional("COMPATIBLE_API_KEY") ??
+      secrets.optional("NVIDIA_INFERENCE_API_KEY") ??
+      secrets.required(nvidiaSecretName);
+    const endpointUrl = env.NEMOCLAW_ENDPOINT_URL || DEFAULT_COMPATIBLE_BASE_URL;
+    const model = env.NEMOCLAW_MODEL || env.NEMOCLAW_COMPAT_MODEL || DEFAULT_COMPATIBLE_MODEL;
+    return {
+      apiKey,
+      credentialEnv: "COMPATIBLE_API_KEY",
+      provider: "compatible",
+      providerName: "compatible-endpoint",
+      endpointUrl,
+      model,
+      env: {
+        NEMOCLAW_PROVIDER: "custom",
+        NEMOCLAW_ENDPOINT_URL: endpointUrl,
+        NEMOCLAW_MODEL: model,
+        NEMOCLAW_COMPAT_MODEL: model,
+        COMPATIBLE_API_KEY: apiKey,
+      },
+      contractLabel: "CI compatible inference credential is present",
+    };
+  }
+
+  const apiKey = secrets.required(nvidiaSecretName);
+  if (!apiKey.startsWith("nvapi-")) {
+    throw new Error(
+      `${nvidiaSecretName} must start with nvapi- unless ${COMPATIBLE_INFERENCE_FLAG}=1 is set`,
+    );
+  }
+
+  const model = options.nvidiaModel ?? env.NEMOCLAW_MODEL ?? "";
+  return {
+    apiKey,
+    credentialEnv: nvidiaSecretName,
+    provider: "nvidia",
+    providerName: "nvidia-prod",
+    endpointUrl: DEFAULT_COMPATIBLE_BASE_URL,
+    model,
+    env: {
+      [nvidiaSecretName]: apiKey,
+      ...(model ? { NEMOCLAW_MODEL: model } : {}),
+    },
+    contractLabel: `${nvidiaSecretName} is present and nvapi-prefixed`,
+  };
+}
diff --git a/test/e2e-scenario/live/credential-migration.test.ts b/test/e2e-scenario/live/credential-migration.test.ts
index 3629bc339e..a2be17a895 100644
--- a/test/e2e-scenario/live/credential-migration.test.ts
+++ b/test/e2e-scenario/live/credential-migration.test.ts
@@ -9,6 +9,7 @@ import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import type { HostCliClient } from "../fixtures/clients/host.ts";
 import { validateSandboxName } from "../fixtures/clients/sandbox.ts";
 import { expect, test } from "../fixtures/e2e-test.ts";
+import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 
 // Migrated from test/e2e/test-credential-migration.sh. This is a focused live
@@ -17,10 +18,9 @@ import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 // a successful real onboard registers the migrated value with the OpenShell
 // gateway, the plaintext file is removed after success, credentials list reads
 // from the gateway, and secure unlink removes a planted symlink without touching
-// its target. The live onboard intentionally follows the legacy default NVIDIA
-// Endpoints path: NVIDIA_INFERENCE_API_KEY is present only in the legacy file, absent from
-// the onboard child env, and must migrate into the nvidia-prod gateway provider.
-// No registry, migration ledger, or shared helper is introduced.
+// its target. By default the live onboard follows the legacy NVIDIA Endpoints
+// path. When CI opts into the compatible-provider secret path, the same
+// migration contract runs against COMPATIBLE_API_KEY and compatible-endpoint.
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -107,7 +107,10 @@ async function cleanupCredentialMigrationState(host: HostCliClient, home: string
     host.command("node", [CLI_ENTRYPOINT, SANDBOX_NAME, "destroy", "--yes"], {
       artifactName: "cleanup-nemoclaw-destroy",
       env,
-      redactionValues: [process.env.NVIDIA_INFERENCE_API_KEY ?? ""],
+      redactionValues: [
+        process.env.NVIDIA_INFERENCE_API_KEY ?? "",
+        process.env.COMPATIBLE_API_KEY ?? "",
+      ],
       timeoutMs: 120_000,
     }),
   );
@@ -138,15 +141,18 @@ runCredentialMigrationTest(
   "credential migration stages legacy file into gateway and removes plaintext safely",
   { timeout: ONBOARD_TIMEOUT_MS + INSTALL_TIMEOUT_MS + 5 * 60_000 },
   async ({ artifacts, cleanup, host, secrets, skip }) => {
-    // Use the existing nightly secret as the legacy NVIDIA credential. The
-    // onboard child env below deliberately does not receive NVIDIA_INFERENCE_API_KEY, so
+    // Use the existing nightly secret as the legacy provider credential. The
+    // onboard child env below deliberately does not receive that credential, so
     // the only source is ~/.nemoclaw/credentials.json — matching the retired
     // shell lane's migration contract.
-    const migratedCredentialValue = secrets.required("NVIDIA_INFERENCE_API_KEY");
-    expect(
-      migratedCredentialValue.startsWith("nvapi-"),
-      "NVIDIA_INFERENCE_API_KEY must start with nvapi-",
-    ).toBe(true);
+    const hostedInference = requireHostedInferenceConfig(secrets, process.env, {
+      nvidiaModel: CREDENTIAL_MIGRATION_MODEL,
+    });
+    const migratedCredentialValue = hostedInference.apiKey;
+    const {
+      [hostedInference.credentialEnv]: _omittedCredential,
+      ...hostedInferenceEnvWithoutCredential
+    } = hostedInference.env;
     expect(fs.existsSync(CLI_ENTRYPOINT), "bin/nemoclaw.js missing").toBe(true);
     expect(
       fs.existsSync(DIST_CREDENTIAL_STORE),
@@ -183,8 +189,8 @@ runCredentialMigrationTest(
       sandboxName: SANDBOX_NAME,
       contracts: [
         "legacy credentials.json stages allowlisted provider keys into onboard env",
-        "successful default NVIDIA Endpoints onboard registers the migrated value with OpenShell gateway",
-        "onboard keeps the default NVIDIA provider/key/endpoint/policy path while pinning a low-quota catalog model",
+        `successful onboard registers the migrated value with the ${hostedInference.providerName} OpenShell gateway provider`,
+        `onboard uses the ${hostedInference.provider} provider/key/endpoint/policy path`,
         "successful onboard removes plaintext credentials.json",
         "tampered non-credential keys do not become gateway providers",
         "credentials list reads providers from the gateway, not disk",
@@ -201,7 +207,7 @@ runCredentialMigrationTest(
       legacyFile,
       JSON.stringify(
         {
-          NVIDIA_INFERENCE_API_KEY: migratedCredentialValue,
+          [hostedInference.credentialEnv]: migratedCredentialValue,
           OPENSHELL_GATEWAY: "evil-gw-from-tampered-file",
           NODE_OPTIONS: "--require=/tmp/evil.js",
         },
@@ -214,11 +220,9 @@ runCredentialMigrationTest(
     const onboard = await host.command("node", [CLI_ENTRYPOINT, "onboard", "--non-interactive"], {
       artifactName: "onboard-from-legacy-credentials",
       env: testEnv(home, {
+        ...hostedInferenceEnvWithoutCredential,
         NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
         NEMOCLAW_RECREATE_SANDBOX: "1",
-        // Keep the default NVIDIA provider/key/endpoint/policy path while
-        // avoiding the high-quota default Nemotron validation model.
-        NEMOCLAW_MODEL: CREDENTIAL_MIGRATION_MODEL,
       }),
       redactionValues: [migratedCredentialValue],
       timeoutMs: ONBOARD_TIMEOUT_MS,
@@ -247,9 +251,10 @@ runCredentialMigrationTest(
       .split(/\r?\n/)
       .map((line) => line.trim())
       .filter((line) => /^[a-zA-Z][a-zA-Z0-9_-]*$/.test(line));
-    expect(providerNames, `expected migrated NVIDIA provider\n${providersText}`).toContain(
-      "nvidia-prod",
-    );
+    expect(
+      providerNames,
+      `expected migrated ${hostedInference.providerName} provider\n${providersText}`,
+    ).toContain(hostedInference.providerName);
     expect(providerNames).not.toContain("OPENSHELL_GATEWAY");
     expect(providerNames).not.toContain("NODE_OPTIONS");
 
@@ -292,7 +297,9 @@ runCredentialMigrationTest(
     await artifacts.writeJson("scenario-result.json", {
       id: "credential-migration",
       sandboxName: SANDBOX_NAME,
-      model: CREDENTIAL_MIGRATION_MODEL,
+      model: hostedInference.model || CREDENTIAL_MIGRATION_MODEL,
+      provider: hostedInference.providerName,
+      credentialEnv: hostedInference.credentialEnv,
       providerNames,
       assertions: {
         onboardSucceeded: onboard.exitCode === 0,
@@ -300,7 +307,7 @@ runCredentialMigrationTest(
           "Staged 1 legacy credential(s) for migration to the OpenShell gateway.",
         ),
         legacyFileRemovedAfterOnboard: !fs.existsSync(legacyFile),
-        migratedNvidiaProviderRegistered: providerNames.includes("nvidia-prod"),
+        migratedProviderRegistered: providerNames.includes(hostedInference.providerName),
         tamperedKeysExcluded:
           !providerNames.includes("OPENSHELL_GATEWAY") && !providerNames.includes("NODE_OPTIONS"),
         credentialsListReadsGateway: credentialsText.includes(
diff --git a/test/e2e-scenario/support-tests/hosted-inference.test.ts b/test/e2e-scenario/support-tests/hosted-inference.test.ts
new file mode 100644
index 0000000000..d1602de9a6
--- /dev/null
+++ b/test/e2e-scenario/support-tests/hosted-inference.test.ts
@@ -0,0 +1,72 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it } from "vitest";
+
+import {
+  requireHostedInferenceConfig,
+  usingCiCompatibleInference,
+} from "../fixtures/hosted-inference.ts";
+
+function secrets(values: Record<string, string | undefined>) {
+  return {
+    optional: (name: string) => values[name],
+    required: (name: string) => {
+      const value = values[name];
+      if (!value) throw new Error(`missing ${name}`);
+      return value;
+    },
+  };
+}
+
+describe("hosted inference E2E config", () => {
+  it("requires an nvapi-prefixed NVIDIA key by default", () => {
+    const cfg = requireHostedInferenceConfig(
+      secrets({ NVIDIA_INFERENCE_API_KEY: "nvapi-test-key" }),
+      {},
+    );
+
+    expect(cfg.provider).toBe("nvidia");
+    expect(cfg.credentialEnv).toBe("NVIDIA_INFERENCE_API_KEY");
+    expect(cfg.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-test-key");
+  });
+
+  it("rejects a non-NVIDIA key unless the compatible-provider flag is set", () => {
+    expect(() =>
+      requireHostedInferenceConfig(secrets({ NVIDIA_INFERENCE_API_KEY: "sk-compatible-key" }), {}),
+    ).toThrow(/must start with nvapi-/);
+  });
+
+  it("accepts a compatible-provider credential when CI enables the compatibility flag", () => {
+    const cfg = requireHostedInferenceConfig(
+      secrets({
+        COMPATIBLE_API_KEY: "sk-compatible-key",
+        NVIDIA_INFERENCE_API_KEY: "sk-compatible-key",
+      }),
+      { NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" },
+    );
+
+    expect(cfg.provider).toBe("compatible");
+    expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY");
+    expect(cfg.env).toMatchObject({
+      NEMOCLAW_PROVIDER: "custom",
+      NEMOCLAW_ENDPOINT_URL: "https://inference-api.nvidia.com/v1",
+      NEMOCLAW_MODEL: "nvidia/nvidia/nemotron-3-super-v3",
+      COMPATIBLE_API_KEY: "sk-compatible-key",
+    });
+  });
+
+  it("falls back to the configured NVIDIA secret name for reusable workflow compatibility", () => {
+    const cfg = requireHostedInferenceConfig(
+      secrets({ NVIDIA_API_KEY: "provider-key" }),
+      { NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" },
+      { nvidiaSecretName: "NVIDIA_API_KEY" },
+    );
+
+    expect(cfg.provider).toBe("compatible");
+    expect(cfg.apiKey).toBe("provider-key");
+    expect(usingCiCompatibleInference({ NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" })).toBe(
+      true,
+    );
+  });
+});

From d7f39ba01152017f78926fb6eebecd21232a316d Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 13 Jun 2026 00:49:49 -0700
Subject: [PATCH 2/3] fix(e2e): simplify hosted inference secrets

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .../e2e-scenario/fixtures/hosted-inference.ts | 21 +++++++------------
 .../support-tests/hosted-inference.test.ts    | 21 +------------------
 2 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/test/e2e-scenario/fixtures/hosted-inference.ts b/test/e2e-scenario/fixtures/hosted-inference.ts
index 49d77822d8..ba8ce85a87 100644
--- a/test/e2e-scenario/fixtures/hosted-inference.ts
+++ b/test/e2e-scenario/fixtures/hosted-inference.ts
@@ -6,18 +6,16 @@ const DEFAULT_COMPATIBLE_BASE_URL = "https://inference-api.nvidia.com/v1";
 const DEFAULT_COMPATIBLE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
 
 export interface HostedInferenceSecrets {
-  optional(name: string): string | undefined;
   required(name: string): string;
 }
 
 export interface HostedInferenceOptions {
-  nvidiaSecretName?: "NVIDIA_INFERENCE_API_KEY" | "NVIDIA_API_KEY";
   nvidiaModel?: string;
 }
 
 export interface HostedInferenceConfig {
   apiKey: string;
-  credentialEnv: "NVIDIA_INFERENCE_API_KEY" | "NVIDIA_API_KEY" | "COMPATIBLE_API_KEY";
+  credentialEnv: "NVIDIA_INFERENCE_API_KEY" | "COMPATIBLE_API_KEY";
   provider: "nvidia" | "compatible";
   providerName: "nvidia-prod" | "compatible-endpoint";
   env: NodeJS.ProcessEnv;
@@ -35,13 +33,8 @@ export function requireHostedInferenceConfig(
   env: NodeJS.ProcessEnv = process.env,
   options: HostedInferenceOptions = {},
 ): HostedInferenceConfig {
-  const nvidiaSecretName = options.nvidiaSecretName ?? "NVIDIA_INFERENCE_API_KEY";
-
   if (usingCiCompatibleInference(env)) {
-    const apiKey =
-      secrets.optional("COMPATIBLE_API_KEY") ??
-      secrets.optional("NVIDIA_INFERENCE_API_KEY") ??
-      secrets.required(nvidiaSecretName);
+    const apiKey = secrets.required("COMPATIBLE_API_KEY");
     const endpointUrl = env.NEMOCLAW_ENDPOINT_URL || DEFAULT_COMPATIBLE_BASE_URL;
     const model = env.NEMOCLAW_MODEL || env.NEMOCLAW_COMPAT_MODEL || DEFAULT_COMPATIBLE_MODEL;
     return {
@@ -62,25 +55,25 @@ export function requireHostedInferenceConfig(
     };
   }
 
-  const apiKey = secrets.required(nvidiaSecretName);
+  const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
   if (!apiKey.startsWith("nvapi-")) {
     throw new Error(
-      `${nvidiaSecretName} must start with nvapi- unless ${COMPATIBLE_INFERENCE_FLAG}=1 is set`,
+      `NVIDIA_INFERENCE_API_KEY must start with nvapi- unless ${COMPATIBLE_INFERENCE_FLAG}=1 is set`,
     );
   }
 
   const model = options.nvidiaModel ?? env.NEMOCLAW_MODEL ?? "";
   return {
     apiKey,
-    credentialEnv: nvidiaSecretName,
+    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
     provider: "nvidia",
     providerName: "nvidia-prod",
     endpointUrl: DEFAULT_COMPATIBLE_BASE_URL,
     model,
     env: {
-      [nvidiaSecretName]: apiKey,
+      NVIDIA_INFERENCE_API_KEY: apiKey,
       ...(model ? { NEMOCLAW_MODEL: model } : {}),
     },
-    contractLabel: `${nvidiaSecretName} is present and nvapi-prefixed`,
+    contractLabel: "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed",
   };
 }
diff --git a/test/e2e-scenario/support-tests/hosted-inference.test.ts b/test/e2e-scenario/support-tests/hosted-inference.test.ts
index d1602de9a6..8f008ae14c 100644
--- a/test/e2e-scenario/support-tests/hosted-inference.test.ts
+++ b/test/e2e-scenario/support-tests/hosted-inference.test.ts
@@ -3,14 +3,10 @@
 
 import { describe, expect, it } from "vitest";
 
-import {
-  requireHostedInferenceConfig,
-  usingCiCompatibleInference,
-} from "../fixtures/hosted-inference.ts";
+import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts";
 
 function secrets(values: Record<string, string | undefined>) {
   return {
-    optional: (name: string) => values[name],
     required: (name: string) => {
       const value = values[name];
       if (!value) throw new Error(`missing ${name}`);
@@ -41,7 +37,6 @@ describe("hosted inference E2E config", () => {
     const cfg = requireHostedInferenceConfig(
       secrets({
         COMPATIBLE_API_KEY: "sk-compatible-key",
-        NVIDIA_INFERENCE_API_KEY: "sk-compatible-key",
       }),
       { NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" },
     );
@@ -55,18 +50,4 @@ describe("hosted inference E2E config", () => {
       COMPATIBLE_API_KEY: "sk-compatible-key",
     });
   });
-
-  it("falls back to the configured NVIDIA secret name for reusable workflow compatibility", () => {
-    const cfg = requireHostedInferenceConfig(
-      secrets({ NVIDIA_API_KEY: "provider-key" }),
-      { NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" },
-      { nvidiaSecretName: "NVIDIA_API_KEY" },
-    );
-
-    expect(cfg.provider).toBe("compatible");
-    expect(cfg.apiKey).toBe("provider-key");
-    expect(usingCiCompatibleInference({ NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" })).toBe(
-      true,
-    );
-  });
 });

From 39b9d6f8f41990a9a1fda0eb15791120da98d9c0 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Sat, 13 Jun 2026 01:00:04 -0700
Subject: [PATCH 3/3] fix(e2e): route hosted inference as compatible

Signed-off-by: Carlos Villela <cvillela@nvidia.com>
---
 .github/workflows/e2e-vitest-scenarios.yaml   |  9 +--
 .github/workflows/nightly-e2e.yaml            | 13 ++--
 .../e2e-scenario/fixtures/hosted-inference.ts | 77 +++++++------------
 .../live/credential-migration.test.ts         | 12 +--
 .../support-tests/hosted-inference.test.ts    | 39 ++++++----
 5 files changed, 68 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index c9a1027707..b8a7c6816a 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -982,17 +982,16 @@ jobs:
 
       - name: Run credential migration live test
         # Migrated from test/e2e/test-credential-migration.sh. This live test
-        # stages the hosted inference credential through legacy credentials.json.
-        # CI uses the compatible-provider route so repository-scoped E2E
-        # credentials do not need an nvapi- prefix.
+        # stages NVIDIA_INFERENCE_API_KEY through legacy credentials.json as the
+        # custom provider's COMPATIBLE_API_KEY. The hosted service behind this
+        # repo-scoped secret is inference-api.nvidia.com, not Build/NVIDIA
+        # Endpoints, so the test must exercise the compatible-provider route.
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
-          NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1"
           NEMOCLAW_PROVIDER: custom
           NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
           NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
           NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
-          COMPATIBLE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
         run: |
           set -euo pipefail
           npx vitest run --project e2e-scenarios-live \
diff --git a/.github/workflows/nightly-e2e.yaml b/.github/workflows/nightly-e2e.yaml
index 07e5fc054d..5ed70533a2 100644
--- a/.github/workflows/nightly-e2e.yaml
+++ b/.github/workflows/nightly-e2e.yaml
@@ -1544,19 +1544,18 @@ jobs:
 
       - name: Run credential migration Vitest test
         # Trusted-code boundary: this job runs the checked-out target ref with
-        # the hosted inference credential because it validates live credential
-        # migration into the OpenShell gateway. CI uses the compatible-provider
-        # route so repository-scoped E2E credentials do not need an nvapi-
-        # prefix. Keep checkout credentials disabled, do not pass GITHUB_TOKEN,
-        # and rely on reviewed/maintainer-dispatched refs.
+        # NVIDIA_INFERENCE_API_KEY because it validates live credential
+        # migration into the OpenShell gateway. The hosted service behind this
+        # repo-scoped secret is inference-api.nvidia.com, not Build/NVIDIA
+        # Endpoints, so the test stages it as the custom provider's
+        # COMPATIBLE_API_KEY. Keep checkout credentials disabled, do not pass
+        # GITHUB_TOKEN, and rely on reviewed/maintainer-dispatched refs.
         env:
           NVIDIA_INFERENCE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
-          NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1"
           NEMOCLAW_PROVIDER: custom
           NEMOCLAW_ENDPOINT_URL: https://inference-api.nvidia.com/v1
           NEMOCLAW_MODEL: nvidia/nvidia/nemotron-3-super-v3
           NEMOCLAW_COMPAT_MODEL: nvidia/nvidia/nemotron-3-super-v3
-          COMPATIBLE_API_KEY: ${{ secrets.NVIDIA_INFERENCE_API_KEY }}
           E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/credential-migration
           NEMOCLAW_RUN_E2E_SCENARIOS: "1"
           NEMOCLAW_SANDBOX_NAME: "e2e-cred-migration"
diff --git a/test/e2e-scenario/fixtures/hosted-inference.ts b/test/e2e-scenario/fixtures/hosted-inference.ts
index ba8ce85a87..f46b446bf7 100644
--- a/test/e2e-scenario/fixtures/hosted-inference.ts
+++ b/test/e2e-scenario/fixtures/hosted-inference.ts
@@ -1,79 +1,60 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-const COMPATIBLE_INFERENCE_FLAG = "NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE";
-const DEFAULT_COMPATIBLE_BASE_URL = "https://inference-api.nvidia.com/v1";
-const DEFAULT_COMPATIBLE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
+const HOSTED_INFERENCE_SECRET = "NVIDIA_INFERENCE_API_KEY";
+const HOSTED_INFERENCE_CREDENTIAL_ENV = "COMPATIBLE_API_KEY";
+const HOSTED_INFERENCE_PROVIDER = "custom";
+const HOSTED_INFERENCE_PROVIDER_NAME = "compatible-endpoint";
+const DEFAULT_HOSTED_INFERENCE_BASE_URL = "https://inference-api.nvidia.com/v1";
+const DEFAULT_HOSTED_INFERENCE_MODEL = "nvidia/nvidia/nemotron-3-super-v3";
 
 export interface HostedInferenceSecrets {
   required(name: string): string;
 }
 
 export interface HostedInferenceOptions {
-  nvidiaModel?: string;
+  model?: string;
 }
 
 export interface HostedInferenceConfig {
   apiKey: string;
-  credentialEnv: "NVIDIA_INFERENCE_API_KEY" | "COMPATIBLE_API_KEY";
-  provider: "nvidia" | "compatible";
-  providerName: "nvidia-prod" | "compatible-endpoint";
+  sourceSecretName: typeof HOSTED_INFERENCE_SECRET;
+  credentialEnv: typeof HOSTED_INFERENCE_CREDENTIAL_ENV;
+  provider: typeof HOSTED_INFERENCE_PROVIDER;
+  providerName: typeof HOSTED_INFERENCE_PROVIDER_NAME;
   env: NodeJS.ProcessEnv;
   model: string;
   endpointUrl: string;
   contractLabel: string;
 }
 
-export function usingCiCompatibleInference(env: NodeJS.ProcessEnv = process.env): boolean {
-  return env[COMPATIBLE_INFERENCE_FLAG] === "1";
-}
-
 export function requireHostedInferenceConfig(
   secrets: HostedInferenceSecrets,
   env: NodeJS.ProcessEnv = process.env,
   options: HostedInferenceOptions = {},
 ): HostedInferenceConfig {
-  if (usingCiCompatibleInference(env)) {
-    const apiKey = secrets.required("COMPATIBLE_API_KEY");
-    const endpointUrl = env.NEMOCLAW_ENDPOINT_URL || DEFAULT_COMPATIBLE_BASE_URL;
-    const model = env.NEMOCLAW_MODEL || env.NEMOCLAW_COMPAT_MODEL || DEFAULT_COMPATIBLE_MODEL;
-    return {
-      apiKey,
-      credentialEnv: "COMPATIBLE_API_KEY",
-      provider: "compatible",
-      providerName: "compatible-endpoint",
-      endpointUrl,
-      model,
-      env: {
-        NEMOCLAW_PROVIDER: "custom",
-        NEMOCLAW_ENDPOINT_URL: endpointUrl,
-        NEMOCLAW_MODEL: model,
-        NEMOCLAW_COMPAT_MODEL: model,
-        COMPATIBLE_API_KEY: apiKey,
-      },
-      contractLabel: "CI compatible inference credential is present",
-    };
-  }
-
-  const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
-  if (!apiKey.startsWith("nvapi-")) {
-    throw new Error(
-      `NVIDIA_INFERENCE_API_KEY must start with nvapi- unless ${COMPATIBLE_INFERENCE_FLAG}=1 is set`,
-    );
-  }
-
-  const model = options.nvidiaModel ?? env.NEMOCLAW_MODEL ?? "";
+  const apiKey = secrets.required(HOSTED_INFERENCE_SECRET);
+  const endpointUrl = env.NEMOCLAW_ENDPOINT_URL || DEFAULT_HOSTED_INFERENCE_BASE_URL;
+  const model =
+    env.NEMOCLAW_MODEL ||
+    env.NEMOCLAW_COMPAT_MODEL ||
+    options.model ||
+    DEFAULT_HOSTED_INFERENCE_MODEL;
   return {
     apiKey,
-    credentialEnv: "NVIDIA_INFERENCE_API_KEY",
-    provider: "nvidia",
-    providerName: "nvidia-prod",
-    endpointUrl: DEFAULT_COMPATIBLE_BASE_URL,
+    sourceSecretName: HOSTED_INFERENCE_SECRET,
+    credentialEnv: HOSTED_INFERENCE_CREDENTIAL_ENV,
+    provider: HOSTED_INFERENCE_PROVIDER,
+    providerName: HOSTED_INFERENCE_PROVIDER_NAME,
+    endpointUrl,
     model,
     env: {
-      NVIDIA_INFERENCE_API_KEY: apiKey,
-      ...(model ? { NEMOCLAW_MODEL: model } : {}),
+      NEMOCLAW_PROVIDER: HOSTED_INFERENCE_PROVIDER,
+      NEMOCLAW_ENDPOINT_URL: endpointUrl,
+      NEMOCLAW_MODEL: model,
+      NEMOCLAW_COMPAT_MODEL: model,
+      [HOSTED_INFERENCE_CREDENTIAL_ENV]: apiKey,
     },
-    contractLabel: "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed",
+    contractLabel: "NVIDIA_INFERENCE_API_KEY is staged as the compatible endpoint credential",
   };
 }
diff --git a/test/e2e-scenario/live/credential-migration.test.ts b/test/e2e-scenario/live/credential-migration.test.ts
index a2be17a895..4068840372 100644
--- a/test/e2e-scenario/live/credential-migration.test.ts
+++ b/test/e2e-scenario/live/credential-migration.test.ts
@@ -18,9 +18,10 @@ import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 // a successful real onboard registers the migrated value with the OpenShell
 // gateway, the plaintext file is removed after success, credentials list reads
 // from the gateway, and secure unlink removes a planted symlink without touching
-// its target. By default the live onboard follows the legacy NVIDIA Endpoints
-// path. When CI opts into the compatible-provider secret path, the same
-// migration contract runs against COMPATIBLE_API_KEY and compatible-endpoint.
+// its target. The repository secret is named NVIDIA_INFERENCE_API_KEY, but the
+// hosted E2E service is the OpenAI-compatible inference-api.nvidia.com endpoint,
+// so the migration contract stages that value as COMPATIBLE_API_KEY and expects
+// the compatible-endpoint gateway provider.
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
@@ -146,7 +147,7 @@ runCredentialMigrationTest(
     // the only source is ~/.nemoclaw/credentials.json — matching the retired
     // shell lane's migration contract.
     const hostedInference = requireHostedInferenceConfig(secrets, process.env, {
-      nvidiaModel: CREDENTIAL_MIGRATION_MODEL,
+      model: CREDENTIAL_MIGRATION_MODEL,
     });
     const migratedCredentialValue = hostedInference.apiKey;
     const {
@@ -190,7 +191,8 @@ runCredentialMigrationTest(
       contracts: [
         "legacy credentials.json stages allowlisted provider keys into onboard env",
         `successful onboard registers the migrated value with the ${hostedInference.providerName} OpenShell gateway provider`,
-        `onboard uses the ${hostedInference.provider} provider/key/endpoint/policy path`,
+        `${hostedInference.sourceSecretName} is migrated into the ${hostedInference.credentialEnv} provider credential`,
+        `onboard uses the ${hostedInference.provider} provider and ${hostedInference.endpointUrl} endpoint path`,
         "successful onboard removes plaintext credentials.json",
         "tampered non-credential keys do not become gateway providers",
         "credentials list reads providers from the gateway, not disk",
diff --git a/test/e2e-scenario/support-tests/hosted-inference.test.ts b/test/e2e-scenario/support-tests/hosted-inference.test.ts
index 8f008ae14c..eaa2a60bc4 100644
--- a/test/e2e-scenario/support-tests/hosted-inference.test.ts
+++ b/test/e2e-scenario/support-tests/hosted-inference.test.ts
@@ -16,38 +16,43 @@ function secrets(values: Record<string, string | undefined>) {
 }
 
 describe("hosted inference E2E config", () => {
-  it("requires an nvapi-prefixed NVIDIA key by default", () => {
+  it("uses NVIDIA_INFERENCE_API_KEY as the hosted compatible endpoint source secret", () => {
     const cfg = requireHostedInferenceConfig(
-      secrets({ NVIDIA_INFERENCE_API_KEY: "nvapi-test-key" }),
+      secrets({ NVIDIA_INFERENCE_API_KEY: "repo-hosted-key" }),
       {},
     );
 
-    expect(cfg.provider).toBe("nvidia");
-    expect(cfg.credentialEnv).toBe("NVIDIA_INFERENCE_API_KEY");
-    expect(cfg.env.NVIDIA_INFERENCE_API_KEY).toBe("nvapi-test-key");
-  });
-
-  it("rejects a non-NVIDIA key unless the compatible-provider flag is set", () => {
-    expect(() =>
-      requireHostedInferenceConfig(secrets({ NVIDIA_INFERENCE_API_KEY: "sk-compatible-key" }), {}),
-    ).toThrow(/must start with nvapi-/);
+    expect(cfg.sourceSecretName).toBe("NVIDIA_INFERENCE_API_KEY");
+    expect(cfg.provider).toBe("custom");
+    expect(cfg.providerName).toBe("compatible-endpoint");
+    expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY");
+    expect(cfg.env.COMPATIBLE_API_KEY).toBe("repo-hosted-key");
   });
 
-  it("accepts a compatible-provider credential when CI enables the compatibility flag", () => {
+  it("does not require an nvapi-prefixed source secret", () => {
     const cfg = requireHostedInferenceConfig(
       secrets({
-        COMPATIBLE_API_KEY: "sk-compatible-key",
+        NVIDIA_INFERENCE_API_KEY: "sk-compatible-key",
       }),
-      { NEMOCLAW_E2E_USE_NVIDIA_SECRET_AS_COMPATIBLE: "1" },
+      {},
     );
 
-    expect(cfg.provider).toBe("compatible");
+    expect(cfg.apiKey).toBe("sk-compatible-key");
     expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY");
+  });
+
+  it("configures the custom provider route for inference-api.nvidia.com", () => {
+    const cfg = requireHostedInferenceConfig(
+      secrets({ NVIDIA_INFERENCE_API_KEY: "repo-hosted-key" }),
+      { NEMOCLAW_MODEL: "nvidia/custom-model" },
+    );
+
     expect(cfg.env).toMatchObject({
       NEMOCLAW_PROVIDER: "custom",
       NEMOCLAW_ENDPOINT_URL: "https://inference-api.nvidia.com/v1",
-      NEMOCLAW_MODEL: "nvidia/nvidia/nemotron-3-super-v3",
-      COMPATIBLE_API_KEY: "sk-compatible-key",
+      NEMOCLAW_MODEL: "nvidia/custom-model",
+      NEMOCLAW_COMPAT_MODEL: "nvidia/custom-model",
+      COMPATIBLE_API_KEY: "repo-hosted-key",
     });
   });
 });