From b35230dd366f460bd739dda3bf392415157b731c Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Mon, 15 Jun 2026 21:54:01 -0700
Subject: [PATCH 1/6] test(e2e): migrate GPU double onboard to Vitest

Signed-off-by: Carlos Villela <cvillela@nvidia.com>

From f435eb5e9b9d2bd6d82252062b884d6826d66ec9 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Tue, 16 Jun 2026 08:40:45 -0700
Subject: [PATCH 2/6] test(e2e): add GPU double onboard Vitest migration

---
 .github/workflows/e2e-vitest-scenarios.yaml   |  66 +++++
 .../live/gpu-double-onboard.test.ts           | 264 ++++++++++++++++++
 2 files changed, 330 insertions(+)
 create mode 100644 test/e2e-scenario/live/gpu-double-onboard.test.ts

diff --git a/.github/workflows/e2e-vitest-scenarios.yaml b/.github/workflows/e2e-vitest-scenarios.yaml
index 4b5d9b2d3f..7ce28e1c64 100644
--- a/.github/workflows/e2e-vitest-scenarios.yaml
+++ b/.github/workflows/e2e-vitest-scenarios.yaml
@@ -1784,6 +1784,71 @@ jobs:
           if-no-files-found: ignore
           retention-days: 14
 
+
+  gpu-double-onboard-vitest:
+    needs: generate-matrix
+    if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',gpu-double-onboard-vitest,') || contains(format(',{0},', inputs.scenarios), ',gpu-double-onboard,') }}
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
+    timeout-minutes: 100
+    env:
+      FREE_STANDING_VITEST_JOB: "1"
+      FREE_STANDING_SCENARIO_ID: "gpu-double-onboard"
+      E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/gpu-double-onboard
+      NEMOCLAW_CLI_BIN: ${{ github.workspace }}/bin/nemoclaw.js
+      NEMOCLAW_RUN_E2E_SCENARIOS: "1"
+      NEMOCLAW_NON_INTERACTIVE: "1"
+      NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1"
+      NEMOCLAW_SANDBOX_NAME: "e2e-gpu-double-onboard-vitest"
+      NEMOCLAW_PROVIDER: "ollama"
+      NEMOCLAW_OLLAMA_PROXY_PORT: "11435"
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Node
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.0.0
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build CLI
+        run: npm run build:cli
+
+      - name: Install OpenShell CLI
+        run: bash scripts/install-openshell.sh
+
+      - name: Run gpu-double-onboard live Vitest test
+        run: |
+          set -euo pipefail
+          export PATH="$HOME/.local/bin:$HOME/.npm-global/bin:$PATH"
+          if command -v openshell >/dev/null 2>&1; then
+            OPENSHELL_BIN="$(command -v openshell)"
+          elif [ -x "$HOME/.local/bin/openshell" ]; then
+            OPENSHELL_BIN="$HOME/.local/bin/openshell"
+          else
+            echo "::error::OpenShell CLI not found after install"
+            exit 1
+          fi
+          export OPENSHELL_BIN
+          "$OPENSHELL_BIN" --version
+          npx vitest run --project e2e-scenarios-live \
+            test/e2e-scenario/live/gpu-double-onboard.test.ts \
+            --silent=false --reporter=default
+
+      - name: Upload gpu-double-onboard artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: e2e-vitest-scenarios-gpu-double-onboard
+          path: e2e-artifacts/vitest/gpu-double-onboard/
+          include-hidden-files: false
+          if-no-files-found: ignore
+          retention-days: 14
+
   token-rotation-vitest:
     needs: generate-matrix
     if: ${{ (inputs.jobs == '' && inputs.scenarios == '') || contains(format(',{0},', inputs.jobs), ',token-rotation-vitest,') || contains(format(',{0},', inputs.scenarios), ',token-rotation,') }}
@@ -2986,6 +3051,7 @@ jobs:
         messaging-providers-vitest,
         launchable-smoke-vitest,
         double-onboard-vitest,
+        gpu-double-onboard-vitest,
         model-router-provider-routed-inference-vitest,
         sandbox-survival-vitest,
         gateway-drift-preflight-vitest,
diff --git a/test/e2e-scenario/live/gpu-double-onboard.test.ts b/test/e2e-scenario/live/gpu-double-onboard.test.ts
new file mode 100644
index 0000000000..07c7ea3c27
--- /dev/null
+++ b/test/e2e-scenario/live/gpu-double-onboard.test.ts
@@ -0,0 +1,264 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/** Live Vitest replacement for test/e2e/test-gpu-double-onboard.sh. */
+
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
+import { type HostCliClient } from "../fixtures/clients/host.ts";
+import { type SandboxClient, validateSandboxName } from "../fixtures/clients/sandbox.ts";
+import { expect, test } from "../fixtures/e2e-test.ts";
+import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
+import type { ShellProbeResult } from "../fixtures/shell-probe.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+const CLI_ENTRYPOINT = path.join(REPO_ROOT, "bin", "nemoclaw.js");
+const SANDBOX_NAME = process.env.NEMOCLAW_SANDBOX_NAME ?? "e2e-gpu-double-onboard-vitest";
+const PROXY_PORT = process.env.NEMOCLAW_OLLAMA_PROXY_PORT ?? "11435";
+const TOKEN_FILE = path.join(os.homedir(), ".nemoclaw", "ollama-proxy-token");
+const LIVE_TIMEOUT_MS = 90 * 60_000;
+const liveTest = shouldRunLiveE2EScenarios() ? test : test.skip;
+
+validateSandboxName(SANDBOX_NAME);
+process.env.NEMOCLAW_CLI_BIN ??= CLI_ENTRYPOINT;
+
+function env(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
+  return {
+    ...buildAvailabilityProbeEnv(),
+    PATH: `${os.homedir()}/.local/bin:${os.homedir()}/.npm-global/bin:${process.env.PATH ?? ""}`,
+    NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+    NEMOCLAW_NON_INTERACTIVE: "1",
+    NEMOCLAW_OLLAMA_PROXY_PORT: PROXY_PORT,
+    NEMOCLAW_PROVIDER: "ollama",
+    NEMOCLAW_RECREATE_SANDBOX: "1",
+    NEMOCLAW_SANDBOX_NAME: SANDBOX_NAME,
+    OPENSHELL_GATEWAY: "nemoclaw",
+    ...extra,
+  };
+}
+
+function resultText(result: Pick<ShellProbeResult, "stdout" | "stderr">): string {
+  return [result.stdout, result.stderr].filter(Boolean).join("\n");
+}
+
+async function nemoclaw(
+  host: HostCliClient,
+  args: string[],
+  artifactName: string,
+  extraEnv: NodeJS.ProcessEnv = {},
+  timeoutMs = 20 * 60_000,
+): Promise<ShellProbeResult> {
+  return await host.command(process.execPath, [CLI_ENTRYPOINT, ...args], {
+    artifactName,
+    env: env(extraEnv),
+    timeoutMs,
+  });
+}
+
+async function cleanup(host: HostCliClient, sandbox: SandboxClient): Promise<void> {
+  await nemoclaw(host, [SANDBOX_NAME, "destroy", "--yes"], "cleanup-nemoclaw-destroy").catch(
+    () => undefined,
+  );
+  await sandbox
+    .openshell(["sandbox", "delete", SANDBOX_NAME], {
+      artifactName: "cleanup-openshell-sandbox-delete",
+      env: env(),
+      timeoutMs: 60_000,
+    })
+    .catch(() => undefined);
+  await sandbox
+    .openshell(["gateway", "destroy", "-g", "nemoclaw"], {
+      artifactName: "cleanup-openshell-gateway-destroy",
+      env: env(),
+      timeoutMs: 60_000,
+    })
+    .catch(() => undefined);
+  await host
+    .command(
+      "bash",
+      [
+        "-lc",
+        "pkill -f 'ollama serve' 2>/dev/null || true; pkill -f 'ollama-auth-proxy' 2>/dev/null || true",
+      ],
+      {
+        artifactName: "cleanup-ollama-processes",
+        env: env(),
+        timeoutMs: 30_000,
+      },
+    )
+    .catch(() => undefined);
+}
+
+async function httpStatus(host: HostCliClient, url: string, artifactName: string, token?: string) {
+  const header = token ? `-H 'Authorization: Bearer ${token}'` : "";
+  return await host.command(
+    "bash",
+    ["-lc", `curl -s -o /dev/null -w '%{http_code}' --connect-timeout 5 ${header} '${url}'`],
+    {
+      artifactName,
+      env: env(),
+      timeoutMs: 30_000,
+    },
+  );
+}
+
+function parseReplyCommand(): string {
+  return String.raw`python3 - <<'PY'
+import json, sys
+try:
+    doc=json.load(sys.stdin)
+    msg=doc['choices'][0]['message']
+    print((msg.get('content') or msg.get('reasoning_content') or msg.get('reasoning') or '').strip())
+except Exception as exc:
+    print(f'PARSE_ERROR: {exc}', file=sys.stderr)
+    sys.exit(1)
+PY`;
+}
+
+liveTest(
+  "gpu double onboard keeps Ollama auth proxy token consistent after re-onboard",
+  { timeout: LIVE_TIMEOUT_MS },
+  async ({ artifacts, cleanup: cleanupRegistry, host, sandbox, skip }) => {
+    await artifacts.writeJson("scenario.json", {
+      id: "gpu-double-onboard",
+      legacySource: "test/e2e/test-gpu-double-onboard.sh",
+      sandboxName: SANDBOX_NAME,
+      proxyPort: PROXY_PORT,
+      contracts: [
+        "GPU and Docker prerequisites are present",
+        "install.sh onboards with the Ollama provider",
+        "the persisted Ollama auth-proxy token works after first onboard",
+        "nemoclaw onboard --non-interactive --yes recreates the sandbox",
+        "the running proxy accepts the persisted token after re-onboard and rejects unauthenticated/wrong-token requests",
+        "sandbox inference.local reaches Ollama after re-onboard",
+      ],
+    });
+
+    const docker = await host.command("docker", ["info"], {
+      artifactName: "phase-0-docker-info",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    if (docker.exitCode !== 0) {
+      if (process.env.GITHUB_ACTIONS === "true") throw new Error(resultText(docker));
+      skip(`Docker is required: ${resultText(docker)}`);
+    }
+    const smi = await host.command("nvidia-smi", [], {
+      artifactName: "phase-0-nvidia-smi",
+      env: env(),
+      timeoutMs: 30_000,
+    });
+    if (smi.exitCode !== 0) {
+      if (process.env.GITHUB_ACTIONS === "true") throw new Error(resultText(smi));
+      skip(`NVIDIA GPU is required: ${resultText(smi)}`);
+    }
+
+    cleanupRegistry.add("remove gpu double-onboard state", () => cleanup(host, sandbox));
+    await cleanup(host, sandbox);
+
+    const installOllama = await host.command(
+      "bash",
+      ["-lc", "command -v ollama >/dev/null 2>&1 || curl -fsSL https://ollama.com/install.sh | sh"],
+      {
+        artifactName: "phase-1-install-ollama",
+        env: env(),
+        timeoutMs: 5 * 60_000,
+      },
+    );
+    expect(installOllama.exitCode, resultText(installOllama)).toBe(0);
+    await host.command(
+      "bash",
+      [
+        "-lc",
+        "systemctl --user stop ollama 2>/dev/null || true; systemctl stop ollama 2>/dev/null || true; pkill -f 'ollama serve' 2>/dev/null || true; pkill -f 'ollama-auth-proxy' 2>/dev/null || true",
+      ],
+      {
+        artifactName: "phase-1-stop-preexisting-ollama",
+        env: env(),
+        timeoutMs: 60_000,
+      },
+    );
+
+    const first = await host.command("bash", ["install.sh", "--non-interactive"], {
+      artifactName: "phase-2-install-sh-first-onboard",
+      cwd: REPO_ROOT,
+      env: env(),
+      timeoutMs: 30 * 60_000,
+    });
+    expect(first.exitCode, resultText(first)).toBe(0);
+
+    const list = await nemoclaw(host, ["list"], "phase-3-nemoclaw-list");
+    expect(list.exitCode, resultText(list)).toBe(0);
+    expect(list.stdout).toContain(SANDBOX_NAME);
+    expect(fs.existsSync(TOKEN_FILE), `${TOKEN_FILE} missing`).toBe(true);
+    const tokenAfterFirst = fs.readFileSync(TOKEN_FILE, "utf8").trim();
+    expect(tokenAfterFirst.length).toBeGreaterThan(10);
+
+    const firstTokenStatus = await httpStatus(
+      host,
+      `http://127.0.0.1:${PROXY_PORT}/v1/models`,
+      "phase-3-proxy-token-status",
+      tokenAfterFirst,
+    );
+    expect(firstTokenStatus.stdout.trim(), resultText(firstTokenStatus)).toBe("200");
+
+    const reonboard = await nemoclaw(
+      host,
+      ["onboard", "--non-interactive", "--yes"],
+      "phase-4-reonboard",
+      env({ NEMOCLAW_RECREATE_SANDBOX: "1" }),
+      30 * 60_000,
+    );
+    expect(reonboard.exitCode, resultText(reonboard)).toBe(0);
+    expect(fs.existsSync(TOKEN_FILE), `${TOKEN_FILE} missing after re-onboard`).toBe(true);
+    const tokenAfterSecond = fs.readFileSync(TOKEN_FILE, "utf8").trim();
+    expect(tokenAfterSecond.length).toBeGreaterThan(10);
+
+    const liveStatus = await httpStatus(
+      host,
+      `http://127.0.0.1:${PROXY_PORT}/api/tags`,
+      "phase-5-proxy-live-status",
+    );
+    expect(liveStatus.stdout.trim()).toMatch(/^[1-9][0-9]{2}$/);
+    const authStatus = await httpStatus(
+      host,
+      `http://127.0.0.1:${PROXY_PORT}/v1/models`,
+      "phase-5-proxy-persisted-token-status",
+      tokenAfterSecond,
+    );
+    expect(authStatus.stdout.trim(), resultText(authStatus)).toBe("200");
+    const wrongStatus = await httpStatus(
+      host,
+      `http://127.0.0.1:${PROXY_PORT}/v1/models`,
+      "phase-5-proxy-wrong-token-status",
+      `wrong-${Date.now()}`,
+    );
+    expect(wrongStatus.stdout.trim()).toBe("401");
+
+    const model = process.env.NEMOCLAW_MODEL ?? "llama3.2:1b";
+    const response = await sandbox.exec(
+      SANDBOX_NAME,
+      [
+        "sh",
+        "-lc",
+        `curl -fsS --max-time 120 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' --data '${JSON.stringify({ model, messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], max_tokens: 200 })}' | ${parseReplyCommand()}`,
+      ],
+      {
+        artifactName: "phase-6-sandbox-inference-after-reonboard",
+        env: env(),
+        timeoutMs: 150_000,
+      },
+    );
+    expect(response.exitCode, resultText(response)).toBe(0);
+    expect(response.stdout).toMatch(/PONG/i);
+
+    await cleanup(host, sandbox);
+    await artifacts.writeJson("scenario-result.json", {
+      id: "gpu-double-onboard",
+      status: "passed",
+    });
+  },
+);

From a9fbcd93ce001baa3aa4eecd066803bd4b73aefa Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Tue, 16 Jun 2026 09:13:01 -0700
Subject: [PATCH 3/6] test(e2e): avoid heredoc in GPU sandbox inference

---
 .../live/gpu-double-onboard.test.ts             | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/test/e2e-scenario/live/gpu-double-onboard.test.ts b/test/e2e-scenario/live/gpu-double-onboard.test.ts
index 07c7ea3c27..2926dbed32 100644
--- a/test/e2e-scenario/live/gpu-double-onboard.test.ts
+++ b/test/e2e-scenario/live/gpu-double-onboard.test.ts
@@ -106,16 +106,13 @@ async function httpStatus(host: HostCliClient, url: string, artifactName: string
 }
 
 function parseReplyCommand(): string {
-  return String.raw`python3 - <<'PY'
-import json, sys
-try:
-    doc=json.load(sys.stdin)
-    msg=doc['choices'][0]['message']
-    print((msg.get('content') or msg.get('reasoning_content') or msg.get('reasoning') or '').strip())
-except Exception as exc:
-    print(f'PARSE_ERROR: {exc}', file=sys.stderr)
-    sys.exit(1)
-PY`;
+  return "python3 -c ";
+  import json,
+  sys;
+  d = json.load(sys.stdin);
+  m = d["choices"][0]["message"];
+  print((m.get('content') or m.get('reasoning_content') or m.get('reasoning') or '').strip()
+  )""
 }
 
 liveTest(

From f5e48b44721174cb888cf20a61992fde3544dd54 Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Tue, 16 Jun 2026 09:13:32 -0700
Subject: [PATCH 4/6] test(e2e): fix GPU inference parser quoting

---
 test/e2e-scenario/live/gpu-double-onboard.test.ts | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/test/e2e-scenario/live/gpu-double-onboard.test.ts b/test/e2e-scenario/live/gpu-double-onboard.test.ts
index 2926dbed32..ab3e2bcd73 100644
--- a/test/e2e-scenario/live/gpu-double-onboard.test.ts
+++ b/test/e2e-scenario/live/gpu-double-onboard.test.ts
@@ -106,13 +106,7 @@ async function httpStatus(host: HostCliClient, url: string, artifactName: string
 }
 
 function parseReplyCommand(): string {
-  return "python3 -c ";
-  import json,
-  sys;
-  d = json.load(sys.stdin);
-  m = d["choices"][0]["message"];
-  print((m.get('content') or m.get('reasoning_content') or m.get('reasoning') or '').strip()
-  )""
+  return String.raw`python3 -c 'import json,sys; d=json.load(sys.stdin); m=d["choices"][0]["message"]; print((m.get("content") or m.get("reasoning_content") or m.get("reasoning") or "").strip())'`;
 }
 
 liveTest(

From cd198f3bdd144758bfe24df9fbe9c8bcdb8df75e Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Tue, 16 Jun 2026 09:35:19 -0700
Subject: [PATCH 5/6] test(e2e): avoid prompt echo in GPU sandbox inference

---
 test/e2e-scenario/live/gpu-double-onboard.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/e2e-scenario/live/gpu-double-onboard.test.ts b/test/e2e-scenario/live/gpu-double-onboard.test.ts
index ab3e2bcd73..45481e5a49 100644
--- a/test/e2e-scenario/live/gpu-double-onboard.test.ts
+++ b/test/e2e-scenario/live/gpu-double-onboard.test.ts
@@ -235,7 +235,7 @@ liveTest(
       [
         "sh",
         "-lc",
-        `curl -fsS --max-time 120 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' --data '${JSON.stringify({ model, messages: [{ role: "user", content: "Reply with exactly one word: PONG" }], max_tokens: 200 })}' | ${parseReplyCommand()}`,
+        `curl -fsS --max-time 120 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' --data '${JSON.stringify({ model, messages: [{ role: "user", content: "What is 6 multiplied by 7? Reply with only the integer, no extra words." }], max_tokens: 200 })}' | ${parseReplyCommand()}`,
       ],
       {
         artifactName: "phase-6-sandbox-inference-after-reonboard",
@@ -244,7 +244,7 @@ liveTest(
       },
     );
     expect(response.exitCode, resultText(response)).toBe(0);
-    expect(response.stdout).toMatch(/PONG/i);
+    expect(response.stdout).toMatch(/(^|[^0-9])42([^0-9]|$)/);
 
     await cleanup(host, sandbox);
     await artifacts.writeJson("scenario-result.json", {

From 5c053773c5a7f7cb82b528bea86a99c1de604a3b Mon Sep 17 00:00:00 2001
From: Carlos Villela <cvillela@nvidia.com>
Date: Tue, 16 Jun 2026 10:19:45 -0700
Subject: [PATCH 6/6] test(e2e): restore GPU double-onboard parity checks

---
 .../live/gpu-double-onboard.test.ts           | 76 ++++++++++++++-----
 1 file changed, 59 insertions(+), 17 deletions(-)

diff --git a/test/e2e-scenario/live/gpu-double-onboard.test.ts b/test/e2e-scenario/live/gpu-double-onboard.test.ts
index 45481e5a49..e6091ddfc8 100644
--- a/test/e2e-scenario/live/gpu-double-onboard.test.ts
+++ b/test/e2e-scenario/live/gpu-double-onboard.test.ts
@@ -109,6 +109,45 @@ function parseReplyCommand(): string {
   return String.raw`python3 -c 'import json,sys; d=json.load(sys.stdin); m=d["choices"][0]["message"]; print((m.get("content") or m.get("reasoning_content") or m.get("reasoning") or "").strip())'`;
 }
 
+function fileMode(pathname: string): string {
+  return (fs.statSync(pathname).mode & 0o777).toString(8).padStart(3, "0");
+}
+
+function chatRequest(model: string): string {
+  return JSON.stringify({
+    model,
+    messages: [
+      {
+        role: "user",
+        content: "What is 6 multiplied by 7? Reply with only the integer, no extra words.",
+      },
+    ],
+    max_tokens: 200,
+  });
+}
+
+async function expectSandboxInference42(
+  sandbox: SandboxClient,
+  model: string,
+  artifactName: string,
+): Promise<void> {
+  const response = await sandbox.exec(
+    SANDBOX_NAME,
+    [
+      "sh",
+      "-lc",
+      `curl -fsS --max-time 120 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' --data '${chatRequest(model)}' | ${parseReplyCommand()}`,
+    ],
+    {
+      artifactName,
+      env: env(),
+      timeoutMs: 150_000,
+    },
+  );
+  expect(response.exitCode, resultText(response)).toBe(0);
+  expect(response.stdout).toMatch(/(^|[^0-9])42([^0-9]|$)/);
+}
+
 liveTest(
   "gpu double onboard keeps Ollama auth proxy token consistent after re-onboard",
   { timeout: LIVE_TIMEOUT_MS },
@@ -187,6 +226,9 @@ liveTest(
     expect(fs.existsSync(TOKEN_FILE), `${TOKEN_FILE} missing`).toBe(true);
     const tokenAfterFirst = fs.readFileSync(TOKEN_FILE, "utf8").trim();
     expect(tokenAfterFirst.length).toBeGreaterThan(10);
+    expect(fileMode(TOKEN_FILE)).toBe("600");
+
+    const model = process.env.NEMOCLAW_MODEL ?? "llama3.2:1b";
 
     const firstTokenStatus = await httpStatus(
       host,
@@ -195,6 +237,7 @@ liveTest(
       tokenAfterFirst,
     );
     expect(firstTokenStatus.stdout.trim(), resultText(firstTokenStatus)).toBe("200");
+    await expectSandboxInference42(sandbox, model, "phase-3-sandbox-inference-first-onboard");
 
     const reonboard = await nemoclaw(
       host,
@@ -207,6 +250,8 @@ liveTest(
     expect(fs.existsSync(TOKEN_FILE), `${TOKEN_FILE} missing after re-onboard`).toBe(true);
     const tokenAfterSecond = fs.readFileSync(TOKEN_FILE, "utf8").trim();
     expect(tokenAfterSecond.length).toBeGreaterThan(10);
+    expect(fileMode(TOKEN_FILE)).toBe("600");
+    expect(tokenAfterSecond).toBe(tokenAfterFirst);
 
     const liveStatus = await httpStatus(
       host,
@@ -218,9 +263,18 @@ liveTest(
       host,
       `http://127.0.0.1:${PROXY_PORT}/v1/models`,
       "phase-5-proxy-persisted-token-status",
-      tokenAfterSecond,
+      tokenAfterFirst,
     );
     expect(authStatus.stdout.trim(), resultText(authStatus)).toBe("200");
+    const unauthPost = await host.command(
+      "bash",
+      [
+        "-lc",
+        `curl -s -o /dev/null -w '%{http_code}' --connect-timeout 5 -X POST 'http://127.0.0.1:${PROXY_PORT}/api/generate' -d '{}'`,
+      ],
+      { artifactName: "phase-5-proxy-unauth-post-status", env: env(), timeoutMs: 30_000 },
+    );
+    expect(unauthPost.stdout.trim()).toBe("401");
     const wrongStatus = await httpStatus(
       host,
       `http://127.0.0.1:${PROXY_PORT}/v1/models`,
@@ -229,24 +283,12 @@ liveTest(
     );
     expect(wrongStatus.stdout.trim()).toBe("401");
 
-    const model = process.env.NEMOCLAW_MODEL ?? "llama3.2:1b";
-    const response = await sandbox.exec(
-      SANDBOX_NAME,
-      [
-        "sh",
-        "-lc",
-        `curl -fsS --max-time 120 https://inference.local/v1/chat/completions -H 'Content-Type: application/json' --data '${JSON.stringify({ model, messages: [{ role: "user", content: "What is 6 multiplied by 7? Reply with only the integer, no extra words." }], max_tokens: 200 })}' | ${parseReplyCommand()}`,
-      ],
-      {
-        artifactName: "phase-6-sandbox-inference-after-reonboard",
-        env: env(),
-        timeoutMs: 150_000,
-      },
-    );
-    expect(response.exitCode, resultText(response)).toBe(0);
-    expect(response.stdout).toMatch(/(^|[^0-9])42([^0-9]|$)/);
+    await expectSandboxInference42(sandbox, model, "phase-6-sandbox-inference-after-reonboard");
 
     await cleanup(host, sandbox);
+    const registryFile = path.join(os.homedir(), ".nemoclaw", "sandboxes.json");
+    const registryText = fs.existsSync(registryFile) ? fs.readFileSync(registryFile, "utf8") : "";
+    expect(registryText).not.toContain(SANDBOX_NAME);
     await artifacts.writeJson("scenario-result.json", {
       id: "gpu-double-onboard",
       status: "passed",