From 9ddb891fc72976dd2029dd2e0056175dcb1f3b21 Mon Sep 17 00:00:00 2001
From: Chengjie Wang <chengjiew@nvidia.com>
Date: Sat, 23 May 2026 20:52:07 +0800
Subject: [PATCH 1/6] fix(sandbox): auto-unlock shields during rebuild

Signed-off-by: Chengjie Wang <chengjiew@nvidia.com>
---
 src/lib/actions/sandbox/rebuild.ts       |  73 +++++
 src/lib/shields/index.ts                 | 236 +++++++++-------
 test/rebuild-shields-auto-unlock.test.ts | 327 +++++++++++++++++++++++
 3 files changed, 540 insertions(+), 96 deletions(-)
 create mode 100644 test/rebuild-shields-auto-unlock.test.ts

diff --git a/src/lib/actions/sandbox/rebuild.ts b/src/lib/actions/sandbox/rebuild.ts
index d2ef0ffc76..33e6ad8f4d 100644
--- a/src/lib/actions/sandbox/rebuild.ts
+++ b/src/lib/actions/sandbox/rebuild.ts
@@ -58,6 +58,7 @@ import {
   createSystemDeps as createSessionDeps,
   getActiveSandboxSessions,
 } from "../../state/sandbox-session";
+import * as shields from "../../shields";
 import { removeSandboxRegistryEntry } from "./destroy";
 import { executeSandboxCommand } from "./process-recovery";
 
@@ -438,6 +439,65 @@ export async function rebuildSandbox(
     }
   }
 
+  // Step 1b: Auto-unlock shields if locked. Without this, the sandbox-state
+  // backup tar fails because high-risk state dirs are owned by root:root.
+  // We re-apply the lockdown after a successful rebuild (#3113).
+  const shieldsWereLocked = !shields.isShieldsDown(sandboxName);
+  if (shieldsWereLocked) {
+    console.log("");
+    console.log(`  ${YW}Shields are UP${R} — temporarily unlocking for rebuild backup...`);
+    try {
+      shields.shieldsDown(sandboxName, {
+        reason: "auto-unlock for rebuild",
+        skipTimer: true,
+        throwOnError: true,
+      });
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      console.error("");
+      console.error(`  ${_RD}Failed to auto-unlock shields:${R} ${message}`);
+      console.error("  Sandbox is untouched — no data was lost.");
+      console.error(
+        `  Run \`${CLI_NAME} ${sandboxName} shields down\` manually, then retry rebuild.`,
+      );
+      bail(`Failed to auto-unlock shields: ${message}`);
+      return;
+    }
+  }
+
+  // Re-lock shields if we auto-unlocked. Idempotent across multiple call sites
+  // (success path + bail-before-destroy path). After destroy, the sandbox is
+  // gone — we surface manual recovery instructions instead.
+  let shieldsRelocked = false;
+  const relockShieldsIfNeeded = (sandboxStillExists: boolean): void => {
+    if (!shieldsWereLocked || shieldsRelocked) return;
+    if (!sandboxStillExists) {
+      console.warn("");
+      console.warn(
+        `  ${YW}⚠${R} Cannot re-apply shields lockdown — sandbox no longer exists.`,
+      );
+      console.warn(
+        `  After recovery, run \`${CLI_NAME} ${sandboxName} shields up\` to restore lockdown.`,
+      );
+      return;
+    }
+    console.log("");
+    console.log("  Re-applying shields lockdown...");
+    try {
+      shields.shieldsUp(sandboxName, { throwOnError: true });
+      console.log(`  ${G}✓${R} Shields restored to UP`);
+      shieldsRelocked = true;
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      console.error(
+        `  ${YW}⚠${R} Failed to re-apply shields lockdown: ${message}`,
+      );
+      console.error(
+        `  Run \`${CLI_NAME} ${sandboxName} shields up\` manually to restore lockdown.`,
+      );
+    }
+  };
+
   // Step 2: Backup
   console.log("  Backing up sandbox state...");
   log(`Agent type: ${sb.agent || "openclaw"}, stateDirs from manifest`);
@@ -456,6 +516,7 @@ export async function rebuildSandbox(
       console.error(`  Failed files: ${backup.failedFiles.join(", ")}`);
     }
     console.error("  Aborting rebuild to prevent data loss.");
+    relockShieldsIfNeeded(true);
     bail("Failed to back up sandbox state.");
     return;
   }
@@ -463,6 +524,7 @@ export async function rebuildSandbox(
   if (!backupManifest) {
     console.error("  Failed to record backup metadata.");
     console.error("  Aborting rebuild to prevent data loss.");
+    relockShieldsIfNeeded(true);
     bail("Failed to record backup metadata.");
     return;
   }
@@ -515,6 +577,7 @@ export async function rebuildSandbox(
   if (deleteResult.status !== 0 && !alreadyGone) {
     console.error("  Failed to delete sandbox. Aborting rebuild.");
     console.error("  State backup is preserved at: " + backupManifest.backupPath);
+    relockShieldsIfNeeded(true);
     bail("Failed to delete sandbox.", deleteResult.status || 1);
     return;
   }
@@ -715,7 +778,12 @@ export async function rebuildSandbox(
     console.error(
       `       ${CLI_NAME} ${sandboxName} snapshot restore "${backupManifest.timestamp}"`,
     );
+    if (shieldsWereLocked) {
+      console.error(`    4. Restore shields lockdown:`);
+      console.error(`       ${CLI_NAME} ${sandboxName} shields up`);
+    }
     console.error("");
+    relockShieldsIfNeeded(false);
     bail(
       `Recreate failed (sandbox destroyed). Backup: ${backupManifest.backupPath}`,
       onboardExitCode,
@@ -855,6 +923,11 @@ export async function rebuildSandbox(
   });
   log(`Registry updated: agentVersion=${agentDef.expectedVersion}`);
 
+  // Step 8: Re-apply shields lockdown if it was active before rebuild (#3113).
+  // Sandbox now exists and policy presets have been re-applied, so this is
+  // the correct point to restore the restrictive policy + lock config files.
+  relockShieldsIfNeeded(true);
+
   console.log("");
   if (restore.success) {
     console.log(`  ${G}\u2713${R} Sandbox '${sandboxName}' rebuilt successfully`);
diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts
index 051b27a430..bae208d1fe 100644
--- a/src/lib/shields/index.ts
+++ b/src/lib/shields/index.ts
@@ -910,10 +910,22 @@ interface ShieldsDownOpts {
   timeout?: string | null;
   reason?: string | null;
   policy?: string;
+  // Internal: skip the auto-restore timer fork. Used by `rebuild` which
+  // re-locks immediately after backup/recreate, so a detached timer racing
+  // with a destroyed-then-recreated sandbox is undesirable (#3113).
+  skipTimer?: boolean;
+  // Internal: throw on error instead of process.exit(1). Lets system-level
+  // operations (e.g., rebuild) recover instead of dying mid-flow.
+  throwOnError?: boolean;
 }
 
 function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   validateName(sandboxName, "sandbox name");
+  const fail = opts.throwOnError
+    ? (msg: string) => {
+        throw new Error(msg);
+      }
+    : (_msg: string) => process.exit(1);
 
   const state = loadShieldsState(sandboxName);
   if (state.shieldsDown) {
@@ -923,7 +935,8 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       "  Run `nemoclaw shields up` first, or use --extend (not yet implemented).",
     );
-    process.exit(1);
+    fail(`Config is already unlocked for ${sandboxName}`);
+    return;
   }
 
   // Kill stale auto-restore markers only when this command will actually
@@ -951,7 +964,8 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   const policyYaml = parseCurrentPolicy(rawPolicy);
   if (!policyYaml) {
     console.error("  Cannot capture current policy. Is the sandbox running?");
-    process.exit(1);
+    fail("Cannot capture current policy");
+    return;
   }
 
   const ts = Date.now();
@@ -983,7 +997,8 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Unknown policy "${policyName}". Use "permissive" or a path to a YAML file.`,
     );
-    process.exit(1);
+    fail(`Unknown policy "${policyName}"`);
+    return;
   }
 
   console.log(`  Applying ${policyName} policy...`);
@@ -1013,7 +1028,8 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Re-run \`nemoclaw ${sandboxName} shields down\` after correcting file ownership.`,
     );
-    process.exit(1);
+    fail(message);
+    return;
   }
 
   // 3. Update state
@@ -1027,92 +1043,100 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     shieldsPolicySnapshotPath: snapshotPath,
   });
 
-  // 4. Start auto-restore timer (detached child process)
+  // 4. Start auto-restore timer (detached child process), unless skipped.
   //    Pass the absolute restore time, not a relative timeout. Steps 1-2b
   //    can take minutes (policy apply + kubectl chmod), so a relative timeout
   //    passed at fork time would fire too early.
-  const restoreAt = new Date(Date.now() + timeoutSeconds * 1000);
-  const processToken = randomBytes(16).toString("hex");
-  const timerScript = path.join(__dirname, "timer.ts");
-  const timerScriptJs = timerScript.replace(/\.ts$/, ".js");
-  const actualScript = fs.existsSync(timerScriptJs)
-    ? timerScriptJs
-    : timerScript;
+  //
+  //    skipTimer is used by `rebuild` (#3113), which re-locks via shieldsUp
+  //    immediately after backup/recreate. Forking a detached timer in that
+  //    flow risks racing the auto-restore against a destroyed-then-recreated
+  //    sandbox.
+  if (!opts.skipTimer) {
+    const restoreAt = new Date(Date.now() + timeoutSeconds * 1000);
+    const processToken = randomBytes(16).toString("hex");
+    const timerScript = path.join(__dirname, "timer.ts");
+    const timerScriptJs = timerScript.replace(/\.ts$/, ".js");
+    const actualScript = fs.existsSync(timerScriptJs)
+      ? timerScriptJs
+      : timerScript;
 
-  try {
-    const child = fork(
-      actualScript,
-      [
-        sandboxName,
-        snapshotPath,
-        restoreAt.toISOString(),
-        target.configPath,
-        target.configDir,
-        processToken,
-      ],
-      {
-        detached: true,
-        stdio: ["ignore", "ignore", "ignore", "ipc"],
-      },
-    );
-    child.disconnect();
-    child.unref();
-
-    // Write timer marker
-    const markerPath = timerMarkerPath(sandboxName);
-    fs.writeFileSync(
-      markerPath,
-      JSON.stringify({
-        pid: child.pid,
-        sandboxName,
-        snapshotPath,
-        restoreAt: restoreAt.toISOString(),
-        processToken,
-      }),
-      { mode: 0o600 },
-    );
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    console.error(`  Cannot start auto-restore timer: ${message}`);
-    console.error("  Rolling back — restoring policy from snapshot...");
-    const rollbackResult = run(
-      buildPolicySetCommand(snapshotPath, sandboxName),
-      {
-        ignoreError: true,
-      },
-    );
-    let rollbackLocked = false;
-    if (rollbackResult.status === 0) {
-      try {
-        lockAgentConfig(sandboxName, target);
-        rollbackLocked = true;
-      } catch {
+    try {
+      const child = fork(
+        actualScript,
+        [
+          sandboxName,
+          snapshotPath,
+          restoreAt.toISOString(),
+          target.configPath,
+          target.configDir,
+          processToken,
+        ],
+        {
+          detached: true,
+          stdio: ["ignore", "ignore", "ignore", "ipc"],
+        },
+      );
+      child.disconnect();
+      child.unref();
+
+      // Write timer marker
+      const markerPath = timerMarkerPath(sandboxName);
+      fs.writeFileSync(
+        markerPath,
+        JSON.stringify({
+          pid: child.pid,
+          sandboxName,
+          snapshotPath,
+          restoreAt: restoreAt.toISOString(),
+          processToken,
+        }),
+        { mode: 0o600 },
+      );
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      console.error(`  Cannot start auto-restore timer: ${message}`);
+      console.error("  Rolling back — restoring policy from snapshot...");
+      const rollbackResult = run(
+        buildPolicySetCommand(snapshotPath, sandboxName),
+        {
+          ignoreError: true,
+        },
+      );
+      let rollbackLocked = false;
+      if (rollbackResult.status === 0) {
+        try {
+          lockAgentConfig(sandboxName, target);
+          rollbackLocked = true;
+        } catch {
+          console.error(
+            "  Warning: Rollback re-lock could not be verified. Check config manually.",
+          );
+        }
+      } else {
+        console.error("  Warning: Policy restore failed during rollback.");
+      }
+      if (rollbackLocked) {
+        saveShieldsState(sandboxName, {
+          shieldsDown: false,
+          shieldsDownAt: null,
+          shieldsDownTimeout: null,
+          shieldsDownReason: null,
+          shieldsDownPolicy: null,
+        });
+        console.error("  Lockdown restored. Config was never left unguarded.");
+      } else {
+        // Leave state as shieldsDown: true — don't lie about protection level
         console.error(
-          "  Warning: Rollback re-lock could not be verified. Check config manually.",
+          "  Config remains unlocked — manual intervention required.",
+        );
+        console.error(
+          `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
         );
       }
-    } else {
-      console.error("  Warning: Policy restore failed during rollback.");
-    }
-    if (rollbackLocked) {
-      saveShieldsState(sandboxName, {
-        shieldsDown: false,
-        shieldsDownAt: null,
-        shieldsDownTimeout: null,
-        shieldsDownReason: null,
-        shieldsDownPolicy: null,
-      });
-      console.error("  Lockdown restored. Config was never left unguarded.");
-    } else {
-      // Leave state as shieldsDown: true — don't lie about protection level
-      console.error(
-        "  Config remains unlocked — manual intervention required.",
-      );
-      console.error(
-        `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
-      );
+      fail(`Cannot start auto-restore timer: ${message}`);
+      return;
     }
-    process.exit(1);
   }
 
   // 5. Audit log
@@ -1127,16 +1151,22 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   });
 
   // 6. Output
-  const mins = Math.floor(timeoutSeconds / 60);
-  const secs = timeoutSeconds % 60;
-  console.log(
-    `  Config unlocked for ${sandboxName} (auto-lockdown in: ${mins}m${secs ? ` ${secs}s` : ""})`,
-  );
-  console.log("");
-  console.log("  Sandbox is in default (mutable) state.");
-  console.log(
-    `  Run \`nemoclaw ${sandboxName} shields up\` to opt into lockdown.`,
-  );
+  if (opts.skipTimer) {
+    console.log(
+      `  Config unlocked for ${sandboxName} (no auto-lockdown timer; caller will re-lock).`,
+    );
+  } else {
+    const mins = Math.floor(timeoutSeconds / 60);
+    const secs = timeoutSeconds % 60;
+    console.log(
+      `  Config unlocked for ${sandboxName} (auto-lockdown in: ${mins}m${secs ? ` ${secs}s` : ""})`,
+    );
+    console.log("");
+    console.log("  Sandbox is in default (mutable) state.");
+    console.log(
+      `  Run \`nemoclaw ${sandboxName} shields up\` to opt into lockdown.`,
+    );
+  }
 }
 
 // ---------------------------------------------------------------------------
@@ -1146,8 +1176,19 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
 // hardening step that restricts the sandbox beyond its default state.
 // ---------------------------------------------------------------------------
 
-function shieldsUp(sandboxName: string): void {
+interface ShieldsUpOpts {
+  // Internal: throw on error instead of process.exit(1). Lets system-level
+  // operations (e.g., rebuild) recover instead of dying mid-flow (#3113).
+  throwOnError?: boolean;
+}
+
+function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
   validateName(sandboxName, "sandbox name");
+  const fail = opts.throwOnError
+    ? (msg: string) => {
+        throw new Error(msg);
+      }
+    : (_msg: string) => process.exit(1);
 
   const state = loadShieldsState(sandboxName);
   // shieldsDown === false means explicitly locked by a previous shields-up.
@@ -1174,7 +1215,8 @@ function shieldsUp(sandboxName: string): void {
     console.error(
       "  Sandbox remains unlocked; recapture shields-down state before running shields up.",
     );
-    process.exit(1);
+    fail("Saved policy snapshot is missing");
+    return;
   }
   if (snapshotPath) {
     console.log("  Restoring restrictive policy from snapshot...");
@@ -1187,7 +1229,8 @@ function shieldsUp(sandboxName: string): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      process.exit(1);
+      fail(activation.error ?? "unknown restore error");
+      return;
     }
   } else {
     // 2b. Lock config file to read-only.
@@ -1209,7 +1252,8 @@ function shieldsUp(sandboxName: string): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      process.exit(1);
+      fail(message);
+      return;
     }
   }
 
diff --git a/test/rebuild-shields-auto-unlock.test.ts b/test/rebuild-shields-auto-unlock.test.ts
new file mode 100644
index 0000000000..36a41d80d5
--- /dev/null
+++ b/test/rebuild-shields-auto-unlock.test.ts
@@ -0,0 +1,327 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Tests for issue #3113: rebuild should auto-unlock when shields are UP.
+ *
+ * When the user has opted into shields-up (lockdown), rebuild used to abort
+ * at the backup step because state dirs are root-owned. Rebuild must
+ * temporarily unlock for backup, complete the rebuild, then re-lock.
+ */
+
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, describe, expect, it } from "vitest";
+
+const REPO_ROOT = path.join(import.meta.dirname, "..");
+const NODE_BIN = path.dirname(process.execPath);
+const tmpFixtures: string[] = [];
+
+afterEach(() => {
+  for (const dir of tmpFixtures.splice(0)) {
+    try {
+      fs.rmSync(dir, { recursive: true, force: true });
+    } catch {
+      /* */
+    }
+  }
+});
+
+function createFixture(opts: { shieldsLocked: boolean }) {
+  const sandboxName = "my-assistant";
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-3113-"));
+  tmpFixtures.push(tmpDir);
+  const nemoclawDir = path.join(tmpDir, ".nemoclaw");
+  fs.mkdirSync(nemoclawDir, { recursive: true, mode: 0o700 });
+  const stateDir = path.join(nemoclawDir, "state");
+  fs.mkdirSync(stateDir, { recursive: true, mode: 0o700 });
+  const usageNotice = JSON.parse(
+    fs.readFileSync(path.join(REPO_ROOT, "bin", "lib", "usage-notice.json"), "utf-8"),
+  );
+  fs.writeFileSync(
+    path.join(nemoclawDir, "usage-notice.json"),
+    JSON.stringify(
+      {
+        acceptedVersion: usageNotice.version,
+        acceptedAt: "2026-01-01T00:00:00.000Z",
+      },
+      null,
+      2,
+    ),
+    { mode: 0o600 },
+  );
+
+  // Pre-write a saved policy snapshot so the relock path can find it.
+  const snapshotPath = path.join(stateDir, "policy-snapshot-prior.yaml");
+  fs.writeFileSync(snapshotPath, "version: 1\nnetwork_policies:\n  test: {}\n", { mode: 0o600 });
+
+  if (opts.shieldsLocked) {
+    fs.writeFileSync(
+      path.join(stateDir, `shields-${sandboxName}.json`),
+      JSON.stringify(
+        {
+          shieldsDown: false,
+          shieldsDownAt: null,
+          shieldsDownTimeout: null,
+          shieldsDownReason: null,
+          shieldsDownPolicy: null,
+          shieldsPolicySnapshotPath: snapshotPath,
+          updatedAt: new Date().toISOString(),
+        },
+        null,
+        2,
+      ),
+      { mode: 0o600 },
+    );
+  }
+
+  fs.writeFileSync(
+    path.join(nemoclawDir, "sandboxes.json"),
+    JSON.stringify({
+      defaultSandbox: sandboxName,
+      sandboxes: {
+        [sandboxName]: {
+          name: sandboxName,
+          model: "meta/llama-3.3-70b-instruct",
+          provider: "nvidia-prod",
+          gpuEnabled: false,
+          policies: [],
+          agent: null,
+          messagingChannels: null,
+        },
+      },
+    }),
+    { mode: 0o600 },
+  );
+
+  fs.writeFileSync(
+    path.join(nemoclawDir, "credentials.json"),
+    JSON.stringify({ NVIDIA_API_KEY: "nvapi-test" }),
+    { mode: 0o600 },
+  );
+
+  fs.writeFileSync(
+    path.join(nemoclawDir, "onboard-session.json"),
+    JSON.stringify({
+      version: 1,
+      sessionId: "s",
+      resumable: true,
+      status: "complete",
+      mode: "interactive",
+      startedAt: "2026-01-01",
+      updatedAt: "2026-01-01",
+      lastStepStarted: null,
+      lastCompletedStep: "policies",
+      failure: null,
+      agent: null,
+      sandboxName,
+      provider: "nvidia-prod",
+      model: "meta/llama-3.3-70b-instruct",
+      endpointUrl: null,
+      credentialEnv: "NVIDIA_API_KEY",
+      preferredInferenceApi: null,
+      nimContainer: null,
+      webSearchConfig: null,
+      policyPresets: [],
+      messagingChannels: null,
+      metadata: { gatewayName: "nemoclaw", fromDockerfile: null },
+      steps: {
+        preflight: { status: "complete", startedAt: null, completedAt: null, error: null },
+        gateway: { status: "complete", startedAt: null, completedAt: null, error: null },
+        sandbox: { status: "complete", startedAt: null, completedAt: null, error: null },
+        provider_selection: { status: "complete", startedAt: null, completedAt: null, error: null },
+        inference: { status: "complete", startedAt: null, completedAt: null, error: null },
+        openclaw: { status: "complete", startedAt: null, completedAt: null, error: null },
+        agent_setup: { status: "pending", startedAt: null, completedAt: null, error: null },
+        policies: { status: "complete", startedAt: null, completedAt: null, error: null },
+      },
+    }),
+    { mode: 0o600 },
+  );
+
+  // Workspace dir for the backup tar
+  const fakeRoot = path.join(tmpDir, "fake-sandbox-root");
+  fs.mkdirSync(path.join(fakeRoot, "workspace"), { recursive: true });
+  fs.writeFileSync(path.join(fakeRoot, "workspace", "marker.txt"), "test");
+  const lockStatePath = path.join(tmpDir, "config-lock-state.txt");
+  fs.writeFileSync(lockStatePath, opts.shieldsLocked ? "locked" : "unlocked");
+
+  // Fake openshell — also returns a parseable YAML policy for the
+  // shields-down policy snapshot capture path.
+  const sshConfig = [
+    `Host openshell-${sandboxName}`,
+    "  HostName 127.0.0.1",
+    "  Port 2222",
+    "  User sandbox",
+    "  StrictHostKeyChecking no",
+    "  UserKnownHostsFile /dev/null",
+  ].join("\\n");
+  fs.writeFileSync(
+    path.join(tmpDir, "openshell"),
+    `#!/usr/bin/env node
+const a = process.argv.slice(2);
+if (a[0]==="sandbox" && a[1]==="list")       { process.stdout.write("${sandboxName}\\n"); process.exit(0); }
+if (a[0]==="sandbox" && a[1]==="ssh-config") { process.stdout.write("${sshConfig}\\n"); process.exit(0); }
+if (a[0]==="sandbox" && a[1]==="delete")     { process.exit(0); }
+if (a[0]==="policy" && a[1]==="get")         { process.stdout.write("version: 1\\nnetwork_policies:\\n  test: {}\\n"); process.exit(0); }
+if (a[0]==="policy" && a[1]==="set")         { process.exit(0); }
+if (a[0]==="status")                         { process.stdout.write("running\\n"); process.exit(0); }
+if (a[0]==="gateway" && a[1]==="info")       { process.stdout.write("nemoclaw\\n"); process.exit(0); }
+if (a[0]==="gateway" && a[1]==="select")     { process.exit(0); }
+if (a[0]==="inference" && a[1]==="get")      { process.stdout.write('{"provider":"nvidia-prod","model":"meta/llama-3.3-70b-instruct"}\\n'); process.exit(0); }
+if (a[0]==="inference" && a[1]==="set")      { process.exit(0); }
+if (a[0]==="provider")                       { process.exit(0); }
+if (a[0]==="forward")                        { process.exit(0); }
+process.exit(0);
+`,
+    { mode: 0o755 },
+  );
+
+  // Fake docker — covers both the basic cases and kubectl exec proxying.
+  // For shields lock/unlock, we return zero exit with the data shields.ts
+  // verification expects (stat for unlock returns sandbox:sandbox 660/2770).
+  fs.writeFileSync(
+    path.join(tmpDir, "docker"),
+    `#!/usr/bin/env node
+const fs = require("fs");
+const a = process.argv.slice(2);
+const lockStatePath = ${JSON.stringify(lockStatePath)};
+function readLockState() {
+  try { return fs.readFileSync(lockStatePath, "utf8").trim(); } catch { return "unlocked"; }
+}
+function writeLockState(state) {
+  fs.writeFileSync(lockStatePath, state);
+}
+if (a[0]==="build")  { process.exit(0); }
+if (a[0]==="image" && a[1]==="inspect") { process.exit(0); }
+if (a[0]==="inspect") { process.stdout.write("true\\n"); process.exit(0); }
+if (a[0]==="ps")     { process.exit(0); }
+// kubectl exec proxy via "docker exec <k3s> kubectl exec -n openshell <sb> -c agent -- <cmd...>"
+if (a[0]==="exec") {
+  // Find the kubectl '--' delimiter; everything after is the actual command.
+  const dashDash = a.indexOf("--");
+  const cmd = dashDash >= 0 ? a.slice(dashDash + 1) : [];
+  // Verification reads:
+  //   stat -c '%a %U:%G' <path>      → expect "660 sandbox:sandbox" or "2770 sandbox:sandbox"
+  //   lsattr -d <path>               → "----i------" (locked) or no immutable bit (unlocked)
+  // shields-up lock verification expects:
+  //   stat → "444 root:root" / "755 root:root"
+  //   lsattr → "----i------"
+  // We are testing the auto-unlock path: shields-down is called on a locked sandbox,
+  // verification should look like 660 sandbox:sandbox / 2770 sandbox:sandbox.
+  if (cmd[0]==="chattr" && cmd[1]==="-i") { writeLockState("unlocked"); process.exit(0); }
+  if (cmd[0]==="chattr" && cmd[1]==="+i") { writeLockState("locked"); process.exit(0); }
+  if (cmd[0]==="chown" && cmd[1]==="sandbox:sandbox") { writeLockState("unlocked"); process.exit(0); }
+  if (cmd[0]==="chown" && cmd[1]==="root:root") { writeLockState("locked"); process.exit(0); }
+  if (cmd[0]==="chmod" && (cmd[1]==="660" || cmd[1]==="2770")) { writeLockState("unlocked"); process.exit(0); }
+  if (cmd[0]==="chmod" && cmd[1]==="444") { writeLockState("locked"); process.exit(0); }
+  if (cmd[0]==="stat") {
+    const target = cmd[cmd.length-1];
+    const locked = readLockState() === "locked";
+    // Heuristic: directories tend to end with .openclaw or have no extension
+    if (target.endsWith(".openclaw") || target.endsWith(".hermes") || /\\/(workspace|skills|hooks|cron|agents|extensions|plugins|memory|credentials|identity|devices|canvas|telegram)$/.test(target)) {
+      process.stdout.write(locked ? "755 root:root\\n" : "2770 sandbox:sandbox\\n");
+    } else {
+      process.stdout.write(locked ? "444 root:root\\n" : "660 sandbox:sandbox\\n");
+    }
+    process.exit(0);
+  }
+  if (cmd[0]==="lsattr") {
+    const flags = readLockState() === "locked" ? "----i----------" : "---------------";
+    process.stdout.write(flags + " " + cmd[cmd.length-1] + "\\n");
+    process.exit(0);
+  }
+  if (cmd[0]==="chattr" || cmd[0]==="chown" || cmd[0]==="chmod" || cmd[0]==="sh" || cmd[0]==="find") { process.exit(0); }
+  process.exit(0);
+}
+process.exit(0);
+`,
+    { mode: 0o755 },
+  );
+
+  // Fake ssh — backup tars from the real fakeRoot
+  fs.writeFileSync(
+    path.join(tmpDir, "ssh"),
+    `#!/usr/bin/env node
+const cmd = process.argv[process.argv.length - 1] || "";
+if (cmd.includes("[ -d")) {
+  process.stdout.write("workspace\\n");
+  process.exit(0);
+}
+if (cmd.includes("tar")) {
+  const { spawnSync } = require("child_process");
+  const r = spawnSync("tar", ["-cf", "-", "-C", ${JSON.stringify(fakeRoot)}, "workspace"], {
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+  if (r.stdout) process.stdout.write(r.stdout);
+  process.exit(r.status || 0);
+}
+if (cmd.includes("rm -rf")) { process.exit(0); }
+if (cmd.includes("chown"))  { process.exit(0); }
+process.exit(0);
+`,
+    { mode: 0o755 },
+  );
+
+  return { tmpDir, nemoclawDir, sandboxName, snapshotPath };
+}
+
+function runRebuild(fixture: ReturnType<typeof createFixture>) {
+  return spawnSync(
+    process.execPath,
+    [path.join(REPO_ROOT, "bin", "nemoclaw.js"), fixture.sandboxName, "rebuild", "--yes"],
+    {
+      cwd: REPO_ROOT,
+      encoding: "utf-8",
+      env: {
+        HOME: fixture.tmpDir,
+        PATH: fixture.tmpDir + ":" + NODE_BIN + ":/usr/bin:/bin",
+        NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1",
+        NEMOCLAW_NON_INTERACTIVE: "1",
+        NEMOCLAW_NO_CONNECT_HINT: "1",
+        NO_COLOR: "1",
+      },
+      timeout: 30_000,
+    },
+  );
+}
+
+describe("Issue #3113: rebuild auto-unlocks when shields are UP", () => {
+  it(
+    "detects locked shields and prints auto-unlock notice",
+    { timeout: 60_000 },
+    () => {
+      const f = createFixture({ shieldsLocked: true });
+      const r = runRebuild(f);
+      const output = (r.stdout || "") + (r.stderr || "");
+
+      // Without the fix this would be:
+      //   "Failed to back up sandbox state. Aborting rebuild to prevent data loss."
+      expect(output).not.toContain("Aborting rebuild to prevent data loss");
+      // With the fix, rebuild detects shields-up and unlocks before backup.
+      expect(output).toContain("Shields are UP");
+      expect(output).toContain("temporarily unlocking for rebuild backup");
+      // Shields-down was invoked programmatically (no permissive policy printout
+      // is required to assert; we just verify the snapshot capture step ran).
+      expect(output).toContain("Capturing current policy snapshot");
+      // Backup proceeds.
+      expect(output).toContain("Backing up sandbox state");
+    },
+  );
+
+  it(
+    "skips auto-unlock when shields are not configured",
+    { timeout: 60_000 },
+    () => {
+      const f = createFixture({ shieldsLocked: false });
+      const r = runRebuild(f);
+      const output = (r.stdout || "") + (r.stderr || "");
+
+      expect(output).not.toContain("Shields are UP");
+      expect(output).not.toContain("temporarily unlocking for rebuild backup");
+      expect(output).toContain("Backing up sandbox state");
+    },
+  );
+});

From b97772f7580d1d8905cf01698693ecda927ba4d2 Mon Sep 17 00:00:00 2001
From: Chengjie Wang <chengjiew@nvidia.com>
Date: Sat, 23 May 2026 21:33:13 +0800
Subject: [PATCH 2/6] fix(sandbox): fail closed when rebuild cannot relock
 shields

Signed-off-by: Chengjie Wang <chengjiew@nvidia.com>
---
 src/lib/actions/sandbox/rebuild-shields.ts | 76 ++++++++++++++++++++
 src/lib/actions/sandbox/rebuild.ts         | 84 +++++++---------------
 src/lib/shields/index.ts                   | 35 +++------
 test/rebuild-shields-window.test.ts        | 80 +++++++++++++++++++++
 4 files changed, 191 insertions(+), 84 deletions(-)
 create mode 100644 src/lib/actions/sandbox/rebuild-shields.ts
 create mode 100644 test/rebuild-shields-window.test.ts

diff --git a/src/lib/actions/sandbox/rebuild-shields.ts b/src/lib/actions/sandbox/rebuild-shields.ts
new file mode 100644
index 0000000000..3139e825dd
--- /dev/null
+++ b/src/lib/actions/sandbox/rebuild-shields.ts
@@ -0,0 +1,76 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { G, R, RD as _RD, YW } from "../../cli/terminal-style";
+import * as shields from "../../shields";
+
+export interface RebuildShieldsWindow {
+  relocked: boolean;
+  wasLocked: boolean;
+}
+
+export function openRebuildShieldsWindow(
+  sandboxName: string,
+  cliName: string,
+): RebuildShieldsWindow {
+  const window = {
+    relocked: false,
+    wasLocked: !shields.isShieldsDown(sandboxName),
+  };
+  if (!window.wasLocked) return window;
+
+  console.log("");
+  console.log(`  ${YW}Shields are UP${R} — temporarily unlocking for rebuild backup...`);
+  try {
+    shields.shieldsDown(sandboxName, {
+      reason: "auto-unlock for rebuild",
+      skipTimer: true,
+      throwOnError: true,
+    });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error("");
+    console.error(`  ${_RD}Failed to auto-unlock shields:${R} ${message}`);
+    console.error("  Sandbox is untouched — no data was lost.");
+    console.error(
+      `  Run \`${cliName} ${sandboxName} shields down\` manually, then retry rebuild.`,
+    );
+    throw new Error(`Failed to auto-unlock shields: ${message}`);
+  }
+  return window;
+}
+
+export function relockRebuildShieldsWindow(
+  sandboxName: string,
+  window: RebuildShieldsWindow,
+  sandboxStillExists: boolean,
+  cliName: string,
+): boolean {
+  if (!window.wasLocked || window.relocked) return true;
+  if (!sandboxStillExists) {
+    console.warn("");
+    console.warn(
+      `  ${YW}⚠${R} Cannot re-apply shields lockdown — sandbox no longer exists.`,
+    );
+    console.warn(
+      `  After recovery, run \`${cliName} ${sandboxName} shields up\` to restore lockdown.`,
+    );
+    return false;
+  }
+
+  console.log("");
+  console.log("  Re-applying shields lockdown...");
+  try {
+    shields.shieldsUp(sandboxName, { throwOnError: true });
+    console.log(`  ${G}✓${R} Shields restored to UP`);
+    window.relocked = true;
+    return true;
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error(`  ${YW}⚠${R} Failed to re-apply shields lockdown: ${message}`);
+    console.error(
+      `  Run \`${cliName} ${sandboxName} shields up\` manually to restore lockdown.`,
+    );
+    return false;
+  }
+}
diff --git a/src/lib/actions/sandbox/rebuild.ts b/src/lib/actions/sandbox/rebuild.ts
index 33e6ad8f4d..e53f8c54b5 100644
--- a/src/lib/actions/sandbox/rebuild.ts
+++ b/src/lib/actions/sandbox/rebuild.ts
@@ -58,9 +58,13 @@ import {
   createSystemDeps as createSessionDeps,
   getActiveSandboxSessions,
 } from "../../state/sandbox-session";
-import * as shields from "../../shields";
 import { removeSandboxRegistryEntry } from "./destroy";
 import { executeSandboxCommand } from "./process-recovery";
+import {
+  openRebuildShieldsWindow,
+  relockRebuildShieldsWindow,
+  type RebuildShieldsWindow,
+} from "./rebuild-shields";
 
 /**
  * Emit timestamped rebuild diagnostics when verbose rebuild logging is enabled.
@@ -439,64 +443,21 @@ export async function rebuildSandbox(
     }
   }
 
-  // Step 1b: Auto-unlock shields if locked. Without this, the sandbox-state
-  // backup tar fails because high-risk state dirs are owned by root:root.
-  // We re-apply the lockdown after a successful rebuild (#3113).
-  const shieldsWereLocked = !shields.isShieldsDown(sandboxName);
-  if (shieldsWereLocked) {
-    console.log("");
-    console.log(`  ${YW}Shields are UP${R} — temporarily unlocking for rebuild backup...`);
-    try {
-      shields.shieldsDown(sandboxName, {
-        reason: "auto-unlock for rebuild",
-        skipTimer: true,
-        throwOnError: true,
-      });
-    } catch (err) {
-      const message = err instanceof Error ? err.message : String(err);
-      console.error("");
-      console.error(`  ${_RD}Failed to auto-unlock shields:${R} ${message}`);
-      console.error("  Sandbox is untouched — no data was lost.");
-      console.error(
-        `  Run \`${CLI_NAME} ${sandboxName} shields down\` manually, then retry rebuild.`,
-      );
-      bail(`Failed to auto-unlock shields: ${message}`);
-      return;
-    }
+  let rebuildShieldsWindow: RebuildShieldsWindow;
+  try {
+    rebuildShieldsWindow = openRebuildShieldsWindow(sandboxName, CLI_NAME);
+  } catch (err) {
+    bail(err instanceof Error ? err.message : String(err));
+    return;
   }
 
-  // Re-lock shields if we auto-unlocked. Idempotent across multiple call sites
-  // (success path + bail-before-destroy path). After destroy, the sandbox is
-  // gone — we surface manual recovery instructions instead.
-  let shieldsRelocked = false;
-  const relockShieldsIfNeeded = (sandboxStillExists: boolean): void => {
-    if (!shieldsWereLocked || shieldsRelocked) return;
-    if (!sandboxStillExists) {
-      console.warn("");
-      console.warn(
-        `  ${YW}⚠${R} Cannot re-apply shields lockdown — sandbox no longer exists.`,
-      );
-      console.warn(
-        `  After recovery, run \`${CLI_NAME} ${sandboxName} shields up\` to restore lockdown.`,
-      );
-      return;
-    }
-    console.log("");
-    console.log("  Re-applying shields lockdown...");
-    try {
-      shields.shieldsUp(sandboxName, { throwOnError: true });
-      console.log(`  ${G}✓${R} Shields restored to UP`);
-      shieldsRelocked = true;
-    } catch (err) {
-      const message = err instanceof Error ? err.message : String(err);
-      console.error(
-        `  ${YW}⚠${R} Failed to re-apply shields lockdown: ${message}`,
-      );
-      console.error(
-        `  Run \`${CLI_NAME} ${sandboxName} shields up\` manually to restore lockdown.`,
-      );
-    }
-  };
+  const relockShieldsIfNeeded = (sandboxStillExists: boolean): boolean =>
+    relockRebuildShieldsWindow(
+      sandboxName,
+      rebuildShieldsWindow,
+      sandboxStillExists,
+      CLI_NAME,
+    );
 
   // Step 2: Backup
   console.log("  Backing up sandbox state...");
@@ -778,7 +739,7 @@ export async function rebuildSandbox(
     console.error(
       `       ${CLI_NAME} ${sandboxName} snapshot restore "${backupManifest.timestamp}"`,
     );
-    if (shieldsWereLocked) {
+    if (rebuildShieldsWindow.wasLocked) {
       console.error(`    4. Restore shields lockdown:`);
       console.error(`       ${CLI_NAME} ${sandboxName} shields up`);
     }
@@ -926,7 +887,12 @@ export async function rebuildSandbox(
   // Step 8: Re-apply shields lockdown if it was active before rebuild (#3113).
   // Sandbox now exists and policy presets have been re-applied, so this is
   // the correct point to restore the restrictive policy + lock config files.
-  relockShieldsIfNeeded(true);
+  if (!relockShieldsIfNeeded(true)) {
+    console.error("");
+    console.error("  Sandbox rebuilt, but shields lockdown could not be restored.");
+    bail("Failed to re-apply shields lockdown.");
+    return;
+  }
 
   console.log("");
   if (restore.success) {
diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts
index bae208d1fe..1262195a5f 100644
--- a/src/lib/shields/index.ts
+++ b/src/lib/shields/index.ts
@@ -910,18 +910,13 @@ interface ShieldsDownOpts {
   timeout?: string | null;
   reason?: string | null;
   policy?: string;
-  // Internal: skip the auto-restore timer fork. Used by `rebuild` which
-  // re-locks immediately after backup/recreate, so a detached timer racing
-  // with a destroyed-then-recreated sandbox is undesirable (#3113).
   skipTimer?: boolean;
-  // Internal: throw on error instead of process.exit(1). Lets system-level
-  // operations (e.g., rebuild) recover instead of dying mid-flow.
   throwOnError?: boolean;
 }
 
 function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   validateName(sandboxName, "sandbox name");
-  const fail = opts.throwOnError
+  const fail: (msg: string) => never = opts.throwOnError
     ? (msg: string) => {
         throw new Error(msg);
       }
@@ -935,8 +930,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       "  Run `nemoclaw shields up` first, or use --extend (not yet implemented).",
     );
-    fail(`Config is already unlocked for ${sandboxName}`);
-    return;
+    return fail(`Config is already unlocked for ${sandboxName}`);
   }
 
   // Kill stale auto-restore markers only when this command will actually
@@ -964,8 +958,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   const policyYaml = parseCurrentPolicy(rawPolicy);
   if (!policyYaml) {
     console.error("  Cannot capture current policy. Is the sandbox running?");
-    fail("Cannot capture current policy");
-    return;
+    return fail("Cannot capture current policy");
   }
 
   const ts = Date.now();
@@ -997,8 +990,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Unknown policy "${policyName}". Use "permissive" or a path to a YAML file.`,
     );
-    fail(`Unknown policy "${policyName}"`);
-    return;
+    return fail(`Unknown policy "${policyName}"`);
   }
 
   console.log(`  Applying ${policyName} policy...`);
@@ -1028,8 +1020,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Re-run \`nemoclaw ${sandboxName} shields down\` after correcting file ownership.`,
     );
-    fail(message);
-    return;
+    return fail(message);
   }
 
   // 3. Update state
@@ -1134,8 +1125,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
           `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
         );
       }
-      fail(`Cannot start auto-restore timer: ${message}`);
-      return;
+      return fail(`Cannot start auto-restore timer: ${message}`);
     }
   }
 
@@ -1177,14 +1167,12 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
 // ---------------------------------------------------------------------------
 
 interface ShieldsUpOpts {
-  // Internal: throw on error instead of process.exit(1). Lets system-level
-  // operations (e.g., rebuild) recover instead of dying mid-flow (#3113).
   throwOnError?: boolean;
 }
 
 function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
   validateName(sandboxName, "sandbox name");
-  const fail = opts.throwOnError
+  const fail: (msg: string) => never = opts.throwOnError
     ? (msg: string) => {
         throw new Error(msg);
       }
@@ -1215,8 +1203,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
     console.error(
       "  Sandbox remains unlocked; recapture shields-down state before running shields up.",
     );
-    fail("Saved policy snapshot is missing");
-    return;
+    return fail("Saved policy snapshot is missing");
   }
   if (snapshotPath) {
     console.log("  Restoring restrictive policy from snapshot...");
@@ -1229,8 +1216,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      fail(activation.error ?? "unknown restore error");
-      return;
+      return fail(activation.error ?? "unknown restore error");
     }
   } else {
     // 2b. Lock config file to read-only.
@@ -1252,8 +1238,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      fail(message);
-      return;
+      return fail(message);
     }
   }
 
diff --git a/test/rebuild-shields-window.test.ts b/test/rebuild-shields-window.test.ts
new file mode 100644
index 0000000000..bc8cae491c
--- /dev/null
+++ b/test/rebuild-shields-window.test.ts
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const shieldsMock = vi.hoisted(() => ({
+  isShieldsDown: vi.fn(),
+  shieldsDown: vi.fn(),
+  shieldsUp: vi.fn(),
+}));
+
+vi.mock("../src/lib/shields", () => shieldsMock);
+
+import {
+  openRebuildShieldsWindow,
+  relockRebuildShieldsWindow,
+} from "../src/lib/actions/sandbox/rebuild-shields";
+
+describe("rebuild shields window", () => {
+  beforeEach(() => {
+    vi.resetAllMocks();
+    vi.spyOn(console, "log").mockImplementation(() => {});
+    vi.spyOn(console, "warn").mockImplementation(() => {});
+    vi.spyOn(console, "error").mockImplementation(() => {});
+  });
+
+  it("temporarily unlocks locked shields without starting an auto-restore timer", () => {
+    shieldsMock.isShieldsDown.mockReturnValue(false);
+
+    const window = openRebuildShieldsWindow("locked-sandbox", "nemoclaw");
+
+    expect(window.wasLocked).toBe(true);
+    expect(shieldsMock.shieldsDown).toHaveBeenCalledWith("locked-sandbox", {
+      reason: "auto-unlock for rebuild",
+      skipTimer: true,
+      throwOnError: true,
+    });
+  });
+
+  it("relocks a previously locked sandbox and records the closed window", () => {
+    const window = { relocked: false, wasLocked: true };
+
+    const relocked = relockRebuildShieldsWindow("locked-sandbox", window, true, "nemoclaw");
+
+    expect(relocked).toBe(true);
+    expect(window.relocked).toBe(true);
+    expect(shieldsMock.shieldsUp).toHaveBeenCalledWith("locked-sandbox", {
+      throwOnError: true,
+    });
+
+    expect(relockRebuildShieldsWindow("locked-sandbox", window, true, "nemoclaw")).toBe(true);
+    expect(shieldsMock.shieldsUp).toHaveBeenCalledTimes(1);
+  });
+
+  it("reports relock failure so rebuild can fail closed", () => {
+    const window = { relocked: false, wasLocked: true };
+    shieldsMock.shieldsUp.mockImplementation(() => {
+      throw new Error("cannot lock config");
+    });
+
+    const relocked = relockRebuildShieldsWindow("locked-sandbox", window, true, "nemoclaw");
+
+    expect(relocked).toBe(false);
+    expect(window.relocked).toBe(false);
+    expect(console.error).toHaveBeenCalledWith(
+      expect.stringContaining("Failed to re-apply shields lockdown"),
+    );
+  });
+
+  it("does nothing when shields were already mutable", () => {
+    shieldsMock.isShieldsDown.mockReturnValue(true);
+
+    const window = openRebuildShieldsWindow("mutable-sandbox", "nemoclaw");
+
+    expect(window.wasLocked).toBe(false);
+    expect(shieldsMock.shieldsDown).not.toHaveBeenCalled();
+    expect(relockRebuildShieldsWindow("mutable-sandbox", window, true, "nemoclaw")).toBe(true);
+    expect(shieldsMock.shieldsUp).not.toHaveBeenCalled();
+  });
+});

From 199ac5a2dfdd3dd387a9097b9e068fc847e1c77b Mon Sep 17 00:00:00 2001
From: Chengjie Wang <chengjiew@nvidia.com>
Date: Sat, 23 May 2026 21:39:00 +0800
Subject: [PATCH 3/6] fix(sandbox): narrow rebuild shields relock handling

Signed-off-by: Chengjie Wang <chengjiew@nvidia.com>
---
 src/lib/actions/sandbox/rebuild-shields.ts | 17 ++++++++---
 src/lib/actions/sandbox/rebuild.ts         | 30 ++++----------------
 src/lib/shields/index.ts                   | 33 ++++++----------------
 test/rebuild-shields-window.test.ts        | 13 ++++-----
 4 files changed, 33 insertions(+), 60 deletions(-)

diff --git a/src/lib/actions/sandbox/rebuild-shields.ts b/src/lib/actions/sandbox/rebuild-shields.ts
index 3139e825dd..a18ceaf2d6 100644
--- a/src/lib/actions/sandbox/rebuild-shields.ts
+++ b/src/lib/actions/sandbox/rebuild-shields.ts
@@ -12,7 +12,7 @@ export interface RebuildShieldsWindow {
 export function openRebuildShieldsWindow(
   sandboxName: string,
   cliName: string,
-): RebuildShieldsWindow {
+): RebuildShieldsWindow | null {
   const window = {
     relocked: false,
     wasLocked: !shields.isShieldsDown(sandboxName),
@@ -25,7 +25,6 @@ export function openRebuildShieldsWindow(
     shields.shieldsDown(sandboxName, {
       reason: "auto-unlock for rebuild",
       skipTimer: true,
-      throwOnError: true,
     });
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
@@ -35,11 +34,21 @@ export function openRebuildShieldsWindow(
     console.error(
       `  Run \`${cliName} ${sandboxName} shields down\` manually, then retry rebuild.`,
     );
-    throw new Error(`Failed to auto-unlock shields: ${message}`);
+    return null;
   }
   return window;
 }
 
+export function printRebuildShieldsRecovery(
+  sandboxName: string,
+  window: RebuildShieldsWindow,
+  cliName: string,
+): void {
+  if (!window.wasLocked) return;
+  console.error(`    4. Restore shields lockdown:`);
+  console.error(`       ${cliName} ${sandboxName} shields up`);
+}
+
 export function relockRebuildShieldsWindow(
   sandboxName: string,
   window: RebuildShieldsWindow,
@@ -61,7 +70,7 @@ export function relockRebuildShieldsWindow(
   console.log("");
   console.log("  Re-applying shields lockdown...");
   try {
-    shields.shieldsUp(sandboxName, { throwOnError: true });
+    shields.shieldsUp(sandboxName);
     console.log(`  ${G}✓${R} Shields restored to UP`);
     window.relocked = true;
     return true;
diff --git a/src/lib/actions/sandbox/rebuild.ts b/src/lib/actions/sandbox/rebuild.ts
index e53f8c54b5..a5a820aa1f 100644
--- a/src/lib/actions/sandbox/rebuild.ts
+++ b/src/lib/actions/sandbox/rebuild.ts
@@ -60,11 +60,7 @@ import {
 } from "../../state/sandbox-session";
 import { removeSandboxRegistryEntry } from "./destroy";
 import { executeSandboxCommand } from "./process-recovery";
-import {
-  openRebuildShieldsWindow,
-  relockRebuildShieldsWindow,
-  type RebuildShieldsWindow,
-} from "./rebuild-shields";
+import { openRebuildShieldsWindow, printRebuildShieldsRecovery, relockRebuildShieldsWindow } from "./rebuild-shields";
 
 /**
  * Emit timestamped rebuild diagnostics when verbose rebuild logging is enabled.
@@ -443,13 +439,8 @@ export async function rebuildSandbox(
     }
   }
 
-  let rebuildShieldsWindow: RebuildShieldsWindow;
-  try {
-    rebuildShieldsWindow = openRebuildShieldsWindow(sandboxName, CLI_NAME);
-  } catch (err) {
-    bail(err instanceof Error ? err.message : String(err));
-    return;
-  }
+  const rebuildShieldsWindow = openRebuildShieldsWindow(sandboxName, CLI_NAME);
+  if (!rebuildShieldsWindow) return bail("Failed to auto-unlock shields.");
 
   const relockShieldsIfNeeded = (sandboxStillExists: boolean): boolean =>
     relockRebuildShieldsWindow(
@@ -739,10 +730,7 @@ export async function rebuildSandbox(
     console.error(
       `       ${CLI_NAME} ${sandboxName} snapshot restore "${backupManifest.timestamp}"`,
     );
-    if (rebuildShieldsWindow.wasLocked) {
-      console.error(`    4. Restore shields lockdown:`);
-      console.error(`       ${CLI_NAME} ${sandboxName} shields up`);
-    }
+    printRebuildShieldsRecovery(sandboxName, rebuildShieldsWindow, CLI_NAME);
     console.error("");
     relockShieldsIfNeeded(false);
     bail(
@@ -884,15 +872,7 @@ export async function rebuildSandbox(
   });
   log(`Registry updated: agentVersion=${agentDef.expectedVersion}`);
 
-  // Step 8: Re-apply shields lockdown if it was active before rebuild (#3113).
-  // Sandbox now exists and policy presets have been re-applied, so this is
-  // the correct point to restore the restrictive policy + lock config files.
-  if (!relockShieldsIfNeeded(true)) {
-    console.error("");
-    console.error("  Sandbox rebuilt, but shields lockdown could not be restored.");
-    bail("Failed to re-apply shields lockdown.");
-    return;
-  }
+  if (!relockShieldsIfNeeded(true)) return bail("Failed to re-apply shields lockdown.");
 
   console.log("");
   if (restore.success) {
diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts
index 1262195a5f..eb9c343e2e 100644
--- a/src/lib/shields/index.ts
+++ b/src/lib/shields/index.ts
@@ -911,16 +911,10 @@ interface ShieldsDownOpts {
   reason?: string | null;
   policy?: string;
   skipTimer?: boolean;
-  throwOnError?: boolean;
 }
 
 function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   validateName(sandboxName, "sandbox name");
-  const fail: (msg: string) => never = opts.throwOnError
-    ? (msg: string) => {
-        throw new Error(msg);
-      }
-    : (_msg: string) => process.exit(1);
 
   const state = loadShieldsState(sandboxName);
   if (state.shieldsDown) {
@@ -930,7 +924,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       "  Run `nemoclaw shields up` first, or use --extend (not yet implemented).",
     );
-    return fail(`Config is already unlocked for ${sandboxName}`);
+    process.exit(1);
   }
 
   // Kill stale auto-restore markers only when this command will actually
@@ -958,7 +952,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   const policyYaml = parseCurrentPolicy(rawPolicy);
   if (!policyYaml) {
     console.error("  Cannot capture current policy. Is the sandbox running?");
-    return fail("Cannot capture current policy");
+    process.exit(1);
   }
 
   const ts = Date.now();
@@ -990,7 +984,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Unknown policy "${policyName}". Use "permissive" or a path to a YAML file.`,
     );
-    return fail(`Unknown policy "${policyName}"`);
+    process.exit(1);
   }
 
   console.log(`  Applying ${policyName} policy...`);
@@ -1020,7 +1014,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Re-run \`nemoclaw ${sandboxName} shields down\` after correcting file ownership.`,
     );
-    return fail(message);
+    process.exit(1);
   }
 
   // 3. Update state
@@ -1125,7 +1119,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
           `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
         );
       }
-      return fail(`Cannot start auto-restore timer: ${message}`);
+      process.exit(1);
     }
   }
 
@@ -1166,17 +1160,8 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
 // hardening step that restricts the sandbox beyond its default state.
 // ---------------------------------------------------------------------------
 
-interface ShieldsUpOpts {
-  throwOnError?: boolean;
-}
-
-function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
+function shieldsUp(sandboxName: string): void {
   validateName(sandboxName, "sandbox name");
-  const fail: (msg: string) => never = opts.throwOnError
-    ? (msg: string) => {
-        throw new Error(msg);
-      }
-    : (_msg: string) => process.exit(1);
 
   const state = loadShieldsState(sandboxName);
   // shieldsDown === false means explicitly locked by a previous shields-up.
@@ -1203,7 +1188,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
     console.error(
       "  Sandbox remains unlocked; recapture shields-down state before running shields up.",
     );
-    return fail("Saved policy snapshot is missing");
+    process.exit(1);
   }
   if (snapshotPath) {
     console.log("  Restoring restrictive policy from snapshot...");
@@ -1216,7 +1201,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      return fail(activation.error ?? "unknown restore error");
+      process.exit(1);
     }
   } else {
     // 2b. Lock config file to read-only.
@@ -1238,7 +1223,7 @@ function shieldsUp(sandboxName: string, opts: ShieldsUpOpts = {}): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      return fail(message);
+      process.exit(1);
     }
   }
 
diff --git a/test/rebuild-shields-window.test.ts b/test/rebuild-shields-window.test.ts
index bc8cae491c..0c77e67df1 100644
--- a/test/rebuild-shields-window.test.ts
+++ b/test/rebuild-shields-window.test.ts
@@ -29,11 +29,11 @@ describe("rebuild shields window", () => {
 
     const window = openRebuildShieldsWindow("locked-sandbox", "nemoclaw");
 
-    expect(window.wasLocked).toBe(true);
+    expect(window).not.toBeNull();
+    expect(window!.wasLocked).toBe(true);
     expect(shieldsMock.shieldsDown).toHaveBeenCalledWith("locked-sandbox", {
       reason: "auto-unlock for rebuild",
       skipTimer: true,
-      throwOnError: true,
     });
   });
 
@@ -44,9 +44,7 @@ describe("rebuild shields window", () => {
 
     expect(relocked).toBe(true);
     expect(window.relocked).toBe(true);
-    expect(shieldsMock.shieldsUp).toHaveBeenCalledWith("locked-sandbox", {
-      throwOnError: true,
-    });
+    expect(shieldsMock.shieldsUp).toHaveBeenCalledWith("locked-sandbox");
 
     expect(relockRebuildShieldsWindow("locked-sandbox", window, true, "nemoclaw")).toBe(true);
     expect(shieldsMock.shieldsUp).toHaveBeenCalledTimes(1);
@@ -72,9 +70,10 @@ describe("rebuild shields window", () => {
 
     const window = openRebuildShieldsWindow("mutable-sandbox", "nemoclaw");
 
-    expect(window.wasLocked).toBe(false);
+    expect(window).not.toBeNull();
+    expect(window!.wasLocked).toBe(false);
     expect(shieldsMock.shieldsDown).not.toHaveBeenCalled();
-    expect(relockRebuildShieldsWindow("mutable-sandbox", window, true, "nemoclaw")).toBe(true);
+    expect(relockRebuildShieldsWindow("mutable-sandbox", window!, true, "nemoclaw")).toBe(true);
     expect(shieldsMock.shieldsUp).not.toHaveBeenCalled();
   });
 });

From 29a5347f7e4f433e84f3621ad521890949c758a8 Mon Sep 17 00:00:00 2001
From: Chengjie Wang <chengjiew@nvidia.com>
Date: Sat, 23 May 2026 21:41:52 +0800
Subject: [PATCH 4/6] fix(shields): roll back failed rebuild unlock

Signed-off-by: Chengjie Wang <chengjiew@nvidia.com>
---
 src/lib/shields/index.ts | 100 +++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts
index eb9c343e2e..64df66a082 100644
--- a/src/lib/shields/index.ts
+++ b/src/lib/shields/index.ts
@@ -174,6 +174,13 @@ interface ShieldsPosture {
   state: LoadedShieldsState;
 }
 
+type AgentConfigTarget = {
+  agentName?: string;
+  configPath: string;
+  configDir: string;
+  sensitiveFiles?: string[];
+};
+
 /**
  * Derive the effective shields mode from persisted state.
  *
@@ -496,12 +503,7 @@ function assertNoLegacyStateLayout(
 
 function unlockAgentConfig(
   sandboxName: string,
-  target: {
-    agentName?: string;
-    configPath: string;
-    configDir: string;
-    sensitiveFiles?: string[];
-  },
+  target: AgentConfigTarget,
 ): void {
   const errors: string[] = [];
   const filesToUnlock = [target.configPath, ...(target.sensitiveFiles || [])];
@@ -632,12 +634,7 @@ function unlockAgentConfig(
 
 function lockAgentConfig(
   sandboxName: string,
-  target: {
-    agentName?: string;
-    configPath: string;
-    configDir: string;
-    sensitiveFiles?: string[];
-  },
+  target: AgentConfigTarget,
 ): void {
   const errors: string[] = [];
   const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])];
@@ -774,6 +771,45 @@ function lockAgentConfig(
   }
 }
 
+function rollbackShieldsDown(
+  sandboxName: string,
+  target: AgentConfigTarget,
+  snapshotPath: string,
+): void {
+  console.error("  Rolling back — restoring policy from snapshot...");
+  const rollbackResult = run(buildPolicySetCommand(snapshotPath, sandboxName), {
+    ignoreError: true,
+  });
+  let rollbackLocked = false;
+  if (rollbackResult.status === 0) {
+    try {
+      lockAgentConfig(sandboxName, target);
+      rollbackLocked = true;
+    } catch {
+      console.error(
+        "  Warning: Rollback re-lock could not be verified. Check config manually.",
+      );
+    }
+  } else {
+    console.error("  Warning: Policy restore failed during rollback.");
+  }
+  if (rollbackLocked) {
+    saveShieldsState(sandboxName, {
+      shieldsDown: false,
+      shieldsDownAt: null,
+      shieldsDownTimeout: null,
+      shieldsDownReason: null,
+      shieldsDownPolicy: null,
+    });
+    console.error("  Lockdown restored. Config was never left unguarded.");
+  } else {
+    console.error("  Config remains unlocked — manual intervention required.");
+    console.error(
+      `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
+    );
+  }
+}
+
 interface LockdownActivationResult {
   ok: boolean;
   error?: string;
@@ -1007,6 +1043,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     unlockAgentConfig(sandboxName, target);
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
+    rollbackShieldsDown(sandboxName, target, snapshotPath);
     console.error(`  ERROR: ${message}`);
     console.error(
       "  Config did not reach the mutable-default state; refusing to save shields-down state.",
@@ -1081,44 +1118,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       console.error(`  Cannot start auto-restore timer: ${message}`);
-      console.error("  Rolling back — restoring policy from snapshot...");
-      const rollbackResult = run(
-        buildPolicySetCommand(snapshotPath, sandboxName),
-        {
-          ignoreError: true,
-        },
-      );
-      let rollbackLocked = false;
-      if (rollbackResult.status === 0) {
-        try {
-          lockAgentConfig(sandboxName, target);
-          rollbackLocked = true;
-        } catch {
-          console.error(
-            "  Warning: Rollback re-lock could not be verified. Check config manually.",
-          );
-        }
-      } else {
-        console.error("  Warning: Policy restore failed during rollback.");
-      }
-      if (rollbackLocked) {
-        saveShieldsState(sandboxName, {
-          shieldsDown: false,
-          shieldsDownAt: null,
-          shieldsDownTimeout: null,
-          shieldsDownReason: null,
-          shieldsDownPolicy: null,
-        });
-        console.error("  Lockdown restored. Config was never left unguarded.");
-      } else {
-        // Leave state as shieldsDown: true — don't lie about protection level
-        console.error(
-          "  Config remains unlocked — manual intervention required.",
-        );
-        console.error(
-          `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
-        );
-      }
+      rollbackShieldsDown(sandboxName, target, snapshotPath);
       process.exit(1);
     }
   }

From 9d3f649ec4a99c1276a5411402376d853e5181fa Mon Sep 17 00:00:00 2001
From: Chengjie Wang <chengjiew@nvidia.com>
Date: Sat, 23 May 2026 21:47:41 +0800
Subject: [PATCH 5/6] fix(shields): let rebuild catch unlock failures

Signed-off-by: Chengjie Wang <chengjiew@nvidia.com>
---
 src/lib/actions/sandbox/rebuild-shields.ts |  3 +-
 src/lib/shields/index.ts                   | 32 ++++++++++++----------
 test/rebuild-shields-window.test.ts        |  5 +++-
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/lib/actions/sandbox/rebuild-shields.ts b/src/lib/actions/sandbox/rebuild-shields.ts
index a18ceaf2d6..71d597dda2 100644
--- a/src/lib/actions/sandbox/rebuild-shields.ts
+++ b/src/lib/actions/sandbox/rebuild-shields.ts
@@ -25,6 +25,7 @@ export function openRebuildShieldsWindow(
     shields.shieldsDown(sandboxName, {
       reason: "auto-unlock for rebuild",
       skipTimer: true,
+      throwOnError: true,
     });
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
@@ -70,7 +71,7 @@ export function relockRebuildShieldsWindow(
   console.log("");
   console.log("  Re-applying shields lockdown...");
   try {
-    shields.shieldsUp(sandboxName);
+    shields.shieldsUp(sandboxName, { throwOnError: true });
     console.log(`  ${G}✓${R} Shields restored to UP`);
     window.relocked = true;
     return true;
diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts
index 64df66a082..e69ec1ba9b 100644
--- a/src/lib/shields/index.ts
+++ b/src/lib/shields/index.ts
@@ -181,6 +181,11 @@ type AgentConfigTarget = {
   sensitiveFiles?: string[];
 };
 
+function failShieldsCommand(message: string, shouldThrow?: boolean): never {
+  if (shouldThrow) throw new Error(message);
+  process.exit(1);
+}
+
 /**
  * Derive the effective shields mode from persisted state.
  *
@@ -947,6 +952,7 @@ interface ShieldsDownOpts {
   reason?: string | null;
   policy?: string;
   skipTimer?: boolean;
+  throwOnError?: boolean;
 }
 
 function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
@@ -960,7 +966,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       "  Run `nemoclaw shields up` first, or use --extend (not yet implemented).",
     );
-    process.exit(1);
+    return failShieldsCommand(`Config is already unlocked for ${sandboxName}`, opts.throwOnError);
   }
 
   // Kill stale auto-restore markers only when this command will actually
@@ -988,7 +994,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   const policyYaml = parseCurrentPolicy(rawPolicy);
   if (!policyYaml) {
     console.error("  Cannot capture current policy. Is the sandbox running?");
-    process.exit(1);
+    return failShieldsCommand("Cannot capture current policy", opts.throwOnError);
   }
 
   const ts = Date.now();
@@ -1020,7 +1026,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Unknown policy "${policyName}". Use "permissive" or a path to a YAML file.`,
     );
-    process.exit(1);
+    return failShieldsCommand(`Unknown policy "${policyName}"`, opts.throwOnError);
   }
 
   console.log(`  Applying ${policyName} policy...`);
@@ -1051,7 +1057,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     console.error(
       `  Re-run \`nemoclaw ${sandboxName} shields down\` after correcting file ownership.`,
     );
-    process.exit(1);
+    return failShieldsCommand(message, opts.throwOnError);
   }
 
   // 3. Update state
@@ -1069,11 +1075,6 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   //    Pass the absolute restore time, not a relative timeout. Steps 1-2b
   //    can take minutes (policy apply + kubectl chmod), so a relative timeout
   //    passed at fork time would fire too early.
-  //
-  //    skipTimer is used by `rebuild` (#3113), which re-locks via shieldsUp
-  //    immediately after backup/recreate. Forking a detached timer in that
-  //    flow risks racing the auto-restore against a destroyed-then-recreated
-  //    sandbox.
   if (!opts.skipTimer) {
     const restoreAt = new Date(Date.now() + timeoutSeconds * 1000);
     const processToken = randomBytes(16).toString("hex");
@@ -1119,7 +1120,10 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
       const message = err instanceof Error ? err.message : String(err);
       console.error(`  Cannot start auto-restore timer: ${message}`);
       rollbackShieldsDown(sandboxName, target, snapshotPath);
-      process.exit(1);
+      return failShieldsCommand(
+        `Cannot start auto-restore timer: ${message}`,
+        opts.throwOnError,
+      );
     }
   }
 
@@ -1160,7 +1164,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
 // hardening step that restricts the sandbox beyond its default state.
 // ---------------------------------------------------------------------------
 
-function shieldsUp(sandboxName: string): void {
+function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): void {
   validateName(sandboxName, "sandbox name");
 
   const state = loadShieldsState(sandboxName);
@@ -1188,7 +1192,7 @@ function shieldsUp(sandboxName: string): void {
     console.error(
       "  Sandbox remains unlocked; recapture shields-down state before running shields up.",
     );
-    process.exit(1);
+    return failShieldsCommand("Saved policy snapshot is missing", opts.throwOnError);
   }
   if (snapshotPath) {
     console.log("  Restoring restrictive policy from snapshot...");
@@ -1201,7 +1205,7 @@ function shieldsUp(sandboxName: string): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      process.exit(1);
+      return failShieldsCommand(activation.error ?? "unknown restore error", opts.throwOnError);
     }
   } else {
     // 2b. Lock config file to read-only.
@@ -1223,7 +1227,7 @@ function shieldsUp(sandboxName: string): void {
       console.error(
         `  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`,
       );
-      process.exit(1);
+      return failShieldsCommand(message, opts.throwOnError);
     }
   }
 
diff --git a/test/rebuild-shields-window.test.ts b/test/rebuild-shields-window.test.ts
index 0c77e67df1..5e1328acf7 100644
--- a/test/rebuild-shields-window.test.ts
+++ b/test/rebuild-shields-window.test.ts
@@ -34,6 +34,7 @@ describe("rebuild shields window", () => {
     expect(shieldsMock.shieldsDown).toHaveBeenCalledWith("locked-sandbox", {
       reason: "auto-unlock for rebuild",
       skipTimer: true,
+      throwOnError: true,
     });
   });
 
@@ -44,7 +45,9 @@ describe("rebuild shields window", () => {
 
     expect(relocked).toBe(true);
     expect(window.relocked).toBe(true);
-    expect(shieldsMock.shieldsUp).toHaveBeenCalledWith("locked-sandbox");
+    expect(shieldsMock.shieldsUp).toHaveBeenCalledWith("locked-sandbox", {
+      throwOnError: true,
+    });
 
     expect(relockRebuildShieldsWindow("locked-sandbox", window, true, "nemoclaw")).toBe(true);
     expect(shieldsMock.shieldsUp).toHaveBeenCalledTimes(1);

From 65d77ce2ad03e4481f26c788a93d26ce6a8b92f0 Mon Sep 17 00:00:00 2001
From: Aaron Erickson <aerickson@nvidia.com>
Date: Sun, 24 May 2026 13:36:21 -0700
Subject: [PATCH 6/6] fix(rebuild): relock shields after unexpected errors

Signed-off-by: Aaron Erickson <aerickson@nvidia.com>
---
 .../sandbox/rebuild-shields-finally.test.ts   | 106 ++++++++++++++++++
 src/lib/actions/sandbox/rebuild.ts            |  13 +++
 2 files changed, 119 insertions(+)
 create mode 100644 src/lib/actions/sandbox/rebuild-shields-finally.test.ts

diff --git a/src/lib/actions/sandbox/rebuild-shields-finally.test.ts b/src/lib/actions/sandbox/rebuild-shields-finally.test.ts
new file mode 100644
index 0000000000..7b1c00b7ef
--- /dev/null
+++ b/src/lib/actions/sandbox/rebuild-shields-finally.test.ts
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { createRequire } from "node:module";
+
+import { afterEach, beforeEach, describe, expect, it, type MockInstance, vi } from "vitest";
+
+type RebuildSandbox = typeof import("../../../../dist/lib/actions/sandbox/rebuild")["rebuildSandbox"];
+
+const requireDist = createRequire(import.meta.url);
+const rebuildModulePath = "../../../../dist/lib/actions/sandbox/rebuild.js";
+
+describe("rebuild shields relock guard", () => {
+  let rebuildSandbox: RebuildSandbox;
+  let spies: MockInstance[];
+  let errorSpy: MockInstance;
+  let logSpy: MockInstance;
+  let relockSpy: MockInstance;
+  const rebuildWindow = { relocked: false, wasLocked: true };
+
+  beforeEach(() => {
+    spies = [];
+    rebuildWindow.relocked = false;
+    delete require.cache[requireDist.resolve(rebuildModulePath)];
+
+    errorSpy = vi.spyOn(console, "error").mockImplementation(() => undefined);
+    logSpy = vi.spyOn(console, "log").mockImplementation(() => undefined);
+
+    const gatewayDrift = requireDist("../../../../dist/lib/adapters/openshell/gateway-drift.js");
+    const gatewayRuntime = requireDist("../../../../dist/lib/gateway-runtime-action.js");
+    const sandboxList = requireDist("../../../../dist/lib/openshell-sandbox-list.js");
+    const resolve = requireDist("../../../../dist/lib/adapters/openshell/resolve.js");
+    const agentRuntime = requireDist("../../../../dist/lib/agent/runtime.js");
+    const onboardSession = requireDist("../../../../dist/lib/state/onboard-session.js");
+    const registry = requireDist("../../../../dist/lib/state/registry.js");
+    const sandboxState = requireDist("../../../../dist/lib/state/sandbox.js");
+    const sandboxSession = requireDist("../../../../dist/lib/state/sandbox-session.js");
+    const sandboxVersion = requireDist("../../../../dist/lib/sandbox/version.js");
+    const rebuildShields = requireDist("../../../../dist/lib/actions/sandbox/rebuild-shields.js");
+
+    relockSpy = vi
+      .spyOn(rebuildShields, "relockRebuildShieldsWindow")
+      .mockImplementation((...args: unknown[]) => {
+        const window = args[1] as typeof rebuildWindow;
+        window.relocked = true;
+        return true;
+      });
+
+    spies.push(
+      vi.spyOn(gatewayDrift, "detectOpenShellStateRpcPreflightIssue").mockReturnValue(null),
+      vi.spyOn(gatewayDrift, "detectOpenShellStateRpcResultIssue").mockReturnValue(null),
+      vi.spyOn(gatewayRuntime, "recoverNamedGatewayRuntime").mockResolvedValue({ recovered: false }),
+      vi.spyOn(sandboxList, "captureSandboxListWithGatewayRecovery").mockResolvedValue({
+        result: { status: 0, output: "alpha Ready" },
+      }),
+      vi.spyOn(resolve, "resolveOpenshell").mockReturnValue(null),
+      vi.spyOn(agentRuntime, "getSessionAgent").mockReturnValue(null),
+      vi.spyOn(agentRuntime, "getAgentDisplayName").mockReturnValue("OpenClaw"),
+      vi.spyOn(onboardSession, "loadSession").mockReturnValue(null),
+      vi.spyOn(registry, "getSandbox").mockReturnValue({
+        name: "alpha",
+        provider: "ollama-local",
+        model: "nvidia/nemotron",
+        policies: [],
+        agent: null,
+        nimContainer: null,
+      } as never),
+      vi.spyOn(sandboxSession, "getActiveSandboxSessions").mockReturnValue({
+        detected: false,
+        sessions: [],
+      }),
+      vi.spyOn(sandboxVersion, "checkAgentVersion").mockReturnValue({
+        expectedVersion: "0.1.0",
+        sandboxVersion: "0.0.1",
+      } as never),
+      vi.spyOn(rebuildShields, "openRebuildShieldsWindow").mockReturnValue(rebuildWindow),
+      relockSpy,
+      vi.spyOn(sandboxState, "backupSandboxState").mockImplementation(() => {
+        throw new Error("unexpected backup exception");
+      }),
+    );
+
+    ({ rebuildSandbox } = requireDist(rebuildModulePath));
+  });
+
+  afterEach(() => {
+    for (const spy of spies) spy.mockRestore();
+    errorSpy.mockRestore();
+    logSpy.mockRestore();
+    delete require.cache[requireDist.resolve(rebuildModulePath)];
+  });
+
+  it("relocks shields when an unexpected exception escapes after auto-unlock", async () => {
+    await expect(rebuildSandbox("alpha", ["--yes"], { throwOnError: true })).rejects.toThrow(
+      "unexpected backup exception",
+    );
+
+    expect(relockSpy).toHaveBeenCalledWith(
+      "alpha",
+      rebuildWindow,
+      true,
+      expect.any(String),
+    );
+    expect(rebuildWindow.relocked).toBe(true);
+  });
+});
diff --git a/src/lib/actions/sandbox/rebuild.ts b/src/lib/actions/sandbox/rebuild.ts
index a5a820aa1f..9dfa54a5f3 100644
--- a/src/lib/actions/sandbox/rebuild.ts
+++ b/src/lib/actions/sandbox/rebuild.ts
@@ -450,6 +450,9 @@ export async function rebuildSandbox(
       CLI_NAME,
     );
 
+  let sandboxStillExists = true;
+
+  try {
   // Step 2: Backup
   console.log("  Backing up sandbox state...");
   log(`Agent type: ${sb.agent || "openclaw"}, stateDirs from manifest`);
@@ -533,6 +536,7 @@ export async function rebuildSandbox(
     bail("Failed to delete sandbox.", deleteResult.status || 1);
     return;
   }
+  sandboxStillExists = false;
   removeSandboxRegistryEntry(sandboxName);
   log(
     `Registry after remove: ${JSON.stringify(registry.listSandboxes().sandboxes.map((s: { name: string }) => s.name))}`,
@@ -698,6 +702,10 @@ export async function rebuildSandbox(
     process.exit = _savedExit;
   }
 
+  if (!onboardFailed) {
+    sandboxStillExists = true;
+  }
+
   if (onboardFailed) {
     // Clean up onboard's internal state that normally runs in
     // process.once("exit") listeners — those never fire because we
@@ -886,4 +894,9 @@ export async function rebuildSandbox(
     );
     console.log(`    Backup available at: ${backupManifest.backupPath}`);
   }
+  } finally {
+    if (!rebuildShieldsWindow.relocked) {
+      relockShieldsIfNeeded(sandboxStillExists);
+    }
+  }
 }