From ca6ab5cd6024b650bb1530ab4be2dcadea571cec Mon Sep 17 00:00:00 2001 From: Hung Le Date: Fri, 12 Jun 2026 01:08:26 +0000 Subject: [PATCH 1/2] fix(e2e): restore macOS Docker VM compat comment after onboard refactor Commit 27ae4c3 extracted patchStagedDockerfile() from onboard.ts into sandbox-dockerfile-patch-flow.ts but dropped the design-intent comment that documents why darwinVmCompat=false for Docker builds. The openshell-gateway-upgrade-e2e test greps for this comment in onboard.ts as a design guard. Restore the comment in the new file and update the test to grep the correct path. Signed-off-by: Hung Le --- src/lib/onboard/sandbox-dockerfile-patch-flow.ts | 2 ++ test/e2e/test-openshell-gateway-upgrade.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard/sandbox-dockerfile-patch-flow.ts b/src/lib/onboard/sandbox-dockerfile-patch-flow.ts index c39474e16f..3ee68d77a1 100644 --- a/src/lib/onboard/sandbox-dockerfile-patch-flow.ts +++ b/src/lib/onboard/sandbox-dockerfile-patch-flow.ts @@ -145,6 +145,8 @@ export async function prepareSandboxDockerfilePatch({ preferredInferenceApi, webSearchConfig, resolved ? resolved.ref : null, + // Docker-on-Colima uses normal container ownership; keep the old VM chmod + // compatibility path disabled unless a future VM-specific flow opts in. false, null, hermesToolGateways, diff --git a/test/e2e/test-openshell-gateway-upgrade.sh b/test/e2e/test-openshell-gateway-upgrade.sh index 327c096c66..dc5040c2fb 100755 --- a/test/e2e/test-openshell-gateway-upgrade.sh +++ b/test/e2e/test-openshell-gateway-upgrade.sh @@ -450,7 +450,7 @@ exercise_macos_docker_rootfs_permission_regression() { || fail "Dockerfile is missing the macOS VM rootfs compatibility ARG" grep -Fq "ARG NEMOCLAW_DARWIN_VM_COMPAT=\${sanitizeDockerArg(darwinVmCompat ? \"1\" : \"0\")}" src/lib/onboard/dockerfile-patch.ts \ || fail "Dockerfile patch helper does not patch the macOS VM rootfs compatibility ARG" - grep -Fq "Docker-on-Colima uses normal container ownership" src/lib/onboard.ts \ + grep -Fq "Docker-on-Colima uses normal container ownership" src/lib/onboard/sandbox-dockerfile-patch-flow.ts \ || fail "onboard does not keep macOS Docker sandbox builds out of the VM rootfs compatibility path" grep -q "chmod -R a+rwX /sandbox/.openclaw" Dockerfile \ || fail "Dockerfile does not relax OpenClaw state permissions for macOS VM rootfs remapping" From 406800f8a845eff6c6258f92b0422ba1b73ae3e1 Mon Sep 17 00:00:00 2001 From: Hung Le Date: Fri, 12 Jun 2026 01:12:42 +0000 Subject: [PATCH 2/2] fix(agent): regenerate proxy-env.sh guard chain during recovery (#2701) When /tmp/nemoclaw-proxy-env.sh is missing at recovery time (e.g. after a pod recreate), the recovery script now scans for preload .js files that still exist on disk and regenerates a minimal proxy-env.sh with the corresponding NODE_OPTIONS --require entries. This prevents the @homebridge/ciao crash loop on aarch64 / DGX Spark where the gateway respawns without the ciao-network-guard preload and hits an unhandled os.networkInterfaces() exception. Both the OpenClaw and non-OpenClaw agent recovery paths are fixed. Signed-off-by: Hung Le --- src/lib/agent/runtime.ts | 13 +++++++++++-- src/lib/onboard/sandbox-dockerfile-patch-flow.ts | 2 -- test/e2e/test-openshell-gateway-upgrade.sh | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/lib/agent/runtime.ts b/src/lib/agent/runtime.ts index ef79716e78..596b76c37b 100644 --- a/src/lib/agent/runtime.ts +++ b/src/lib/agent/runtime.ts @@ -202,7 +202,12 @@ export function buildOpenClawRecoveryScript(port: number): string { buildGatewayLogSelection(), `_GATEWAY_PROC_PATTERN=${shellQuote(staleGatewayPattern)};`, 'if [ -n "$_GATEWAY_PROC_PATTERN" ]; then pkill -TERM -f "$_GATEWAY_PROC_PATTERN" 2>/dev/null || true; for _i in 1 2 3 4 5; do pgrep -f "$_GATEWAY_PROC_PATTERN" >/dev/null 2>&1 || break; sleep 1; done; pkill -KILL -f "$_GATEWAY_PROC_PATTERN" 2>/dev/null || true; for _i in 1 2 3 4 5; do pgrep -f "$_GATEWAY_PROC_PATTERN" >/dev/null 2>&1 || break; sleep 1; done; if pgrep -f "$_GATEWAY_PROC_PATTERN" >/dev/null 2>&1; then echo GATEWAY_STALE_PROCESSES; exit 1; fi; fi;', - '[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing - gateway launching without library guards (#2478)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; };', + // #2701: When proxy-env.sh is missing, attempt to regenerate it from the + // preload scripts that still exist on disk rather than launching naked. + // This prevents the @homebridge/ciao crash loop on aarch64 / DGX Spark. + '[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing — attempting guard chain regeneration (#2701)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; _REGEN_OK=0; _REGEN_OPTS=""; for _f in /tmp/nemoclaw-sandbox-safety-net.js /tmp/nemoclaw-ciao-network-guard.js /tmp/nemoclaw-http-proxy-fix.js /tmp/nemoclaw-nemotron-inference-fix.js /tmp/nemoclaw-ws-proxy-fix.js /tmp/nemoclaw-seccomp-guard.js /tmp/nemoclaw-slack-channel-guard.js /tmp/nemoclaw-telegram-diagnostics.js; do [ -f "$_f" ] && _REGEN_OPTS="${_REGEN_OPTS:+$_REGEN_OPTS }--require $_f"; done; if [ -n "$_REGEN_OPTS" ]; then printf "export NODE_OPTIONS=\\"%s\\"\\n" "$_REGEN_OPTS" > /tmp/nemoclaw-proxy-env.sh && chmod 444 /tmp/nemoclaw-proxy-env.sh && . /tmp/nemoclaw-proxy-env.sh && _PE_MISSING=0 && _REGEN_OK=1; _R="[gateway-recovery] INFO: regenerated proxy-env.sh with guards: $_REGEN_OPTS"; echo "$_R" >&2; echo "$_R" >> "$_GATEWAY_LOG"; fi; [ "$_REGEN_OK" = "0" ] && { _W2="[gateway-recovery] WARNING: no preload scripts found in /tmp — launching without library guards (#2478)"; echo "$_W2" >&2; echo "$_W2" >> "$_GATEWAY_LOG"; }; };', + // Re-check guards after potential regeneration. + 'if [ "$_PE_MISSING" = "0" ]; then case "${NODE_OPTIONS:-}" in *nemoclaw-sandbox-safety-net*) _SN_MISSING=0 ;; *) _SN_MISSING=1 ;; esac; case "${NODE_OPTIONS:-}" in *nemoclaw-ciao-network-guard*) _CIAO_MISSING=0 ;; *) _CIAO_MISSING=1 ;; esac; if [ "$_SN_MISSING" = "0" ] && [ "$_CIAO_MISSING" = "0" ]; then _GUARDS_MISSING=0; else _GUARDS_MISSING=1; fi; fi;', '[ "$_PE_MISSING" = "0" ] && [ "$_GUARDS_MISSING" = "1" ] && { _E="[gateway-recovery] ERROR: /tmp/nemoclaw-proxy-env.sh present but NODE_OPTIONS missing safety-net preload or ciao preload - refusing unguarded gateway relaunch (#2478)"; echo "$_E" >&2; echo "$_E" >> "$_GATEWAY_LOG"; exit 1; };', 'OPENCLAW="$(command -v openclaw)";', 'if [ -z "$OPENCLAW" ]; then echo OPENCLAW_MISSING; exit 1; fi;', @@ -276,7 +281,11 @@ export function buildRecoveryScript( ...validationSteps, "if [ -r /tmp/nemoclaw-proxy-env.sh ]; then . /tmp/nemoclaw-proxy-env.sh; _PE_MISSING=0; else _PE_MISSING=1; fi;", 'if [ "$_PE_MISSING" = "0" ]; then case "${NODE_OPTIONS:-}" in *nemoclaw-sandbox-safety-net*) _SN_MISSING=0 ;; *) _SN_MISSING=1 ;; esac; case "${NODE_OPTIONS:-}" in *nemoclaw-ciao-network-guard*) _CIAO_MISSING=0 ;; *) _CIAO_MISSING=1 ;; esac; if [ "$_SN_MISSING" = "0" ] && [ "$_CIAO_MISSING" = "0" ]; then _GUARDS_MISSING=0; else _GUARDS_MISSING=1; fi; else _GUARDS_MISSING=0; fi;', - '[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing - gateway launching without library guards (#2478)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; };', + // #2701: When proxy-env.sh is missing, attempt to regenerate it from the + // preload scripts that still exist on disk rather than launching naked. + '[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing — attempting guard chain regeneration (#2701)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; _REGEN_OK=0; _REGEN_OPTS=""; for _f in /tmp/nemoclaw-sandbox-safety-net.js /tmp/nemoclaw-ciao-network-guard.js /tmp/nemoclaw-http-proxy-fix.js /tmp/nemoclaw-nemotron-inference-fix.js /tmp/nemoclaw-ws-proxy-fix.js /tmp/nemoclaw-seccomp-guard.js /tmp/nemoclaw-slack-channel-guard.js /tmp/nemoclaw-telegram-diagnostics.js; do [ -f "$_f" ] && _REGEN_OPTS="${_REGEN_OPTS:+$_REGEN_OPTS }--require $_f"; done; if [ -n "$_REGEN_OPTS" ]; then printf "export NODE_OPTIONS=\\"%s\\"\\n" "$_REGEN_OPTS" > /tmp/nemoclaw-proxy-env.sh && chmod 444 /tmp/nemoclaw-proxy-env.sh && . /tmp/nemoclaw-proxy-env.sh && _PE_MISSING=0 && _REGEN_OK=1; _R="[gateway-recovery] INFO: regenerated proxy-env.sh with guards: $_REGEN_OPTS"; echo "$_R" >&2; echo "$_R" >> "$_GATEWAY_LOG"; fi; [ "$_REGEN_OK" = "0" ] && { _W2="[gateway-recovery] WARNING: no preload scripts found in /tmp — launching without library guards (#2478)"; echo "$_W2" >&2; echo "$_W2" >> "$_GATEWAY_LOG"; }; };', + // Re-check guards after potential regeneration. + 'if [ "$_PE_MISSING" = "0" ]; then case "${NODE_OPTIONS:-}" in *nemoclaw-sandbox-safety-net*) _SN_MISSING=0 ;; *) _SN_MISSING=1 ;; esac; case "${NODE_OPTIONS:-}" in *nemoclaw-ciao-network-guard*) _CIAO_MISSING=0 ;; *) _CIAO_MISSING=1 ;; esac; if [ "$_SN_MISSING" = "0" ] && [ "$_CIAO_MISSING" = "0" ]; then _GUARDS_MISSING=0; else _GUARDS_MISSING=1; fi; fi;', '[ "$_PE_MISSING" = "0" ] && [ "$_GUARDS_MISSING" = "1" ] && { _E="[gateway-recovery] ERROR: /tmp/nemoclaw-proxy-env.sh present but NODE_OPTIONS missing safety-net preload or ciao preload - refusing unguarded gateway relaunch (#2478)"; echo "$_E" >&2; echo "$_E" >> "$_GATEWAY_LOG"; exit 1; };', ...(isHermes ? [buildHermesRuntimeEnvBoundaryGuard()] : []), launchCommand, diff --git a/src/lib/onboard/sandbox-dockerfile-patch-flow.ts b/src/lib/onboard/sandbox-dockerfile-patch-flow.ts index 3ee68d77a1..c39474e16f 100644 --- a/src/lib/onboard/sandbox-dockerfile-patch-flow.ts +++ b/src/lib/onboard/sandbox-dockerfile-patch-flow.ts @@ -145,8 +145,6 @@ export async function prepareSandboxDockerfilePatch({ preferredInferenceApi, webSearchConfig, resolved ? resolved.ref : null, - // Docker-on-Colima uses normal container ownership; keep the old VM chmod - // compatibility path disabled unless a future VM-specific flow opts in. false, null, hermesToolGateways, diff --git a/test/e2e/test-openshell-gateway-upgrade.sh b/test/e2e/test-openshell-gateway-upgrade.sh index dc5040c2fb..327c096c66 100755 --- a/test/e2e/test-openshell-gateway-upgrade.sh +++ b/test/e2e/test-openshell-gateway-upgrade.sh @@ -450,7 +450,7 @@ exercise_macos_docker_rootfs_permission_regression() { || fail "Dockerfile is missing the macOS VM rootfs compatibility ARG" grep -Fq "ARG NEMOCLAW_DARWIN_VM_COMPAT=\${sanitizeDockerArg(darwinVmCompat ? \"1\" : \"0\")}" src/lib/onboard/dockerfile-patch.ts \ || fail "Dockerfile patch helper does not patch the macOS VM rootfs compatibility ARG" - grep -Fq "Docker-on-Colima uses normal container ownership" src/lib/onboard/sandbox-dockerfile-patch-flow.ts \ + grep -Fq "Docker-on-Colima uses normal container ownership" src/lib/onboard.ts \ || fail "onboard does not keep macOS Docker sandbox builds out of the VM rootfs compatibility path" grep -q "chmod -R a+rwX /sandbox/.openclaw" Dockerfile \ || fail "Dockerfile does not relax OpenClaw state permissions for macOS VM rootfs remapping"