From da806928ef7a9027ae7aacf01db275857650715f Mon Sep 17 00:00:00 2001 From: Ziyang Guo <121015044+RunMarshal@users.noreply.github.com> Date: Thu, 2 Jul 2026 15:58:42 +0800 Subject: [PATCH] fix(manager): avoid restart recovery local failure --- changelog/current.md | 1 + manager/scripts/init/start-manager-agent.sh | 3 +- manager/tests/test-manager-startup-script.sh | 48 ++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100755 manager/tests/test-manager-startup-script.sh diff --git a/changelog/current.md b/changelog/current.md index 4b43a6ca..d340b235 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -6,6 +6,7 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas - fix(agent): update file-sharing path guidance for CoPaw and Team Leader agents to use `/root/hiclaw-fs/agents/...` instead of the retired `/root/.hiclaw-worker/...` path. - feat(controller): add per-agent `spec.resources` support for Manager, Worker, Team Leader, and Team Worker CRDs. +- fix(manager): prevent Manager restart recovery from exiting on a top-level `local` declaration while recreating missing Worker containers. - **OpenHuman runtime**: OpenHuman added as the fourth Worker runtime with native Matrix support via `channel-matrix` feature flag; includes controller routing (K8s + Docker backends), Dockerfile, entrypoint script, agent template, Helm chart integration, and Makefile build targets. - **Multi model providers**: Worker, Team, and Manager specs can now select a Higress model provider via `spec.modelProvider`; the controller resolves the provider, injects the matching gateway URL into runtime config, and authorizes consumers only on the selected AI route. diff --git a/manager/scripts/init/start-manager-agent.sh b/manager/scripts/init/start-manager-agent.sh index c712e538..32684239 100755 --- a/manager/scripts/init/start-manager-agent.sh +++ b/manager/scripts/init/start-manager-agent.sh @@ -1032,7 +1032,8 @@ if container_api_available; then _runtime=$(jq -r --arg w "${_worker_name}" '.workers[$w].runtime // "openclaw"' "${REGISTRY_FILE}" 2>/dev/null) _recreated=false for _attempt in 1 2 3; do - local _env_map _create_body + _env_map="" + _create_body="" _env_map=$(jq -cn \ --arg name "${_worker_name}" \ --arg fak "${_worker_name}" \ diff --git a/manager/tests/test-manager-startup-script.sh b/manager/tests/test-manager-startup-script.sh new file mode 100755 index 00000000..ad3e48a2 --- /dev/null +++ b/manager/tests/test-manager-startup-script.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Regression tests for manager/scripts/init/start-manager-agent.sh. +# +# Usage: bash manager/tests/test-manager-startup-script.sh + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +START_SCRIPT="${PROJECT_ROOT}/manager/scripts/init/start-manager-agent.sh" + +PASS=0 +FAIL=0 + +pass() { echo " PASS: $1"; PASS=$((PASS + 1)); } +fail() { echo " FAIL: $1"; echo " $2"; FAIL=$((FAIL + 1)); } + +echo "" +echo "=== TC1: startup script has valid bash syntax ===" +if bash -n "${START_SCRIPT}"; then + pass "bash -n start-manager-agent.sh" +else + fail "bash -n start-manager-agent.sh" "syntax check failed" +fi + +echo "" +echo "=== TC2: Worker restart recovery block has no top-level local declarations ===" +bad_lines=$( + awk ' + /Recreate Worker containers as needed after Manager restart/ { in_block = 1; next } + in_block && /Builtin files \(AGENTS.md, skills\)/ { in_block = 0 } + in_block && /^[[:space:]]*local[[:space:]]/ { print NR ":" $0 } + ' "${START_SCRIPT}" +) +if [ -z "${bad_lines}" ]; then + pass "no local declarations in top-level restart recovery block" +else + fail "no local declarations in top-level restart recovery block" "${bad_lines}" +fi + +echo "" +echo "=== Summary ===" +echo "PASS: ${PASS}" +echo "FAIL: ${FAIL}" + +if [ "${FAIL}" -ne 0 ]; then + exit 1 +fi