From 3a7a1e42e0f6845847470cd9b3710300af2fe8f7 Mon Sep 17 00:00:00 2001 From: Ziyang Guo <121015044+RunMarshal@users.noreply.github.com> Date: Fri, 3 Jul 2026 16:10:41 +0800 Subject: [PATCH] test: make multi-worker bob setup deterministic --- tests/test-06-multi-worker.sh | 43 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/tests/test-06-multi-worker.sh b/tests/test-06-multi-worker.sh index b2c8c59bd..b95edbb94 100755 --- a/tests/test-06-multi-worker.sh +++ b/tests/test-06-multi-worker.sh @@ -39,32 +39,23 @@ wait_for_manager_agent_ready 300 "${DM_ROOM}" "${ADMIN_TOKEN}" || { wait_for_worker_container "alice" 60 METRICS_BASELINE=$(snapshot_baseline "alice" "bob") TEST_WORKER_RUNTIME="${HICLAW_DEFAULT_WORKER_RUNTIME:-openclaw}" -# worker-management/SKILL.md tells Manager to ask admin for FOUR inputs -# (name / runtime / SOUL / skills) before running `hiclaw create worker` -# and not to invent defaults. A vague prompt that only names the worker is -# therefore a coin flip — sometimes Manager replies with a confirmation -# request, never calls the CLI, and the consumer/SOUL.md polls below -# silently time out. Spell out all four inputs and tell Manager to skip -# confirmation so this test exercises actual Worker creation. -# -# The runtime is explicit because the CI matrix runtime is the source of truth; -# rendered Manager workspace text may contain fallback defaults. -matrix_send_message "${ADMIN_TOKEN}" "${DM_ROOM}" \ - "Please create a new Worker now using these exact values — do not ask me to confirm any of them: -- name: bob -- runtime: ${TEST_WORKER_RUNTIME} (use this exact runtime; do not reinterpret it as the install default) -- SOUL/role: Backend developer specializing in REST APIs, server-side logic, and data persistence -- skills: github-operations (file-sync / task-progress / project-participation are auto-included, no need to ask) - -Proceed immediately and tell me when he is created." - -log_info "Waiting for Manager to create Worker Bob..." -REPLY=$(matrix_wait_for_reply_matching "${ADMIN_TOKEN}" "${DM_ROOM}" "@manager" \ - "bob.*(accepted|created|creating|pending|running)" 300 \ - "${ADMIN_TOKEN}" "${DM_ROOM}" "Please check if the request to create worker bob has been processed.") - -assert_not_empty "${REPLY}" "Manager replied to create bob request" -assert_contains_i "${REPLY}" "bob" "Reply mentions worker name 'bob'" +# This test verifies multi-worker collaboration after Alice and Bob exist. Bob +# creation must be deterministic setup: asking the Manager to create him via DM +# is flaky under CI because the Manager may still be processing heartbeat/task +# follow-ups from earlier shard-A tests, or may hit a tool-guard prompt before +# calling `hiclaw create worker`. Use the controller CLI directly here so the +# collaboration assertions below measure the multi-worker flow, not LLM timing. +BOB_SOUL="Backend developer specializing in REST APIs, server-side logic, and data persistence" +log_info "Creating Worker Bob via hiclaw CLI (runtime: ${TEST_WORKER_RUNTIME})..." +CREATE_OUTPUT=$(exec_in_agent hiclaw apply worker --name bob \ + --runtime "${TEST_WORKER_RUNTIME}" \ + --soul "${BOB_SOUL}" \ + --skills github-operations 2>&1) +if echo "${CREATE_OUTPUT}" | grep -qiE "worker/bob (created|configured)"; then + log_pass "hiclaw apply worker bob accepted" +else + log_fail "hiclaw apply worker bob failed: ${CREATE_OUTPUT}" +fi # Verify Bob's infrastructure. Worker creation is asynchronous, so wait on # persisted provisioning state and gateway side effects instead of sleeping.