From 4059f2ac96f24de61f27fa6932ace53636e27cae Mon Sep 17 00:00:00 2001 From: TianyeDong Date: Fri, 29 May 2026 23:51:18 -0400 Subject: [PATCH] fix(miles): keep health checks in admission mode --- miles/router/router.py | 11 +-------- tests/test_partial_sleep_wake.py | 41 +++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/miles/router/router.py b/miles/router/router.py index 63b95a5fa7..37a9d86638 100644 --- a/miles/router/router.py +++ b/miles/router/router.py @@ -145,16 +145,7 @@ async def _health_check_loop(self): try: await asyncio.sleep(interval) - # Probe only enabled, non-dead workers. Disabled workers are - # parked (no traffic should reach them); failing health on - # them would fight the F2 disable lifecycle. Health-driven - # quarantine still applies once an enabled worker accumulates - # enough consecutive failures. - # - # Legacy / test compat: when admission has never been declared - # (``enabled_workers`` is empty), fall back to probing the - # full registry the way the pre-iter-6 router did. - if self.enabled_workers: + if self._admission_declared: urls = [u for u in self.enabled_workers if u not in self.dead_workers] else: urls = [u for u in self.worker_request_counts if u not in self.dead_workers] diff --git a/tests/test_partial_sleep_wake.py b/tests/test_partial_sleep_wake.py index d22f8ae951..b94ad5cb9b 100644 --- a/tests/test_partial_sleep_wake.py +++ b/tests/test_partial_sleep_wake.py @@ -20,6 +20,8 @@ import os import sys +import asyncio +import types import unittest from unittest import mock @@ -59,7 +61,24 @@ class TestRouterAdmissionLifecycle(unittest.TestCase): """ def _build_router(self): - from miles.router.router import MilesRouter + ray_stub = types.ModuleType("ray") + ray_stub.remote = lambda *args, **kwargs: ( + args[0] if args and callable(args[0]) and not kwargs else lambda obj: obj + ) + ray_util_stub = types.ModuleType("ray.util") + scheduling_stub = types.ModuleType("ray.util.scheduling_strategies") + scheduling_stub.NodeAffinitySchedulingStrategy = object + + with mock.patch.dict( + sys.modules, + { + "ray": ray_stub, + "ray.util": ray_util_stub, + "ray.util.scheduling_strategies": scheduling_stub, + }, + ): + from miles.router.router import MilesRouter + self.router_module = sys.modules[MilesRouter.__module__] args = mock.Mock() args.miles_router_max_connections = 8 @@ -96,6 +115,26 @@ def test_remove_worker_drops_all_state(self): self.assertNotIn("http://w1:8000", router.enabled_workers) self.assertNotIn("http://w1:8000", router.worker_engine_index_map) + def test_health_check_does_not_probe_disabled_workers_when_zero_active(self): + router = self._build_router() + router._add_worker_internal("http://w1:8000", engine_index=0) + router._disable_worker_internal("http://w1:8000") + router._check_worker_health = mock.AsyncMock() + + sleep_calls = 0 + + async def sleep_once_then_cancel(_interval): + nonlocal sleep_calls + sleep_calls += 1 + if sleep_calls > 1: + raise asyncio.CancelledError + + with mock.patch.object(self.router_module.asyncio, "sleep", sleep_once_then_cancel): + with self.assertRaises(asyncio.CancelledError): + asyncio.run(router._health_check_loop()) + + router._check_worker_health.assert_not_called() + class TestSchedulerPreemptClassification(unittest.TestCase): """F3 / F31 — _is_scheduler_preempt strict missing-metadata check."""