diff --git a/python/packages/jumpstarter/jumpstarter/exporter/exporter.py b/python/packages/jumpstarter/jumpstarter/exporter/exporter.py index 236651766..330c76631 100644 --- a/python/packages/jumpstarter/jumpstarter/exporter/exporter.py +++ b/python/packages/jumpstarter/jumpstarter/exporter/exporter.py @@ -210,6 +210,10 @@ def stop(self, wait_for_lease_exit=False, should_unregister=False, exit_code: in self._deferred_unregister = should_unregister logger.info("Exporter marked for stop upon lease exit") + def _has_before_lease_hook(self) -> bool: + """Return whether a real beforeLease hook is configured.""" + return bool(self.hook_executor and self.hook_executor.config.before_lease) + @property def exit_code(self) -> int | None: """Get the exit code for the exporter. @@ -594,26 +598,20 @@ async def _cleanup_after_lease(self, lease_scope: LeaseContext) -> None: running the afterLease hook if appropriate, and transitioning to AVAILABLE. """ with CancelScope(shield=True): - # Wait for beforeLease hook to complete before running afterLease. - # When a lease ends during hook execution, the hook must finish - # (subject to its configured timeout) before cleanup proceeds. - # Safety timeout: prevent permanent deadlock if before_lease_hook - # was never set due to a race (e.g. conn_tg cancelled early). - # Use the configured hook timeout (+ margin) when available so we - # never interrupt a legitimately-running beforeLease hook. - safety_timeout = 15 # generous default for no-hook / unknown cases - if ( - self.hook_executor - and self.hook_executor.config.before_lease - ): + if self._has_before_lease_hook(): + # Wait for beforeLease hook to complete before running afterLease. + # When a lease ends during hook execution, the hook must finish + # (subject to its configured timeout) before cleanup proceeds. + # Safety timeout: prevent permanent deadlock if before_lease_hook + # was never set due to a race. safety_timeout = self.hook_executor.config.before_lease.timeout + 30 - with move_on_after(safety_timeout) as timeout_scope: - await lease_scope.before_lease_hook.wait() - if timeout_scope.cancelled_caught: - logger.warning( - "Timed out waiting for before_lease_hook; forcing it set to avoid deadlock" - ) - lease_scope.before_lease_hook.set() + with move_on_after(safety_timeout) as timeout_scope: + await lease_scope.before_lease_hook.wait() + if timeout_scope.cancelled_caught: + logger.warning( + "Timed out waiting for before_lease_hook; forcing it set to avoid deadlock" + ) + lease_scope.before_lease_hook.set() if not lease_scope.after_lease_hook_started.is_set(): lease_scope.after_lease_hook_started.set() @@ -670,6 +668,7 @@ async def handle_lease(self, lease_name: str, tg: TaskGroup, lease_scope: LeaseC ends or the exporter stops. """ logger.info("Listening for incoming connection requests on lease %s", lease_name) + has_before_lease_hook = self._has_before_lease_hook() # Buffer Listen responses to avoid blocking when responses arrive before # process_connections starts iterating. This prevents a race condition where @@ -751,16 +750,10 @@ async def process_connections(): # Report LEASE_READY if no beforeLease hook is configured. # This MUST happen after Listen stream is started so the # controller can forward client Dial requests. - if not self.hook_executor: + if not has_before_lease_hook: await self._report_status(ExporterStatus.LEASE_READY, "Ready for commands") lease_scope.before_lease_hook.set() finally: - # Ensure before_lease_hook is set so _cleanup_after_lease never - # blocks forever. When conn_tg is cancelled before the no-hook - # path reaches lease_scope.before_lease_hook.set(), this flag - # remains unset and _cleanup_after_lease (shielded) deadlocks. - if not lease_scope.before_lease_hook.is_set(): - lease_scope.before_lease_hook.set() # Close the listen stream to signal termination to listen_rx await listen_tx.aclose() # Run afterLease hook before closing the session @@ -818,7 +811,7 @@ async def serve(self): # noqa: C901 # Before-lease hook when transitioning from unleased to leased if not previous_leased: - if self.hook_executor and self._lease_context: + if self._has_before_lease_hook() and self._lease_context: tg.start_soon( self.hook_executor.run_before_lease_hook, self._lease_context, @@ -897,7 +890,7 @@ async def serve_standalone_tcp( self._tg = tg tg.start_soon(self._handle_end_session, lease_scope) - if self.hook_executor: + if self._has_before_lease_hook(): await self.hook_executor.run_before_lease_hook( lease_scope, self._report_status, diff --git a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py index a25c496e2..34e8157c4 100644 --- a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py +++ b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py @@ -59,6 +59,7 @@ async def test_cleanup_waits_for_before_lease_hook_before_running_after_lease(se from jumpstarter.exporter.hooks import HookExecutor hook_config = HookConfigV1Alpha1( + before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=10), after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), ) hook_executor = HookExecutor(config=hook_config) @@ -298,9 +299,16 @@ async def test_cleanup_forces_hook_set_on_safety_timeout(self): forces the event set and cleanup proceeds normally.""" from unittest.mock import patch + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + lease_ctx = make_lease_context() # Deliberately do NOT set before_lease_hook to simulate the race condition - exporter = make_exporter(lease_ctx) + hook_config = HookConfigV1Alpha1( + before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=60), + ) + hook_executor = HookExecutor(config=hook_config) + exporter = make_exporter(lease_ctx, hook_executor) statuses = [] @@ -362,33 +370,34 @@ def tracking_move_on_after(delay, *args, **kwargs): ) -class TestHandleLeaseFinally: - async def test_finally_sets_before_lease_hook_on_early_cancel(self): - """When conn_tg is cancelled before before_lease_hook.set() is - reached (no hook executor path), the finally block must ensure - the event is set so _cleanup_after_lease can proceed.""" +class TestCleanupWithoutBeforeLeaseHook: + async def test_cleanup_does_not_wait_without_before_lease_hook(self): + """When only afterLease is configured, cleanup must not wait on + before_lease_hook because there is no real beforeLease hook.""" + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + lease_ctx = make_lease_context() - # Verify the event starts unset - assert not lease_ctx.before_lease_hook.is_set() + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + exporter = make_exporter(lease_ctx, hook_executor) - exporter = make_exporter(lease_ctx) - # Mock methods needed by handle_lease - exporter.uuid = "test-uuid" - exporter.labels = {} - exporter.tls = None - exporter.grpc_options = None - - # We test just the finally-block behavior by calling - # _cleanup_after_lease with an unset event: the primary fix is - # in handle_lease's finally, but we can verify _cleanup_after_lease - # handles the unset event via the safety timeout. - # A more direct test: simulate what the finally block does. - if not lease_ctx.before_lease_hook.is_set(): - lease_ctx.before_lease_hook.set() + statuses = [] - assert lease_ctx.before_lease_hook.is_set(), ( - "before_lease_hook must be set after the finally-block logic" - ) + async def track_status(status, message=""): + statuses.append(status) + + exporter._report_status = AsyncMock(side_effect=track_status) + + with anyio.fail_after(2.5): + await exporter._cleanup_after_lease(lease_ctx) + + assert not lease_ctx.before_lease_hook.is_set() + assert ExporterStatus.AFTER_LEASE_HOOK in statuses + assert ExporterStatus.AVAILABLE in statuses + assert lease_ctx.after_lease_hook_done.is_set() class TestIdempotentLeaseEnd: