From 18d0e1dc0a7f05b56cba073035094ba04500b53c Mon Sep 17 00:00:00 2001 From: Paul Wallrabe Date: Thu, 16 Apr 2026 20:39:19 +0200 Subject: [PATCH 1/2] fix: skip afterLease hook on client disconnect without EndSession (#238) When a client exits a shell on a pre-created lease (release=False), the client does not call EndSession. Previously, _cleanup_after_lease would unconditionally run the afterLease hook and release the lease, making the pre-created lease unusable for reconnection. Add _should_run_after_lease_cleanup guard that skips afterLease hook and lease release when all of these conditions hold: EndSession was not requested, lease has not expired, and exporter is not shutting down. This keeps the lease active for reconnection. Existing tests updated to set lease_ended where they simulate lease expiry scenarios, matching the actual runtime behavior. Generated-By: Forge/20260416_202053_681470_ddfd9aba_i238 Co-Authored-By: Claude Opus 4.6 --- .../jumpstarter/exporter/exporter.py | 37 +++ .../jumpstarter/exporter/exporter_test.py | 226 ++++++++++++++++-- 2 files changed, 249 insertions(+), 14 deletions(-) diff --git a/python/packages/jumpstarter/jumpstarter/exporter/exporter.py b/python/packages/jumpstarter/jumpstarter/exporter/exporter.py index 236651766..f8a1092e5 100644 --- a/python/packages/jumpstarter/jumpstarter/exporter/exporter.py +++ b/python/packages/jumpstarter/jumpstarter/exporter/exporter.py @@ -587,11 +587,38 @@ async def session_for_lease(self): yield session, main_path, hook_path logger.info("Session closed") + def _should_run_after_lease_cleanup(self, lease_scope: LeaseContext) -> bool: + """Determine whether afterLease hook and lease release should run. + + Returns False when a client simply disconnects from a pre-created + lease without calling EndSession and the lease has not expired. + In that case the lease stays active for reconnection. + + Returns True when any of: + - EndSession was requested (auto-created lease teardown) + - Lease has ended (expired or released by controller) + - Exporter is shutting down (needs cleanup) + - Running in standalone mode (no controller, always clean up) + """ + if lease_scope.end_session_requested.is_set(): + return True + if lease_scope.lease_ended.is_set(): + return True + if self._stop_requested: + return True + if self._standalone: + return True + return False + async def _cleanup_after_lease(self, lease_scope: LeaseContext) -> None: """Run afterLease hook cleanup when handle_lease exits. This handles the finally-block logic: shielding from cancellation, running the afterLease hook if appropriate, and transitioning to AVAILABLE. + + When a client disconnects without calling EndSession on a still-active + lease (pre-created lease scenario), the afterLease hook is skipped and + the lease remains active for reconnection (issue #238). """ with CancelScope(shield=True): # Wait for beforeLease hook to complete before running afterLease. @@ -615,6 +642,16 @@ async def _cleanup_after_lease(self, lease_scope: LeaseContext) -> None: ) lease_scope.before_lease_hook.set() + if not self._should_run_after_lease_cleanup(lease_scope): + logger.info( + "Client disconnected without EndSession on active lease %s, " + "skipping afterLease hook to keep lease available for reconnection", + lease_scope.lease_name, + ) + if not lease_scope.after_lease_hook_done.is_set(): + lease_scope.after_lease_hook_done.set() + return + if not lease_scope.after_lease_hook_started.is_set(): lease_scope.after_lease_hook_started.set() if (self.hook_executor diff --git a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py index a25c496e2..5781ad073 100644 --- a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py +++ b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py @@ -52,6 +52,7 @@ async def test_cleanup_waits_for_before_lease_hook_before_running_after_lease(se complete before starting the afterLease hook. This prevents running afterLease while beforeLease is still in progress.""" lease_ctx = make_lease_context() + lease_ctx.lease_ended.set() after_lease_started_before_hook_done = False @@ -94,6 +95,7 @@ async def test_exporter_returns_to_available_after_premature_lease_end(self): must transition to AVAILABLE once hooks complete.""" lease_ctx = make_lease_context() lease_ctx.before_lease_hook.set() + lease_ctx.lease_ended.set() statuses = [] @@ -113,6 +115,7 @@ async def test_new_lease_accepted_after_recovery_from_premature_end(self): can be created and the exporter processes it normally.""" lease_ctx_1 = make_lease_context(lease_name="lease-1") lease_ctx_1.before_lease_hook.set() + lease_ctx_1.lease_ended.set() statuses = [] @@ -128,6 +131,7 @@ async def track_status(status, message=""): lease_ctx_2 = make_lease_context(lease_name="lease-2") lease_ctx_2.before_lease_hook.set() + lease_ctx_2.lease_ended.set() exporter._lease_context = lease_ctx_2 statuses.clear() @@ -142,6 +146,7 @@ async def test_unused_lease_timeout_transitions_to_available(self): the exporter must transition to AVAILABLE.""" lease_ctx = make_lease_context(client_name="") lease_ctx.before_lease_hook.set() + lease_ctx.lease_ended.set() statuses = [] @@ -164,6 +169,7 @@ async def test_unused_lease_with_hooks_runs_after_lease_when_client_present(self lease_ctx = make_lease_context(client_name="some-client") lease_ctx.before_lease_hook.set() + lease_ctx.lease_ended.set() hook_config = HookConfigV1Alpha1( after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), @@ -189,6 +195,7 @@ async def test_new_lease_after_unused_timeout_recovery(self): can be accepted and processed.""" lease_ctx_1 = make_lease_context(lease_name="unused-lease", client_name="") lease_ctx_1.before_lease_hook.set() + lease_ctx_1.lease_ended.set() statuses = [] @@ -204,6 +211,7 @@ async def track_status(status, message=""): lease_ctx_2 = make_lease_context(lease_name="new-lease", client_name="real-client") lease_ctx_2.before_lease_hook.set() + lease_ctx_2.lease_ended.set() exporter._lease_context = lease_ctx_2 statuses.clear() @@ -218,6 +226,7 @@ async def test_after_lease_done_before_new_lease_context_created(self): previous lease's after_lease_hook_done is set.""" lease_ctx_1 = make_lease_context(lease_name="lease-1") lease_ctx_1.before_lease_hook.set() + lease_ctx_1.lease_ended.set() exporter = make_exporter(lease_ctx_1) exporter._report_status = AsyncMock() @@ -228,6 +237,7 @@ async def test_after_lease_done_before_new_lease_context_created(self): exporter._lease_context = None lease_ctx_2 = make_lease_context(lease_name="lease-2") + lease_ctx_2.lease_ended.set() exporter._lease_context = lease_ctx_2 lease_ctx_2.before_lease_hook.set() @@ -267,6 +277,7 @@ async def tracking_after(*args, **kwargs): hook_executor.run_after_lease_hook = tracking_after lease_ctx_1 = make_lease_context(lease_name="lease-1") + lease_ctx_1.lease_ended.set() exporter = make_exporter(lease_ctx_1, hook_executor) exporter._report_status = AsyncMock() @@ -276,6 +287,7 @@ async def tracking_after(*args, **kwargs): await exporter._cleanup_after_lease(lease_ctx_1) lease_ctx_2 = make_lease_context(lease_name="lease-2") + lease_ctx_2.lease_ended.set() exporter._lease_context = lease_ctx_2 await hook_executor.run_before_lease_hook( @@ -299,7 +311,6 @@ async def test_cleanup_forces_hook_set_on_safety_timeout(self): from unittest.mock import patch lease_ctx = make_lease_context() - # Deliberately do NOT set before_lease_hook to simulate the race condition exporter = make_exporter(lease_ctx) statuses = [] @@ -309,21 +320,17 @@ async def track_status(status, message=""): exporter._report_status = AsyncMock(side_effect=track_status) - # Patch move_on_after to use a tiny timeout so the test runs fast original_move_on_after = anyio.move_on_after def fast_move_on_after(delay, *args, **kwargs): - # Replace any safety timeout with 0.1s for fast testing return original_move_on_after(0.1, *args, **kwargs) with patch("jumpstarter.exporter.exporter.move_on_after", side_effect=fast_move_on_after): await exporter._cleanup_after_lease(lease_ctx) - # The event should be force-set by the timeout handler assert lease_ctx.before_lease_hook.is_set(), ( "before_lease_hook should be force-set after safety timeout" ) - # Cleanup should have completed normally assert ExporterStatus.AVAILABLE in statuses assert lease_ctx.after_lease_hook_done.is_set() @@ -342,7 +349,7 @@ async def test_safety_timeout_uses_hook_config_when_available(self): hook_executor = HookExecutor(config=hook_config) lease_ctx = make_lease_context() - lease_ctx.before_lease_hook.set() # Set so we don't actually timeout + lease_ctx.before_lease_hook.set() exporter = make_exporter(lease_ctx, hook_executor) @@ -356,7 +363,6 @@ def tracking_move_on_after(delay, *args, **kwargs): with patch("jumpstarter.exporter.exporter.move_on_after", side_effect=tracking_move_on_after): await exporter._cleanup_after_lease(lease_ctx) - # The safety timeout should be hook timeout (60) + margin (30) = 90 assert 90 in captured_timeouts, ( f"Expected safety timeout of 90s (60 + 30), got timeouts: {captured_timeouts}" ) @@ -368,21 +374,14 @@ async def test_finally_sets_before_lease_hook_on_early_cancel(self): reached (no hook executor path), the finally block must ensure the event is set so _cleanup_after_lease can proceed.""" lease_ctx = make_lease_context() - # Verify the event starts unset assert not lease_ctx.before_lease_hook.is_set() exporter = make_exporter(lease_ctx) - # Mock methods needed by handle_lease exporter.uuid = "test-uuid" exporter.labels = {} exporter.tls = None exporter.grpc_options = None - # We test just the finally-block behavior by calling - # _cleanup_after_lease with an unset event: the primary fix is - # in handle_lease's finally, but we can verify _cleanup_after_lease - # handles the unset event via the safety timeout. - # A more direct test: simulate what the finally block does. if not lease_ctx.before_lease_hook.is_set(): lease_ctx.before_lease_hook.set() @@ -391,6 +390,204 @@ async def test_finally_sets_before_lease_hook_on_early_cancel(self): ) +class TestClientDisconnectWithoutEndSession: + async def test_cleanup_skips_after_lease_hook_on_disconnect_without_end_session(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + after_hook_call_count = 0 + original_run_after = hook_executor.run_after_lease_hook + + async def counting_run_after(*args, **kwargs): + nonlocal after_hook_call_count + after_hook_call_count += 1 + return await original_run_after(*args, **kwargs) + + hook_executor.run_after_lease_hook = counting_run_after + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + exporter = make_exporter(lease_ctx, hook_executor) + exporter._report_status = AsyncMock() + + await exporter._cleanup_after_lease(lease_ctx) + + assert after_hook_call_count == 0, ( + f"afterLease hook ran {after_hook_call_count} times but should have been " + "skipped when client disconnects without EndSession on active lease" + ) + + async def test_cleanup_does_not_transition_to_available_on_disconnect(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + + statuses = [] + + async def track_status(status, message=""): + statuses.append(status) + + exporter = make_exporter(lease_ctx, hook_executor) + exporter._report_status = AsyncMock(side_effect=track_status) + + await exporter._cleanup_after_lease(lease_ctx) + + assert ExporterStatus.AVAILABLE not in statuses, ( + f"Exporter transitioned to AVAILABLE on client disconnect without " + f"EndSession. Statuses: {statuses}" + ) + + async def test_cleanup_does_not_call_request_lease_release_on_disconnect(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + + exporter = make_exporter(lease_ctx, hook_executor) + exporter._report_status = AsyncMock() + exporter._request_lease_release = AsyncMock() + + await exporter._cleanup_after_lease(lease_ctx) + + exporter._request_lease_release.assert_not_called() + + async def test_cleanup_runs_after_lease_hook_when_lease_ended(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + after_hook_call_count = 0 + original_run_after = hook_executor.run_after_lease_hook + + async def counting_run_after(*args, **kwargs): + nonlocal after_hook_call_count + after_hook_call_count += 1 + return await original_run_after(*args, **kwargs) + + hook_executor.run_after_lease_hook = counting_run_after + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + lease_ctx.lease_ended.set() + + exporter = make_exporter(lease_ctx, hook_executor) + exporter._report_status = AsyncMock() + + await exporter._cleanup_after_lease(lease_ctx) + + assert after_hook_call_count == 1, ( + f"afterLease hook ran {after_hook_call_count} times, expected 1 " + "when lease_ended is set" + ) + + async def test_cleanup_runs_after_lease_hook_when_end_session_requested(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + after_hook_call_count = 0 + original_run_after = hook_executor.run_after_lease_hook + + async def counting_run_after(*args, **kwargs): + nonlocal after_hook_call_count + after_hook_call_count += 1 + return await original_run_after(*args, **kwargs) + + hook_executor.run_after_lease_hook = counting_run_after + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + lease_ctx.end_session_requested.set() + + exporter = make_exporter(lease_ctx, hook_executor) + exporter._report_status = AsyncMock() + + await exporter._cleanup_after_lease(lease_ctx) + + assert after_hook_call_count == 1, ( + f"afterLease hook ran {after_hook_call_count} times, expected 1 " + "when end_session_requested is set" + ) + + async def test_cleanup_runs_after_lease_hook_when_stop_requested(self): + from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1 + from jumpstarter.exporter.hooks import HookExecutor + + hook_config = HookConfigV1Alpha1( + after_lease=HookInstanceConfigV1Alpha1(script="echo cleanup", timeout=10), + ) + hook_executor = HookExecutor(config=hook_config) + + after_hook_call_count = 0 + original_run_after = hook_executor.run_after_lease_hook + + async def counting_run_after(*args, **kwargs): + nonlocal after_hook_call_count + after_hook_call_count += 1 + return await original_run_after(*args, **kwargs) + + hook_executor.run_after_lease_hook = counting_run_after + + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + + exporter = make_exporter(lease_ctx, hook_executor) + exporter._stop_requested = True + exporter._report_status = AsyncMock() + + await exporter._cleanup_after_lease(lease_ctx) + + assert after_hook_call_count == 1, ( + f"afterLease hook ran {after_hook_call_count} times, expected 1 " + "when _stop_requested is True" + ) + + async def test_cleanup_without_hooks_skips_available_on_disconnect(self): + lease_ctx = make_lease_context() + lease_ctx.before_lease_hook.set() + + statuses = [] + + async def track_status(status, message=""): + statuses.append(status) + + exporter = make_exporter(lease_ctx, hook_executor=None) + exporter._report_status = AsyncMock(side_effect=track_status) + + await exporter._cleanup_after_lease(lease_ctx) + + assert ExporterStatus.AVAILABLE not in statuses, ( + f"Exporter transitioned to AVAILABLE on client disconnect without " + f"EndSession (no hooks). Statuses: {statuses}" + ) + + class TestIdempotentLeaseEnd: async def test_duplicate_cleanup_is_noop(self): """Calling _cleanup_after_lease twice for the same LeaseContext @@ -416,6 +613,7 @@ async def counting_run_after(*args, **kwargs): lease_ctx = make_lease_context() lease_ctx.before_lease_hook.set() + lease_ctx.lease_ended.set() exporter = make_exporter(lease_ctx, hook_executor) exporter._report_status = AsyncMock() From 7dfa52f67d5ce390670022999b08927f2ddf4afd Mon Sep 17 00:00:00 2001 From: Paul Wallrabe Date: Fri, 17 Apr 2026 12:39:57 +0200 Subject: [PATCH 2/2] fix: set lease_ended in safety timeout test The _should_run_after_lease_cleanup guard requires at least one condition to be set. Set lease_ended since the test simulates a scenario where cleanup is forced after timeout. Co-Authored-By: Claude Opus 4.6 --- .../packages/jumpstarter/jumpstarter/exporter/exporter_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py index 5781ad073..541de0f76 100644 --- a/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py +++ b/python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py @@ -311,6 +311,7 @@ async def test_cleanup_forces_hook_set_on_safety_timeout(self): from unittest.mock import patch lease_ctx = make_lease_context() + lease_ctx.lease_ended.set() exporter = make_exporter(lease_ctx) statuses = []