From 2ca7b0bd28c8ccc394d324ce753243862248ff6b Mon Sep 17 00:00:00 2001 From: phall1 Date: Wed, 3 Jun 2026 19:30:07 -0400 Subject: [PATCH] fix(test): stop stress_resize_extremes hanging CI forever (phux-s2iw) The e2e lane intermittently hung for >100min on `both_axes_shrink_storm_under_output_does_not_panic` (and latently `resize_degenerate_viewports_do_not_panic`). Root cause is NOT a server deadlock: the final `cap.attach_screen(client.screenshot()...)` call drains output "until 20ms of quiet", but these tests' seeds emit every 5ms / 20ms forever, so screenshot's drain loop never sees a gap and spins indefinitely. Flaky only because occasional scheduler jitter yields a >20ms gap. Confirmed by stack sample: runtime thread stuck in `ClientHandle::screenshot` (builder.rs). Add two harness helpers: `drain_output_bounded(max_frames)` (a count-bounded drain that is safe against a continuously-emitting seed) and `snapshot_text()` (read the oracle without draining). Replace the hanging `screenshot()` in both tests with a bounded drain + snapshot. The resize storm loops are left untouched -- they never deadlocked (`resize_raw` is send-only and always reached the final screenshot). Validation: both_axes 25/25 pass, slowest 1s (was a 100min hang); full file both tests 10/10, slowest 2s. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/phux-server/tests/common/builder.rs | 32 +++++++++++++++++++ .../tests/stress_resize_extremes.rs | 13 ++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/crates/phux-server/tests/common/builder.rs b/crates/phux-server/tests/common/builder.rs index c120473..3a61280 100644 --- a/crates/phux-server/tests/common/builder.rs +++ b/crates/phux-server/tests/common/builder.rs @@ -436,6 +436,38 @@ impl ClientHandle { &mut self.screen } + /// Snapshot the oracle's current text WITHOUT draining the wire. + /// + /// Safe to call against a continuously-emitting seed where + /// [`Self::screenshot`] would loop forever (its "drain until quiet" never + /// terminates when output arrives faster than its idle window). Pair with + /// [`Self::drain_output_bounded`] when the latest content is wanted. + pub fn snapshot_text(&mut self) -> String { + self.screen.snapshot_text() + } + + /// Drain up to `max_frames` of immediately-available `TERMINAL_OUTPUT` + /// into the oracle, stopping early on a brief (5ms) quiet gap. + /// + /// Unlike [`Self::screenshot`], this is BOUNDED by frame count, so it is + /// safe to call against a seed that emits continuously (e.g. an infinite + /// `printf` loop): `screenshot`'s "drain until quiet" never terminates + /// when output arrives faster than its idle window. Use this inside a + /// resize/output storm to keep the server's bounded outbound mailbox and + /// socket buffer from filling — a client that only sends and never reads + /// wedges the writer and deadlocks the shared current-thread runtime. + pub async fn drain_output_bounded(&mut self, max_frames: usize) { + for _ in 0..max_frames { + match timeout(Duration::from_millis(5), recv_typed(&mut self.stream)).await { + Ok((tb, FrameKind::TerminalOutput { bytes, .. })) if tb == TYPE_TERMINAL_OUTPUT => { + self.screen.write(&bytes); + } + Ok(_) => {} // non-output frame: ignore, keep draining + Err(_) => break, // brief quiet: backlog cleared for now + } + } + } + /// Drain `TERMINAL_OUTPUT` into the oracle until `pred` holds or /// [`WIRE_RECV_TIMEOUT`] elapses. Returns `Ok(())` if the predicate /// held, `Err` with the final screen text on timeout. diff --git a/crates/phux-server/tests/stress_resize_extremes.rs b/crates/phux-server/tests/stress_resize_extremes.rs index cb6b9ce..82f9fb5 100644 --- a/crates/phux-server/tests/stress_resize_extremes.rs +++ b/crates/phux-server/tests/stress_resize_extremes.rs @@ -77,7 +77,11 @@ fn resize_degenerate_viewports_do_not_panic() { client.resize(final_cols, final_rows).await; let needle = format!("{final_rows} {final_cols}"); let res = client.wait_until(|s| s.contains(&needle)).await; - cap.attach_screen(client.screenshot().await.snapshot_text()); + // Bounded drain, not `screenshot()`: the `stty size` seed loops + // every 20ms, so screenshot's "drain until 20ms quiet" can spin + // forever against it (same hang class as the both-axes storm). + client.drain_output_bounded(32).await; + cap.attach_screen(client.snapshot_text()); assert!( res.is_ok(), "PTY winsize never converged to {final_cols}x{final_rows} \ @@ -145,7 +149,12 @@ fn both_axes_shrink_storm_under_output_does_not_panic() { // server error at teardown.) client.resize(100, 30).await; let res = client.wait_until(|s| s.contains("row-")).await; - cap.attach_screen(client.screenshot().await.snapshot_text()); + // Snapshot the oracle WITHOUT `screenshot()`: the seed emits + // every 5ms, so screenshot's "drain until 20ms quiet" never + // terminates here (the pre-existing hang). wait_until already + // populated the oracle; a bounded drain refreshes it. + client.drain_output_bounded(32).await; + cap.attach_screen(client.snapshot_text()); assert!( res.is_ok(), "pane produced no output after the both-shrink storm — \