From beb85c8a4124cc2941c1716f8f3b3f444fa573a3 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Jun 2026 20:39:34 +0000 Subject: [PATCH 1/2] feat(tunnels): detect intermittent listener flapping Track a short window of listener-presence observations per SSH tunnel and surface a distinct 'flapping' status when an Alive -L/-D tunnel's local LISTEN socket comes and goes across scans. This catches a tunnel that degrades slowly (listener intermittently present) rather than only the binary 'no listener' (gone right now) case. - SshTunnel records one listener observation per confirmable scan into a capped VecDeque; is_flapping() reports a window holding both present and absent samples. History clears on respawn. - Recording lives in App::refresh (only past the startup grace window and while auto-refresh is active) so stale/premature samples never forge a phantom flap. Shared LISTENER_GRACE + entry_has_listener helper between the recorder and the tunnels view. - New tunnel_health_flapping i18n string (en/ru/zh), rendered in light yellow, distinct from 'no listener' (yellow) which keeps priority. https://claude.ai/code/session_01BHfdoVE66Dag2SEFtzNSR3 --- crates/prt-core/src/i18n/en.rs | 1 + crates/prt-core/src/i18n/mod.rs | 1 + crates/prt-core/src/i18n/ru.rs | 1 + crates/prt-core/src/i18n/zh.rs | 1 + crates/prt/src/app.rs | 40 ++++++++++++ crates/prt/src/forward.rs | 104 ++++++++++++++++++++++++++++++++ crates/prt/src/views/tunnels.rs | 40 ++++-------- 7 files changed, 159 insertions(+), 29 deletions(-) diff --git a/crates/prt-core/src/i18n/en.rs b/crates/prt-core/src/i18n/en.rs index c393a48..02f921c 100644 --- a/crates/prt-core/src/i18n/en.rs +++ b/crates/prt-core/src/i18n/en.rs @@ -149,6 +149,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "starting", tunnel_status_failed: "failed", tunnel_health_no_listener: "no listener", + tunnel_health_flapping: "flapping", tunnel_form_edit_title: " Edit SSH Tunnel ", tunnel_form_field_required: "required", tunnels_empty: " No active tunnels. Press [n] to create one.", diff --git a/crates/prt-core/src/i18n/mod.rs b/crates/prt-core/src/i18n/mod.rs index 0654701..9d626cb 100644 --- a/crates/prt-core/src/i18n/mod.rs +++ b/crates/prt-core/src/i18n/mod.rs @@ -255,6 +255,7 @@ pub struct Strings { pub tunnel_status_starting: &'static str, pub tunnel_status_failed: &'static str, pub tunnel_health_no_listener: &'static str, + pub tunnel_health_flapping: &'static str, pub tunnel_form_edit_title: &'static str, pub tunnel_form_field_required: &'static str, pub tunnels_empty: &'static str, diff --git a/crates/prt-core/src/i18n/ru.rs b/crates/prt-core/src/i18n/ru.rs index efd2c38..bf54f88 100644 --- a/crates/prt-core/src/i18n/ru.rs +++ b/crates/prt-core/src/i18n/ru.rs @@ -149,6 +149,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "запускается", tunnel_status_failed: "сбой", tunnel_health_no_listener: "нет листенера", + tunnel_health_flapping: "нестабилен", tunnel_form_edit_title: " Правка SSH-туннеля ", tunnel_form_field_required: "обязательно", tunnels_empty: " Активных туннелей нет. Нажмите [n] чтобы создать.", diff --git a/crates/prt-core/src/i18n/zh.rs b/crates/prt-core/src/i18n/zh.rs index 1f1d3e3..8a83ca3 100644 --- a/crates/prt-core/src/i18n/zh.rs +++ b/crates/prt-core/src/i18n/zh.rs @@ -148,6 +148,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "启动中", tunnel_status_failed: "失败", tunnel_health_no_listener: "无监听", + tunnel_health_flapping: "监听抖动", tunnel_form_edit_title: " 编辑 SSH 隧道 ", tunnel_form_field_required: "必填", tunnels_empty: " 无活跃隧道。按 [n] 创建。", diff --git a/crates/prt/src/app.rs b/crates/prt/src/app.rs index 07a3250..3b982a4 100644 --- a/crates/prt/src/app.rs +++ b/crates/prt/src/app.rs @@ -23,8 +23,36 @@ use ratatui::prelude::*; use std::io::stdout; use std::time::Instant; +use crate::forward::TunnelStatus; use crate::input::handle_key; use crate::ui::draw; +use prt_core::model::ConnectionState; +use std::time::Duration; + +/// Grace period after a tunnel (re)starts before its missing listener is held +/// against it. The scan backing the listener check only refreshes every +/// `TICK_RATE`, and a tunnel needs a tick to go `Starting -> Alive` plus +/// another for the scan to observe its `LISTEN` socket. Shared by the recorder +/// here and the renderer in `views::tunnels` so both agree on when a scan is +/// trustworthy. +pub(crate) const LISTENER_GRACE: Duration = TICK_RATE.saturating_mul(2); + +/// True if `ssh_pid` owns a `LISTEN` socket on `local_port` in the given scan +/// — confirms an `Alive` tunnel actually opened its own socket. Read-only: +/// reuses the data prt already scanned, opens no new connections. +/// +/// The PID match matters: OpenSSH defaults to `ExitOnForwardFailure no`, so on +/// a local-port conflict the `ssh` child keeps running while *another* process +/// owns the port. Matching `LISTEN + port` alone would then mask the bind +/// failure as healthy; requiring the listener's PID to be our `ssh` child +/// avoids that false green. +pub(crate) fn entry_has_listener(entries: &[TrackedEntry], local_port: u16, ssh_pid: u32) -> bool { + entries.iter().any(|e| { + e.entry.state == ConnectionState::Listen + && e.entry.local_addr.port() == local_port + && e.entry.process.pid == ssh_pid + }) +} #[derive(Clone, Copy)] pub(crate) enum SudoPurpose { @@ -307,6 +335,18 @@ impl App { self.detail_cache = None; } } + // Record one listener-presence observation per scan for flapping + // detection. Only `Alive` tunnels past the grace window are sampled, so + // the history never holds a phantom `false` from a freshly (re)started + // tunnel the scan hasn't observed yet. `refresh()` runs only while + // auto-refresh is active, so the scan here is always fresh. + let entries = &self.session.entries; + for t in &mut self.forwards.tunnels { + if t.last_status == TunnelStatus::Alive && t.uptime() >= LISTENER_GRACE { + let present = entry_has_listener(entries, t.spec.local_port, t.pid()); + t.record_listener(present); + } + } self.update_filtered_preserving(prev_key); } diff --git a/crates/prt/src/forward.rs b/crates/prt/src/forward.rs index 1824ac7..bd02563 100644 --- a/crates/prt/src/forward.rs +++ b/crates/prt/src/forward.rs @@ -5,6 +5,7 @@ use prt_core::core::ssh_config::{SshHost, SshHostSource}; use prt_core::core::ssh_tunnel::{ResolvedHost, SshTunnelSpec, TunnelKind}; +use std::collections::VecDeque; use std::process::{Child, Command, Stdio}; use std::thread; use std::time::{Duration, Instant}; @@ -36,6 +37,33 @@ const STABILITY_THRESHOLD: Duration = Duration::from_secs(30); /// remove it and lets the user restart it manually. const MAX_RECONNECT_ATTEMPTS: u32 = 10; +/// How many recent listener-presence observations to keep per tunnel for +/// flapping detection. Each sample covers one full scan (~`TICK_RATE`), so a +/// window of 6 spans roughly the last ~12s of confirmable scans. +const LISTENER_HISTORY_CAP: usize = 6; +/// Minimum samples before the flapping verdict is trusted — avoids flagging a +/// tunnel as unstable off a single good/bad sample pair right after it starts +/// being observed. +const LISTENER_MIN_SAMPLES: usize = 4; + +/// Push one listener-presence sample, evicting the oldest once the window is +/// full. Free function (rather than inline in `record_listener`) so the +/// capping behaviour is unit-testable without spawning an `ssh` child. +fn push_listener_sample(history: &mut VecDeque, present: bool) { + history.push_back(present); + if history.len() > LISTENER_HISTORY_CAP { + history.pop_front(); + } +} + +/// True when the observation window holds *both* a present and an absent +/// listener sample (and enough samples to be meaningful): the listener is +/// coming and going across scans rather than being stably up or stably down. +/// A pure predicate so it can be unit-tested without spawning an `ssh` child. +fn history_is_flapping(history: &VecDeque) -> bool { + history.len() >= LISTENER_MIN_SAMPLES && history.contains(&true) && history.contains(&false) +} + /// A single SSH tunnel: a running `ssh` child process plus the spec and /// resolved argument list (kept so `restart()` reuses the same resolution). pub struct SshTunnel { @@ -57,6 +85,12 @@ pub struct SshTunnel { /// Earliest instant the next reconnect attempt may run. `None` once the /// tunnel is healthy or no retry has been scheduled yet. next_retry_at: Option, + /// Recent listener-presence observations (newest at the back, capped at + /// `LISTENER_HISTORY_CAP`). Only confirmable scans are pushed here (not + /// while paused or within the startup grace window), so a mix of `true` + /// and `false` genuinely means the local `LISTEN` socket flapped. Cleared + /// on `respawn` so a restarted tunnel starts with a clean slate. + listener_history: VecDeque, } impl SshTunnel { @@ -73,6 +107,7 @@ impl SshTunnel { retry_backoff: INITIAL_BACKOFF, retry_count: 0, next_retry_at: None, + listener_history: VecDeque::with_capacity(LISTENER_HISTORY_CAP), } } @@ -177,6 +212,8 @@ impl SshTunnel { }; self.last_status = TunnelStatus::Starting; self.started_at = Instant::now(); + // The old child's listener history says nothing about the new one. + self.listener_history.clear(); Ok(()) } @@ -198,6 +235,22 @@ impl SshTunnel { self.started_at.elapsed() } + /// Record one listener-presence observation from a confirmable scan, + /// evicting the oldest sample once the window is full. Callers must only + /// invoke this when the scan can be trusted (auto-refresh running and past + /// the startup grace window); a stale or premature `false` would otherwise + /// manufacture a phantom flap. + pub fn record_listener(&mut self, present: bool) { + push_listener_sample(&mut self.listener_history, present); + } + + /// True when the local listener has been intermittently present across + /// recent scans — a "degrading slowly" signal distinct from the binary + /// "no listener" (a listener that is gone *right now*). + pub fn is_flapping(&self) -> bool { + history_is_flapping(&self.listener_history) + } + /// PID of the current `ssh` child. For `-L`/`-D` tunnels this is the /// process that binds the local port, so the listener health check can /// confirm a `LISTEN` socket really belongs to *this* tunnel. @@ -531,4 +584,55 @@ mod tests { fn shell_quote_quotes_empty_arg() { assert_eq!(shell_quote(""), "''"); } + + fn history(samples: &[bool]) -> VecDeque { + let mut h = VecDeque::new(); + for &s in samples { + push_listener_sample(&mut h, s); + } + h + } + + #[test] + fn stable_present_is_not_flapping() { + assert!(!history_is_flapping(&history(&[ + true, true, true, true, true, true + ]))); + } + + #[test] + fn all_absent_is_not_flapping() { + assert!(!history_is_flapping(&history(&[ + false, false, false, false, false, false + ]))); + } + + #[test] + fn alternating_presence_is_flapping() { + assert!(history_is_flapping(&history(&[true, false, true, false]))); + } + + #[test] + fn insufficient_samples_is_not_flapping() { + // Both values present, but fewer than LISTENER_MIN_SAMPLES samples. + assert!(!history_is_flapping(&history(&[true, false]))); + } + + #[test] + fn empty_history_is_not_flapping() { + assert!(!history_is_flapping(&VecDeque::new())); + } + + #[test] + fn window_caps_and_evicts_old_samples() { + // Fill with absences, then push enough presences to roll the absences + // out: a once-flapping window settles back to stably-up. + let mut h = history(&[false, false, false, false, false, false]); + assert!(!history_is_flapping(&h)); // all absent + for _ in 0..LISTENER_HISTORY_CAP { + push_listener_sample(&mut h, true); + } + assert_eq!(h.len(), LISTENER_HISTORY_CAP); + assert!(!history_is_flapping(&h)); // absences evicted, now stably up + } } diff --git a/crates/prt/src/views/tunnels.rs b/crates/prt/src/views/tunnels.rs index cd24a10..c54022d 100644 --- a/crates/prt/src/views/tunnels.rs +++ b/crates/prt/src/views/tunnels.rs @@ -1,38 +1,13 @@ //! Fullscreen SSH tunnels manager. -use crate::app::App; +use crate::app::{entry_has_listener, App, LISTENER_GRACE}; use crate::forward::TunnelStatus; use crossterm::event::{KeyCode, KeyEvent}; use prt_core::core::scanner::format_uptime; use prt_core::core::ssh_tunnel::TunnelKind; use prt_core::i18n; -use prt_core::model::{ConnectionState, TICK_RATE}; use ratatui::prelude::*; use ratatui::widgets::*; -use std::time::Duration; - -/// Grace period after (re)start before a missing listener is reported. The scan -/// backing `has_local_listener` only refreshes every `TICK_RATE`, and a tunnel -/// needs a tick to go `Starting -> Alive` plus another for the scan to observe -/// its `LISTEN` socket, so we'd otherwise flash a bogus "no listener". -const LISTENER_GRACE: Duration = TICK_RATE.saturating_mul(2); - -/// True if `ssh_pid` owns a `LISTEN` socket on `local_port` in the latest scan -/// — confirms an `Alive` tunnel actually opened its own socket. Read-only: -/// reuses the data prt already scanned, opens no new connections. -/// -/// The PID match matters: OpenSSH defaults to `ExitOnForwardFailure no`, so on -/// a local-port conflict the `ssh` child keeps running while *another* process -/// owns the port. Matching `LISTEN + port` alone would then mask the bind -/// failure as healthy; requiring the listener's PID to be our `ssh` child -/// avoids that false green. -fn has_local_listener(app: &App, local_port: u16, ssh_pid: u32) -> bool { - app.session.entries.iter().any(|e| { - e.entry.state == ConnectionState::Listen - && e.entry.local_addr.port() == local_port - && e.entry.process.pid == ssh_pid - }) -} pub fn draw(f: &mut Frame, app: &App, area: Rect) { let s = i18n::strings(); @@ -104,10 +79,17 @@ pub fn draw(f: &mut Frame, app: &App, area: Rect) { let (status, color) = match t.last_status { TunnelStatus::Alive => { let scan_can_confirm = !app.auto_refresh_paused && t.uptime() >= LISTENER_GRACE; - if !scan_can_confirm || has_local_listener(app, t.spec.local_port, t.pid()) { - (s.tunnel_status_alive.to_string(), Color::Green) - } else { + let present = + entry_has_listener(&app.session.entries, t.spec.local_port, t.pid()); + if scan_can_confirm && !present { + // Listener gone right now — the acute case wins. (s.tunnel_health_no_listener.to_string(), Color::Yellow) + } else if scan_can_confirm && t.is_flapping() { + // Listener present now but intermittently dropped across + // recent scans — degrading rather than broken. + (s.tunnel_health_flapping.to_string(), Color::LightYellow) + } else { + (s.tunnel_status_alive.to_string(), Color::Green) } } TunnelStatus::Starting => (s.tunnel_status_starting.to_string(), Color::Yellow), From 2e319c675f09ec7d172c1468aeb53e98e3266c28 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Jun 2026 20:46:11 +0000 Subject: [PATCH 2/2] fix(tunnels): exclude Gone entries from listener health sampling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit diff_entries retains a vanished LISTEN socket in session.entries as EntryStatus::Gone for GONE_RETENTION (5s) before removal. entry_has_listener matched it regardless of status, so a single-refresh listener drop recorded 'true' for the missing socket — the flapping window never saw the absence and the new signal stayed hidden. It also kept the binary 'no listener' check green for up to 5s after the socket actually died. Filter out Gone so short drops are observable. Reported by Codex review. https://claude.ai/code/session_01BHfdoVE66Dag2SEFtzNSR3 --- crates/prt/src/app.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/prt/src/app.rs b/crates/prt/src/app.rs index 3b982a4..5bf0719 100644 --- a/crates/prt/src/app.rs +++ b/crates/prt/src/app.rs @@ -26,7 +26,7 @@ use std::time::Instant; use crate::forward::TunnelStatus; use crate::input::handle_key; use crate::ui::draw; -use prt_core::model::ConnectionState; +use prt_core::model::{ConnectionState, EntryStatus}; use std::time::Duration; /// Grace period after a tunnel (re)starts before its missing listener is held @@ -46,9 +46,16 @@ pub(crate) const LISTENER_GRACE: Duration = TICK_RATE.saturating_mul(2); /// owns the port. Matching `LISTEN + port` alone would then mask the bind /// failure as healthy; requiring the listener's PID to be our `ssh` child /// avoids that false green. +/// +/// `Gone` entries are excluded: `diff_entries` keeps a vanished `LISTEN` socket +/// in `entries` (as `EntryStatus::Gone`) for `GONE_RETENTION` before removal, so +/// counting it as present would hide a short listener drop — exactly the flap +/// this signal exists to catch — and keep the binary "no listener" check green +/// for up to 5s after the socket actually died. pub(crate) fn entry_has_listener(entries: &[TrackedEntry], local_port: u16, ssh_pid: u32) -> bool { entries.iter().any(|e| { - e.entry.state == ConnectionState::Listen + e.status != EntryStatus::Gone + && e.entry.state == ConnectionState::Listen && e.entry.local_addr.port() == local_port && e.entry.process.pid == ssh_pid })