diff --git a/crates/prt-core/src/i18n/en.rs b/crates/prt-core/src/i18n/en.rs index c393a48..02f921c 100644 --- a/crates/prt-core/src/i18n/en.rs +++ b/crates/prt-core/src/i18n/en.rs @@ -149,6 +149,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "starting", tunnel_status_failed: "failed", tunnel_health_no_listener: "no listener", + tunnel_health_flapping: "flapping", tunnel_form_edit_title: " Edit SSH Tunnel ", tunnel_form_field_required: "required", tunnels_empty: " No active tunnels. Press [n] to create one.", diff --git a/crates/prt-core/src/i18n/mod.rs b/crates/prt-core/src/i18n/mod.rs index 0654701..9d626cb 100644 --- a/crates/prt-core/src/i18n/mod.rs +++ b/crates/prt-core/src/i18n/mod.rs @@ -255,6 +255,7 @@ pub struct Strings { pub tunnel_status_starting: &'static str, pub tunnel_status_failed: &'static str, pub tunnel_health_no_listener: &'static str, + pub tunnel_health_flapping: &'static str, pub tunnel_form_edit_title: &'static str, pub tunnel_form_field_required: &'static str, pub tunnels_empty: &'static str, diff --git a/crates/prt-core/src/i18n/ru.rs b/crates/prt-core/src/i18n/ru.rs index efd2c38..bf54f88 100644 --- a/crates/prt-core/src/i18n/ru.rs +++ b/crates/prt-core/src/i18n/ru.rs @@ -149,6 +149,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "запускается", tunnel_status_failed: "сбой", tunnel_health_no_listener: "нет листенера", + tunnel_health_flapping: "нестабилен", tunnel_form_edit_title: " Правка SSH-туннеля ", tunnel_form_field_required: "обязательно", tunnels_empty: " Активных туннелей нет. Нажмите [n] чтобы создать.", diff --git a/crates/prt-core/src/i18n/zh.rs b/crates/prt-core/src/i18n/zh.rs index 1f1d3e3..8a83ca3 100644 --- a/crates/prt-core/src/i18n/zh.rs +++ b/crates/prt-core/src/i18n/zh.rs @@ -148,6 +148,7 @@ pub static STRINGS: Strings = Strings { tunnel_status_starting: "启动中", tunnel_status_failed: "失败", tunnel_health_no_listener: "无监听", + tunnel_health_flapping: "监听抖动", tunnel_form_edit_title: " 编辑 SSH 隧道 ", tunnel_form_field_required: "必填", tunnels_empty: " 无活跃隧道。按 [n] 创建。", diff --git a/crates/prt/src/app.rs b/crates/prt/src/app.rs index 07a3250..5bf0719 100644 --- a/crates/prt/src/app.rs +++ b/crates/prt/src/app.rs @@ -23,8 +23,43 @@ use ratatui::prelude::*; use std::io::stdout; use std::time::Instant; +use crate::forward::TunnelStatus; use crate::input::handle_key; use crate::ui::draw; +use prt_core::model::{ConnectionState, EntryStatus}; +use std::time::Duration; + +/// Grace period after a tunnel (re)starts before its missing listener is held +/// against it. The scan backing the listener check only refreshes every +/// `TICK_RATE`, and a tunnel needs a tick to go `Starting -> Alive` plus +/// another for the scan to observe its `LISTEN` socket. Shared by the recorder +/// here and the renderer in `views::tunnels` so both agree on when a scan is +/// trustworthy. +pub(crate) const LISTENER_GRACE: Duration = TICK_RATE.saturating_mul(2); + +/// True if `ssh_pid` owns a `LISTEN` socket on `local_port` in the given scan +/// — confirms an `Alive` tunnel actually opened its own socket. Read-only: +/// reuses the data prt already scanned, opens no new connections. +/// +/// The PID match matters: OpenSSH defaults to `ExitOnForwardFailure no`, so on +/// a local-port conflict the `ssh` child keeps running while *another* process +/// owns the port. Matching `LISTEN + port` alone would then mask the bind +/// failure as healthy; requiring the listener's PID to be our `ssh` child +/// avoids that false green. +/// +/// `Gone` entries are excluded: `diff_entries` keeps a vanished `LISTEN` socket +/// in `entries` (as `EntryStatus::Gone`) for `GONE_RETENTION` before removal, so +/// counting it as present would hide a short listener drop — exactly the flap +/// this signal exists to catch — and keep the binary "no listener" check green +/// for up to 5s after the socket actually died. +pub(crate) fn entry_has_listener(entries: &[TrackedEntry], local_port: u16, ssh_pid: u32) -> bool { + entries.iter().any(|e| { + e.status != EntryStatus::Gone + && e.entry.state == ConnectionState::Listen + && e.entry.local_addr.port() == local_port + && e.entry.process.pid == ssh_pid + }) +} #[derive(Clone, Copy)] pub(crate) enum SudoPurpose { @@ -307,6 +342,18 @@ impl App { self.detail_cache = None; } } + // Record one listener-presence observation per scan for flapping + // detection. Only `Alive` tunnels past the grace window are sampled, so + // the history never holds a phantom `false` from a freshly (re)started + // tunnel the scan hasn't observed yet. `refresh()` runs only while + // auto-refresh is active, so the scan here is always fresh. + let entries = &self.session.entries; + for t in &mut self.forwards.tunnels { + if t.last_status == TunnelStatus::Alive && t.uptime() >= LISTENER_GRACE { + let present = entry_has_listener(entries, t.spec.local_port, t.pid()); + t.record_listener(present); + } + } self.update_filtered_preserving(prev_key); } diff --git a/crates/prt/src/forward.rs b/crates/prt/src/forward.rs index 1824ac7..bd02563 100644 --- a/crates/prt/src/forward.rs +++ b/crates/prt/src/forward.rs @@ -5,6 +5,7 @@ use prt_core::core::ssh_config::{SshHost, SshHostSource}; use prt_core::core::ssh_tunnel::{ResolvedHost, SshTunnelSpec, TunnelKind}; +use std::collections::VecDeque; use std::process::{Child, Command, Stdio}; use std::thread; use std::time::{Duration, Instant}; @@ -36,6 +37,33 @@ const STABILITY_THRESHOLD: Duration = Duration::from_secs(30); /// remove it and lets the user restart it manually. const MAX_RECONNECT_ATTEMPTS: u32 = 10; +/// How many recent listener-presence observations to keep per tunnel for +/// flapping detection. Each sample covers one full scan (~`TICK_RATE`), so a +/// window of 6 spans roughly the last ~12s of confirmable scans. +const LISTENER_HISTORY_CAP: usize = 6; +/// Minimum samples before the flapping verdict is trusted — avoids flagging a +/// tunnel as unstable off a single good/bad sample pair right after it starts +/// being observed. +const LISTENER_MIN_SAMPLES: usize = 4; + +/// Push one listener-presence sample, evicting the oldest once the window is +/// full. Free function (rather than inline in `record_listener`) so the +/// capping behaviour is unit-testable without spawning an `ssh` child. +fn push_listener_sample(history: &mut VecDeque, present: bool) { + history.push_back(present); + if history.len() > LISTENER_HISTORY_CAP { + history.pop_front(); + } +} + +/// True when the observation window holds *both* a present and an absent +/// listener sample (and enough samples to be meaningful): the listener is +/// coming and going across scans rather than being stably up or stably down. +/// A pure predicate so it can be unit-tested without spawning an `ssh` child. +fn history_is_flapping(history: &VecDeque) -> bool { + history.len() >= LISTENER_MIN_SAMPLES && history.contains(&true) && history.contains(&false) +} + /// A single SSH tunnel: a running `ssh` child process plus the spec and /// resolved argument list (kept so `restart()` reuses the same resolution). pub struct SshTunnel { @@ -57,6 +85,12 @@ pub struct SshTunnel { /// Earliest instant the next reconnect attempt may run. `None` once the /// tunnel is healthy or no retry has been scheduled yet. next_retry_at: Option, + /// Recent listener-presence observations (newest at the back, capped at + /// `LISTENER_HISTORY_CAP`). Only confirmable scans are pushed here (not + /// while paused or within the startup grace window), so a mix of `true` + /// and `false` genuinely means the local `LISTEN` socket flapped. Cleared + /// on `respawn` so a restarted tunnel starts with a clean slate. + listener_history: VecDeque, } impl SshTunnel { @@ -73,6 +107,7 @@ impl SshTunnel { retry_backoff: INITIAL_BACKOFF, retry_count: 0, next_retry_at: None, + listener_history: VecDeque::with_capacity(LISTENER_HISTORY_CAP), } } @@ -177,6 +212,8 @@ impl SshTunnel { }; self.last_status = TunnelStatus::Starting; self.started_at = Instant::now(); + // The old child's listener history says nothing about the new one. + self.listener_history.clear(); Ok(()) } @@ -198,6 +235,22 @@ impl SshTunnel { self.started_at.elapsed() } + /// Record one listener-presence observation from a confirmable scan, + /// evicting the oldest sample once the window is full. Callers must only + /// invoke this when the scan can be trusted (auto-refresh running and past + /// the startup grace window); a stale or premature `false` would otherwise + /// manufacture a phantom flap. + pub fn record_listener(&mut self, present: bool) { + push_listener_sample(&mut self.listener_history, present); + } + + /// True when the local listener has been intermittently present across + /// recent scans — a "degrading slowly" signal distinct from the binary + /// "no listener" (a listener that is gone *right now*). + pub fn is_flapping(&self) -> bool { + history_is_flapping(&self.listener_history) + } + /// PID of the current `ssh` child. For `-L`/`-D` tunnels this is the /// process that binds the local port, so the listener health check can /// confirm a `LISTEN` socket really belongs to *this* tunnel. @@ -531,4 +584,55 @@ mod tests { fn shell_quote_quotes_empty_arg() { assert_eq!(shell_quote(""), "''"); } + + fn history(samples: &[bool]) -> VecDeque { + let mut h = VecDeque::new(); + for &s in samples { + push_listener_sample(&mut h, s); + } + h + } + + #[test] + fn stable_present_is_not_flapping() { + assert!(!history_is_flapping(&history(&[ + true, true, true, true, true, true + ]))); + } + + #[test] + fn all_absent_is_not_flapping() { + assert!(!history_is_flapping(&history(&[ + false, false, false, false, false, false + ]))); + } + + #[test] + fn alternating_presence_is_flapping() { + assert!(history_is_flapping(&history(&[true, false, true, false]))); + } + + #[test] + fn insufficient_samples_is_not_flapping() { + // Both values present, but fewer than LISTENER_MIN_SAMPLES samples. + assert!(!history_is_flapping(&history(&[true, false]))); + } + + #[test] + fn empty_history_is_not_flapping() { + assert!(!history_is_flapping(&VecDeque::new())); + } + + #[test] + fn window_caps_and_evicts_old_samples() { + // Fill with absences, then push enough presences to roll the absences + // out: a once-flapping window settles back to stably-up. + let mut h = history(&[false, false, false, false, false, false]); + assert!(!history_is_flapping(&h)); // all absent + for _ in 0..LISTENER_HISTORY_CAP { + push_listener_sample(&mut h, true); + } + assert_eq!(h.len(), LISTENER_HISTORY_CAP); + assert!(!history_is_flapping(&h)); // absences evicted, now stably up + } } diff --git a/crates/prt/src/views/tunnels.rs b/crates/prt/src/views/tunnels.rs index cd24a10..c54022d 100644 --- a/crates/prt/src/views/tunnels.rs +++ b/crates/prt/src/views/tunnels.rs @@ -1,38 +1,13 @@ //! Fullscreen SSH tunnels manager. -use crate::app::App; +use crate::app::{entry_has_listener, App, LISTENER_GRACE}; use crate::forward::TunnelStatus; use crossterm::event::{KeyCode, KeyEvent}; use prt_core::core::scanner::format_uptime; use prt_core::core::ssh_tunnel::TunnelKind; use prt_core::i18n; -use prt_core::model::{ConnectionState, TICK_RATE}; use ratatui::prelude::*; use ratatui::widgets::*; -use std::time::Duration; - -/// Grace period after (re)start before a missing listener is reported. The scan -/// backing `has_local_listener` only refreshes every `TICK_RATE`, and a tunnel -/// needs a tick to go `Starting -> Alive` plus another for the scan to observe -/// its `LISTEN` socket, so we'd otherwise flash a bogus "no listener". -const LISTENER_GRACE: Duration = TICK_RATE.saturating_mul(2); - -/// True if `ssh_pid` owns a `LISTEN` socket on `local_port` in the latest scan -/// — confirms an `Alive` tunnel actually opened its own socket. Read-only: -/// reuses the data prt already scanned, opens no new connections. -/// -/// The PID match matters: OpenSSH defaults to `ExitOnForwardFailure no`, so on -/// a local-port conflict the `ssh` child keeps running while *another* process -/// owns the port. Matching `LISTEN + port` alone would then mask the bind -/// failure as healthy; requiring the listener's PID to be our `ssh` child -/// avoids that false green. -fn has_local_listener(app: &App, local_port: u16, ssh_pid: u32) -> bool { - app.session.entries.iter().any(|e| { - e.entry.state == ConnectionState::Listen - && e.entry.local_addr.port() == local_port - && e.entry.process.pid == ssh_pid - }) -} pub fn draw(f: &mut Frame, app: &App, area: Rect) { let s = i18n::strings(); @@ -104,10 +79,17 @@ pub fn draw(f: &mut Frame, app: &App, area: Rect) { let (status, color) = match t.last_status { TunnelStatus::Alive => { let scan_can_confirm = !app.auto_refresh_paused && t.uptime() >= LISTENER_GRACE; - if !scan_can_confirm || has_local_listener(app, t.spec.local_port, t.pid()) { - (s.tunnel_status_alive.to_string(), Color::Green) - } else { + let present = + entry_has_listener(&app.session.entries, t.spec.local_port, t.pid()); + if scan_can_confirm && !present { + // Listener gone right now — the acute case wins. (s.tunnel_health_no_listener.to_string(), Color::Yellow) + } else if scan_can_confirm && t.is_flapping() { + // Listener present now but intermittently dropped across + // recent scans — degrading rather than broken. + (s.tunnel_health_flapping.to_string(), Color::LightYellow) + } else { + (s.tunnel_status_alive.to_string(), Color::Green) } } TunnelStatus::Starting => (s.tunnel_status_starting.to_string(), Color::Yellow),