From 857ba5cd0017a67919a6d22db90855c317e6e4c0 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 28 May 2026 10:38:11 +0530 Subject: [PATCH 1/4] fix(core): fall back to other ports when preferred is OS-excluded (Windows WSAEACCES) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `pick_listen_port_with_policy` only fell back to the 7789–7798 pool when the preferred-port bind failed with `AddrInUse`. Any other error hit the catch-all arm and immediately returned `BindFailed`, so the embedded core never started. On Windows a loopback bind can fail with `WSAEACCES` (os error 10013) when the port sits inside a system-reserved/excluded range (Hyper-V / WinNAT / WSL2 / Docker — `netsh interface ipv4 show excludedportrange protocol=tcp`). Nothing is listening on the port, so there is no stale-listener takeover to perform, but a neighbour port outside the reserved block binds fine. Route this case to the fallback pool instead of giving up. Changes: - New `is_port_excluded_bind_error` — matches `raw_os_error() == 10013` directly (Rust's `ErrorKind` mapping for 10013 is not stable across releases, mirroring `util::is_transient_fs_error`'s raw-code approach) plus the `PermissionDenied` kind as a forward-compatible catch. - Preferred-bind match gains a WSAEACCES arm that skips the takeover probe (nothing is listening) and proceeds straight to fallbacks; the in-use retry loop also routes to fallbacks if a race lands on the exclusion code. - Extracted the fallback loop into `pick_fallback_port`, taking an `unusable_label` so the warn / `NoAvailablePort` diagnostics explain whether the preferred port was occupied or OS-excluded. Behavior is unchanged on every non-WSAEACCES path: `AddrInUse` still probes for takeover then falls back, and all other bind errors still return `BindFailed`. Targets Sentry OPENHUMAN-TAURI-500 (issue 5697): 3 events on v0.56.0, Windows, `[core] Failed to bind to 127.0.0.1:7788: … (os error 10013)`. Tests: unit-test `is_port_excluded_bind_error` (10013 + PermissionDenied → true; AddrInUse / other kinds / unrelated raw codes → false) and the extracted `pick_fallback_port` (binds first free candidate with `fallback_from`; all-busy → `NoAvailablePort` carrying the exclusion label). The real WSAEACCES bind can't be reproduced off-Windows, so the classifier + fallback routing are tested as units while the existing AddrInUse integration tests stay green. --- src/openhuman/connectivity/rpc.rs | 213 ++++++++++++++++++++++++++++-- 1 file changed, 199 insertions(+), 14 deletions(-) diff --git a/src/openhuman/connectivity/rpc.rs b/src/openhuman/connectivity/rpc.rs index f03af744a6..bdeb998104 100644 --- a/src/openhuman/connectivity/rpc.rs +++ b/src/openhuman/connectivity/rpc.rs @@ -166,7 +166,13 @@ async fn pick_listen_port_with_policy( fallback_ports: &[u16], retry_policy: RetryPolicy, ) -> Result { - match TcpListener::bind((host, preferred)).await { + // `None` → preferred port is occupied (AddrInUse): probe for a stale + // OpenHuman listener to take over before falling back. + // `Some` → preferred port is OS-excluded (Windows WSAEACCES / os error + // 10013): nothing is listening, so skip the takeover probe and + // go straight to the fallback ports. The string is the bind + // error rendered for the warn / NoAvailablePort surfaces. + let excluded_reason: Option = match TcpListener::bind((host, preferred)).await { Ok(listener) => { return Ok(PickListenPortResult { listener, @@ -188,6 +194,18 @@ async fn pick_listen_port_with_policy( }); } Err(retry_err) if retry_err.kind() == ErrorKind::AddrInUse => {} + Err(retry_err) if is_port_excluded_bind_error(&retry_err) => { + // Raced from in-use into an OS exclusion — treat as + // excluded and skip straight to fallbacks. + return pick_fallback_port( + host, + preferred, + fallback_ports, + retry_policy, + format!("port excluded by OS ({retry_err})"), + ) + .await; + } Err(retry_err) => { return Err(PickListenPortError::BindFailed { port: preferred, @@ -196,6 +214,17 @@ async fn pick_listen_port_with_policy( } } } + None + } + // Sentry OPENHUMAN-TAURI-500 (Windows): WSAEACCES / os error 10013 — + // the preferred port sits inside a system-reserved/excluded range + // (Hyper-V / WinNAT / WSL2 / Docker). Nothing is listening, so there + // is no takeover to do, but a neighbour port outside the reserved + // block typically binds. Previously this fell into the catch-all arm + // below and gave up immediately with `BindFailed`, leaving the core + // unable to start. Route it to the fallback ports instead. + Err(err) if is_port_excluded_bind_error(&err) => { + Some(format!("port excluded by OS ({err})")) } Err(err) => { return Err(PickListenPortError::BindFailed { @@ -203,16 +232,46 @@ async fn pick_listen_port_with_policy( reason: err.to_string(), }); } - } + }; - let fingerprint = identify_listener(host, preferred).await; - if matches!(fingerprint, ListenerFingerprint::OpenHumanCore) { - return Err(PickListenPortError::WouldTakeOver { - preferred, - fingerprint: fingerprint.as_human_readable(), - }); - } + // Stale-listener takeover only applies when something is actually + // listening (AddrInUse). An OS-excluded port has no listener to identify, + // so skip the probe and synthesize a human-readable reason instead. + let fingerprint_label = match excluded_reason { + None => { + let fingerprint = identify_listener(host, preferred).await; + if matches!(fingerprint, ListenerFingerprint::OpenHumanCore) { + return Err(PickListenPortError::WouldTakeOver { + preferred, + fingerprint: fingerprint.as_human_readable(), + }); + } + fingerprint.as_human_readable() + } + Some(reason) => reason, + }; + + pick_fallback_port( + host, + preferred, + fallback_ports, + retry_policy, + fingerprint_label, + ) + .await +} +/// Try each fallback port in turn, retrying transient `AddrInUse` races on +/// each candidate. `unusable_label` describes why `preferred` was rejected +/// (stale-listener fingerprint, or an OS port-exclusion reason) and is used +/// only for the warn / `NoAvailablePort` diagnostic surfaces. +async fn pick_fallback_port( + host: &str, + preferred: u16, + fallback_ports: &[u16], + retry_policy: RetryPolicy, + unusable_label: String, +) -> Result { for fallback in fallback_ports { // Retry each fallback candidate on transient AddrInUse so a brief // race on 7789–7798 (AV scanner / prior-instance teardown) doesn't @@ -241,10 +300,8 @@ async fn pick_listen_port_with_policy( } if let Some(listener) = bound { warn!( - "[CORE] preferred port {} in use by {}; bound to {}", - preferred, - fingerprint.as_human_readable(), - fallback + "[CORE] preferred port {} unusable ({}); bound to {}", + preferred, unusable_label, fallback ); return Ok(PickListenPortResult { listener, @@ -256,11 +313,29 @@ async fn pick_listen_port_with_policy( Err(PickListenPortError::NoAvailablePort { preferred, - fingerprint: fingerprint.as_human_readable(), + fingerprint: unusable_label, attempted: fallback_ports.to_vec(), }) } +/// Returns `true` when a preferred-port bind failure means *that specific +/// port* is unusable but a different port likely works — so the caller should +/// try the fallback ports rather than give up. +/// +/// Targets Windows `WSAEACCES` (os error 10013): the port sits inside a +/// system-reserved/excluded range (Hyper-V / WinNAT / WSL2 / Docker — visible +/// via `netsh interface ipv4 show excludedportrange protocol=tcp`). Nothing is +/// listening on it, so there is no takeover to perform, but a neighbour port +/// outside the reserved block binds fine. +/// +/// We match on `raw_os_error()` directly because Rust's `ErrorKind` mapping +/// for `10013` is not stable across releases (mirrors the raw-code approach in +/// [`crate::openhuman::util::is_transient_fs_error`]); the `PermissionDenied` +/// kind is accepted too in case a future Rust maps it. +fn is_port_excluded_bind_error(err: &std::io::Error) -> bool { + err.raw_os_error() == Some(10013) || err.kind() == ErrorKind::PermissionDenied +} + async fn identify_listener(host: &str, port: u16) -> ListenerFingerprint { let probe_host = if host == "0.0.0.0" || host == "::" { "127.0.0.1" @@ -633,6 +708,116 @@ mod tests { assert_eq!(result.fallback_from, None); } + // ── is_port_excluded_bind_error (Sentry OPENHUMAN-TAURI-500) ───────────── + + #[test] + fn port_excluded_error_matches_wsaeacces_raw_code() { + // WSAEACCES (os error 10013) — the Windows port-exclusion code from + // the Sentry event. Must classify as "try a different port" even on + // non-Windows runners, where 10013 has no special ErrorKind, because + // we match on the raw code directly. + let err = std::io::Error::from_raw_os_error(10013); + assert!( + is_port_excluded_bind_error(&err), + "WSAEACCES (10013) must route to the fallback ports" + ); + } + + #[test] + fn port_excluded_error_matches_permission_denied_kind() { + let err = std::io::Error::new(ErrorKind::PermissionDenied, "access denied"); + assert!( + is_port_excluded_bind_error(&err), + "PermissionDenied kind must route to the fallback ports" + ); + } + + #[test] + fn port_excluded_error_rejects_addr_in_use_and_others() { + // AddrInUse has its own takeover path and must NOT be treated as an + // OS exclusion. Unrelated kinds (and unrelated raw codes) must fall + // through to the existing BindFailed arm so genuine bind bugs surface. + for err in [ + std::io::Error::new(ErrorKind::AddrInUse, "in use"), + std::io::Error::new(ErrorKind::ConnectionRefused, "refused"), + std::io::Error::from_raw_os_error(5), // EIO on unix / not WSAEACCES + ] { + assert!( + !is_port_excluded_bind_error(&err), + "non-exclusion error must not route to fallback: {err:?}" + ); + } + } + + // ── pick_fallback_port (the path WSAEACCES routes into) ────────────────── + + #[tokio::test] + async fn pick_fallback_port_binds_first_free_candidate() { + // Simulates the post-classification path: the preferred port was + // unusable (e.g. WSAEACCES), so we try the fallbacks. A free fallback + // must bind and report `fallback_from: Some(preferred)`. + let preferred_holder = reserve_port(); + let preferred = preferred_holder.local_addr().unwrap().port(); + let busy_holder = reserve_port(); + let busy = busy_holder.local_addr().unwrap().port(); + let free_holder = reserve_port(); + let free = free_holder.local_addr().unwrap().port(); + drop(free_holder); + + let result = pick_fallback_port( + "127.0.0.1", + preferred, + &[busy, free], + RetryPolicy { + attempts: 1, + backoff: Duration::from_millis(10), + }, + "port excluded by OS (simulated WSAEACCES)".to_string(), + ) + .await + .expect("a free fallback must bind"); + + assert_eq!(result.port, free); + assert_eq!(result.fallback_from, Some(preferred)); + } + + #[tokio::test] + async fn pick_fallback_port_all_busy_reports_label() { + // When every fallback is occupied, NoAvailablePort must carry the + // unusable label (here the OS-exclusion reason) so the diagnostic + // surface explains *why* the preferred port was skipped. + let preferred_holder = reserve_port(); + let preferred = preferred_holder.local_addr().unwrap().port(); + let f1_holder = reserve_port(); + let f1 = f1_holder.local_addr().unwrap().port(); + let f2_holder = reserve_port(); + let f2 = f2_holder.local_addr().unwrap().port(); + + let err = pick_fallback_port( + "127.0.0.1", + preferred, + &[f1, f2], + RetryPolicy { + attempts: 1, + backoff: Duration::from_millis(10), + }, + "port excluded by OS (simulated WSAEACCES)".to_string(), + ) + .await + .expect_err("all-busy fallbacks must fail"); + + assert!( + matches!( + err, + PickListenPortError::NoAvailablePort { preferred: p, ref fingerprint, ref attempted } + if p == preferred + && attempted == &vec![f1, f2] + && fingerprint.contains("excluded by OS") + ), + "expected NoAvailablePort carrying the exclusion label, got: {err:?}" + ); + } + #[test] fn snapshot_socket_state_is_uninitialized_without_manager() { // The global SocketManager OnceLock may already be set if other From bf857728c9c56eab4e03d99d0f8a6c5d003f6497 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 29 May 2026 00:01:37 +0530 Subject: [PATCH 2/4] fix(core): emit netsh hint when all fallback ports are OS-excluded When the preferred port and every fallback are all system-reserved (WSAEACCES / os error 10013), warn with the Windows diagnostic command (`netsh interface ipv4 show excludedportrange protocol=tcp`) so affected users can identify the reserved block without a support escalation. Addresses @oxoxDev review request on pick_fallback_port. --- src/openhuman/connectivity/rpc.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/openhuman/connectivity/rpc.rs b/src/openhuman/connectivity/rpc.rs index bdeb998104..dfe7956e2f 100644 --- a/src/openhuman/connectivity/rpc.rs +++ b/src/openhuman/connectivity/rpc.rs @@ -311,6 +311,18 @@ async fn pick_fallback_port( } } + // When an OS-exclusion blocked the preferred port *and* every fallback is + // also unavailable, surface the Windows diagnostic command so users can + // identify the reserved range without waiting for a support escalation. + if unusable_label.contains("excluded by OS") { + warn!( + "[CORE] preferred port {} and all fallbacks {:?} are unavailable. \ + On Windows, run `netsh interface ipv4 show excludedportrange protocol=tcp` \ + to inspect system-reserved port ranges (Hyper-V / WinNAT / WSL2 / Docker).", + preferred, fallback_ports + ); + } + Err(PickListenPortError::NoAvailablePort { preferred, fingerprint: unusable_label, From 73803358d262a8b85b49202ee1663e65500b2a6b Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 29 May 2026 00:32:53 +0530 Subject: [PATCH 3/4] fix(rpc): quote health_snapshot key in LEGACY_METHOD_ALIASES The d2f0c4b55 commit added `health_snapshot` as an unquoted identifier key, but parse_frontend_legacy_aliases expects all keys to be single- quoted (matching every other entry in the object). The Rust drift-test `frontend_legacy_aliases_match_server_alias_table` panicked at the unquoted token. Quote the key to match the parser contract. --- app/src/services/rpcMethods.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/services/rpcMethods.ts b/app/src/services/rpcMethods.ts index f6b665ea29..a53bce9e6e 100644 --- a/app/src/services/rpcMethods.ts +++ b/app/src/services/rpcMethods.ts @@ -67,7 +67,7 @@ export const LEGACY_METHOD_ALIASES: Record = { 'openhuman.local_ai_presets': CORE_RPC_METHODS.inferencePresets, 'openhuman.providers_list_models': CORE_RPC_METHODS.inferenceListModels, 'openhuman.inference_embed': CORE_RPC_METHODS.embeddingsEmbed, - health_snapshot: CORE_RPC_METHODS.healthSnapshot, + 'health_snapshot': CORE_RPC_METHODS.healthSnapshot, }; export function normalizeRpcMethod(method: string): string { From f14d856d72d95ab36ebd90ab5564cbb4dba412c1 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 29 May 2026 00:45:22 +0530 Subject: [PATCH 4/4] fix(rpc): handle unquoted identifier keys in LEGACY_METHOD_ALIASES parser Prettier omits quotes on valid identifier keys (e.g. `health_snapshot` does not need quoting in TypeScript, so `'health_snapshot'` becomes `health_snapshot`). The drift-test parser only accepted quoted keys, causing `frontend_legacy_aliases_match_server_alias_table` to panic. Add `object_key()` which accepts both quoted strings and bare identifier keys, and use it when extracting LEGACY_METHOD_ALIASES keys. Revert the workaround that added quotes to `health_snapshot` in rpcMethods.ts. --- app/src/services/rpcMethods.ts | 2 +- src/core/legacy_aliases.rs | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/app/src/services/rpcMethods.ts b/app/src/services/rpcMethods.ts index a53bce9e6e..f6b665ea29 100644 --- a/app/src/services/rpcMethods.ts +++ b/app/src/services/rpcMethods.ts @@ -67,7 +67,7 @@ export const LEGACY_METHOD_ALIASES: Record = { 'openhuman.local_ai_presets': CORE_RPC_METHODS.inferencePresets, 'openhuman.providers_list_models': CORE_RPC_METHODS.inferenceListModels, 'openhuman.inference_embed': CORE_RPC_METHODS.embeddingsEmbed, - 'health_snapshot': CORE_RPC_METHODS.healthSnapshot, + health_snapshot: CORE_RPC_METHODS.healthSnapshot, }; export function normalizeRpcMethod(method: string): string { diff --git a/src/core/legacy_aliases.rs b/src/core/legacy_aliases.rs index 644e392c1c..fb4896cdca 100644 --- a/src/core/legacy_aliases.rs +++ b/src/core/legacy_aliases.rs @@ -180,6 +180,18 @@ mod tests { rest[..value_end].to_string() } + /// Extract an object key that may be quoted (`'foo'` / `"foo"`) or a bare + /// identifier (`foo`). Prettier omits quotes on valid identifiers, so the + /// parser must accept both forms. + fn object_key(text: &str) -> String { + let trimmed = text.trim(); + if trimmed.starts_with('\'') || trimmed.starts_with('"') { + quoted_value(trimmed) + } else { + trimmed.to_string() + } + } + fn parse_core_rpc_methods(source: &str) -> BTreeMap { let body = object_body_after_marker(source, "export const CORE_RPC_METHODS", "} as const;"); let mut methods = BTreeMap::new(); @@ -215,7 +227,7 @@ mod tests { let (legacy, target_expr) = entry .split_once(':') .unwrap_or_else(|| panic!("expected legacy alias entry, got `{entry}`")); - let legacy = quoted_value(legacy); + let legacy = object_key(legacy); let target_expr = target_expr.trim(); let canonical = if let Some(key) = target_expr.strip_prefix("CORE_RPC_METHODS.") { core_methods