From 99d959adf3df53d57ea7d8512f95b039bd71ab53 Mon Sep 17 00:00:00 2001 From: variablefate Date: Thu, 14 May 2026 01:06:43 -0700 Subject: [PATCH 1/2] fix(relay): force-reconnect path that survives backoff + stale sockets (#86) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Reconnect to Relays" button could leave the app permanently unable to connect after a single press, with subsequent presses and even app restarts failing to recover. Root cause was five compounding bugs in the manual-reconnect path: 1. No WebSocket pingInterval — NAT/wifi transitions silently dropped sockets while the client thought they were still CONNECTED. 2. disconnectAll() used graceful WebSocket.close(), which can hold an OkHttp dispatcher slot for up to readTimeout (30s) while the close handshake races with the immediate connectAll() that followed. 3. reconnectAttempts only reset on a successful onOpen, so repeated auto-retry failures ramped backoff to 60s — manual presses that themselves failed just kept incrementing the counter. 4. ensureConnected() (used by the roadflare-rider button) only reconnects DISCONNECTED relays, so a connection stuck in CONNECTING couldn't recover. 5. The button's "Reconnecting…" state was a fake 2s timer disconnected from actual connection state, and it leaked a MainScope coroutine on every press. This adds a dedicated forceReconnect path: - RelayConnection.forceReconnect(): single synchronized transition that cancels (not closes) the prior socket for immediate teardown, resets reconnectAttempts to 0, bumps connectionGeneration so stale callbacks from the prior socket are filtered, and opens a new socket. - RelayManager.forceReconnectAll(newRelayUrls: List?): fan-out to every connection, plus optional relay-list sync so user edits to custom relays take effect without an app restart. - NostrService.forceReconnect(): thin facade. - OkHttp client now sends 30s pings (matches CashuWebSocket). - RelayManagementScreen button state now derives from connectionStates rather than a fixed timer, and uses LaunchedEffect rather than MainScope so no coroutine leaks. All three apps' "Reconnect" handlers (rider-app, drivestr, roadflare-rider) are migrated to the new path. Public API of disconnectAll/connectAll/ensureConnected is preserved — they're still called from MainActivity onCreate, ProfileSyncManager, LogoutManager, etc. New methods are additive. Tests: 4 new unit tests cover the forceReconnect contract (reset backoff, bump generation, transition to CONNECTING, recover after disconnect). Full common suite: 508/508 passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/ridestr/common/nostr/NostrService.kt | 14 +++ .../ridestr/common/nostr/relay/RelayConfig.kt | 6 + .../common/nostr/relay/RelayConnection.kt | 59 +++++++++ .../common/nostr/relay/RelayManager.kt | 44 +++++++ .../common/ui/RelayManagementScreen.kt | 34 ++++-- .../RelayConnectionForceReconnectTest.kt | 113 ++++++++++++++++++ .../java/com/drivestr/app/MainActivity.kt | 6 +- .../java/com/ridestr/rider/MainActivity.kt | 6 +- .../java/com/roadflare/rider/MainActivity.kt | 2 +- .../rider/viewmodels/RiderViewModel.kt | 5 + 10 files changed, 275 insertions(+), 14 deletions(-) create mode 100644 common/src/test/java/com/ridestr/common/nostr/relay/RelayConnectionForceReconnectTest.kt diff --git a/common/src/main/java/com/ridestr/common/nostr/NostrService.kt b/common/src/main/java/com/ridestr/common/nostr/NostrService.kt index 373455a9..5902c032 100644 --- a/common/src/main/java/com/ridestr/common/nostr/NostrService.kt +++ b/common/src/main/java/com/ridestr/common/nostr/NostrService.kt @@ -123,6 +123,20 @@ class NostrService internal constructor( relayManager.ensureConnected() } + /** + * Force every relay to tear down and reopen, resetting reconnect backoff. + * Use from the manual "Reconnect to Relays" UI — does NOT have the + * dispatcher-starvation / backoff-poisoning problems of disconnectAll+connectAll. + * + * @param newRelayUrls Optional updated relay list (e.g., from + * `settingsRepository.getEffectiveRelays()`) so user edits to custom relays + * take effect without an app restart. + */ + fun forceReconnect(newRelayUrls: List? = null) { + Log.d(TAG, "Force-reconnecting all relays (newRelayUrls=${newRelayUrls?.size ?: "unchanged"})") + relayManager.forceReconnectAll(newRelayUrls) + } + /** * Clear all subscriptions. Use for debugging or to reset state. */ diff --git a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConfig.kt b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConfig.kt index a7cd76a3..5a4a3c7a 100644 --- a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConfig.kt +++ b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConfig.kt @@ -33,4 +33,10 @@ object RelayConfig { * Time to wait before attempting reconnection after failure. */ const val RECONNECT_DELAY_MS = 5_000L + + /** + * WebSocket ping interval. Without pings, NAT/wifi transitions can leave + * the client believing a connection is alive when it isn't. + */ + const val PING_INTERVAL_MS = 30_000L } diff --git a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt index 56ffe475..e3de4c0f 100644 --- a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt +++ b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt @@ -1,6 +1,7 @@ package com.ridestr.common.nostr.relay import android.util.Log +import androidx.annotation.VisibleForTesting import com.vitorpamplona.quartz.nip01Core.core.Event import com.vitorpamplona.quartz.nip01Core.crypto.verify import kotlinx.coroutines.CoroutineScope @@ -69,6 +70,20 @@ class RelayConnection( private val _state = MutableStateFlow(RelayConnectionState.DISCONNECTED) val state: StateFlow = _state.asStateFlow() + /** Test-only accessor for the auto-reconnect backoff counter. */ + @VisibleForTesting + internal fun reconnectAttemptsForTest(): Int = reconnectAttempts.get() + + /** Test-only mutator to simulate prior auto-retry failures. */ + @VisibleForTesting + internal fun setReconnectAttemptsForTest(value: Int) { + reconnectAttempts.set(value) + } + + /** Test-only accessor for the generation counter. */ + @VisibleForTesting + internal fun connectionGenerationForTest(): Long = synchronized(this) { connectionGeneration } + private val activeSubscriptions = ConcurrentHashMap() // subId -> filterJson private val pendingEvents = ConcurrentHashMap() // eventId -> event @@ -115,6 +130,50 @@ class RelayConnection( Log.d(TAG, "Disconnected from $url") } + /** + * Tear down the current socket and reopen immediately, resetting the reconnect backoff. + * + * Used by the manual "Reconnect to Relays" UI. Differs from `disconnect()+connect()`: + * 1. `cancel()` (not graceful `close()`) — frees the OkHttp dispatcher slot immediately + * even if the prior socket was mid-handshake. Graceful close can linger up to + * `readTimeout` (30s) while the close handshake races with new connection attempts. + * 2. Resets `reconnectAttempts` so the auto-retry backoff (capped at 60s) restarts at 0, + * preventing manual reconnects from being shadowed by long-pending scheduled retries. + * 3. Single-locked transition keeps `socket`, `state`, and `connectionGeneration` + * coherent so the stale-callback guards in `RelayWebSocketListener` work correctly. + */ + fun forceReconnect() { + val socketToCancel: WebSocket? + + synchronized(this) { + socketToCancel = socket + socket = null + + // Bump generation so any in-flight messages/callbacks from the prior socket + // are recognized as stale. + connectionGeneration++ + + // Reset backoff — any scheduled-reconnect coroutine still sleeping will see + // shouldReconnect=true and either complete its connect() (no-op because we're + // now CONNECTING) or be preempted by ours. + reconnectAttempts.set(0) + shouldReconnect.set(true) + _state.value = RelayConnectionState.CONNECTING + + val request = Request.Builder() + .url(url) + .build() + socket = client.newWebSocket(request, RelayWebSocketListener()) + } + + // Cancel outside the lock to avoid holding it during network I/O. + // `cancel()` is non-graceful and releases resources immediately, unlike `close()` + // which initiates a close handshake that can hang for up to readTimeout. + socketToCancel?.cancel() + + Log.d(TAG, "Force-reconnecting to $url (generation $connectionGeneration)") + } + /** * Send a subscription request to the relay. */ diff --git a/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt b/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt index d47da858..e9b2b5c4 100644 --- a/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt +++ b/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt @@ -52,6 +52,10 @@ class RelayManager( .connectTimeout(RelayConfig.CONNECT_TIMEOUT_MS, TimeUnit.MILLISECONDS) .readTimeout(RelayConfig.READ_TIMEOUT_MS, TimeUnit.MILLISECONDS) .writeTimeout(RelayConfig.WRITE_TIMEOUT_MS, TimeUnit.MILLISECONDS) + // Send WebSocket pings so NAT/firewall transitions don't leave zombie sockets + // that the client thinks are CONNECTED but won't deliver messages. + // Matches CashuWebSocket's 30s interval. + .pingInterval(RelayConfig.PING_INTERVAL_MS, TimeUnit.MILLISECONDS) .build() private val connections = ConcurrentHashMap() @@ -126,6 +130,46 @@ class RelayManager( connections.values.forEach { it.disconnect() } } + /** + * Force-reconnect every relay, resetting backoff and replacing any stale socket. + * + * Use this from the manual "Reconnect to Relays" UI rather than + * `disconnectAll()+connectAll()`. The compound call is not equivalent: + * + * - `disconnectAll()` uses graceful `WebSocket.close()`, which can keep an OkHttp + * dispatcher slot occupied for up to `readTimeout` (30s) while the close handshake + * completes. A rapid `connectAll()` afterwards starts new sockets that compete with + * the dying ones, and the backoff state (`reconnectAttempts`) isn't reset. + * - `forceReconnectAll()` per-relay: `cancel()`s the old socket (immediate teardown), + * resets `reconnectAttempts` to 0, and opens a new socket in a single synchronized + * transition so stale-callback guards in the listener fire reliably. + * + * @param newRelayUrls Optional updated relay list. If provided, relays not in the list + * are removed and new relays are added BEFORE force-reconnecting. Use this when the + * user has edited custom relays in settings so the changes take effect without an + * app restart. + */ + fun forceReconnectAll(newRelayUrls: List? = null) { + if (newRelayUrls != null) { + syncRelayList(newRelayUrls) + } + Log.d(TAG, "Force-reconnecting all ${connections.size} relay(s)") + connections.values.forEach { it.forceReconnect() } + } + + /** + * Reconcile the connection set with a target list. Removes connections that aren't + * in the target and adds new ones. Existing connections present in the target stay. + */ + private fun syncRelayList(target: List) { + val targetSet = target.toSet() + // Remove any that are no longer wanted. + val toRemove = connections.keys - targetSet + toRemove.forEach { removeRelay(it) } + // Add any new ones (addRelay is idempotent for existing entries). + target.forEach { url -> addRelay(url) } + } + /** * Publish an event to all connected relays. */ diff --git a/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt b/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt index 642e9394..f9537fd0 100644 --- a/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt +++ b/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt @@ -18,9 +18,7 @@ import androidx.compose.ui.unit.dp import androidx.compose.foundation.background import androidx.compose.foundation.shape.CircleShape import com.ridestr.common.nostr.relay.RelayConnectionState -import kotlinx.coroutines.MainScope import kotlinx.coroutines.delay -import kotlinx.coroutines.launch /** * Standalone relay management screen accessible from: @@ -46,7 +44,30 @@ fun RelayManagementScreen( ) { BackHandler(onBack = onBack) - var isReconnecting by remember { mutableStateOf(false) } + // Tracks "we just kicked off a reconnect" so the button doesn't no-op visually. + // Auto-clears either when at least one relay reports CONNECTED, or after a hard cap + // so a totally unreachable relay set doesn't lock the button forever. + var reconnectInitiatedAt by remember { mutableStateOf(null) } + val isReconnecting = reconnectInitiatedAt != null + LaunchedEffect(reconnectInitiatedAt, connectedCount) { + val started = reconnectInitiatedAt ?: return@LaunchedEffect + // Clear immediately on success. + if (connectedCount > 0) { + reconnectInitiatedAt = null + return@LaunchedEffect + } + // Hard cap so the button doesn't stay disabled if all relays are unreachable. + // 15s ≈ ample for fresh OkHttp WebSocket handshake (10s connectTimeout + slack). + val elapsed = System.currentTimeMillis() - started + val remaining = 15_000L - elapsed + if (remaining > 0) { + delay(remaining) + } + // Re-check on resume — connectedCount may have updated during the delay. + if (connectedCount == 0) { + reconnectInitiatedAt = null + } + } var newRelayInput by remember { mutableStateOf("") } Scaffold( @@ -122,13 +143,8 @@ fun RelayManagementScreen( if (onReconnect != null) { Button( onClick = { - isReconnecting = true + reconnectInitiatedAt = System.currentTimeMillis() onReconnect() - // Reset after a brief delay (reconnection is async) - MainScope().launch { - delay(2000) - isReconnecting = false - } }, enabled = !isReconnecting, modifier = Modifier diff --git a/common/src/test/java/com/ridestr/common/nostr/relay/RelayConnectionForceReconnectTest.kt b/common/src/test/java/com/ridestr/common/nostr/relay/RelayConnectionForceReconnectTest.kt new file mode 100644 index 00000000..61fa6a37 --- /dev/null +++ b/common/src/test/java/com/ridestr/common/nostr/relay/RelayConnectionForceReconnectTest.kt @@ -0,0 +1,113 @@ +package com.ridestr.common.nostr.relay + +import okhttp3.OkHttpClient +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test +import org.junit.runner.RunWith +import org.robolectric.RobolectricTestRunner +import java.util.concurrent.TimeUnit + +/** + * Regression coverage for the manual-reconnect bug + * (variablefate/ridestr#86): "Reconnect to Relays" disconnected from all and never recovered. + * + * Verifies the two invariants that the broken `disconnectAll() + connectAll()` path + * was violating: + * 1. Force-reconnect resets the auto-retry backoff counter so a manual press + * isn't shadowed by long-pending scheduled retries. + * 2. Force-reconnect bumps the connection generation so any in-flight + * callbacks/messages from the prior socket are recognized as stale. + * + * These tests use a real OkHttpClient pointed at an invalid URL so no + * network I/O is needed — we only assert on in-memory state mutations. + */ +@RunWith(RobolectricTestRunner::class) +class RelayConnectionForceReconnectTest { + + /** Cheap client; we never actually open a working connection in this test. */ + private val client: OkHttpClient = OkHttpClient.Builder() + .connectTimeout(100, TimeUnit.MILLISECONDS) + .readTimeout(100, TimeUnit.MILLISECONDS) + .build() + + private fun newConnection(): RelayConnection = RelayConnection( + url = "wss://invalid.localhost.invalid:1/", + client = client, + onEvent = { _, _, _ -> }, + onEose = { _, _ -> }, + onOk = { _, _, _, _ -> }, + onNotice = { _, _ -> } + ) + + @Test + fun `forceReconnect resets reconnectAttempts to zero`() { + val connection = newConnection() + + // Simulate prior auto-retry failures having ramped backoff up. + connection.setReconnectAttemptsForTest(12) + assertEquals(12, connection.reconnectAttemptsForTest()) + + connection.forceReconnect() + + // Backoff must be reset so the user's manual press triggers an immediate retry + // rather than waiting on the 60s scheduled-retry slot. + assertEquals( + "forceReconnect must reset auto-retry backoff so manual reconnects aren't " + + "shadowed by long-pending scheduled retries", + 0, + connection.reconnectAttemptsForTest() + ) + } + + @Test + fun `forceReconnect bumps connection generation`() { + val connection = newConnection() + val genBefore = connection.connectionGenerationForTest() + + connection.forceReconnect() + + val genAfter = connection.connectionGenerationForTest() + assertTrue( + "Generation must strictly increase so stale callbacks from prior socket " + + "are filtered out (before=$genBefore, after=$genAfter)", + genAfter > genBefore + ) + } + + @Test + fun `forceReconnect transitions state to CONNECTING`() { + val connection = newConnection() + + // Initial state is DISCONNECTED. + assertEquals(RelayConnectionState.DISCONNECTED, connection.state.value) + + connection.forceReconnect() + + assertEquals( + "forceReconnect must immediately move state to CONNECTING — the prior " + + "disconnectAll+connectAll path could leave it in DISCONNECTED if a " + + "scheduled-retry coroutine raced with the user's press", + RelayConnectionState.CONNECTING, + connection.state.value + ) + } + + @Test + fun `forceReconnect after disconnect re-enables auto-reconnect`() { + val connection = newConnection() + + // disconnect() sets shouldReconnect=false. Anything left over from a logout + // path could otherwise sneak in and suppress future retries. + connection.disconnect() + assertEquals(RelayConnectionState.DISCONNECTED, connection.state.value) + + // Bump backoff to verify reset happens even from a fully-torn-down state. + connection.setReconnectAttemptsForTest(7) + + connection.forceReconnect() + + assertEquals(0, connection.reconnectAttemptsForTest()) + assertEquals(RelayConnectionState.CONNECTING, connection.state.value) + } +} diff --git a/drivestr/src/main/java/com/drivestr/app/MainActivity.kt b/drivestr/src/main/java/com/drivestr/app/MainActivity.kt index 6e5ea962..b32d43a1 100644 --- a/drivestr/src/main/java/com/drivestr/app/MainActivity.kt +++ b/drivestr/src/main/java/com/drivestr/app/MainActivity.kt @@ -1004,8 +1004,10 @@ fun DrivestrApp(settingsRepository: SettingsRepository) { connectionStates = connectionStates, onBack = { currentScreen = Screen.MAIN }, onReconnect = { - nostrService.relayManager.disconnectAll() - nostrService.relayManager.connectAll() + // Pass the current effective relay list so user edits take effect + // without an app restart, and use the dedicated force-reconnect path + // (cancels stale sockets immediately + resets backoff). + nostrService.forceReconnect(settingsRepository.getEffectiveRelays()) }, modifier = Modifier.padding(innerPadding) ) diff --git a/rider-app/src/main/java/com/ridestr/rider/MainActivity.kt b/rider-app/src/main/java/com/ridestr/rider/MainActivity.kt index 99a4c2d8..6086519c 100644 --- a/rider-app/src/main/java/com/ridestr/rider/MainActivity.kt +++ b/rider-app/src/main/java/com/ridestr/rider/MainActivity.kt @@ -802,8 +802,10 @@ fun RidestrApp(settingsRepository: SettingsRepository) { connectionStates = connectionStates, onBack = { currentScreen = Screen.MAIN }, onReconnect = { - nostrService.relayManager.disconnectAll() - nostrService.relayManager.connectAll() + // Pass the current effective relay list so user edits take effect + // without an app restart, and use the dedicated force-reconnect path + // (cancels stale sockets immediately + resets backoff). + nostrService.forceReconnect(settingsRepository.getEffectiveRelays()) }, modifier = Modifier.padding(innerPadding) ) diff --git a/roadflare-rider/src/main/java/com/roadflare/rider/MainActivity.kt b/roadflare-rider/src/main/java/com/roadflare/rider/MainActivity.kt index 79636407..cd414f9a 100644 --- a/roadflare-rider/src/main/java/com/roadflare/rider/MainActivity.kt +++ b/roadflare-rider/src/main/java/com/roadflare/rider/MainActivity.kt @@ -376,7 +376,7 @@ private fun MainTabScreen() { totalRelays = connectionStates.size, connectionStates = connectionStates, onBack = { secondaryScreen = SecondaryScreen.None }, - onReconnect = { viewModel.nostrService.relayManager.ensureConnected() } + onReconnect = { viewModel.onForceReconnect() } ) return } diff --git a/roadflare-rider/src/main/java/com/roadflare/rider/viewmodels/RiderViewModel.kt b/roadflare-rider/src/main/java/com/roadflare/rider/viewmodels/RiderViewModel.kt index db371321..34bd22aa 100644 --- a/roadflare-rider/src/main/java/com/roadflare/rider/viewmodels/RiderViewModel.kt +++ b/roadflare-rider/src/main/java/com/roadflare/rider/viewmodels/RiderViewModel.kt @@ -393,6 +393,11 @@ class RiderViewModel @Inject constructor( fun onAddRelay(url: String) = viewModelScope.launch { settingsRepository.addRelay(url) } fun onRemoveRelay(url: String) = viewModelScope.launch { settingsRepository.removeRelay(url) } fun onResetRelays() = viewModelScope.launch { settingsRepository.resetRelaysToDefault() } + fun onForceReconnect() { + // Reads the effective relay list synchronously and passes it to forceReconnect so + // user edits to custom relays take effect without an app restart. + nostrService.forceReconnect(settingsRepository.getEffectiveRelays()) + } fun onToggleUseGeocodingSearch() = viewModelScope.launch { settingsRepository.toggleUseGeocodingSearch() } fun onSetUseManualDriverLocation(enabled: Boolean) = viewModelScope.launch { settingsRepository.setUseManualDriverLocation(enabled) } fun onSetManualDriverLocation(lat: Double, lon: Double) = viewModelScope.launch { settingsRepository.setManualDriverLocation(lat, lon) } From 8492fb02730b9a259fb08ff550a0be1dda434e12 Mon Sep 17 00:00:00 2001 From: variablefate Date: Thu, 14 May 2026 08:51:42 -0700 Subject: [PATCH 2/2] fix(relay): close two follow-ups surfaced by PR #87 self-review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two findings flagged by the self-review on PR #87 that scored just below the strict-merge threshold but were verified as real: 1. Subscription-seeding gap on dynamically-added relays --------------------------------------------------- `RelayManager.addRelay()` created a new `RelayConnection` and added it to `connections`, but never pushed the manager-level `subscriptions` registry to it. `subscribe()` only fans out to connections that existed at call time, and `RelayConnection.resubscribeAll()` only sends entries from the connection's own local map. Result: a relay added mid-session (which the new `forceReconnectAll(newRelayUrls)` path now does when the user edits custom relays then taps Reconnect) would open CONNECTED but receive zero events — a silent data-loss footgun. Fix: in `addRelay()`, after inserting the new connection, push every active `subscriptions[*]` entry into it. At construction time this is a no-op because `subscriptions` is empty. 2. Spinner-clears-too-early race on partial connectivity ----------------------------------------------------- `connection.state` collectors run on `Dispatchers.IO` (RelayManager line 49). The button onClick runs on Main: it writes `reconnectInitiatedAt`, calls `forceReconnect()` (which mutates each `RelayConnection._state.value = CONNECTING` synchronously), then returns. Compose then recomposes for the `reconnectInitiatedAt` change BEFORE the IO→Main hop has propagated the new state through `collectAsState()`. The LaunchedEffect fires with stale `connectedCount > 0`, clears `reconnectInitiatedAt`, and the spinner never appears. Fix: replace the single key+early-clear pattern with a two-phase `snapshotFlow` wait keyed only on `reconnectInitiatedAt`: - Phase 1: wait for `connectedCount` to reach 0 (absorbs the IO→Main propagation window — without this, pre-existing CONNECTED state would let Phase 2 resolve instantly). - Phase 2: wait for `connectedCount > 0` (reconnect succeeded) or hit the 15s hard cap. `rememberUpdatedState` exposes the parameter to the snapshot tracker so the flow observes recomposition updates without re-keying the effect. Tests: 3 new `RelayManagerAddRelayTest` cases covering subscription seeding under three conditions (existing subs at addRelay time, empty subs at construction time, end-to-end via forceReconnectAll). Total common suite: 511/511 passing. Test-only accessors added: - `RelayConnection.activeSubscriptionIdsForTest()` - `RelayManager.connectionForTest(url)` Co-Authored-By: Claude Opus 4.7 (1M context) --- .../common/nostr/relay/RelayConnection.kt | 4 + .../common/nostr/relay/RelayManager.kt | 17 ++- .../common/ui/RelayManagementScreen.kt | 51 +++++--- .../nostr/relay/RelayManagerAddRelayTest.kt | 109 ++++++++++++++++++ 4 files changed, 162 insertions(+), 19 deletions(-) create mode 100644 common/src/test/java/com/ridestr/common/nostr/relay/RelayManagerAddRelayTest.kt diff --git a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt index e3de4c0f..ad77b90f 100644 --- a/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt +++ b/common/src/main/java/com/ridestr/common/nostr/relay/RelayConnection.kt @@ -84,6 +84,10 @@ class RelayConnection( @VisibleForTesting internal fun connectionGenerationForTest(): Long = synchronized(this) { connectionGeneration } + /** Test-only accessor for the set of subscription IDs this connection has stored. */ + @VisibleForTesting + internal fun activeSubscriptionIdsForTest(): Set = activeSubscriptions.keys.toSet() + private val activeSubscriptions = ConcurrentHashMap() // subId -> filterJson private val pendingEvents = ConcurrentHashMap() // eventId -> event diff --git a/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt b/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt index e9b2b5c4..85a061b4 100644 --- a/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt +++ b/common/src/main/java/com/ridestr/common/nostr/relay/RelayManager.kt @@ -94,6 +94,17 @@ class RelayManager( connections[url] = connection + // Seed the new connection with every active subscription so when it opens, + // resubscribeAll() actually has something to send. Without this, a relay + // added mid-session (e.g., via forceReconnectAll's syncRelayList) would + // come up CONNECTED but receive zero events — `subscriptions` is the + // authoritative registry, but `subscribe()` only fans out to connections + // that existed at call time. At construction time this loop is a no-op + // because subscriptions is empty. + subscriptions.values.forEach { subscription -> + connection.subscribe(subscription.id, subscription.filters) + } + // Watch connection state changes scope.launch { connection.state.collect { state -> @@ -102,7 +113,7 @@ class RelayManager( } updateConnectionStates() - Log.d(TAG, "Added relay: $url") + Log.d(TAG, "Added relay: $url (seeded with ${subscriptions.size} subscription(s))") } /** @@ -392,6 +403,10 @@ class RelayManager( */ fun getRelayUrls(): List = connections.keys.toList() + /** Test-only accessor for a specific connection. */ + @androidx.annotation.VisibleForTesting + internal fun connectionForTest(url: String): RelayConnection? = connections[url] + private fun handleEvent(event: Event, subscriptionId: String, relayUrl: String) { Log.d(TAG, "Received event ${event.id} (kind ${event.kind}) from $relayUrl") diff --git a/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt b/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt index f9537fd0..e3a16bb0 100644 --- a/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt +++ b/common/src/main/java/com/ridestr/common/ui/RelayManagementScreen.kt @@ -18,7 +18,8 @@ import androidx.compose.ui.unit.dp import androidx.compose.foundation.background import androidx.compose.foundation.shape.CircleShape import com.ridestr.common.nostr.relay.RelayConnectionState -import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.first +import kotlinx.coroutines.withTimeoutOrNull /** * Standalone relay management screen accessible from: @@ -45,26 +46,40 @@ fun RelayManagementScreen( BackHandler(onBack = onBack) // Tracks "we just kicked off a reconnect" so the button doesn't no-op visually. - // Auto-clears either when at least one relay reports CONNECTED, or after a hard cap - // so a totally unreachable relay set doesn't lock the button forever. + // Auto-clears either when at least one relay comes back CONNECTED after the + // forced disconnect propagates, or after a hard cap so a totally unreachable + // relay set doesn't lock the button forever. var reconnectInitiatedAt by remember { mutableStateOf(null) } val isReconnecting = reconnectInitiatedAt != null - LaunchedEffect(reconnectInitiatedAt, connectedCount) { + + // `connectedCount` is a regular composable parameter, recomputed per + // recomposition. Wrap it in `rememberUpdatedState` so `snapshotFlow` inside + // the LaunchedEffect can observe its changes without re-keying the effect + // (which would cancel/restart and lose the "transition seen" state). + val currentConnectedCount by rememberUpdatedState(connectedCount) + + LaunchedEffect(reconnectInitiatedAt) { val started = reconnectInitiatedAt ?: return@LaunchedEffect - // Clear immediately on success. - if (connectedCount > 0) { - reconnectInitiatedAt = null - return@LaunchedEffect - } - // Hard cap so the button doesn't stay disabled if all relays are unreachable. - // 15s ≈ ample for fresh OkHttp WebSocket handshake (10s connectTimeout + slack). - val elapsed = System.currentTimeMillis() - started - val remaining = 15_000L - elapsed - if (remaining > 0) { - delay(remaining) - } - // Re-check on resume — connectedCount may have updated during the delay. - if (connectedCount == 0) { + val deadline = started + 15_000L + val timeBudget = { (deadline - System.currentTimeMillis()).coerceAtLeast(0) } + try { + // Phase 1: wait for the forced disconnect to be visible + // (connectedCount drops to 0). Without this, pre-existing CONNECTED + // state would let Phase 2 resolve instantly and the spinner would + // never appear. Cross-dispatcher (Dispatchers.IO -> Main) state + // propagation means `connectedCount` can briefly look stale right + // after the press; this wait absorbs that window. + withTimeoutOrNull(timeBudget()) { + snapshotFlow { currentConnectedCount }.first { it == 0 } + } + // Phase 2: wait for at least one relay to come back online, or hit + // the cap. If Phase 1 timed out (e.g., reconnect was a no-op), + // `currentConnectedCount` is already non-zero and this resolves + // immediately. + withTimeoutOrNull(timeBudget()) { + snapshotFlow { currentConnectedCount }.first { it > 0 } + } + } finally { reconnectInitiatedAt = null } } diff --git a/common/src/test/java/com/ridestr/common/nostr/relay/RelayManagerAddRelayTest.kt b/common/src/test/java/com/ridestr/common/nostr/relay/RelayManagerAddRelayTest.kt new file mode 100644 index 00000000..e8f40a02 --- /dev/null +++ b/common/src/test/java/com/ridestr/common/nostr/relay/RelayManagerAddRelayTest.kt @@ -0,0 +1,109 @@ +package com.ridestr.common.nostr.relay + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test +import org.junit.runner.RunWith +import org.robolectric.RobolectricTestRunner + +/** + * Regression coverage for the subscription-seeding gap surfaced by + * `forceReconnectAll(newRelayUrls)`. Without this fix, a relay added + * mid-session (e.g., a user editing custom relays then tapping Reconnect) + * would connect successfully but receive zero events, because + * `subscribe()` only fans out to connections that existed at call time + * and `RelayConnection.resubscribeAll()` only sends subscriptions stored + * in its own local map. + */ +@RunWith(RobolectricTestRunner::class) +class RelayManagerAddRelayTest { + + /** Use an unreachable URL pool; we never let any of these actually connect. */ + private val initialRelays = listOf( + "wss://invalid.localhost.invalid:1/a", + "wss://invalid.localhost.invalid:1/b" + ) + + private fun newSubscriptionFilter() = listOf( + mapOf("kinds" to listOf(1), "limit" to 10) + ) + + @Test + fun `addRelay seeds the new connection with every existing subscription`() { + val manager = RelayManager(initialRelays) + + // Pre-existing subscriptions registered before the new relay is added. + val subId1 = manager.subscribe(kinds = listOf(1)) { _, _ -> } + val subId2 = manager.subscribe(kinds = listOf(30173), authors = listOf("abc")) { _, _ -> } + + // Both existing connections should have both subscriptions. + val initialConnections = initialRelays.map { url -> + // Use the manager's internal map via getRelayUrls + state — but the + // simplest verification is to peek at the connection set we know. + // We need access to RelayConnection instances; use the test accessor + // approach via reflection-free path: call addRelay on a NEW url and + // verify it receives the seeds. + url + } + + // Add a new relay mid-session. + val newUrl = "wss://invalid.localhost.invalid:1/new" + manager.addRelay(newUrl) + + // The manager should report the new relay alongside the originals. + val urls = manager.getRelayUrls() + assertTrue("New relay must be present in the manager", newUrl in urls) + assertEquals(initialRelays.size + 1, urls.size) + + // Verify the new connection has both subscriptions seeded. + val newConnection = manager.connectionForTest(newUrl) + ?: error("New connection should exist after addRelay") + val seededIds = newConnection.activeSubscriptionIdsForTest() + assertTrue( + "New connection must have subId1=$subId1 seeded so resubscribeAll() " + + "actually sends it on socket open. Found: $seededIds", + subId1 in seededIds + ) + assertTrue( + "New connection must have subId2=$subId2 seeded. Found: $seededIds", + subId2 in seededIds + ) + } + + @Test + fun `addRelay with no existing subscriptions is a no-op for seeding`() { + // Construction-time addRelay calls (in init) hit this path before any + // subscribe() — must not error or do weird things. + val manager = RelayManager(initialRelays) + + val url = "wss://invalid.localhost.invalid:1/c" + manager.addRelay(url) + + val conn = manager.connectionForTest(url) + ?: error("Connection should exist after addRelay") + assertEquals( + "With no existing subscriptions, seeded map must be empty", + emptySet(), + conn.activeSubscriptionIdsForTest() + ) + } + + @Test + fun `forceReconnectAll with new relay url seeds subscriptions into the newcomer`() { + val manager = RelayManager(initialRelays) + val existingSubId = manager.subscribe(kinds = listOf(1)) { _, _ -> } + + // Simulate the user adding a relay in settings, then pressing Reconnect. + val expandedRelays = initialRelays + "wss://invalid.localhost.invalid:1/added-via-ui" + manager.forceReconnectAll(expandedRelays) + + val newConn = manager.connectionForTest(expandedRelays.last()) + ?: error("Newly-synced connection should exist") + assertTrue( + "forceReconnectAll's new relay must inherit the existing subscription " + + "($existingSubId). Without this, the relay would open CONNECTED but " + + "deliver no events.", + existingSubId in newConn.activeSubscriptionIdsForTest() + ) + } +}